Files
ragflow/internal/engine/elasticsearch/index.go
Yingfeng 4ee0702aed Feat: add skills space to context engine (#13908)
### What problem does this PR solve?

issue #13714

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
2026-04-30 12:36:03 +08:00

364 lines
9.4 KiB
Go

//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package elasticsearch
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"os"
"github.com/elastic/go-elasticsearch/v8/esapi"
)
// CreateDataset creates an index
func (e *elasticsearchEngine) CreateDataset(ctx context.Context, indexName, datasetID string, vectorSize int, parserID string) error {
if indexName == "" {
return fmt.Errorf("index name cannot be empty")
}
// Check if index already exists
exists, err := e.TableExists(ctx, indexName)
if err != nil {
return fmt.Errorf("failed to check index existence: %w", err)
}
if exists {
return fmt.Errorf("index '%s' already exists", indexName)
}
// Load mapping based on index type
var mapping map[string]interface{}
if datasetID == "skill" {
// Load skill-specific mapping
skillMapping, err := loadSkillMapping()
if err != nil {
return fmt.Errorf("failed to load skill mapping: %w", err)
}
mapping = skillMapping
} else {
// Default mapping for dataset
mapping = map[string]interface{}{
"settings": map[string]interface{}{
"number_of_shards": 1,
"number_of_replicas": 0,
},
}
}
// Prepare request body
var body io.Reader
if mapping != nil {
data, err := json.Marshal(mapping)
if err != nil {
return fmt.Errorf("failed to marshal mapping: %w", err)
}
body = bytes.NewReader(data)
}
// Create index
req := esapi.IndicesCreateRequest{
Index: indexName,
Body: body,
}
res, err := req.Do(ctx, e.client)
if err != nil {
return fmt.Errorf("failed to create index: %w", err)
}
defer res.Body.Close()
if res.IsError() {
bodyBytes, _ := io.ReadAll(res.Body)
reason := extractErrorReason(bodyBytes)
if reason != "" {
return fmt.Errorf("elasticsearch error: %s", reason)
}
return fmt.Errorf("elasticsearch returned error: %s, body: %s", res.Status(), string(bodyBytes))
}
// Parse response
var result map[string]interface{}
if err := json.NewDecoder(res.Body).Decode(&result); err != nil {
return fmt.Errorf("failed to parse response: %w", err)
}
acknowledged, ok := result["acknowledged"].(bool)
if !ok || !acknowledged {
return fmt.Errorf("index creation not acknowledged")
}
return nil
}
// loadSkillMapping loads the skill index mapping from config file
func loadSkillMapping() (map[string]interface{}, error) {
// Try multiple possible locations for the mapping file
possiblePaths := []string{
"conf/skill_es_mapping.json",
"../conf/skill_es_mapping.json",
"/app/conf/skill_es_mapping.json",
}
var data []byte
var err error
for _, path := range possiblePaths {
data, err = os.ReadFile(path)
if err == nil {
break
}
}
if err != nil {
// Fallback to default skill mapping if file not found
return getDefaultSkillMapping(), nil
}
var mapping map[string]interface{}
if err := json.Unmarshal(data, &mapping); err != nil {
return nil, fmt.Errorf("failed to parse skill mapping: %w", err)
}
return mapping, nil
}
// getDefaultSkillMapping returns the default skill index mapping
func getDefaultSkillMapping() map[string]interface{} {
return map[string]interface{}{
"settings": map[string]interface{}{
"index": map[string]interface{}{
"number_of_shards": 1,
"number_of_replicas": 0,
"refresh_interval": "1000ms",
},
},
"mappings": map[string]interface{}{
"dynamic": false,
"properties": map[string]interface{}{
"skill_id": map[string]interface{}{
"type": "keyword",
"store": true,
},
"name": map[string]interface{}{
"type": "text",
"index": false,
"store": true,
},
"name_tks": map[string]interface{}{
"type": "text",
"analyzer": "whitespace",
"store": true,
},
"tags": map[string]interface{}{
"type": "text",
"index": false,
"store": true,
},
"tags_tks": map[string]interface{}{
"type": "text",
"analyzer": "whitespace",
"store": true,
},
"description": map[string]interface{}{
"type": "text",
"index": false,
"store": true,
},
"description_tks": map[string]interface{}{
"type": "text",
"analyzer": "whitespace",
"store": true,
},
"content": map[string]interface{}{
"type": "text",
"index": false,
"store": true,
},
"content_tks": map[string]interface{}{
"type": "text",
"analyzer": "whitespace",
"store": true,
},
"q_3072_vec": map[string]interface{}{
"type": "dense_vector",
"dims": 3072,
"index": true,
"similarity": "cosine",
},
"q_2560_vec": map[string]interface{}{
"type": "dense_vector",
"dims": 2560,
"index": true,
"similarity": "cosine",
},
"q_1536_vec": map[string]interface{}{
"type": "dense_vector",
"dims": 1536,
"index": true,
"similarity": "cosine",
},
"q_1024_vec": map[string]interface{}{
"type": "dense_vector",
"dims": 1024,
"index": true,
"similarity": "cosine",
},
"q_768_vec": map[string]interface{}{
"type": "dense_vector",
"dims": 768,
"index": true,
"similarity": "cosine",
},
"q_512_vec": map[string]interface{}{
"type": "dense_vector",
"dims": 512,
"index": true,
"similarity": "cosine",
},
"q_256_vec": map[string]interface{}{
"type": "dense_vector",
"dims": 256,
"index": true,
"similarity": "cosine",
},
"version": map[string]interface{}{
"type": "keyword",
"store": true,
},
"status": map[string]interface{}{
"type": "keyword",
"store": true,
},
"create_time": map[string]interface{}{
"type": "long",
"store": true,
},
"update_time": map[string]interface{}{
"type": "long",
"store": true,
},
},
},
}
}
// DropTable deletes an index
func (e *elasticsearchEngine) DropTable(ctx context.Context, indexName string) error {
if indexName == "" {
return fmt.Errorf("index name cannot be empty")
}
// Check if index exists
exists, err := e.TableExists(ctx, indexName)
if err != nil {
return fmt.Errorf("failed to check index existence: %w", err)
}
if !exists {
return fmt.Errorf("index '%s' does not exist", indexName)
}
// Delete index
req := esapi.IndicesDeleteRequest{
Index: []string{indexName},
}
res, err := req.Do(ctx, e.client)
if err != nil {
return fmt.Errorf("failed to delete index: %w", err)
}
defer res.Body.Close()
if res.IsError() {
bodyBytes, _ := io.ReadAll(res.Body)
reason := extractErrorReason(bodyBytes)
if reason != "" {
return fmt.Errorf("elasticsearch error: %s", reason)
}
return fmt.Errorf("elasticsearch returned error: %s", res.Status())
}
return nil
}
// TableExists checks if index exists
func (e *elasticsearchEngine) TableExists(ctx context.Context, indexName string) (bool, error) {
if indexName == "" {
return false, fmt.Errorf("index name cannot be empty")
}
req := esapi.IndicesExistsRequest{
Index: []string{indexName},
}
res, err := req.Do(ctx, e.client)
if err != nil {
return false, fmt.Errorf("failed to check index existence: %w", err)
}
defer res.Body.Close()
if res.StatusCode == 200 {
return true, nil
} else if res.StatusCode == 404 {
return false, nil
}
bodyBytes, _ := io.ReadAll(res.Body)
reason := extractErrorReason(bodyBytes)
if reason != "" {
return false, fmt.Errorf("elasticsearch error: %s", reason)
}
return false, fmt.Errorf("elasticsearch returned error: %s", res.Status())
}
// CreateMetadata creates the document metadata index
func (e *elasticsearchEngine) CreateMetadata(ctx context.Context, indexName string) error {
// TODO
return nil
}
// InsertDataset inserts documents into a dataset index
func (e *elasticsearchEngine) InsertDataset(ctx context.Context, documents []map[string]interface{}, indexName string, knowledgebaseID string) ([]string, error) {
// TODO
return []string{}, nil
}
// InsertMetadata inserts documents into tenant's metadata index
func (e *elasticsearchEngine) InsertMetadata(ctx context.Context, documents []map[string]interface{}, tenantID string) ([]string, error) {
// TODO
return []string{}, nil
}
// UpdateDataset updates a chunk by condition
func (e *elasticsearchEngine) UpdateDataset(ctx context.Context, condition map[string]interface{}, newValue map[string]interface{}, tableNamePrefix string, knowledgebaseID string) error {
// TODO
return nil
}
// UpdateMetadata updates document metadata in tenant's metadata index
func (e *elasticsearchEngine) UpdateMetadata(ctx context.Context, docID string, kbID string, metaFields map[string]interface{}, tenantID string) error {
// TODO
return nil
}
// Delete deletes rows from either a dataset index or metadata index
func (e *elasticsearchEngine) Delete(ctx context.Context, condition map[string]interface{}, indexName string, datasetID string) (int64, error) {
// TODO
return 0, nil
}