mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-05-06 02:07:49 +08:00
RAGFlow go API server (#13240)
# RAGFlow Go Implementation Plan 🚀 This repository tracks the progress of porting RAGFlow to Go. We'll implement core features and provide performance comparisons between Python and Go versions. ## Implementation Checklist - [x] User Management APIs - [x] Dataset Management Operations - [x] Retrieval Test - [x] Chat Management Operations - [x] Infinity Go SDK --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Yingfeng Zhang <yingfeng.zhang@gmail.com>
This commit is contained in:
103
internal/engine/elasticsearch/client.go
Normal file
103
internal/engine/elasticsearch/client.go
Normal file
@ -0,0 +1,103 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"ragflow/internal/server"
|
||||
"time"
|
||||
|
||||
"github.com/elastic/go-elasticsearch/v8"
|
||||
"github.com/elastic/go-elasticsearch/v8/esapi"
|
||||
)
|
||||
|
||||
// Engine Elasticsearch engine implementation
|
||||
type elasticsearchEngine struct {
|
||||
client *elasticsearch.Client
|
||||
config *server.ElasticsearchConfig
|
||||
}
|
||||
|
||||
// NewEngine creates an Elasticsearch engine
|
||||
func NewEngine(cfg interface{}) (*elasticsearchEngine, error) {
|
||||
esConfig, ok := cfg.(*server.ElasticsearchConfig)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid Elasticsearch config type, expected *config.ElasticsearchConfig")
|
||||
}
|
||||
|
||||
// Create ES client
|
||||
client, err := elasticsearch.NewClient(elasticsearch.Config{
|
||||
Addresses: []string{esConfig.Hosts},
|
||||
Username: esConfig.Username,
|
||||
Password: esConfig.Password,
|
||||
Transport: &http.Transport{
|
||||
MaxIdleConnsPerHost: 10,
|
||||
ResponseHeaderTimeout: 30 * time.Second,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create Elasticsearch client: %w", err)
|
||||
}
|
||||
|
||||
// Check connection
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
req := esapi.InfoRequest{}
|
||||
res, err := req.Do(ctx, client)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to ping Elasticsearch: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.IsError() {
|
||||
return nil, fmt.Errorf("Elasticsearch returned error: %s", res.Status())
|
||||
}
|
||||
|
||||
engine := &elasticsearchEngine{
|
||||
client: client,
|
||||
config: esConfig,
|
||||
}
|
||||
|
||||
return engine, nil
|
||||
}
|
||||
|
||||
// Type returns the engine type
|
||||
func (e *elasticsearchEngine) Type() string {
|
||||
return "elasticsearch"
|
||||
}
|
||||
|
||||
// Ping health check
|
||||
func (e *elasticsearchEngine) Ping(ctx context.Context) error {
|
||||
req := esapi.InfoRequest{}
|
||||
res, err := req.Do(ctx, e.client)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.IsError() {
|
||||
return fmt.Errorf("elasticsearch ping failed: %s", res.Status())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close closes the connection
|
||||
func (e *elasticsearchEngine) Close() error {
|
||||
// Go-elasticsearch client doesn't have a Close method, connection is managed by the transport
|
||||
return nil
|
||||
}
|
||||
238
internal/engine/elasticsearch/document.go
Normal file
238
internal/engine/elasticsearch/document.go
Normal file
@ -0,0 +1,238 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/elastic/go-elasticsearch/v8/esapi"
|
||||
)
|
||||
|
||||
// IndexDocument indexes a single document
|
||||
func (e *elasticsearchEngine) IndexDocument(ctx context.Context, indexName, docID string, doc interface{}) error {
|
||||
if indexName == "" {
|
||||
return fmt.Errorf("index name cannot be empty")
|
||||
}
|
||||
if docID == "" {
|
||||
return fmt.Errorf("document id cannot be empty")
|
||||
}
|
||||
if doc == nil {
|
||||
return fmt.Errorf("document cannot be nil")
|
||||
}
|
||||
|
||||
// Serialize document
|
||||
data, err := json.Marshal(doc)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal document: %w", err)
|
||||
}
|
||||
|
||||
// Index document
|
||||
req := esapi.IndexRequest{
|
||||
Index: indexName,
|
||||
DocumentID: docID,
|
||||
Body: bytes.NewReader(data),
|
||||
Refresh: "true",
|
||||
}
|
||||
|
||||
res, err := req.Do(ctx, e.client)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to index document: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.IsError() {
|
||||
return fmt.Errorf("elasticsearch returned error: %s", res.Status())
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// BulkIndex indexes documents in bulk
|
||||
func (e *elasticsearchEngine) BulkIndex(ctx context.Context, indexName string, docs []interface{}) (interface{}, error) {
|
||||
if indexName == "" {
|
||||
return nil, fmt.Errorf("index name cannot be empty")
|
||||
}
|
||||
if len(docs) == 0 {
|
||||
return nil, fmt.Errorf("documents cannot be empty")
|
||||
}
|
||||
|
||||
// Build bulk request
|
||||
var buf bytes.Buffer
|
||||
for _, doc := range docs {
|
||||
docMap, ok := doc.(map[string]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("document must be map[string]interface{}")
|
||||
}
|
||||
|
||||
docID, hasID := docMap["_id"]
|
||||
if !hasID {
|
||||
return nil, fmt.Errorf("document missing _id field")
|
||||
}
|
||||
|
||||
// Delete _id field to avoid duplication
|
||||
delete(docMap, "_id")
|
||||
|
||||
// Add index operation
|
||||
meta := map[string]interface{}{
|
||||
"_index": indexName,
|
||||
"_id": docID,
|
||||
}
|
||||
metaData, _ := json.Marshal(meta)
|
||||
docData, _ := json.Marshal(docMap)
|
||||
|
||||
buf.Write(metaData)
|
||||
buf.WriteByte('\n')
|
||||
buf.Write(docData)
|
||||
buf.WriteByte('\n')
|
||||
}
|
||||
|
||||
// Execute bulk request
|
||||
req := esapi.BulkRequest{
|
||||
Body: &buf,
|
||||
Refresh: "true",
|
||||
}
|
||||
|
||||
res, err := req.Do(ctx, e.client)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("bulk index failed: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.IsError() {
|
||||
return nil, fmt.Errorf("elasticsearch returned error: %s", res.Status())
|
||||
}
|
||||
|
||||
// Parse response
|
||||
var result map[string]interface{}
|
||||
if err := json.NewDecoder(res.Body).Decode(&result); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
// Check for errors
|
||||
if errors, ok := result["errors"].(bool); ok && errors {
|
||||
// Get error details
|
||||
if items, ok := result["items"].([]interface{}); ok && len(items) > 0 {
|
||||
for _, item := range items {
|
||||
if itemMap, ok := item.(map[string]interface{}); ok {
|
||||
for _, op := range itemMap {
|
||||
if opMap, ok := op.(map[string]interface{}); ok {
|
||||
if errInfo, ok := opMap["error"].(map[string]interface{}); ok {
|
||||
if reason, ok := errInfo["reason"].(string); ok {
|
||||
return nil, fmt.Errorf("bulk index error: %s", reason)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("bulk index has errors")
|
||||
}
|
||||
|
||||
response := &BulkResponse{
|
||||
Took: int64(result["took"].(float64)),
|
||||
Errors: result["errors"].(bool),
|
||||
Indexed: len(docs),
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
||||
// BulkResponse bulk operation response
|
||||
type BulkResponse struct {
|
||||
Took int64
|
||||
Errors bool
|
||||
Indexed int
|
||||
}
|
||||
|
||||
// GetDocument gets a document
|
||||
func (e *elasticsearchEngine) GetDocument(ctx context.Context, indexName, docID string) (interface{}, error) {
|
||||
if indexName == "" {
|
||||
return nil, fmt.Errorf("index name cannot be empty")
|
||||
}
|
||||
if docID == "" {
|
||||
return nil, fmt.Errorf("document id cannot be empty")
|
||||
}
|
||||
|
||||
// Get document
|
||||
req := esapi.GetRequest{
|
||||
Index: indexName,
|
||||
DocumentID: docID,
|
||||
}
|
||||
|
||||
res, err := req.Do(ctx, e.client)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get document: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.StatusCode == 404 {
|
||||
return nil, fmt.Errorf("document not found")
|
||||
}
|
||||
|
||||
if res.IsError() {
|
||||
return nil, fmt.Errorf("elasticsearch returned error: %s", res.Status())
|
||||
}
|
||||
|
||||
// Parse response
|
||||
var result map[string]interface{}
|
||||
if err := json.NewDecoder(res.Body).Decode(&result); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
if found, ok := result["found"].(bool); !ok || !found {
|
||||
return nil, fmt.Errorf("document not found")
|
||||
}
|
||||
|
||||
return result["_source"], nil
|
||||
}
|
||||
|
||||
// DeleteDocument deletes a document
|
||||
func (e *elasticsearchEngine) DeleteDocument(ctx context.Context, indexName, docID string) error {
|
||||
if indexName == "" {
|
||||
return fmt.Errorf("index name cannot be empty")
|
||||
}
|
||||
if docID == "" {
|
||||
return fmt.Errorf("document id cannot be empty")
|
||||
}
|
||||
|
||||
// Delete document
|
||||
req := esapi.DeleteRequest{
|
||||
Index: indexName,
|
||||
DocumentID: docID,
|
||||
Refresh: "true",
|
||||
}
|
||||
|
||||
res, err := req.Do(ctx, e.client)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to delete document: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.StatusCode == 404 {
|
||||
return fmt.Errorf("document not found")
|
||||
}
|
||||
|
||||
if res.IsError() {
|
||||
return fmt.Errorf("elasticsearch returned error: %s", res.Status())
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
144
internal/engine/elasticsearch/index.go
Normal file
144
internal/engine/elasticsearch/index.go
Normal file
@ -0,0 +1,144 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/elastic/go-elasticsearch/v8/esapi"
|
||||
)
|
||||
|
||||
// CreateIndex creates an index
|
||||
func (e *elasticsearchEngine) CreateIndex(ctx context.Context, indexName string, mapping interface{}) error {
|
||||
if indexName == "" {
|
||||
return fmt.Errorf("index name cannot be empty")
|
||||
}
|
||||
|
||||
// Check if index already exists
|
||||
exists, err := e.IndexExists(ctx, indexName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check index existence: %w", err)
|
||||
}
|
||||
if exists {
|
||||
return fmt.Errorf("index '%s' already exists", indexName)
|
||||
}
|
||||
|
||||
// Prepare request body
|
||||
var body io.Reader
|
||||
if mapping != nil {
|
||||
if str, ok := mapping.(string); ok {
|
||||
body = bytes.NewBufferString(str)
|
||||
} else {
|
||||
data, err := json.Marshal(mapping)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal mapping: %w", err)
|
||||
}
|
||||
body = bytes.NewReader(data)
|
||||
}
|
||||
}
|
||||
|
||||
// Create index
|
||||
req := esapi.IndicesCreateRequest{
|
||||
Index: indexName,
|
||||
Body: body,
|
||||
}
|
||||
|
||||
res, err := req.Do(ctx, e.client)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create index: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.IsError() {
|
||||
return fmt.Errorf("elasticsearch returned error: %s", res.Status())
|
||||
}
|
||||
|
||||
// Parse response
|
||||
var result map[string]interface{}
|
||||
if err := json.NewDecoder(res.Body).Decode(&result); err != nil {
|
||||
return fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
acknowledged, ok := result["acknowledged"].(bool)
|
||||
if !ok || !acknowledged {
|
||||
return fmt.Errorf("index creation not acknowledged")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteIndex deletes an index
|
||||
func (e *elasticsearchEngine) DeleteIndex(ctx context.Context, indexName string) error {
|
||||
if indexName == "" {
|
||||
return fmt.Errorf("index name cannot be empty")
|
||||
}
|
||||
|
||||
// Check if index exists
|
||||
exists, err := e.IndexExists(ctx, indexName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check index existence: %w", err)
|
||||
}
|
||||
if !exists {
|
||||
return fmt.Errorf("index '%s' does not exist", indexName)
|
||||
}
|
||||
|
||||
// Delete index
|
||||
req := esapi.IndicesDeleteRequest{
|
||||
Index: []string{indexName},
|
||||
}
|
||||
|
||||
res, err := req.Do(ctx, e.client)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to delete index: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.IsError() {
|
||||
return fmt.Errorf("elasticsearch returned error: %s", res.Status())
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// IndexExists checks if index exists
|
||||
func (e *elasticsearchEngine) IndexExists(ctx context.Context, indexName string) (bool, error) {
|
||||
if indexName == "" {
|
||||
return false, fmt.Errorf("index name cannot be empty")
|
||||
}
|
||||
|
||||
req := esapi.IndicesExistsRequest{
|
||||
Index: []string{indexName},
|
||||
}
|
||||
|
||||
res, err := req.Do(ctx, e.client)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to check index existence: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.StatusCode == 200 {
|
||||
return true, nil
|
||||
} else if res.StatusCode == 404 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return false, fmt.Errorf("elasticsearch returned error: %s", res.Status())
|
||||
}
|
||||
528
internal/engine/elasticsearch/search.go
Normal file
528
internal/engine/elasticsearch/search.go
Normal file
@ -0,0 +1,528 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package elasticsearch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/elastic/go-elasticsearch/v8/esapi"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"ragflow/internal/engine/types"
|
||||
"ragflow/internal/logger"
|
||||
)
|
||||
|
||||
// SearchRequest Elasticsearch search request (legacy, kept for backward compatibility)
|
||||
type SearchRequest struct {
|
||||
IndexNames []string
|
||||
Query map[string]interface{}
|
||||
Filters map[string]interface{} // Filter conditions (e.g., kb_id, doc_id, available_int)
|
||||
Size int
|
||||
From int
|
||||
Highlight map[string]interface{}
|
||||
Source []string
|
||||
Sort []interface{}
|
||||
}
|
||||
|
||||
// SearchResponse Elasticsearch search response
|
||||
type SearchResponse struct {
|
||||
Hits struct {
|
||||
Total struct {
|
||||
Value int64 `json:"value"`
|
||||
} `json:"total"`
|
||||
Hits []struct {
|
||||
ID string `json:"_id"`
|
||||
Score float64 `json:"_score"`
|
||||
Source map[string]interface{} `json:"_source"`
|
||||
} `json:"hits"`
|
||||
} `json:"hits"`
|
||||
Aggregations map[string]interface{} `json:"aggregations"`
|
||||
}
|
||||
|
||||
// Search executes search (supports both unified engine.SearchRequest and legacy SearchRequest)
|
||||
func (e *elasticsearchEngine) Search(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
|
||||
switch searchReq := req.(type) {
|
||||
case *types.SearchRequest:
|
||||
return e.searchUnified(ctx, searchReq)
|
||||
case *SearchRequest:
|
||||
return e.searchLegacy(ctx, searchReq)
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid search request type: %T", req)
|
||||
}
|
||||
}
|
||||
|
||||
// searchUnified handles the unified engine.SearchRequest
|
||||
func (e *elasticsearchEngine) searchUnified(ctx context.Context, req *types.SearchRequest) (*types.SearchResponse, error) {
|
||||
if len(req.IndexNames) == 0 {
|
||||
return nil, fmt.Errorf("index names cannot be empty")
|
||||
}
|
||||
|
||||
// Build pagination parameters
|
||||
offset, limit := calculatePagination(req.Page, req.Size, req.TopK)
|
||||
|
||||
// Build filter clauses (default: available=1, meaning available_int >= 1)
|
||||
// Reference: rag/utils/es_conn.py L60-L78
|
||||
filterClauses := buildFilterClauses(req.KbIDs, req.DocIDs, 1)
|
||||
|
||||
// Build search query body
|
||||
queryBody := make(map[string]interface{})
|
||||
|
||||
// Use MatchText if available (from QueryBuilder), otherwise use original Question
|
||||
matchText := req.MatchText
|
||||
if matchText == "" {
|
||||
matchText = req.Question
|
||||
}
|
||||
|
||||
var vectorFieldName string
|
||||
if req.KeywordOnly || len(req.Vector) == 0 {
|
||||
// Keyword-only search
|
||||
queryBody["query"] = buildESKeywordQuery(matchText, filterClauses, 1.0)
|
||||
} else {
|
||||
// Hybrid search: keyword + vector
|
||||
// Calculate text weight
|
||||
textWeight := 1.0 - req.VectorSimilarityWeight
|
||||
// Build boolean query for text match and filters
|
||||
boolQuery := buildESKeywordQuery(matchText, filterClauses, 1.0)
|
||||
// Add boost to the bool query (as in Python code)
|
||||
if boolMap, ok := boolQuery["bool"].(map[string]interface{}); ok {
|
||||
boolMap["boost"] = textWeight
|
||||
}
|
||||
// Build kNN query
|
||||
dimension := len(req.Vector)
|
||||
var fieldBuilder strings.Builder
|
||||
fieldBuilder.WriteString("q_")
|
||||
fieldBuilder.WriteString(strconv.Itoa(dimension))
|
||||
fieldBuilder.WriteString("_vec")
|
||||
vectorFieldName = fieldBuilder.String()
|
||||
|
||||
k := req.TopK
|
||||
if k <= 0 {
|
||||
k = 1024
|
||||
}
|
||||
numCandidates := k * 2
|
||||
|
||||
knnQuery := map[string]interface{}{
|
||||
"field": vectorFieldName,
|
||||
"query_vector": req.Vector,
|
||||
"k": k,
|
||||
"num_candidates": numCandidates,
|
||||
"filter": boolQuery,
|
||||
"similarity": req.SimilarityThreshold,
|
||||
}
|
||||
|
||||
queryBody["knn"] = knnQuery
|
||||
queryBody["query"] = boolQuery
|
||||
}
|
||||
|
||||
queryBody["size"] = limit
|
||||
queryBody["from"] = offset
|
||||
|
||||
// Serialize query
|
||||
var buf bytes.Buffer
|
||||
if err := json.NewEncoder(&buf).Encode(queryBody); err != nil {
|
||||
return nil, fmt.Errorf("error encoding query: %w", err)
|
||||
}
|
||||
|
||||
// Log search details
|
||||
logger.Debug("Elasticsearch searching indices", zap.Strings("indices", req.IndexNames))
|
||||
logger.Debug("Elasticsearch DSL", zap.Any("dsl", queryBody))
|
||||
|
||||
// Build search request
|
||||
reqES := esapi.SearchRequest{
|
||||
Index: req.IndexNames,
|
||||
Body: &buf,
|
||||
}
|
||||
|
||||
// Execute search
|
||||
res, err := reqES.Do(ctx, e.client)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("search failed: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.IsError() {
|
||||
bodyBytes, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
logger.Error("Elasticsearch failed to read error response body", err)
|
||||
} else {
|
||||
logger.Warn("Elasticsearch error response", zap.String("body", string(bodyBytes)))
|
||||
}
|
||||
return nil, fmt.Errorf("Elasticsearch returned error: %s", res.Status())
|
||||
}
|
||||
|
||||
// Parse response
|
||||
var esResp SearchResponse
|
||||
if err := json.NewDecoder(res.Body).Decode(&esResp); err != nil {
|
||||
return nil, fmt.Errorf("error parsing response: %w", err)
|
||||
}
|
||||
|
||||
// Convert to unified response
|
||||
chunks := convertESResponse(&esResp, vectorFieldName)
|
||||
return &types.SearchResponse{
|
||||
Chunks: chunks,
|
||||
Total: esResp.Hits.Total.Value,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// searchLegacy handles the legacy elasticsearch.SearchRequest (backward compatibility)
|
||||
func (e *elasticsearchEngine) searchLegacy(ctx context.Context, searchReq *SearchRequest) (*SearchResponse, error) {
|
||||
if len(searchReq.IndexNames) == 0 {
|
||||
return nil, fmt.Errorf("index names cannot be empty")
|
||||
}
|
||||
|
||||
// Build search query
|
||||
queryBody := make(map[string]interface{})
|
||||
|
||||
// Process Filters first - convert to Elasticsearch filter clauses
|
||||
var filterClauses []map[string]interface{}
|
||||
if searchReq.Filters != nil && len(searchReq.Filters) > 0 {
|
||||
for field, value := range searchReq.Filters {
|
||||
switch v := value.(type) {
|
||||
case map[string]interface{}:
|
||||
filterClauses = append(filterClauses, map[string]interface{}{
|
||||
field: v,
|
||||
})
|
||||
default:
|
||||
filterClauses = append(filterClauses, map[string]interface{}{
|
||||
"term": map[string]interface{}{
|
||||
field: v,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if searchReq.Query != nil {
|
||||
queryCopy := make(map[string]interface{})
|
||||
for k, v := range searchReq.Query {
|
||||
queryCopy[k] = v
|
||||
}
|
||||
|
||||
if knnValue, ok := queryCopy["knn"]; ok {
|
||||
queryBody["knn"] = knnValue
|
||||
delete(queryCopy, "knn")
|
||||
}
|
||||
|
||||
if len(queryCopy) > 0 {
|
||||
if len(filterClauses) > 0 {
|
||||
queryBody["query"] = map[string]interface{}{
|
||||
"bool": map[string]interface{}{
|
||||
"must": queryCopy,
|
||||
"filter": filterClauses,
|
||||
},
|
||||
}
|
||||
} else {
|
||||
queryBody["query"] = queryCopy
|
||||
}
|
||||
} else if len(filterClauses) > 0 {
|
||||
queryBody["query"] = map[string]interface{}{
|
||||
"bool": map[string]interface{}{
|
||||
"filter": filterClauses,
|
||||
},
|
||||
}
|
||||
}
|
||||
} else if len(filterClauses) > 0 {
|
||||
queryBody["query"] = map[string]interface{}{
|
||||
"bool": map[string]interface{}{
|
||||
"filter": filterClauses,
|
||||
},
|
||||
}
|
||||
}
|
||||
if searchReq.Size > 0 {
|
||||
queryBody["size"] = searchReq.Size
|
||||
}
|
||||
if searchReq.From > 0 {
|
||||
queryBody["from"] = searchReq.From
|
||||
}
|
||||
if searchReq.Highlight != nil {
|
||||
queryBody["highlight"] = searchReq.Highlight
|
||||
}
|
||||
if len(searchReq.Source) > 0 {
|
||||
queryBody["_source"] = searchReq.Source
|
||||
}
|
||||
if len(searchReq.Sort) > 0 {
|
||||
queryBody["sort"] = searchReq.Sort
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := json.NewEncoder(&buf).Encode(queryBody); err != nil {
|
||||
return nil, fmt.Errorf("error encoding query: %w", err)
|
||||
}
|
||||
|
||||
logger.Debug("Elasticsearch searching indices", zap.Strings("indices", searchReq.IndexNames))
|
||||
logger.Debug("Elasticsearch DSL", zap.Any("dsl", queryBody))
|
||||
|
||||
reqES := esapi.SearchRequest{
|
||||
Index: searchReq.IndexNames,
|
||||
Body: &buf,
|
||||
}
|
||||
|
||||
res, err := reqES.Do(ctx, e.client)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("search failed: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.IsError() {
|
||||
bodyBytes, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
logger.Error("Elasticsearch failed to read error response body", err)
|
||||
} else {
|
||||
logger.Warn("Elasticsearch error response", zap.String("body", string(bodyBytes)))
|
||||
}
|
||||
return nil, fmt.Errorf("Elasticsearch returned error: %s", res.Status())
|
||||
}
|
||||
|
||||
var response SearchResponse
|
||||
if err := json.NewDecoder(res.Body).Decode(&response); err != nil {
|
||||
return nil, fmt.Errorf("error parsing response: %w", err)
|
||||
}
|
||||
|
||||
return &response, nil
|
||||
}
|
||||
|
||||
// calculatePagination calculates offset and limit based on page, size and topK
|
||||
func calculatePagination(page, size, topK int) (int, int) {
|
||||
if page < 1 {
|
||||
page = 1
|
||||
}
|
||||
if size <= 0 {
|
||||
size = 30
|
||||
}
|
||||
if topK <= 0 {
|
||||
topK = 1024
|
||||
}
|
||||
|
||||
RERANK_LIMIT := max(30, (64/size)*size)
|
||||
if RERANK_LIMIT < size {
|
||||
RERANK_LIMIT = size
|
||||
}
|
||||
if RERANK_LIMIT > topK {
|
||||
RERANK_LIMIT = topK
|
||||
}
|
||||
|
||||
offset := (page - 1) * RERANK_LIMIT
|
||||
if offset < 0 {
|
||||
offset = 0
|
||||
}
|
||||
|
||||
return offset, RERANK_LIMIT
|
||||
}
|
||||
|
||||
// buildFilterClauses builds ES filter clauses from kb_ids, doc_ids and available_int
|
||||
// Reference: rag/utils/es_conn.py L60-L78
|
||||
// When available=0: available_int < 1
|
||||
// When available!=0: NOT (available_int < 1)
|
||||
func buildFilterClauses(kbIDs, docIDs []string, available int) []map[string]interface{} {
|
||||
var filters []map[string]interface{}
|
||||
|
||||
if len(kbIDs) > 0 {
|
||||
filters = append(filters, map[string]interface{}{
|
||||
"terms": map[string]interface{}{"kb_id": kbIDs},
|
||||
})
|
||||
}
|
||||
|
||||
if len(docIDs) > 0 {
|
||||
filters = append(filters, map[string]interface{}{
|
||||
"terms": map[string]interface{}{"doc_id": docIDs},
|
||||
})
|
||||
}
|
||||
|
||||
// Add available_int filter
|
||||
// Reference: rag/utils/es_conn.py L63-L68
|
||||
if available == 0 {
|
||||
// available_int < 1
|
||||
filters = append(filters, map[string]interface{}{
|
||||
"range": map[string]interface{}{
|
||||
"available_int": map[string]interface{}{
|
||||
"lt": 1,
|
||||
},
|
||||
},
|
||||
})
|
||||
} else {
|
||||
// must_not: available_int < 1 (i.e., available_int >= 1)
|
||||
filters = append(filters, map[string]interface{}{
|
||||
"bool": map[string]interface{}{
|
||||
"must_not": []map[string]interface{}{
|
||||
{
|
||||
"range": map[string]interface{}{
|
||||
"available_int": map[string]interface{}{
|
||||
"lt": 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return filters
|
||||
}
|
||||
|
||||
// buildESKeywordQuery builds keyword-only search query for ES
|
||||
// Uses query_string if matchText is in query_string format, otherwise uses multi_match
|
||||
// boost is applied to the text match clause (query_string or multi_match)
|
||||
func buildESKeywordQuery(matchText string, filterClauses []map[string]interface{}, boost float64) map[string]interface{} {
|
||||
var mustClause map[string]interface{}
|
||||
|
||||
// Use query_string for complex queries
|
||||
queryString := map[string]interface{}{
|
||||
"query": matchText,
|
||||
"fields": []string{"title_tks^10", "title_sm_tks^5", "important_kwd^30", "important_tks^20", "question_tks^20", "content_ltks^2", "content_sm_ltks"},
|
||||
"type": "best_fields",
|
||||
"minimum_should_match": "30%",
|
||||
"boost": boost,
|
||||
}
|
||||
mustClause = map[string]interface{}{
|
||||
"query_string": queryString,
|
||||
}
|
||||
|
||||
return map[string]interface{}{
|
||||
"bool": map[string]interface{}{
|
||||
"must": mustClause,
|
||||
"filter": filterClauses,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// convertESResponse converts ES SearchResponse to unified chunks format
|
||||
func convertESResponse(esResp *SearchResponse, vectorFieldName string) []map[string]interface{} {
|
||||
if esResp == nil || esResp.Hits.Hits == nil {
|
||||
return []map[string]interface{}{}
|
||||
}
|
||||
|
||||
chunks := make([]map[string]interface{}, len(esResp.Hits.Hits))
|
||||
for i, hit := range esResp.Hits.Hits {
|
||||
|
||||
//// vectorField is list of float64, which need to be converted to float32
|
||||
|
||||
chunks[i] = hit.Source
|
||||
chunks[i]["_score"] = hit.Score
|
||||
chunks[i]["_id"] = hit.ID
|
||||
//vectorField := hit.Source[vectorFieldName]
|
||||
//chunks[i][vectorFieldName] = utility.Float64ToFloat32(vectorField)
|
||||
}
|
||||
return chunks
|
||||
}
|
||||
|
||||
// Helper query builder functions (legacy)
|
||||
|
||||
// BuildMatchTextQuery builds a text match query
|
||||
func BuildMatchTextQuery(fields []string, text string, fuzziness string) map[string]interface{} {
|
||||
query := map[string]interface{}{
|
||||
"multi_match": map[string]interface{}{
|
||||
"query": text,
|
||||
"fields": fields,
|
||||
},
|
||||
}
|
||||
|
||||
if fuzziness != "" {
|
||||
if multiMatch, ok := query["multi_match"].(map[string]interface{}); ok {
|
||||
multiMatch["fuzziness"] = fuzziness
|
||||
}
|
||||
}
|
||||
|
||||
return query
|
||||
}
|
||||
|
||||
// BuildTermQuery builds a term query
|
||||
func BuildTermQuery(field string, value interface{}) map[string]interface{} {
|
||||
return map[string]interface{}{
|
||||
"term": map[string]interface{}{
|
||||
field: value,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// BuildRangeQuery builds a range query
|
||||
func BuildRangeQuery(field string, from, to interface{}) map[string]interface{} {
|
||||
rangeQuery := make(map[string]interface{})
|
||||
if from != nil {
|
||||
rangeQuery["gte"] = from
|
||||
}
|
||||
if to != nil {
|
||||
rangeQuery["lte"] = to
|
||||
}
|
||||
|
||||
return map[string]interface{}{
|
||||
"range": map[string]interface{}{
|
||||
field: rangeQuery,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// BuildBoolQuery builds a bool query
|
||||
func BuildBoolQuery() map[string]interface{} {
|
||||
return map[string]interface{}{
|
||||
"bool": make(map[string]interface{}),
|
||||
}
|
||||
}
|
||||
|
||||
// AddMust adds must clause to bool query
|
||||
func AddMust(query map[string]interface{}, clauses ...map[string]interface{}) {
|
||||
if boolQuery, ok := query["bool"].(map[string]interface{}); ok {
|
||||
if _, exists := boolQuery["must"]; !exists {
|
||||
boolQuery["must"] = []map[string]interface{}{}
|
||||
}
|
||||
if must, ok := boolQuery["must"].([]map[string]interface{}); ok {
|
||||
boolQuery["must"] = append(must, clauses...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AddShould adds should clause to bool query
|
||||
func AddShould(query map[string]interface{}, clauses ...map[string]interface{}) {
|
||||
if boolQuery, ok := query["bool"].(map[string]interface{}); ok {
|
||||
if _, exists := boolQuery["should"]; !exists {
|
||||
boolQuery["should"] = []map[string]interface{}{}
|
||||
}
|
||||
if should, ok := boolQuery["should"].([]map[string]interface{}); ok {
|
||||
boolQuery["should"] = append(should, clauses...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AddFilter adds filter clause to bool query
|
||||
func AddFilter(query map[string]interface{}, clauses ...map[string]interface{}) {
|
||||
if boolQuery, ok := query["bool"].(map[string]interface{}); ok {
|
||||
if _, exists := boolQuery["filter"]; !exists {
|
||||
boolQuery["filter"] = []map[string]interface{}{}
|
||||
}
|
||||
if filter, ok := boolQuery["filter"].([]map[string]interface{}); ok {
|
||||
boolQuery["filter"] = append(filter, clauses...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AddMustNot adds must_not clause to bool query
|
||||
func AddMustNot(query map[string]interface{}, clauses ...map[string]interface{}) {
|
||||
if boolQuery, ok := query["bool"].(map[string]interface{}); ok {
|
||||
if _, exists := boolQuery["must_not"]; !exists {
|
||||
boolQuery["must_not"] = []map[string]interface{}{}
|
||||
}
|
||||
if mustNot, ok := boolQuery["must_not"].([]map[string]interface{}); ok {
|
||||
boolQuery["must_not"] = append(mustNot, clauses...)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user