ragflow/internal/engine/infinity/search.go

//
//  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
//  Licensed under the Apache License, Version 2.0 (the "License");
//  you may not use this file except in compliance with the License.
//  You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
//  Unless required by applicable law or agreed to in writing, software
//  distributed under the License is distributed on an "AS IS" BASIS,
//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//  See the License for the specific language governing permissions and
//  limitations under the License.
//

package infinity

import (
	"context"
	"encoding/json"
	"fmt"
	"ragflow/internal/common"
	"ragflow/internal/engine/types"
	"ragflow/internal/utility"
	"regexp"
	"slices"
	"sort"
	"strconv"
	"strings"
	"unicode"

	infinity "github.com/infiniflow/infinity-go-sdk"
	"go.uber.org/zap"
)

// Search searches the Infinity engine for matching chunks.
// It supports three matching types: MatchTextExpr (full-text), MatchDenseExpr (vector), and FusionExpr (combined).
// If no match expressions are provided, Search relies solely on filter (e.g., doc_id, available_int) to find results.
func (e *infinityEngine) Search(ctx context.Context, req *types.SearchRequest) (*types.SearchResult, error) {
	common.Debug("Search in Infinity started", zap.Any("indexNames", req.IndexNames))
	if common.IsDebugEnabled() {
		// Format match expressions for logging
		var matchExprsStr string
		for i, expr := range req.MatchExprs {
			switch e := expr.(type) {
			case *types.MatchTextExpr:
				matchExprsStr += fmt.Sprintf("    [%d] MatchTextExpr: fields=%v, matchingText=%s, topN=%d, extraOptions=%v\n", i, e.Fields, e.MatchingText, e.TopN, e.ExtraOptions)
			case *types.MatchDenseExpr:
				matchExprsStr += fmt.Sprintf("    [%d] MatchDenseExpr: vectorColumn=%s, vectorSize=%d, topN=%d, extraOptions=%v\n", i, e.VectorColumnName, len(e.EmbeddingData), e.TopN, e.ExtraOptions)
			case *types.FusionExpr:
				matchExprsStr += fmt.Sprintf("    [%d] FusionExpr: method=%s, topN=%d, fusionParams=%v\n", i, e.Method, e.TopN, e.FusionParams)
			default:
				matchExprsStr += fmt.Sprintf("    [%d] unknown type\n", i)
			}
		}
		common.Debug(fmt.Sprintf("Search request:\n"+
			"    indexNames=%v\n"+
			"    KbIDs=%v\n"+
			"    offset=%d, limit=%d\n"+
			"    SelectFields=%v\n"+
			"    Filter=%v\n"+
			"    MatchExprs:\n%s    orderBy=%v\n"+
			"    RankFeature=%v",
			req.IndexNames, req.KbIDs, req.Offset, req.Limit, req.SelectFields, req.Filter, matchExprsStr, req.OrderBy, req.RankFeature))
	}

	if len(req.IndexNames) == 0 {
		return nil, fmt.Errorf("index names cannot be empty")
	}

	// Get retrieval parameters with defaults
	pageSize := req.Limit
	if pageSize <= 0 {
		pageSize = 30
	}

	offset := req.Offset
	if offset < 0 {
		offset = 0
	}

	db, err := e.client.conn.GetDatabase(e.client.dbName)
	if err != nil {
		return nil, fmt.Errorf("failed to get database: %w", err)
	}

	isMetadataTable := false
	isSkillIndex := false
	for _, idx := range req.IndexNames {
		if strings.HasPrefix(idx, "ragflow_doc_meta_") {
			isMetadataTable = true
			break
		}
		if strings.HasPrefix(idx, "skill_") {
			isSkillIndex = true
			break
		}
	}

	var outputColumns []string
	if isMetadataTable {
		outputColumns = []string{"id", "kb_id", "meta_fields"}
	} else if isSkillIndex {
		outputColumns = []string{
			"skill_id", "space_id", "folder_id", "name", "tags", "description", "content",
			"version", "status", "create_time", "update_time",
		}
		outputColumns = convertSelectFields(outputColumns, true)
	} else {
		outputColumns = []string{
			"id", "doc_id", "kb_id", "content_ltks", "content_with_weight",
			"title_tks", "docnm_kwd", "img_id", "available_int", "important_kwd",
			"position_int", "page_num_int", "top_int", "chunk_order_int",
			"create_timestamp_flt", "knowledge_graph_kwd", "question_kwd", "question_tks",
			"doc_type_kwd", "mom_id", "tag_kwd", "pagerank_fea", "tag_feas",
		}
		outputColumns = convertSelectFields(outputColumns)
	}

	hasTextMatch := false
	hasVectorMatch := false
	var matchText *types.MatchTextExpr
	var matchDense *types.MatchDenseExpr
	if req.MatchExprs != nil && len(req.MatchExprs) > 0 {
		for _, expr := range req.MatchExprs {
			if expr == nil {
				continue
			}
			switch e := expr.(type) {
			case string:
				if e != "" {
					hasTextMatch = true
					matchText = &types.MatchTextExpr{
						MatchingText: e,
						TopN:         pageSize,
					}
				}
			case *types.MatchTextExpr:
				if e.MatchingText != "" {
					hasTextMatch = true
					matchText = e
				}
			case *types.MatchDenseExpr:
				if len(e.EmbeddingData) > 0 {
					hasVectorMatch = true
					matchDense = e
				}
			}
		}
	}

	if hasTextMatch || hasVectorMatch {
		if hasTextMatch {
			outputColumns = append(outputColumns, "score()")
		}
		// similarity() is only allowed by Infinity when there is ONLY MATCH VECTOR.
		// When both text and vector matches exist (hybrid search with Fusion),
		// only score() is valid — Fusion produces a unified SCORE column.
		if hasVectorMatch && !hasTextMatch {
			outputColumns = append(outputColumns, "similarity()")
		}
		// Skill index does not have pagerank_fea and tag_feas columns
		if !isSkillIndex {
			if !slices.Contains(outputColumns, common.PAGERANK_FLD) {
				outputColumns = append(outputColumns, common.PAGERANK_FLD)
			}
			if !slices.Contains(outputColumns, common.TAG_FLD) {
				outputColumns = append(outputColumns, common.TAG_FLD)
			}
		}
	}

	if !slices.Contains(outputColumns, "row_id") && !slices.Contains(outputColumns, "row_id()") {
		outputColumns = append(outputColumns, "row_id()")
	}

	outputColumns = convertSelectFields(outputColumns, isSkillIndex)
	if hasVectorMatch && matchDense != nil && matchDense.VectorColumnName != "" {
		outputColumns = append(outputColumns, matchDense.VectorColumnName)
	}

	var filterParts []string
	if isMetadataTable && len(req.KbIDs) > 0 && req.KbIDs[0] != "" {
		kbIDs := req.KbIDs
		if len(kbIDs) == 1 {
			filterParts = append(filterParts, fmt.Sprintf("kb_id = '%s'", kbIDs[0]))
		} else {
			kbIDStr := strings.Join(kbIDs, "', '")
			filterParts = append(filterParts, fmt.Sprintf("kb_id IN ('%s')", kbIDStr))
		}
	}

	if !isMetadataTable && (hasTextMatch || hasVectorMatch) {
		if req.Filter != nil {
			if availInt, ok := req.Filter["available_int"]; ok {
				filterParts = append(filterParts, fmt.Sprintf("available_int=%v", availInt))
			} else if status, ok := req.Filter["status"]; ok {
				filterParts = append(filterParts, fmt.Sprintf("status='%s'", status))
			} else {
				if isSkillIndex {
					filterParts = append(filterParts, "status='1'")
				} else {
					filterParts = append(filterParts, "available_int=1")
				}
			}
		} else {
			if isSkillIndex {
				filterParts = append(filterParts, "status='1'")
			} else {
				filterParts = append(filterParts, "available_int=1")
			}
		}
	}

	// Build filter string from req.Filter
	if req.Filter != nil {
		filterCopy := req.Filter
		if !isMetadataTable {
			filterCopy = make(map[string]interface{})
			for k, v := range req.Filter {
				if k != "kb_id" {
					filterCopy[k] = v
				}
			}
		}

		condStr := equivalentConditionToStr(filterCopy)
		if condStr != "" {
			filterParts = append(filterParts, condStr)
		}
	}
	filterStr := strings.Join(filterParts, " AND ")

	orderBy := req.OrderBy
	var rankFeature map[string]float64
	if req.RankFeature != nil {
		rankFeature = req.RankFeature
	}

	var fusionExpr *types.FusionExpr
	if len(req.MatchExprs) > 2 {
		if fe, ok := req.MatchExprs[2].(*types.FusionExpr); ok {
			fusionExpr = fe
		}
	}

	var allResults []map[string]interface{}
	totalHits := int64(0)

	for _, indexName := range req.IndexNames {
		var tableNames []string
		if strings.HasPrefix(indexName, "ragflow_doc_meta_") {
			tableNames = []string{indexName}
		} else {
			kbIDs := req.KbIDs
			if len(kbIDs) == 0 {
				kbIDs = []string{""}
			}
			for _, kbID := range kbIDs {
				if kbID == "" {
					tableNames = append(tableNames, indexName)
				} else {
					tableNames = append(tableNames, fmt.Sprintf("%s_%s", indexName, kbID))
				}
			}
		}

		minMatch := 0.3

		var questionText string
		var vectorData []float64
		textTopN := pageSize
		var originalQuery string
		if matchText != nil {
			questionText = matchText.MatchingText
			textTopN = int(matchText.TopN)
			if matchText.ExtraOptions != nil {
				if oq, ok := matchText.ExtraOptions["original_query"].(string); ok {
					originalQuery = oq
				}
			}
		}
		if matchDense != nil {
			vectorData = matchDense.EmbeddingData
		}

		for _, tableName := range tableNames {
			tbl, err := db.GetTable(tableName)
			if err != nil {
				continue
			}
			table := tbl.Output(outputColumns)

			var textFields []string
			if matchText != nil && len(matchText.Fields) > 0 {
				textFields = matchText.Fields
			} else if isSkillIndex {
				textFields = []string{
					"name^10",
					"tags^5",
					"description^3",
					"content^1",
				}
			} else {
				textFields = []string{
					"title_tks^10",
					"title_sm_tks^5",
					"important_kwd^30",
					"important_tks^20",
					"question_tks^20",
					"content_ltks^2",
					"content_sm_ltks",
				}
			}

			// Convert field names for Infinity
			var convertedFields []string
			for _, f := range textFields {
				cf := convertMatchingField(f)
				convertedFields = append(convertedFields, cf)
			}
			fields := strings.Join(convertedFields, ",")

			hasTextMatch := questionText != ""
			hasVectorMatch := len(vectorData) > 0
			// Add text match if question is provided
			if hasTextMatch {
				extraOptions := map[string]string{
					"minimum_should_match": fmt.Sprintf("%d%%", int(minMatch*100)),
				}

				if filterStr != "" {
					extraOptions["filter"] = filterStr
				}

				if rankFeature != nil {
					var rankFeaturesList []string
					for featureName, weight := range rankFeature {
						rankFeaturesList = append(rankFeaturesList, fmt.Sprintf("%s^%s^%.0f", common.TAG_FLD, featureName, weight))
					}
					if len(rankFeaturesList) > 0 {
						extraOptions["rank_features"] = strings.Join(rankFeaturesList, ",")
					}
				}

				if originalQuery != "" {
					extraOptions["original_query"] = originalQuery
				}

				table = table.MatchText(fields, questionText, textTopN, extraOptions)

				common.Debug(fmt.Sprintf(
					"MatchTextExpr:\n"+
						"    fields=%s\n"+
						"    matching_text=%s\n"+
						"    topn=%d\n"+
						"    extra_options=%v",
					fields, questionText, textTopN, extraOptions,
				))
			}

			// Add vector match if provided
			if hasVectorMatch {
				vectorSize := len(vectorData)
				fieldName := fmt.Sprintf("q_%d_vec", vectorSize)
				dataType := "float"
				distanceType := "cosine"

				if matchDense != nil {
					if matchDense.VectorColumnName != "" {
						fieldName = matchDense.VectorColumnName
					}
					if matchDense.EmbeddingDataType != "" {
						dataType = matchDense.EmbeddingDataType
					}
					if matchDense.DistanceType != "" {
						distanceType = matchDense.DistanceType
					}
				}

				vectorTopN := pageSize
				if matchDense != nil && matchDense.TopN > 0 {
					vectorTopN = int(matchDense.TopN)
				}

				denseFilterStr := filterStr
				if denseFilterStr == "" {
					if isSkillIndex {
						denseFilterStr = "status='1'"
					} else {
						denseFilterStr = "available_int=1"
					}
				}

				if hasTextMatch && fusionExpr == nil {
					fieldsStr := strings.Join(convertedFields, ",")
					filterFulltext := fmt.Sprintf("filter_fulltext('%s', '%s')", fieldsStr, questionText)
					denseFilterStr = fmt.Sprintf("(%s) AND %s", denseFilterStr, filterFulltext)
				}
				extraOptions := map[string]string{
					"threshold": utility.FloatToString(0.0),
					"filter":    denseFilterStr,
				}

				common.Debug("MatchDense for hybrid search",
					zap.String("fieldName", fieldName),
					zap.String("distanceType", distanceType),
					zap.Int("topN", vectorTopN),
					zap.Bool("hasFusion", fusionExpr != nil))

				table = table.MatchDense(fieldName, vectorData, dataType, distanceType, vectorTopN, extraOptions)
			}

			// Add fusion (for text + vector combination)
			if hasTextMatch && hasVectorMatch && fusionExpr != nil {
				fusionMethod := fusionExpr.Method
				fusionTopK := fusionExpr.TopN
				if fusionTopK == 0 {
					fusionTopK = pageSize
				}
				fusionParams := map[string]interface{}{
					"normalize": "atan",
				}
				if fusionExpr.FusionParams != nil {
					for k, v := range fusionExpr.FusionParams {
						fusionParams[k] = v
					}
				}

				common.Debug("Applying Fusion for hybrid search",
					zap.String("method", fusionMethod),
					zap.Int("topN", fusionTopK),
					zap.Any("params", fusionParams))

				table = table.Fusion(fusionMethod, fusionTopK, fusionParams)
			}

			// Add order_by if provided
			if orderBy != nil && len(orderBy.Fields) > 0 {
				var sortFields [][2]interface{}
				for _, orderField := range orderBy.Fields {
					sortType := infinity.SortTypeAsc
					if orderField.Type == types.SortDesc {
						sortType = infinity.SortTypeDesc
					}
					sortFields = append(sortFields, [2]interface{}{orderField.Field, sortType})
				}
				table = table.Sort(sortFields)
			}

			// Add filter when there's no text/vector match (like metadata queries)
			if !hasTextMatch && !hasVectorMatch && filterStr != "" {
				common.Debug(fmt.Sprintf("Adding filter for no-match query: %s", filterStr))
				table = table.Filter(filterStr)
			}

			// Set limit and offset
			table = table.Limit(pageSize)
			if offset > 0 {
				table = table.Offset(offset)
			}

			// Request total_hits_count from Infinity
			table = table.Option(map[string]interface{}{"total_hits_count": true})

			// Execute query
			df, err := table.ToDataFrame()
			if err != nil {
				common.Warn("Infinity query failed",
					zap.String("tableName", tableName),
					zap.Bool("hasTextMatch", hasTextMatch),
					zap.Bool("hasVectorMatch", hasVectorMatch),
					zap.Bool("hasFusion", fusionExpr != nil),
					zap.Error(err))
				continue
			}

			// Convert DataFrame to chunks format (column-oriented to row-oriented)
			chunks := make([]map[string]interface{}, 0)
			for colName, colData := range df.ColumnData {
				for i, val := range colData {
					for len(chunks) <= i {
						chunks = append(chunks, make(map[string]interface{}))
					}
					chunks[i][colName] = val
				}
			}

			// Apply field name mapping and row_id handling
			// Skill index uses different schema
			// so we skip the document-specific field mappings
			if !isSkillIndex {
				GetFields(chunks, nil)
			} else {
				// For skill index, only handle ROW_ID -> row_id() mapping
				for _, chunk := range chunks {
					if val, ok := chunk["ROW_ID"]; ok {
						chunk["row_id()"] = val
						delete(chunk, "ROW_ID")
					}
				}
			}

			// Parse total_hits_count from ExtraInfo
			var tableTotal int64
			if df.ExtraInfo != "" {
				var extraResult map[string]interface{}
				if err := json.Unmarshal([]byte(df.ExtraInfo), &extraResult); err == nil {
					if count, ok := extraResult["total_hits_count"].(float64); ok {
						tableTotal = int64(count)
					}
				}
			}

			searchResult := &types.SearchResult{
				Chunks: chunks,
				Total:  tableTotal,
			}

			allResults = append(allResults, searchResult.Chunks...)
			totalHits += searchResult.Total
		}
	}

	if hasTextMatch || hasVectorMatch {
		scoreColumn := ""
		if hasTextMatch && hasVectorMatch {
			scoreColumn = "SCORE"
		} else if hasTextMatch {
			scoreColumn = "SCORE"
		} else if hasVectorMatch {
			scoreColumn = "SIMILARITY"
		}
		pagerankField := common.PAGERANK_FLD
		if isSkillIndex {
			pagerankField = "" // Skill index has no pagerank field
		}

		allResults = calculateScores(allResults, scoreColumn, pagerankField)
		allResults = sortByScore(allResults, len(allResults))
	}

	if len(allResults) > pageSize {
		allResults = allResults[:pageSize]
	}

	common.Debug("Search in Infinity completed", zap.Int("returnedRows", len(allResults)), zap.Int64("totalHits", totalHits))

	return &types.SearchResult{
		Chunks: allResults,
		Total:  totalHits,
	}, nil
}

// convertSelectFields converts field names to Infinity format
// isSkillIndex indicates if this is a skill index (uses skill_id instead of id)
func convertSelectFields(output []string, isSkillIndex ...bool) []string {
	fieldMapping := map[string]string{
		"docnm_kwd":           "docnm",
		"title_tks":           "docnm",
		"title_sm_tks":        "docnm",
		"important_kwd":       "important_keywords",
		"important_tks":       "important_keywords",
		"question_kwd":        "questions",
		"question_tks":        "questions",
		"content_with_weight": "content",
		"content_ltks":        "content",
		"content_sm_ltks":     "content",
		"authors_tks":         "authors",
		"authors_sm_tks":      "authors",
	}

	skillIndex := false
	if len(isSkillIndex) > 0 {
		skillIndex = isSkillIndex[0]
	}

	needEmptyCount := false
	for i, field := range output {
		if field == "important_kwd" {
			needEmptyCount = true
		}
		if newField, ok := fieldMapping[field]; ok {
			output[i] = newField
		}
	}

	// Remove duplicates
	seen := make(map[string]bool)
	result := []string{}
	for _, f := range output {
		if f != "" && !seen[f] {
			seen[f] = true
			result = append(result, f)
		}
	}

	// Add id and empty count if needed
	// For skill index, use skill_id instead of id
	hasID := false
	idField := "id"
	if skillIndex {
		idField = "skill_id"
	}
	for _, f := range result {
		if f == idField {
			hasID = true
			break
		}
	}
	if !hasID {
		result = append([]string{idField}, result...)
	}

	if needEmptyCount {
		result = append(result, "important_kwd_empty_count")
	}

	return result
}

// convertMatchingField converts field names for matching
// For regular document indices: maps _tks/_kwd fields to column@index_name format
// For skill indices: maps raw field names to column@index_name format
// Infinity requires column@index_name when a column has multiple full-text indexes
func convertMatchingField(fieldWeightStr string) string {
	// Split on ^ to get field name
	parts := strings.Split(fieldWeightStr, "^")
	field := parts[0]

	// Field name conversion
	fieldMapping := map[string]string{
		"docnm_kwd":           "docnm@ft_docnm_rag_coarse",
		"title_tks":           "docnm@ft_docnm_rag_coarse",
		"title_sm_tks":        "docnm@ft_docnm_rag_fine",
		"important_kwd":       "important_keywords@ft_important_keywords_rag_coarse",
		"important_tks":       "important_keywords@ft_important_keywords_rag_fine",
		"question_kwd":        "questions@ft_questions_rag_coarse",
		"question_tks":        "questions@ft_questions_rag_fine",
		"content_with_weight": "content@ft_content_rag_coarse",
		"content_ltks":        "content@ft_content_rag_coarse",
		"content_sm_ltks":     "content@ft_content_rag_fine",
		"authors_tks":         "authors@ft_authors_rag_coarse",
		"authors_sm_tks":      "authors@ft_authors_rag_fine",
		"tag_kwd":             "tag_kwd@ft_tag_kwd_whitespace__",
		// Skill index fields
		"name":        "name@ft_name_rag_coarse",
		"tags":        "tags@ft_tags_rag_coarse",
		"description": "description@ft_description_rag_coarse",
		"content":     "content@ft_content_rag_coarse",
	}

	if newField, ok := fieldMapping[field]; ok {
		parts[0] = newField
	}

	return strings.Join(parts, "^")
}

// escapeFilterValue escapes single quotes for filter values
func escapeFilterValue(s string) string {
	return strings.ReplaceAll(s, "'", "''")
}

// equivalentConditionToStr converts a condition map to an Infinity filter string
func equivalentConditionToStr(condition map[string]interface{}) string {
	if len(condition) == 0 {
		return ""
	}

	var cond []string

	for k, v := range condition {
		if k == "_id" || utility.IsEmpty(v) {
			continue
		}

		// Handle must_not specially
		if k == "must_not" {
			if m, ok := v.(map[string]interface{}); ok {
				for kk, vv := range m {
					if kk == "exists" {
						// For must_not exists, use !='' since we don't have table schema
						cond = append(cond, fmt.Sprintf("NOT (%v!='')", vv))
					}
				}
			}
			continue
		}

		// Handle exists specially (without table schema, use string comparison)
		if k == "exists" {
			cond = append(cond, fmt.Sprintf("%v!=''", v))
			continue
		}

		// Handle keyword fields (using full-text filter)
		if fieldKeyword(k) {
			// For keyword fields, values are always treated as strings for filter_fulltext
			switch val := v.(type) {
			case []string:
				var inCond []string
				for _, item := range val {
					inCond = append(inCond, fmt.Sprintf("filter_fulltext('%s', '%s')",
						convertMatchingField(k), escapeFilterValue(item)))
				}
				if len(inCond) > 0 {
					cond = append(cond, "("+strings.Join(inCond, " or ")+")")
				}
			case []interface{}:
				var inCond []string
				for _, item := range val {
					if s, ok := item.(string); ok {
						inCond = append(inCond, fmt.Sprintf("filter_fulltext('%s', '%s')",
							convertMatchingField(k), escapeFilterValue(s)))
					} else {
						inCond = append(inCond, fmt.Sprintf("filter_fulltext('%s', '%s')",
							convertMatchingField(k), escapeFilterValue(fmt.Sprintf("%v", item))))
					}
				}
				if len(inCond) > 0 {
					cond = append(cond, "("+strings.Join(inCond, " or ")+")")
				}
			case string:
				cond = append(cond, fmt.Sprintf("filter_fulltext('%s', '%s')",
					convertMatchingField(k), escapeFilterValue(val)))
			default:
				cond = append(cond, fmt.Sprintf("filter_fulltext('%s', '%s')",
					convertMatchingField(k), escapeFilterValue(fmt.Sprintf("%v", v))))
			}
			continue
		}

		// Handle list values (mixed types - strings get quotes, numbers don't)
		if list, ok := v.([]interface{}); ok && len(list) > 0 {
			var strItems, numItems []string
			for _, item := range list {
				if s, ok := item.(string); ok {
					strItems = append(strItems, fmt.Sprintf("'%s'", escapeFilterValue(s)))
				} else if n, ok := item.(int); ok {
					numItems = append(numItems, strconv.Itoa(n))
				} else if n, ok := item.(int64); ok {
					numItems = append(numItems, strconv.FormatInt(n, 10))
				} else if f, ok := item.(float64); ok {
					numItems = append(numItems, strconv.FormatFloat(f, 'f', -1, 64))
				} else if s, ok := item.(fmt.Stringer); ok {
					strItems = append(strItems, fmt.Sprintf("'%s'", escapeFilterValue(s.String())))
				} else {
					strItems = append(strItems, fmt.Sprintf("'%s'", escapeFilterValue(fmt.Sprintf("%v", item))))
				}
			}
			if len(strItems) > 0 {
				if len(strItems) == 1 {
					cond = append(cond, fmt.Sprintf("%s=%s", k, strItems[0]))
				} else {
					cond = append(cond, fmt.Sprintf("%s IN (%s)", k, strings.Join(strItems, ", ")))
				}
			}
			if len(numItems) > 0 {
				if len(numItems) == 1 {
					cond = append(cond, fmt.Sprintf("%s=%s", k, numItems[0]))
				} else {
					cond = append(cond, fmt.Sprintf("%s IN (%s)", k, strings.Join(numItems, ", ")))
				}
			}
			continue
		}

		if list, ok := v.([]string); ok && len(list) > 0 {
			if len(list) == 1 {
				cond = append(cond, fmt.Sprintf("%s='%s'", k, escapeFilterValue(list[0])))
			} else {
				var items []string
				for _, item := range list {
					items = append(items, fmt.Sprintf("'%s'", escapeFilterValue(item)))
				}
				cond = append(cond, fmt.Sprintf("%s IN (%s)", k, strings.Join(items, ", ")))
			}
			continue
		}

		if list, ok := v.([]int); ok && len(list) > 0 {
			if len(list) == 1 {
				cond = append(cond, fmt.Sprintf("%s=%d", k, list[0]))
			} else {
				var strs []string
				for _, n := range list {
					strs = append(strs, strconv.Itoa(n))
				}
				cond = append(cond, fmt.Sprintf("%s IN (%s)", k, strings.Join(strs, ", ")))
			}
			continue
		}

		// Handle numeric values (no quotes)
		if utility.IsNumericValue(v) {
			cond = append(cond, fmt.Sprintf("%s=%v", k, v))
			continue
		}

		// Handle string values (with quotes and escaping)
		if str, ok := v.(string); ok {
			cond = append(cond, fmt.Sprintf("%s='%s'", k, escapeFilterValue(str)))
			continue
		}

		// Fallback: treat as string
		cond = append(cond, fmt.Sprintf("%s='%s'", k, escapeFilterValue(fmt.Sprintf("%v", v))))
	}

	if len(cond) == 0 {
		return ""
	}
	return strings.Join(cond, " AND ")
}

// calculateScores calculates _score = score_column + pagerank
func calculateScores(chunks []map[string]interface{}, scoreColumn, pagerankField string) []map[string]interface{} {
	for i := range chunks {
		score := 0.0
		if scoreVal, ok := chunks[i][scoreColumn]; ok {
			if f, ok := utility.ToFloat64(scoreVal); ok {
				score += f
			}
		}
		if pagerankField != "" {
			if prVal, ok := chunks[i][pagerankField]; ok {
				if f, ok := utility.ToFloat64(prVal); ok {
					score += f
				}
			}
		}
		chunks[i]["_score"] = score
	}
	return chunks
}

// sortByScore sorts by _score descending and limits
func sortByScore(chunks []map[string]interface{}, limit int) []map[string]interface{} {
	if len(chunks) == 0 {
		return chunks
	}

	// Sort by _score descending
	sort.Slice(chunks, func(i, j int) bool {
		scoreI := getChunkScore(chunks[i])
		scoreJ := getChunkScore(chunks[j])
		return scoreI > scoreJ
	})

	// Limit
	if len(chunks) > limit && limit > 0 {
		chunks = chunks[:limit]
	}

	return chunks
}

// getChunkScore extracts the score from a chunk
func getChunkScore(chunk map[string]interface{}) float64 {
	if v, ok := chunk["_score"].(float64); ok {
		return v
	}
	if v, ok := chunk["SCORE"].(float64); ok {
		return v
	}
	if v, ok := chunk["SIMILARITY"].(float64); ok {
		return v
	}
	return 0.0
}

// GetAggregation aggregates field values from search results.
//
// Example:
// input chunks:
//
//	[{"docnm_kwd": "docA"}, {"docnm_kwd": "docA"}, {"docnm_kwd": "docB"}]
//
// GetAggregation(chunks, "docnm_kwd") returns:
//
//	[{"key": "docA", "count": 2}, {"key": "docB", "count": 1}]
//
// For tag_kwd field, splits values by "###" separator.
// For other fields, uses comma separation.
func (e *infinityEngine) GetAggregation(chunks []map[string]interface{}, fieldName string) []map[string]interface{} {
	if len(chunks) == 0 {
		return []map[string]interface{}{}
	}

	// Check if field exists in first chunk
	hasField := false
	for _, chunk := range chunks {
		if _, ok := chunk[fieldName]; ok {
			hasField = true
			break
		}
	}
	if !hasField {
		return []map[string]interface{}{}
	}

	// Count occurrences
	tagCounts := make(map[string]int)
	for _, chunk := range chunks {
		value, ok := chunk[fieldName]
		if !ok || value == nil {
			continue
		}

		// Handle string value
		if valueStr, ok := value.(string); ok {
			if valueStr == "" {
				continue
			}

			var tags []string
			// Split by "###" for tag_kwd field
			if fieldName == "tag_kwd" && strings.Contains(valueStr, "###") {
				for _, tag := range strings.Split(valueStr, "###") {
					tag = strings.TrimSpace(tag)
					if tag != "" {
						tags = append(tags, tag)
					}
				}
			} else {
				// Fallback to comma separation
				for _, tag := range strings.Split(valueStr, ",") {
					tag = strings.TrimSpace(tag)
					if tag != "" {
						tags = append(tags, tag)
					}
				}
			}

			for _, tag := range tags {
				tagCounts[tag]++
			}
			continue
		}

		// Handle list value
		if valueList, ok := value.([]interface{}); ok {
			for _, item := range valueList {
				if itemStr, ok := item.(string); ok {
					tag := strings.TrimSpace(itemStr)
					if tag != "" {
						tagCounts[tag]++
					}
				}
			}
		}
	}

	if len(tagCounts) == 0 {
		return []map[string]interface{}{}
	}

	// Convert to slice and sort by count descending
	type tagCountPair struct {
		tag   string
		count int
	}
	pairs := make([]tagCountPair, 0, len(tagCounts))
	for tag, count := range tagCounts {
		pairs = append(pairs, tagCountPair{tag, count})
	}
	sort.Slice(pairs, func(i, j int) bool {
		return pairs[i].count > pairs[j].count
	})

	// Convert to []map[string]interface{} directly
	result := make([]map[string]interface{}, len(pairs))
	for i, p := range pairs {
		result[i] = map[string]interface{}{"key": p.tag, "count": p.count}
	}

	return result
}

// GetDocIDs extracts document IDs from search results.
// Extracts "id" field from each chunk and returns as a list.
func (e *infinityEngine) GetDocIDs(chunks []map[string]interface{}) []string {
	if len(chunks) == 0 {
		return nil
	}
	ids := make([]string, 0, len(chunks))
	for _, chunk := range chunks {
		if id, ok := chunk["id"].(string); ok {
			ids = append(ids, id)
		}
	}
	return ids
}

// GetHighlight generates highlighted text snippets for search results.
// Matches keywords in text and wraps them with <em> tags.
func (e *infinityEngine) GetHighlight(chunks []map[string]interface{}, keywords []string, fieldName string) map[string]string {
	result := make(map[string]string)
	if len(chunks) == 0 || len(keywords) == 0 {
		return result
	}

	// Check if field exists
	hasField := false
	for _, chunk := range chunks {
		if _, ok := chunk[fieldName]; ok {
			hasField = true
			break
		}
	}
	if !hasField {
		// Try alternative field names
		if fieldName == "content_with_weight" {
			if _, ok := chunks[0]["content"]; ok {
				fieldName = "content"
				hasField = true
			}
		}
	}
	if !hasField {
		return result
	}

	emTag := regexp.MustCompile(`<em>[^<>]+</em>`)

	for _, chunk := range chunks {
		id := ""
		if idVal, ok := chunk["id"].(string); ok {
			id = idVal
		}

		txt, ok := chunk[fieldName].(string)
		if !ok || txt == "" {
			continue
		}

		// Check if already highlighted
		if emTag.MatchString(txt) {
			result[id] = txt
			continue
		}

		// Replace newlines with spaces
		txt = regexp.MustCompile(`[\r\n]`).ReplaceAllString(txt, " ")

		// Split by sentence delimiters
		delimiters := regexp.MustCompile(`[.?!;\n]`)
		segments := delimiters.Split(txt, -1)

		var highlightedSegments []string
		for _, segment := range segments {
			// Check if segment is English or contains keywords
			englishCount := 0
			totalCount := 0
			for _, r := range segment {
				if unicode.IsLetter(r) {
					totalCount++
					if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') {
						englishCount++
					}
				}
			}
			isEnglish := totalCount > 0 && float64(englishCount)/float64(totalCount) > 0.5
			segmentToCheck := segment
			if isEnglish {
				// For English: match whole words with boundaries
				for _, kw := range keywords {
					re := regexp.MustCompile(`(^|[ .?/'\"\(\)!,:;-])` + regexp.QuoteMeta(kw) + `([ .?/'\"\(\)!,:;-]|$)`)
					segmentToCheck = re.ReplaceAllString(segmentToCheck, "$1<em>"+kw+"</em>$2")
				}
			} else {
				// For non-English: simple keyword replacement (sorted by length desc for longer matches first)
				sortedKeywords := make([]string, len(keywords))
				copy(sortedKeywords, keywords)
				sort.Slice(sortedKeywords, func(i, j int) bool {
					return len(sortedKeywords[i]) > len(sortedKeywords[j])
				})
				for _, kw := range sortedKeywords {
					re := regexp.MustCompile(regexp.QuoteMeta(kw))
					segmentToCheck = re.ReplaceAllString(segmentToCheck, "<em>"+kw+"</em>")
				}
			}

			// Check if any keywords were highlighted
			if emTag.MatchString(segmentToCheck) {
				highlightedSegments = append(highlightedSegments, segmentToCheck)
			}
		}

		if len(highlightedSegments) > 0 {
			result[id] = "..." + strings.Join(highlightedSegments, "...") + "..."
		} else {
			result[id] = txt
		}
	}

	return result
}