Call get_flatted_meta_by_kbs in dify retrieval (#13509)

### What problem does this PR solve?

Fix https://github.com/infiniflow/ragflow/issues/13388

Call get_flatted_meta_by_kbs in dify retrieval. Remove get_meta_by_kbs.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
qinling0210
2026-03-11 13:42:24 +08:00
committed by GitHub
parent 2d2d3cdbcf
commit 1815f5950b
3 changed files with 10 additions and 81 deletions

View File

@ -123,7 +123,7 @@ async def retrieval(tenant_id):
similarity_threshold = float(retrieval_setting.get("score_threshold", 0.0))
top = int(retrieval_setting.get("top_k", 1024))
metadata_condition = req.get("metadata_condition", {}) or {}
metas = DocMetadataService.get_meta_by_kbs([kb_id])
metas = DocMetadataService.get_flatted_meta_by_kbs([kb_id])
doc_ids = []
try:

View File

@ -694,82 +694,6 @@ class DocMetadataService:
logging.error(f"Error getting metadata for document {doc_id}: {e}")
return {}
@classmethod
@DB.connection_context()
def get_meta_by_kbs(cls, kb_ids: List[str]) -> Dict:
"""
Get metadata for documents in knowledge bases (Legacy).
Legacy metadata aggregator (backward-compatible).
- Does NOT expand list values and a list is kept as one string key.
Example: {"tags": ["foo","bar"]} -> meta["tags"]["['foo', 'bar']"] = [doc_id]
- Expects meta_fields is a dict.
Use when existing callers rely on the old list-as-string semantics.
Args:
kb_ids: List of knowledge base IDs
Returns:
Metadata dictionary in format: {field_name: {value: [doc_ids]}}
"""
try:
# Get tenant_id from first KB
kb = Knowledgebase.get_by_id(kb_ids[0])
if not kb:
return {}
tenant_id = kb.tenant_id
index_name = cls._get_doc_meta_index_name(tenant_id)
condition = {"kb_id": kb_ids}
order_by = OrderByExpr()
# Query with large limit
results = settings.docStoreConn.search(
select_fields=["*"],
highlight_fields=[],
condition=condition,
match_expressions=[],
order_by=order_by,
offset=0,
limit=10000,
index_names=index_name,
knowledgebase_ids=kb_ids
)
logging.debug(f"[get_meta_by_kbs] index_name: {index_name}, kb_ids: {kb_ids}")
# Aggregate metadata (legacy: keeps lists as string keys)
meta = {}
# Use helper to iterate over results in any format
for doc_id, doc in cls._iter_search_results(results):
# Extract metadata fields (exclude system fields)
doc_meta = cls._extract_metadata(doc)
# Legacy: Keep lists as string keys (do NOT expand)
for k, v in doc_meta.items():
if k not in meta:
meta[k] = {}
# If not list, make it a list
if not isinstance(v, list):
v = [v]
# Legacy: Use the entire list as a string key
# Skip nested lists/dicts
if isinstance(v, list) and any(isinstance(x, (list, dict)) for x in v):
continue
list_key = str(v)
if list_key not in meta[k]:
meta[k][list_key] = []
meta[k][list_key].append(doc_id)
logging.debug(f"[get_meta_by_kbs] KBs: {kb_ids}, Returning metadata: {meta}")
return meta
except Exception as e:
logging.error(f"Error getting metadata for KBs {kb_ids}: {e}")
return {}
@classmethod
@DB.connection_context()
def get_flatted_meta_by_kbs(cls, kb_ids: List[str]) -> Dict: