mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-03-15 11:57:15 +08:00
Call get_flatted_meta_by_kbs in dify retrieval (#13509)
### What problem does this PR solve? Fix https://github.com/infiniflow/ragflow/issues/13388 Call get_flatted_meta_by_kbs in dify retrieval. Remove get_meta_by_kbs. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -123,7 +123,7 @@ async def retrieval(tenant_id):
|
||||
similarity_threshold = float(retrieval_setting.get("score_threshold", 0.0))
|
||||
top = int(retrieval_setting.get("top_k", 1024))
|
||||
metadata_condition = req.get("metadata_condition", {}) or {}
|
||||
metas = DocMetadataService.get_meta_by_kbs([kb_id])
|
||||
metas = DocMetadataService.get_flatted_meta_by_kbs([kb_id])
|
||||
|
||||
doc_ids = []
|
||||
try:
|
||||
|
||||
@ -694,82 +694,6 @@ class DocMetadataService:
|
||||
logging.error(f"Error getting metadata for document {doc_id}: {e}")
|
||||
return {}
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_meta_by_kbs(cls, kb_ids: List[str]) -> Dict:
|
||||
"""
|
||||
Get metadata for documents in knowledge bases (Legacy).
|
||||
|
||||
Legacy metadata aggregator (backward-compatible).
|
||||
- Does NOT expand list values and a list is kept as one string key.
|
||||
Example: {"tags": ["foo","bar"]} -> meta["tags"]["['foo', 'bar']"] = [doc_id]
|
||||
- Expects meta_fields is a dict.
|
||||
Use when existing callers rely on the old list-as-string semantics.
|
||||
|
||||
Args:
|
||||
kb_ids: List of knowledge base IDs
|
||||
|
||||
Returns:
|
||||
Metadata dictionary in format: {field_name: {value: [doc_ids]}}
|
||||
"""
|
||||
try:
|
||||
# Get tenant_id from first KB
|
||||
kb = Knowledgebase.get_by_id(kb_ids[0])
|
||||
if not kb:
|
||||
return {}
|
||||
|
||||
tenant_id = kb.tenant_id
|
||||
index_name = cls._get_doc_meta_index_name(tenant_id)
|
||||
|
||||
condition = {"kb_id": kb_ids}
|
||||
order_by = OrderByExpr()
|
||||
|
||||
# Query with large limit
|
||||
results = settings.docStoreConn.search(
|
||||
select_fields=["*"],
|
||||
highlight_fields=[],
|
||||
condition=condition,
|
||||
match_expressions=[],
|
||||
order_by=order_by,
|
||||
offset=0,
|
||||
limit=10000,
|
||||
index_names=index_name,
|
||||
knowledgebase_ids=kb_ids
|
||||
)
|
||||
|
||||
logging.debug(f"[get_meta_by_kbs] index_name: {index_name}, kb_ids: {kb_ids}")
|
||||
|
||||
# Aggregate metadata (legacy: keeps lists as string keys)
|
||||
meta = {}
|
||||
|
||||
# Use helper to iterate over results in any format
|
||||
for doc_id, doc in cls._iter_search_results(results):
|
||||
# Extract metadata fields (exclude system fields)
|
||||
doc_meta = cls._extract_metadata(doc)
|
||||
|
||||
# Legacy: Keep lists as string keys (do NOT expand)
|
||||
for k, v in doc_meta.items():
|
||||
if k not in meta:
|
||||
meta[k] = {}
|
||||
# If not list, make it a list
|
||||
if not isinstance(v, list):
|
||||
v = [v]
|
||||
# Legacy: Use the entire list as a string key
|
||||
# Skip nested lists/dicts
|
||||
if isinstance(v, list) and any(isinstance(x, (list, dict)) for x in v):
|
||||
continue
|
||||
list_key = str(v)
|
||||
if list_key not in meta[k]:
|
||||
meta[k][list_key] = []
|
||||
meta[k][list_key].append(doc_id)
|
||||
|
||||
logging.debug(f"[get_meta_by_kbs] KBs: {kb_ids}, Returning metadata: {meta}")
|
||||
return meta
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting metadata for KBs {kb_ids}: {e}")
|
||||
return {}
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_flatted_meta_by_kbs(cls, kb_ids: List[str]) -> Dict:
|
||||
|
||||
@ -161,6 +161,11 @@ def _load_dify_retrieval_module(monkeypatch):
|
||||
|
||||
tenant_llm_service_mod.TenantService = _StubTenantService
|
||||
tenant_llm_service_mod.TenantLLMService = _StubTenantLLMService
|
||||
|
||||
class _StubLLMFactoriesService:
|
||||
pass
|
||||
|
||||
tenant_llm_service_mod.LLMFactoriesService = _StubLLMFactoriesService
|
||||
monkeypatch.setitem(sys.modules, "api.db.services.tenant_llm_service", tenant_llm_service_mod)
|
||||
|
||||
# Mock llm_service for LLMService
|
||||
@ -265,7 +270,7 @@ def test_retrieval_success_with_metadata_and_kg(monkeypatch):
|
||||
)
|
||||
|
||||
monkeypatch.setattr(module, "jsonify", lambda payload: payload)
|
||||
monkeypatch.setattr(module.DocMetadataService, "get_meta_by_kbs", lambda _kb_ids: [{"doc_id": "doc-1"}])
|
||||
monkeypatch.setattr(module.DocMetadataService, "get_flatted_meta_by_kbs", lambda _kbs: [{"doc_id": "doc-1"}])
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, _DummyKB()))
|
||||
monkeypatch.setattr(module, "convert_conditions", lambda cond: cond.get("conditions", []))
|
||||
monkeypatch.setattr(module, "meta_filter", lambda *_args, **_kwargs: [])
|
||||
@ -303,7 +308,7 @@ def test_retrieval_success_with_metadata_and_kg(monkeypatch):
|
||||
def test_retrieval_kb_not_found(monkeypatch):
|
||||
module = _load_dify_retrieval_module(monkeypatch)
|
||||
_set_request_json(monkeypatch, module, {"knowledge_id": "kb-missing", "query": "hello"})
|
||||
monkeypatch.setattr(module.DocMetadataService, "get_meta_by_kbs", lambda _kb_ids: [])
|
||||
monkeypatch.setattr(module.DocMetadataService, "get_flatted_meta_by_kbs", lambda _kbs: [])
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (False, None))
|
||||
|
||||
res = _run(inspect.unwrap(module.retrieval)("tenant-1"))
|
||||
@ -315,7 +320,7 @@ def test_retrieval_kb_not_found(monkeypatch):
|
||||
def test_retrieval_not_found_exception_mapping(monkeypatch):
|
||||
module = _load_dify_retrieval_module(monkeypatch)
|
||||
_set_request_json(monkeypatch, module, {"knowledge_id": "kb-1", "query": "hello"})
|
||||
monkeypatch.setattr(module.DocMetadataService, "get_meta_by_kbs", lambda _kb_ids: [])
|
||||
monkeypatch.setattr(module.DocMetadataService, "get_flatted_meta_by_kbs", lambda _kbs: [])
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, _DummyKB()))
|
||||
monkeypatch.setattr(module, "label_question", lambda *_args, **_kwargs: [])
|
||||
|
||||
@ -334,7 +339,7 @@ def test_retrieval_not_found_exception_mapping(monkeypatch):
|
||||
def test_retrieval_generic_exception_mapping(monkeypatch):
|
||||
module = _load_dify_retrieval_module(monkeypatch)
|
||||
_set_request_json(monkeypatch, module, {"knowledge_id": "kb-1", "query": "hello"})
|
||||
monkeypatch.setattr(module.DocMetadataService, "get_meta_by_kbs", lambda _kb_ids: [])
|
||||
monkeypatch.setattr(module.DocMetadataService, "get_flatted_meta_by_kbs", lambda _kbs: [])
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, _DummyKB()))
|
||||
monkeypatch.setattr(module, "label_question", lambda *_args, **_kwargs: [])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user