mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-05-03 00:37:48 +08:00
Fix retrieval function when metadata_condtion is specified in retrieval API (#13473)
### What problem does this PR solve? Fix https://github.com/infiniflow/ragflow/issues/13388 The following command returns empty when there is doc with the meta data ``` curl --request POST \ --url http://localhost:9222/api/v1/retrieval \ --header 'Content-Type: application/json' \ --header 'Authorization: Bearer ragflow-fO3mPFePfLgUYg8-9gjBVVXbvHqrvMPLGaW0P86PvAk' \ --data '{ "question": "any question", "dataset_ids": ["9bb4f0591b8811f18a4a84ba59049aa3"], "metadata_condition": { "logic": "and", "conditions": [ { "name": "character", "comparison_operator": "is", "value": "刘备" } ] } }' ``` When metadata_condtion is specified in the retrieval API, it is converted to doc_ids and doc_ids is passed to retrieval function. In retrieval funciton, when doc_ids is explicitly provided , we should bypass threshold. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -1682,7 +1682,7 @@ async def retrieval_test(tenant_id):
|
||||
if not doc_ids:
|
||||
metadata_condition = req.get("metadata_condition")
|
||||
if metadata_condition:
|
||||
metas = DocMetadataService.get_meta_by_kbs(kb_ids)
|
||||
metas = DocMetadataService.get_flatted_meta_by_kbs(kb_ids)
|
||||
doc_ids = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
|
||||
# If metadata_condition has conditions but no docs match, return empty result
|
||||
if not doc_ids and metadata_condition.get("conditions"):
|
||||
|
||||
@ -438,6 +438,12 @@ class Dealer:
|
||||
|
||||
# When vector_similarity_weight is 0, similarity_threshold is not meaningful for term-only scores.
|
||||
post_threshold = 0.0 if vector_similarity_weight <= 0 else similarity_threshold
|
||||
|
||||
# When doc_ids is explicitly provided (metadata or document filtering), bypass threshold
|
||||
# User wants those specific documents regardless of their relevance score
|
||||
if doc_ids:
|
||||
post_threshold = 0.0
|
||||
|
||||
valid_idx = [int(i) for i in sorted_idx if sim_np[i] >= post_threshold]
|
||||
filtered_count = len(valid_idx)
|
||||
ranks["total"] = int(filtered_count)
|
||||
|
||||
@ -220,6 +220,11 @@ def _load_doc_module(monkeypatch):
|
||||
|
||||
tenant_llm_service_mod.TenantService = _StubTenantService
|
||||
tenant_llm_service_mod.TenantLLMService = _StubTenantLLMService
|
||||
|
||||
class _StubLLMFactoriesService:
|
||||
pass
|
||||
|
||||
tenant_llm_service_mod.LLMFactoriesService = _StubLLMFactoriesService
|
||||
monkeypatch.setitem(sys.modules, "api.db.services.tenant_llm_service", tenant_llm_service_mod)
|
||||
|
||||
# Mock LLMService
|
||||
@ -993,7 +998,7 @@ class TestDocRoutesUnit:
|
||||
"get_request_json",
|
||||
lambda: _AwaitableValue({"dataset_ids": ["ds-1"], "question": "q", "metadata_condition": {"logic": "and"}}),
|
||||
)
|
||||
monkeypatch.setattr(module.DocMetadataService, "get_meta_by_kbs", lambda _ids: [])
|
||||
monkeypatch.setattr(module.DocMetadataService, "get_flatted_meta_by_kbs", lambda _kbs: [])
|
||||
monkeypatch.setattr(module, "meta_filter", lambda *_args, **_kwargs: [])
|
||||
res = _run(module.retrieval_test.__wrapped__("tenant-1"))
|
||||
assert "code" in res
|
||||
|
||||
Reference in New Issue
Block a user