Fix retrieval function when metadata_condtion is specified in retrieval API (#13473)

### What problem does this PR solve?

Fix https://github.com/infiniflow/ragflow/issues/13388

The following command returns empty when there is doc with the meta data
```
curl --request POST \
     --url http://localhost:9222/api/v1/retrieval \
     --header 'Content-Type: application/json' \
     --header 'Authorization: Bearer ragflow-fO3mPFePfLgUYg8-9gjBVVXbvHqrvMPLGaW0P86PvAk' \
     --data '{
          "question": "any question",
          "dataset_ids": ["9bb4f0591b8811f18a4a84ba59049aa3"],
           "metadata_condition": {
            "logic": "and",
            "conditions": [
              {
                "name": "character",
                "comparison_operator": "is",
                "value": "刘备"
              }
            ]
          }
     }'
```

When metadata_condtion is specified in the retrieval API, it is
converted to doc_ids and doc_ids is passed to retrieval function.
In retrieval funciton, when doc_ids is explicitly provided , we should
bypass threshold.


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
qinling0210
2026-03-10 11:57:32 +08:00
committed by GitHub
parent 292a1a8566
commit 7c92f51133
3 changed files with 13 additions and 2 deletions

View File

@ -1682,7 +1682,7 @@ async def retrieval_test(tenant_id):
if not doc_ids:
metadata_condition = req.get("metadata_condition")
if metadata_condition:
metas = DocMetadataService.get_meta_by_kbs(kb_ids)
metas = DocMetadataService.get_flatted_meta_by_kbs(kb_ids)
doc_ids = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
# If metadata_condition has conditions but no docs match, return empty result
if not doc_ids and metadata_condition.get("conditions"):

View File

@ -438,6 +438,12 @@ class Dealer:
# When vector_similarity_weight is 0, similarity_threshold is not meaningful for term-only scores.
post_threshold = 0.0 if vector_similarity_weight <= 0 else similarity_threshold
# When doc_ids is explicitly provided (metadata or document filtering), bypass threshold
# User wants those specific documents regardless of their relevance score
if doc_ids:
post_threshold = 0.0
valid_idx = [int(i) for i in sorted_idx if sim_np[i] >= post_threshold]
filtered_count = len(valid_idx)
ranks["total"] = int(filtered_count)

View File

@ -220,6 +220,11 @@ def _load_doc_module(monkeypatch):
tenant_llm_service_mod.TenantService = _StubTenantService
tenant_llm_service_mod.TenantLLMService = _StubTenantLLMService
class _StubLLMFactoriesService:
pass
tenant_llm_service_mod.LLMFactoriesService = _StubLLMFactoriesService
monkeypatch.setitem(sys.modules, "api.db.services.tenant_llm_service", tenant_llm_service_mod)
# Mock LLMService
@ -993,7 +998,7 @@ class TestDocRoutesUnit:
"get_request_json",
lambda: _AwaitableValue({"dataset_ids": ["ds-1"], "question": "q", "metadata_condition": {"logic": "and"}}),
)
monkeypatch.setattr(module.DocMetadataService, "get_meta_by_kbs", lambda _ids: [])
monkeypatch.setattr(module.DocMetadataService, "get_flatted_meta_by_kbs", lambda _kbs: [])
monkeypatch.setattr(module, "meta_filter", lambda *_args, **_kwargs: [])
res = _run(module.retrieval_test.__wrapped__("tenant-1"))
assert "code" in res