fix(#14389): normalize list metadata values for in filters (#14410)

## Summary
- normalize string items for list-valued metadata filters in
`meta_filter`
- fix `in` / `not in` case asymmetry when document metadata is
lowercased but filter list values are not
- add regression tests that cover the original issue scenario using
uppercase list values

## Validation
- `PYTHONPATH=external/ragflow pytest
external/ragflow/test/unit_test/common/test_metadata_filter_operators.py
-q`

## Notes
- I commented on #14389 before opening this PR to claim the issue.
- The new tests use `value=["F2", "F11"]` so they fail on the old
implementation and pass with this fix.
- This also benefits other non-comparison operators that flow through
the same normalization path.

Co-authored-by: copizza <copizza@users.noreply.github.com>
Co-authored-by: Wang Qi <wangq8@outlook.com>
This commit is contained in:
Shiyao Huang
2026-05-06 14:28:25 +08:00
committed by GitHub
parent e4aee25b4b
commit 406b36a452
2 changed files with 23 additions and 8 deletions

View File

@ -42,6 +42,13 @@ def convert_conditions(metadata_condition):
def meta_filter(metas: dict, filters: list[dict], logic: str = "and"):
doc_ids = set([])
def normalize_string_values(value):
if isinstance(value, str):
return value.lower()
if isinstance(value, list):
return [item.lower() if isinstance(item, str) else item for item in value]
return value
def filter_out(v2docs, operator, value):
ids = []
for input, docids in v2docs.items():
@ -96,14 +103,8 @@ def meta_filter(metas: dict, filters: list[dict], logic: str = "and"):
value = value.lower()
else:
# Non-comparison operators: maintain original logic
if isinstance(input, str):
input = input.lower()
elif operator in ("in", "not in") and isinstance(input, list):
input = [x.lower() if isinstance(x, str) else x for x in input]
if isinstance(value, str):
value = value.lower()
elif operator in ("in", "not in") and isinstance(value, list):
value = [x.lower() if isinstance(x, str) else x for x in value]
input = normalize_string_values(input)
value = normalize_string_values(value)
matched = False
try:

View File

@ -33,6 +33,20 @@ def test_not_in_operator():
assert meta_filter(metas, filters) == ["doc3"]
def test_in_operator_with_list_value_is_case_insensitive():
metas = {"product": {"F2": ["doc1"], "F11": ["doc2"], "G1": ["doc3"]}}
filters = [{"key": "product", "op": "in", "value": ["F2", "F11"]}]
assert set(meta_filter(metas, filters)) == {"doc1", "doc2"}
def test_not_in_operator_with_list_value_is_case_insensitive():
metas = {"product": {"F2": ["doc1"], "F11": ["doc2"], "G1": ["doc3"]}}
filters = [{"key": "product", "op": "not in", "value": ["F2", "F11"]}]
assert meta_filter(metas, filters) == ["doc3"]
def test_start_with():
# returns chunk where the metadata starts with the value
metas = {"name": {"prefix_value": ["doc1"], "other": ["doc2"]}}