tests: improve RAGFlow coverage based on Codecov report (#13219)

### What problem does this PR solve?

Codecov’s coverage report shows that several RAGFlow code paths are
currently untested or under-tested. This makes it easier for regressions
to slip in during refactors and feature work.
This PR adds targeted automated tests to cover the files and branches
highlighted by Codecov, improving confidence in core behavior while
keeping runtime functionality unchanged.

### Type of change

- [x] Other (please describe): Test coverage improvement (adds/extends
unit and integration tests to address Codecov-reported gaps)
This commit is contained in:
6ba3i
2026-02-26 19:03:26 +08:00
committed by GitHub
parent 1aa49a11f0
commit 22c4d72891
26 changed files with 11107 additions and 13 deletions

View File

@ -26,6 +26,8 @@ from types import ModuleType, SimpleNamespace
import pytest
pytestmark = pytest.mark.filterwarnings("ignore:.*joblib will operate in serial mode.*:UserWarning")
class _DummyManager:
def route(self, *_args, **_kwargs):
@ -169,6 +171,16 @@ def _base_update_payload(**kwargs):
return payload
@pytest.fixture(scope="session")
def auth():
return "unit-auth"
@pytest.fixture(scope="session", autouse=True)
def set_tenant_info():
return None
@pytest.mark.p2
def test_create_branches(monkeypatch):
module = _load_kb_module(monkeypatch)
@ -1046,3 +1058,236 @@ def test_unbind_task_branch_matrix(monkeypatch):
res = route()
assert res["code"] == module.RetCode.EXCEPTION_ERROR, res
assert "cannot delete task" in res["message"], res
@pytest.mark.p2
def test_check_embedding_similarity_threshold_matrix_unit(monkeypatch):
module = _load_kb_module(monkeypatch)
route = inspect.unwrap(module.check_embedding)
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, SimpleNamespace(tenant_id="tenant-1")))
monkeypatch.setattr(module.search, "index_name", lambda _tenant_id: "idx")
class _FlipBool:
def __init__(self):
self._calls = 0
def __bool__(self):
self._calls += 1
return self._calls == 1
monkeypatch.setattr(
module.re,
"sub",
lambda _pattern, _repl, text: _FlipBool() if "TRIGGER_NO_TEXT" in str(text) else text,
)
def _fixed_sample(population, k):
return list(population)[:k]
monkeypatch.setattr(module.random, "sample", _fixed_sample)
class _DocStore:
def __init__(self, total, ids_by_offset, docs):
self.total = total
self.ids_by_offset = ids_by_offset
self.docs = docs
def search(self, select_fields, **kwargs):
if not select_fields:
return {"kind": "total"}
return {"kind": "sample", "offset": kwargs["offset"]}
def get_total(self, _res):
return self.total
def get_doc_ids(self, res):
return self.ids_by_offset.get(res.get("offset", -1), [])
def get(self, cid, _index_name, _kb_ids):
return self.docs.get(cid, {})
class _EmbModel:
def __init__(self):
self.calls = []
def encode(self, pair):
title, _txt = pair
self.calls.append(title)
if title == "Doc Mix":
# title+content mix wins over content only path.
return [module.np.array([1.0, 0.0]), module.np.array([0.0, 1.0])], None
if title == "Doc High":
return [module.np.array([1.0, 0.0]), module.np.array([1.0, 0.0])], None
return [module.np.array([0.0, 1.0]), module.np.array([0.0, 1.0])], None
emb_model = _EmbModel()
monkeypatch.setattr(module, "LLMBundle", lambda *_args, **_kwargs: emb_model)
low_docs = {
"chunk-no-vec": {
"doc_id": "doc-no-vec",
"docnm_kwd": "Doc No Vec",
"content_with_weight": "body-no-vec",
"page_num_int": 1,
"position_int": 1,
"top_int": 1,
},
"chunk-bad-type": {
"doc_id": "doc-bad-type",
"docnm_kwd": "Doc Bad Type",
"content_with_weight": "body-bad-type",
"question_kwd": [],
"q_vec": {"bad": "type"},
"page_num_int": 1,
"position_int": 2,
"top_int": 2,
},
"chunk-low-zero": {
"doc_id": "doc-low-zero",
"docnm_kwd": "Doc Low Zero",
"content_with_weight": "body-low",
"question_kwd": [],
"q_vec": "0\t0",
"page_num_int": 1,
"position_int": 3,
"top_int": 3,
},
"chunk-no-text": {
"doc_id": "doc-no-text",
"docnm_kwd": "Doc No Text",
"content_with_weight": "TRIGGER_NO_TEXT",
"q_vec": [1.0, 0.0],
"page_num_int": 1,
"position_int": 4,
"top_int": 4,
},
"chunk-mix": {
"doc_id": "doc-mix",
"docnm_kwd": "Doc Mix",
"content_with_weight": "body-mix",
"q_vec": [1.0, 0.0],
"page_num_int": 1,
"position_int": 5,
"top_int": 5,
},
}
monkeypatch.setattr(
module.settings,
"docStoreConn",
_DocStore(
total=6,
ids_by_offset={
0: [],
1: ["chunk-no-vec"],
2: ["chunk-bad-type"],
3: ["chunk-low-zero"],
4: ["chunk-no-text"],
5: ["chunk-mix"],
},
docs=low_docs,
),
)
_set_request_json(monkeypatch, module, {"kb_id": "kb-1", "embd_id": "emb-1", "check_num": 6})
res = _run(route())
assert res["code"] == module.RetCode.NOT_EFFECTIVE, res
assert "average similarity" in res["message"], res
summary = res["data"]["summary"]
assert summary["sampled"] == 5, summary
assert summary["valid"] == 2, summary
reasons = {item.get("reason") for item in res["data"]["results"] if "reason" in item}
assert "no_stored_vector" in reasons, res
assert "no_text" in reasons, res
assert any(item.get("chunk_id") == "chunk-low-zero" and "cos_sim" in item for item in res["data"]["results"]), res
assert summary["match_mode"] in {"content_only", "title+content"}, summary
high_docs = {
"chunk-high": {
"doc_id": "doc-high",
"docnm_kwd": "Doc High",
"content_with_weight": "body-high",
"q_vec": [1.0, 0.0],
"page_num_int": 1,
"position_int": 1,
"top_int": 1,
}
}
monkeypatch.setattr(
module.settings,
"docStoreConn",
_DocStore(total=1, ids_by_offset={0: ["chunk-high"]}, docs=high_docs),
)
_set_request_json(monkeypatch, module, {"kb_id": "kb-1", "embd_id": "emb-1", "check_num": 1})
res = _run(route())
assert res["code"] == module.RetCode.SUCCESS, res
assert res["data"]["summary"]["avg_cos_sim"] > 0.9, res
@pytest.mark.p2
def test_check_embedding_error_and_empty_sample_paths_unit(monkeypatch):
module = _load_kb_module(monkeypatch)
route = inspect.unwrap(module.check_embedding)
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, SimpleNamespace(tenant_id="tenant-1")))
monkeypatch.setattr(module.search, "index_name", lambda _tenant_id: "idx")
monkeypatch.setattr(module.random, "sample", lambda population, k: list(population)[:k])
class _DocStore:
def __init__(self, total, ids_by_offset, docs):
self.total = total
self.ids_by_offset = ids_by_offset
self.docs = docs
def search(self, select_fields, **kwargs):
if not select_fields:
return {"kind": "total"}
return {"kind": "sample", "offset": kwargs["offset"]}
def get_total(self, _res):
return self.total
def get_doc_ids(self, res):
return self.ids_by_offset.get(res.get("offset", -1), [])
def get(self, cid, _index_name, _kb_ids):
return self.docs.get(cid, {})
class _BoomEmbModel:
def encode(self, _pair):
raise RuntimeError("encode boom")
monkeypatch.setattr(module, "LLMBundle", lambda *_args, **_kwargs: _BoomEmbModel())
monkeypatch.setattr(
module.settings,
"docStoreConn",
_DocStore(
total=1,
ids_by_offset={0: ["chunk-err"]},
docs={
"chunk-err": {
"doc_id": "doc-err",
"docnm_kwd": "Doc Err",
"content_with_weight": "body-err",
"q_vec": [1.0, 0.0],
"page_num_int": 1,
"position_int": 1,
"top_int": 1,
}
},
),
)
_set_request_json(monkeypatch, module, {"kb_id": "kb-1", "embd_id": "emb-1", "check_num": 1})
res = _run(route())
assert res["code"] == module.RetCode.DATA_ERROR, res
assert "Embedding failure." in res["message"], res
assert "encode boom" in res["message"], res
class _OkEmbModel:
def encode(self, _pair):
return [module.np.array([1.0, 0.0]), module.np.array([1.0, 0.0])], None
monkeypatch.setattr(module, "LLMBundle", lambda *_args, **_kwargs: _OkEmbModel())
monkeypatch.setattr(module.settings, "docStoreConn", _DocStore(total=0, ids_by_offset={}, docs={}))
_set_request_json(monkeypatch, module, {"kb_id": "kb-1", "embd_id": "emb-1", "check_num": 1})
with pytest.raises(UnboundLocalError):
_run(route())