tests: improve RAGFlow coverage based on Codecov report (#13200)

### What problem does this PR solve?

Codecov’s coverage report shows that several RAGFlow code paths are
currently untested or under-tested. This makes it easier for regressions
to slip in during refactors and feature work.
This PR adds targeted automated tests to cover the files and branches
highlighted by Codecov, improving confidence in core behavior while
keeping runtime functionality unchanged.

### Type of change

- [x] Other (please describe): Test coverage improvement (adds/extends
unit and integration tests to address Codecov-reported gaps)
This commit is contained in:
6ba3i
2026-02-25 19:12:11 +08:00
committed by GitHub
parent 2a5ddf064d
commit 38011f2c16
56 changed files with 11453 additions and 17 deletions

View File

@ -15,10 +15,22 @@
#
import importlib.util
import sys
from pathlib import Path
from types import ModuleType, SimpleNamespace
import pytest
from common import bulk_upload_documents, delete_document, list_documents
class _DummyManager:
def route(self, *_args, **_kwargs):
def decorator(func):
return func
return decorator
@pytest.fixture(scope="function")
def add_document_func(request, WebApiAuth, add_dataset, ragflow_tmp_dir):
def cleanup():
@ -56,3 +68,49 @@ def add_documents_func(request, WebApiAuth, add_dataset_func, ragflow_tmp_dir):
dataset_id = add_dataset_func
return dataset_id, bulk_upload_documents(WebApiAuth, dataset_id, 3, ragflow_tmp_dir)
@pytest.fixture()
def document_app_module(monkeypatch):
repo_root = Path(__file__).resolve().parents[4]
common_pkg = ModuleType("common")
common_pkg.__path__ = [str(repo_root / "common")]
monkeypatch.setitem(sys.modules, "common", common_pkg)
deepdoc_pkg = ModuleType("deepdoc")
deepdoc_parser_pkg = ModuleType("deepdoc.parser")
deepdoc_parser_pkg.__path__ = []
class _StubPdfParser:
pass
class _StubExcelParser:
pass
deepdoc_parser_pkg.PdfParser = _StubPdfParser
deepdoc_pkg.parser = deepdoc_parser_pkg
monkeypatch.setitem(sys.modules, "deepdoc", deepdoc_pkg)
monkeypatch.setitem(sys.modules, "deepdoc.parser", deepdoc_parser_pkg)
deepdoc_excel_module = ModuleType("deepdoc.parser.excel_parser")
deepdoc_excel_module.RAGFlowExcelParser = _StubExcelParser
monkeypatch.setitem(sys.modules, "deepdoc.parser.excel_parser", deepdoc_excel_module)
deepdoc_html_module = ModuleType("deepdoc.parser.html_parser")
class _StubHtmlParser:
pass
deepdoc_html_module.RAGFlowHtmlParser = _StubHtmlParser
monkeypatch.setitem(sys.modules, "deepdoc.parser.html_parser", deepdoc_html_module)
monkeypatch.setitem(sys.modules, "xgboost", ModuleType("xgboost"))
stub_apps = ModuleType("api.apps")
stub_apps.current_user = SimpleNamespace(id="user-1")
stub_apps.login_required = lambda func: func
monkeypatch.setitem(sys.modules, "api.apps", stub_apps)
module_path = repo_root / "api" / "apps" / "document_app.py"
spec = importlib.util.spec_from_file_location("test_document_app_unit", module_path)
module = importlib.util.module_from_spec(spec)
module.manager = _DummyManager()
spec.loader.exec_module(module)
return module

View File

@ -13,7 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import asyncio
import string
from types import SimpleNamespace
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
@ -21,6 +23,7 @@ from common import create_document, list_kbs
from configs import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth
from utils.file_utils import create_txt_file
from api.constants import FILE_NAME_LEN_LIMIT
@pytest.mark.p1
@ -90,3 +93,130 @@ class TestDocumentCreate:
res = list_kbs(WebApiAuth, {"id": kb_id})
assert res["data"]["kbs"][0]["doc_num"] == count, res
def _run(coro):
return asyncio.run(coro)
@pytest.mark.p2
class TestDocumentCreateUnit:
def test_missing_kb_id(self, document_app_module, monkeypatch):
module = document_app_module
async def fake_request_json():
return {"kb_id": "", "name": "doc.txt"}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.create.__wrapped__())
assert res["code"] == 101
assert res["message"] == 'Lack of "KB ID"'
def test_filename_too_long(self, document_app_module, monkeypatch):
module = document_app_module
long_name = "a" * (FILE_NAME_LEN_LIMIT + 1)
async def fake_request_json():
return {"kb_id": "kb1", "name": long_name}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.create.__wrapped__())
assert res["code"] == 101
assert res["message"] == f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less."
def test_filename_whitespace(self, document_app_module, monkeypatch):
module = document_app_module
async def fake_request_json():
return {"kb_id": "kb1", "name": " "}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.create.__wrapped__())
assert res["code"] == 101
assert res["message"] == "File name can't be empty."
def test_kb_not_found(self, document_app_module, monkeypatch):
module = document_app_module
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (False, None))
async def fake_request_json():
return {"kb_id": "missing", "name": "doc.txt"}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.create.__wrapped__())
assert res["code"] == 102
assert res["message"] == "Can't find this dataset!"
def test_duplicate_name(self, document_app_module, monkeypatch):
module = document_app_module
kb = SimpleNamespace(id="kb1", tenant_id="tenant1", name="kb", parser_id="parser", pipeline_id="pipe", parser_config={})
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, kb))
monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [object()])
async def fake_request_json():
return {"kb_id": "kb1", "name": "doc.txt"}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.create.__wrapped__())
assert res["code"] == 102
assert "Duplicated document name" in res["message"]
def test_root_folder_missing(self, document_app_module, monkeypatch):
module = document_app_module
kb = SimpleNamespace(id="kb1", tenant_id="tenant1", name="kb", parser_id="parser", pipeline_id="pipe", parser_config={})
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, kb))
monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [])
monkeypatch.setattr(module.FileService, "get_kb_folder", lambda *_args, **_kwargs: None)
async def fake_request_json():
return {"kb_id": "kb1", "name": "doc.txt"}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.create.__wrapped__())
assert res["code"] == 102
assert res["message"] == "Cannot find the root folder."
def test_kb_folder_missing(self, document_app_module, monkeypatch):
module = document_app_module
kb = SimpleNamespace(id="kb1", tenant_id="tenant1", name="kb", parser_id="parser", pipeline_id="pipe", parser_config={})
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, kb))
monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [])
monkeypatch.setattr(module.FileService, "get_kb_folder", lambda *_args, **_kwargs: {"id": "root"})
monkeypatch.setattr(module.FileService, "new_a_file_from_kb", lambda *_args, **_kwargs: None)
async def fake_request_json():
return {"kb_id": "kb1", "name": "doc.txt"}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.create.__wrapped__())
assert res["code"] == 102
assert res["message"] == "Cannot find the kb folder for this file."
def test_success(self, document_app_module, monkeypatch):
module = document_app_module
kb = SimpleNamespace(id="kb1", tenant_id="tenant1", name="kb", parser_id="parser", pipeline_id="pipe", parser_config={})
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, kb))
monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [])
monkeypatch.setattr(module.FileService, "get_kb_folder", lambda *_args, **_kwargs: {"id": "root"})
monkeypatch.setattr(module.FileService, "new_a_file_from_kb", lambda *_args, **_kwargs: {"id": "folder"})
class _Doc:
def __init__(self, doc_id):
self.id = doc_id
def to_json(self):
return {"id": self.id, "name": "doc.txt", "kb_id": "kb1"}
def to_dict(self):
return {"id": self.id, "name": "doc.txt", "kb_id": "kb1"}
monkeypatch.setattr(module.DocumentService, "insert", lambda _doc: _Doc("doc1"))
monkeypatch.setattr(module.FileService, "add_file_from_kb", lambda *_args, **_kwargs: None)
async def fake_request_json():
return {"kb_id": "kb1", "name": "doc.txt"}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.create.__wrapped__())
assert res["code"] == 0
assert res["data"]["id"] == "doc1"

View File

@ -13,6 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import asyncio
from types import SimpleNamespace
import pytest
from common import (
document_change_status,
@ -241,3 +244,170 @@ class TestDocumentMetadataNegative:
res = document_set_meta(WebApiAuth, {"doc_id": doc_id, "meta": "[]"})
assert res["code"] == 101, res
assert "dictionary" in res["message"], res
def _run(coro):
return asyncio.run(coro)
@pytest.mark.p2
class TestDocumentMetadataUnit:
def _allow_kb(self, module, monkeypatch, kb_id="kb1", tenant_id="tenant1"):
monkeypatch.setattr(module.UserTenantService, "query", lambda **_kwargs: [SimpleNamespace(tenant_id=tenant_id)])
monkeypatch.setattr(module.KnowledgebaseService, "query", lambda **_kwargs: True if _kwargs.get("id") == kb_id else False)
def test_filter_missing_kb_id(self, document_app_module, monkeypatch):
module = document_app_module
async def fake_request_json():
return {}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.get_filter())
assert res["code"] == 101
assert "KB ID" in res["message"]
def test_filter_unauthorized(self, document_app_module, monkeypatch):
module = document_app_module
monkeypatch.setattr(module.UserTenantService, "query", lambda **_kwargs: [SimpleNamespace(tenant_id="tenant1")])
monkeypatch.setattr(module.KnowledgebaseService, "query", lambda **_kwargs: False)
async def fake_request_json():
return {"kb_id": "kb1"}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.get_filter())
assert res["code"] == 103
def test_filter_invalid_filters(self, document_app_module, monkeypatch):
module = document_app_module
self._allow_kb(module, monkeypatch)
async def fake_request_json():
return {"kb_id": "kb1", "run_status": ["INVALID"]}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.get_filter())
assert res["code"] == 102
assert "Invalid filter run status" in res["message"]
async def fake_request_json_types():
return {"kb_id": "kb1", "types": ["INVALID"]}
monkeypatch.setattr(module, "get_request_json", fake_request_json_types)
res = _run(module.get_filter())
assert res["code"] == 102
assert "Invalid filter conditions" in res["message"]
def test_filter_keywords_suffix(self, document_app_module, monkeypatch):
module = document_app_module
self._allow_kb(module, monkeypatch)
monkeypatch.setattr(module.DocumentService, "get_filter_by_kb_id", lambda *_args, **_kwargs: ({"run": {}}, 1))
async def fake_request_json():
return {"kb_id": "kb1", "keywords": "ragflow", "suffix": ["txt"]}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.get_filter())
assert res["code"] == 0
assert "filter" in res["data"]
def test_filter_exception(self, document_app_module, monkeypatch):
module = document_app_module
self._allow_kb(module, monkeypatch)
def raise_error(*_args, **_kwargs):
raise RuntimeError("boom")
monkeypatch.setattr(module.DocumentService, "get_filter_by_kb_id", raise_error)
async def fake_request_json():
return {"kb_id": "kb1"}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.get_filter())
assert res["code"] == 100
def test_infos_meta_fields(self, document_app_module, monkeypatch):
module = document_app_module
monkeypatch.setattr(module.DocumentService, "accessible", lambda *_args, **_kwargs: True)
class _Docs:
def dicts(self):
return [{"id": "doc1"}]
monkeypatch.setattr(module.DocumentService, "get_by_ids", lambda _ids: _Docs())
monkeypatch.setattr(module.DocMetadataService, "get_document_metadata", lambda _doc_id: {"author": "alice"})
async def fake_request_json():
return {"doc_ids": ["doc1"]}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.doc_infos())
assert res["code"] == 0
assert res["data"][0]["meta_fields"]["author"] == "alice"
def test_metadata_summary_missing_kb_id(self, document_app_module, monkeypatch):
module = document_app_module
async def fake_request_json():
return {"doc_ids": ["doc1"]}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.metadata_summary())
assert res["code"] == 101
def test_metadata_summary_unauthorized(self, document_app_module, monkeypatch):
module = document_app_module
monkeypatch.setattr(module.UserTenantService, "query", lambda **_kwargs: [SimpleNamespace(tenant_id="tenant1")])
monkeypatch.setattr(module.KnowledgebaseService, "query", lambda **_kwargs: False)
async def fake_request_json():
return {"kb_id": "kb1", "doc_ids": ["doc1"]}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.metadata_summary())
assert res["code"] == 103
def test_metadata_summary_success_and_exception(self, document_app_module, monkeypatch):
module = document_app_module
self._allow_kb(module, monkeypatch)
monkeypatch.setattr(module.DocMetadataService, "get_metadata_summary", lambda *_args, **_kwargs: {"author": {"alice": 1}})
async def fake_request_json():
return {"kb_id": "kb1", "doc_ids": ["doc1"]}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.metadata_summary())
assert res["code"] == 0
assert "summary" in res["data"]
def raise_error(*_args, **_kwargs):
raise RuntimeError("boom")
monkeypatch.setattr(module.DocMetadataService, "get_metadata_summary", raise_error)
res = _run(module.metadata_summary())
assert res["code"] == 100
def test_metadata_update_missing_kb_id(self, document_app_module, monkeypatch):
module = document_app_module
async def fake_request_json():
return {"doc_ids": ["doc1"], "updates": [], "deletes": []}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.metadata_update.__wrapped__())
assert res["code"] == 101
assert "KB ID" in res["message"]
def test_metadata_update_success(self, document_app_module, monkeypatch):
module = document_app_module
monkeypatch.setattr(module.DocMetadataService, "batch_update_metadata", lambda *_args, **_kwargs: 1)
async def fake_request_json():
return {"kb_id": "kb1", "doc_ids": ["doc1"], "updates": [{"key": "author", "value": "alice"}], "deletes": []}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.metadata_update.__wrapped__())
assert res["code"] == 0
assert res["data"]["matched_docs"] == 1

View File

@ -13,7 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import asyncio
from concurrent.futures import ThreadPoolExecutor, as_completed
from types import SimpleNamespace
import pytest
from common import list_documents
@ -178,3 +180,214 @@ class TestDocumentsList:
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(future.result()["code"] == 0 for future in futures), responses
def _run(coro):
return asyncio.run(coro)
class _DummyArgs(dict):
def get(self, key, default=None):
return super().get(key, default)
@pytest.mark.p2
class TestDocumentsListUnit:
def _set_args(self, module, monkeypatch, **kwargs):
monkeypatch.setattr(module, "request", SimpleNamespace(args=_DummyArgs(kwargs)))
def _allow_kb(self, module, monkeypatch, kb_id="kb1", tenant_id="tenant1"):
monkeypatch.setattr(module.UserTenantService, "query", lambda **_kwargs: [SimpleNamespace(tenant_id=tenant_id)])
monkeypatch.setattr(module.KnowledgebaseService, "query", lambda **_kwargs: True if _kwargs.get("id") == kb_id else False)
def test_missing_kb_id(self, document_app_module, monkeypatch):
module = document_app_module
self._set_args(module, monkeypatch)
async def fake_request_json():
return {}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.list_docs())
assert res["code"] == 101
assert res["message"] == 'Lack of "KB ID"'
def test_unauthorized_dataset(self, document_app_module, monkeypatch):
module = document_app_module
self._set_args(module, monkeypatch, kb_id="kb1")
monkeypatch.setattr(module.UserTenantService, "query", lambda **_kwargs: [SimpleNamespace(tenant_id="tenant1")])
monkeypatch.setattr(module.KnowledgebaseService, "query", lambda **_kwargs: False)
async def fake_request_json():
return {}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.list_docs())
assert res["code"] == 103
assert "Only owner of dataset" in res["message"]
def test_return_empty_metadata_flags(self, document_app_module, monkeypatch):
module = document_app_module
self._set_args(module, monkeypatch, kb_id="kb1")
self._allow_kb(module, monkeypatch)
monkeypatch.setattr(module.DocumentService, "get_by_kb_id", lambda *_args, **_kwargs: ([], 0))
async def fake_request_json():
return {"return_empty_metadata": "true", "metadata": {"author": "alice"}}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.list_docs())
assert res["code"] == 0
async def fake_request_json_empty():
return {"metadata": {"empty_metadata": True, "author": "alice"}}
monkeypatch.setattr(module, "get_request_json", fake_request_json_empty)
res = _run(module.list_docs())
assert res["code"] == 0
def test_invalid_filters(self, document_app_module, monkeypatch):
module = document_app_module
self._set_args(module, monkeypatch, kb_id="kb1")
self._allow_kb(module, monkeypatch)
async def fake_request_json():
return {"run_status": ["INVALID"]}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.list_docs())
assert res["code"] == 102
assert "Invalid filter run status" in res["message"]
async def fake_request_json_types():
return {"types": ["INVALID"]}
monkeypatch.setattr(module, "get_request_json", fake_request_json_types)
res = _run(module.list_docs())
assert res["code"] == 102
assert "Invalid filter conditions" in res["message"]
def test_invalid_metadata_types(self, document_app_module, monkeypatch):
module = document_app_module
self._set_args(module, monkeypatch, kb_id="kb1")
self._allow_kb(module, monkeypatch)
async def fake_request_json():
return {"metadata_condition": "bad"}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.list_docs())
assert res["code"] == 102
assert "metadata_condition" in res["message"]
async def fake_request_json_meta():
return {"metadata": ["not", "object"]}
monkeypatch.setattr(module, "get_request_json", fake_request_json_meta)
res = _run(module.list_docs())
assert res["code"] == 102
assert "metadata must be an object" in res["message"]
def test_metadata_condition_empty_result(self, document_app_module, monkeypatch):
module = document_app_module
self._set_args(module, monkeypatch, kb_id="kb1")
self._allow_kb(module, monkeypatch)
monkeypatch.setattr(module.DocMetadataService, "get_flatted_meta_by_kbs", lambda *_args, **_kwargs: {})
monkeypatch.setattr(module, "meta_filter", lambda *_args, **_kwargs: set())
async def fake_request_json():
return {"metadata_condition": {"conditions": [{"name": "author", "comparison_operator": "is", "value": "alice"}]}}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.list_docs())
assert res["code"] == 0
assert res["data"]["total"] == 0
def test_metadata_values_intersection(self, document_app_module, monkeypatch):
module = document_app_module
self._set_args(module, monkeypatch, kb_id="kb1")
self._allow_kb(module, monkeypatch)
metas = {
"author": {"alice": ["doc1", "doc2"]},
"topic": {"rag": ["doc2"]},
}
monkeypatch.setattr(module.DocMetadataService, "get_flatted_meta_by_kbs", lambda *_args, **_kwargs: metas)
captured = {}
def fake_get_by_kb_id(*_args, **_kwargs):
if len(_args) >= 10:
captured["doc_ids_filter"] = _args[9]
else:
captured["doc_ids_filter"] = None
return ([{"id": "doc2", "thumbnail": "", "parser_config": {}}], 1)
monkeypatch.setattr(module.DocumentService, "get_by_kb_id", fake_get_by_kb_id)
async def fake_request_json():
return {"metadata": {"author": ["alice", " ", None], "topic": "rag"}}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.list_docs())
assert res["code"] == 0
assert captured["doc_ids_filter"] == ["doc2"]
def test_metadata_intersection_empty(self, document_app_module, monkeypatch):
module = document_app_module
self._set_args(module, monkeypatch, kb_id="kb1")
self._allow_kb(module, monkeypatch)
metas = {
"author": {"alice": ["doc1"]},
"topic": {"rag": ["doc2"]},
}
monkeypatch.setattr(module.DocMetadataService, "get_flatted_meta_by_kbs", lambda *_args, **_kwargs: metas)
async def fake_request_json():
return {"metadata": {"author": "alice", "topic": "rag"}}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.list_docs())
assert res["code"] == 0
assert res["data"]["total"] == 0
def test_desc_time_and_schema(self, document_app_module, monkeypatch):
module = document_app_module
self._set_args(module, monkeypatch, kb_id="kb1", desc="false", create_time_from="150", create_time_to="250")
self._allow_kb(module, monkeypatch)
docs = [
{"id": "doc1", "thumbnail": "", "parser_config": {"metadata": {"a": 1}}, "create_time": 100},
{"id": "doc2", "thumbnail": "", "parser_config": {"metadata": {"b": 2}}, "create_time": 200},
]
def fake_get_by_kb_id(*_args, **_kwargs):
return (docs, 2)
monkeypatch.setattr(module.DocumentService, "get_by_kb_id", fake_get_by_kb_id)
monkeypatch.setattr(module, "turn2jsonschema", lambda _meta: {"schema": True})
async def fake_request_json():
return {}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.list_docs())
assert res["code"] == 0
assert len(res["data"]["docs"]) == 1
assert res["data"]["docs"][0]["parser_config"]["metadata"] == {"schema": True}
def test_exception_path(self, document_app_module, monkeypatch):
module = document_app_module
self._set_args(module, monkeypatch, kb_id="kb1")
self._allow_kb(module, monkeypatch)
def raise_error(*_args, **_kwargs):
raise RuntimeError("boom")
monkeypatch.setattr(module.DocumentService, "get_by_kb_id", raise_error)
async def fake_request_json():
return {}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
res = _run(module.list_docs())
assert res["code"] == 100

View File

@ -13,7 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import asyncio
import string
from types import SimpleNamespace
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
@ -21,6 +23,7 @@ from common import list_kbs, upload_documents
from configs import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth
from utils.file_utils import create_txt_file
from api.constants import FILE_NAME_LEN_LIMIT
@pytest.mark.p1
@ -189,3 +192,288 @@ class TestDocumentsUpload:
res = list_kbs(WebApiAuth)
assert res["data"]["kbs"][0]["doc_num"] == count, res
class _AwaitableValue:
def __init__(self, value):
self._value = value
def __await__(self):
async def _coro():
return self._value
return _coro().__await__()
class _DummyFiles(dict):
def getlist(self, key):
value = self.get(key, [])
if isinstance(value, list):
return value
return [value]
class _DummyFile:
def __init__(self, filename):
self.filename = filename
self.closed = False
self.stream = self
def close(self):
self.closed = True
class _DummyRequest:
def __init__(self, form=None, files=None):
self._form = form or {}
self._files = files or _DummyFiles()
@property
def form(self):
return _AwaitableValue(self._form)
@property
def files(self):
return _AwaitableValue(self._files)
def _run(coro):
return asyncio.run(coro)
@pytest.mark.p2
class TestDocumentsUploadUnit:
def test_missing_kb_id(self, document_app_module, monkeypatch):
module = document_app_module
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": ""}, files=_DummyFiles()))
res = _run(module.upload.__wrapped__())
assert res["code"] == 101
assert res["message"] == 'Lack of "KB ID"'
def test_missing_file_part(self, document_app_module, monkeypatch):
module = document_app_module
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "kb1"}, files=_DummyFiles()))
res = _run(module.upload.__wrapped__())
assert res["code"] == 101
assert res["message"] == "No file part!"
def test_empty_filename_closes_files(self, document_app_module, monkeypatch):
module = document_app_module
file_obj = _DummyFile("")
files = _DummyFiles({"file": [file_obj]})
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "kb1"}, files=files))
res = _run(module.upload.__wrapped__())
assert res["code"] == 101
assert res["message"] == "No file selected!"
assert file_obj.closed is True
def test_filename_too_long(self, document_app_module, monkeypatch):
module = document_app_module
long_name = "a" * (FILE_NAME_LEN_LIMIT + 1)
file_obj = _DummyFile(long_name)
files = _DummyFiles({"file": [file_obj]})
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "kb1"}, files=files))
res = _run(module.upload.__wrapped__())
assert res["code"] == 101
assert res["message"] == f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less."
def test_invalid_kb_id_raises(self, document_app_module, monkeypatch):
module = document_app_module
file_obj = _DummyFile("ragflow_test.txt")
files = _DummyFiles({"file": [file_obj]})
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "missing"}, files=files))
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (False, None))
with pytest.raises(LookupError):
_run(module.upload.__wrapped__())
def test_no_permission(self, document_app_module, monkeypatch):
module = document_app_module
kb = SimpleNamespace(id="kb1", tenant_id="tenant1", name="kb", parser_id="parser", parser_config={})
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, kb))
monkeypatch.setattr(module, "check_kb_team_permission", lambda *_args, **_kwargs: False)
file_obj = _DummyFile("ragflow_test.txt")
files = _DummyFiles({"file": [file_obj]})
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "kb1"}, files=files))
res = _run(module.upload.__wrapped__())
assert res["code"] == 109
assert res["message"] == "No authorization."
def test_thread_pool_errors(self, document_app_module, monkeypatch):
module = document_app_module
kb = SimpleNamespace(id="kb1", tenant_id="tenant1", name="kb", parser_id="parser", parser_config={})
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, kb))
monkeypatch.setattr(module, "check_kb_team_permission", lambda *_args, **_kwargs: True)
async def fake_thread_pool_exec(*_args, **_kwargs):
return (["unsupported type"], [("file1", "blob")])
monkeypatch.setattr(module, "thread_pool_exec", fake_thread_pool_exec)
file_obj = _DummyFile("ragflow_test.txt")
files = _DummyFiles({"file": [file_obj]})
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "kb1"}, files=files))
res = _run(module.upload.__wrapped__())
assert res["code"] == 500
assert "unsupported type" in res["message"]
assert res["data"] == ["file1"]
def test_empty_upload_result(self, document_app_module, monkeypatch):
module = document_app_module
kb = SimpleNamespace(id="kb1", tenant_id="tenant1", name="kb", parser_id="parser", parser_config={})
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, kb))
monkeypatch.setattr(module, "check_kb_team_permission", lambda *_args, **_kwargs: True)
async def fake_thread_pool_exec(*_args, **_kwargs):
return (None, [])
monkeypatch.setattr(module, "thread_pool_exec", fake_thread_pool_exec)
file_obj = _DummyFile("ragflow_test.txt")
files = _DummyFiles({"file": [file_obj]})
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "kb1"}, files=files))
res = _run(module.upload.__wrapped__())
assert res["code"] == 102
assert "file format" in res["message"]
@pytest.mark.p2
class TestWebCrawlUnit:
def test_missing_kb_id(self, document_app_module, monkeypatch):
module = document_app_module
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "", "name": "doc", "url": "http://example.com"}))
res = _run(module.web_crawl.__wrapped__())
assert res["code"] == 101
assert res["message"] == 'Lack of "KB ID"'
def test_invalid_url(self, document_app_module, monkeypatch):
module = document_app_module
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "kb1", "name": "doc", "url": "not-a-url"}))
res = _run(module.web_crawl.__wrapped__())
assert res["code"] == 101
assert res["message"] == "The URL format is invalid"
def test_invalid_kb_id_raises(self, document_app_module, monkeypatch):
module = document_app_module
monkeypatch.setattr(module, "is_valid_url", lambda _url: True)
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (False, None))
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "missing", "name": "doc", "url": "http://example.com"}))
with pytest.raises(LookupError):
_run(module.web_crawl.__wrapped__())
def test_no_permission(self, document_app_module, monkeypatch):
module = document_app_module
kb = SimpleNamespace(id="kb1", tenant_id="tenant1", name="kb", parser_id="parser", parser_config={})
monkeypatch.setattr(module, "is_valid_url", lambda _url: True)
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, kb))
monkeypatch.setattr(module, "check_kb_team_permission", lambda *_args, **_kwargs: False)
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "kb1", "name": "doc", "url": "http://example.com"}))
res = _run(module.web_crawl.__wrapped__())
assert res["code"] == 109
assert res["message"] == "No authorization."
def test_download_failure(self, document_app_module, monkeypatch):
module = document_app_module
kb = SimpleNamespace(id="kb1", tenant_id="tenant1", name="kb", parser_id="parser", parser_config={})
monkeypatch.setattr(module, "is_valid_url", lambda _url: True)
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, kb))
monkeypatch.setattr(module, "check_kb_team_permission", lambda *_args, **_kwargs: True)
monkeypatch.setattr(module, "html2pdf", lambda _url: None)
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "kb1", "name": "doc", "url": "http://example.com"}))
res = _run(module.web_crawl.__wrapped__())
assert res["code"] == 100
assert "Download failure" in res["message"]
def test_unsupported_type(self, document_app_module, monkeypatch):
module = document_app_module
kb = SimpleNamespace(id="kb1", tenant_id="tenant1", name="kb", parser_id="parser", parser_config={})
monkeypatch.setattr(module, "is_valid_url", lambda _url: True)
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, kb))
monkeypatch.setattr(module, "check_kb_team_permission", lambda *_args, **_kwargs: True)
monkeypatch.setattr(module, "html2pdf", lambda _url: b"%PDF-1.4")
monkeypatch.setattr(module.FileService, "get_root_folder", lambda _uid: {"id": "root"})
monkeypatch.setattr(module.FileService, "init_knowledgebase_docs", lambda *_args, **_kwargs: None)
monkeypatch.setattr(module.FileService, "get_kb_folder", lambda *_args, **_kwargs: {"id": "kb_root"})
monkeypatch.setattr(module.FileService, "new_a_file_from_kb", lambda *_args, **_kwargs: {"id": "kb_folder"})
monkeypatch.setattr(module, "duplicate_name", lambda *_args, **_kwargs: "bad.exe")
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "kb1", "name": "doc", "url": "http://example.com"}))
res = _run(module.web_crawl.__wrapped__())
assert res["code"] == 100
assert "supported yet" in res["message"]
@pytest.mark.parametrize(
"filename,filetype,expected_parser",
[
("image.png", "visual", "picture"),
("sound.mp3", "aural", "audio"),
("deck.pptx", "doc", "presentation"),
("mail.eml", "doc", "email"),
],
)
def test_success_parser_overrides(self, document_app_module, monkeypatch, filename, filetype, expected_parser):
module = document_app_module
kb = SimpleNamespace(id="kb1", tenant_id="tenant1", name="kb", parser_id="parser", parser_config={})
captured = {}
class _Storage:
def obj_exist(self, *_args, **_kwargs):
return False
def put(self, *_args, **_kwargs):
captured["put"] = True
def insert_doc(doc):
captured["doc"] = doc
monkeypatch.setattr(module, "is_valid_url", lambda _url: True)
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, kb))
monkeypatch.setattr(module, "check_kb_team_permission", lambda *_args, **_kwargs: True)
monkeypatch.setattr(module, "html2pdf", lambda _url: b"%PDF-1.4")
monkeypatch.setattr(module.FileService, "get_root_folder", lambda _uid: {"id": "root"})
monkeypatch.setattr(module.FileService, "init_knowledgebase_docs", lambda *_args, **_kwargs: None)
monkeypatch.setattr(module.FileService, "get_kb_folder", lambda *_args, **_kwargs: {"id": "kb_root"})
monkeypatch.setattr(module.FileService, "new_a_file_from_kb", lambda *_args, **_kwargs: {"id": "kb_folder"})
monkeypatch.setattr(module, "duplicate_name", lambda *_args, **_kwargs: filename)
monkeypatch.setattr(module, "filename_type", lambda _name: filetype)
monkeypatch.setattr(module, "thumbnail", lambda *_args, **_kwargs: "")
monkeypatch.setattr(module, "get_uuid", lambda: "doc-1")
monkeypatch.setattr(module.settings, "STORAGE_IMPL", _Storage())
monkeypatch.setattr(module.DocumentService, "insert", insert_doc)
monkeypatch.setattr(module.FileService, "add_file_from_kb", lambda *_args, **_kwargs: None)
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "kb1", "name": "doc", "url": "http://example.com"}))
res = _run(module.web_crawl.__wrapped__())
assert res["code"] == 0
assert captured["doc"]["parser_id"] == expected_parser
assert captured["put"] is True
def test_exception_path(self, document_app_module, monkeypatch):
module = document_app_module
kb = SimpleNamespace(id="kb1", tenant_id="tenant1", name="kb", parser_id="parser", parser_config={})
class _Storage:
def obj_exist(self, *_args, **_kwargs):
return False
def put(self, *_args, **_kwargs):
return None
def insert_doc(_doc):
raise RuntimeError("boom")
monkeypatch.setattr(module, "is_valid_url", lambda _url: True)
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, kb))
monkeypatch.setattr(module, "check_kb_team_permission", lambda *_args, **_kwargs: True)
monkeypatch.setattr(module, "html2pdf", lambda _url: b"%PDF-1.4")
monkeypatch.setattr(module.FileService, "get_root_folder", lambda _uid: {"id": "root"})
monkeypatch.setattr(module.FileService, "init_knowledgebase_docs", lambda *_args, **_kwargs: None)
monkeypatch.setattr(module.FileService, "get_kb_folder", lambda *_args, **_kwargs: {"id": "kb_root"})
monkeypatch.setattr(module.FileService, "new_a_file_from_kb", lambda *_args, **_kwargs: {"id": "kb_folder"})
monkeypatch.setattr(module, "duplicate_name", lambda *_args, **_kwargs: "doc.pdf")
monkeypatch.setattr(module, "filename_type", lambda _name: "pdf")
monkeypatch.setattr(module, "thumbnail", lambda *_args, **_kwargs: "")
monkeypatch.setattr(module, "get_uuid", lambda: "doc-1")
monkeypatch.setattr(module.settings, "STORAGE_IMPL", _Storage())
monkeypatch.setattr(module.DocumentService, "insert", insert_doc)
monkeypatch.setattr(module.FileService, "add_file_from_kb", lambda *_args, **_kwargs: None)
monkeypatch.setattr(module, "request", _DummyRequest(form={"kb_id": "kb1", "name": "doc", "url": "http://example.com"}))
res = _run(module.web_crawl.__wrapped__())
assert res["code"] == 100