Files
ragflow/test/testcases/test_web_api/test_document_app/conftest.py
buua436 f703169117 Refa: migrate document preview/download to RESTful API (#14633)
### What problem does this PR solve?

migrate document preview/download to RESTful API

### Type of change
- [x] Refactoring
2026-05-08 13:26:13 +08:00

254 lines
10 KiB
Python

#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import importlib.util
import sys
from pathlib import Path
from types import ModuleType, SimpleNamespace
import pytest
from test_common import bulk_upload_documents, delete_document, list_documents
class _DummyManager:
def route(self, *_args, **_kwargs):
def decorator(func):
return func
return decorator
class _StubKBRecord(dict):
def __getattr__(self, item):
try:
return self[item]
except KeyError as exc:
raise AttributeError(item) from exc
@pytest.fixture(scope="function")
def add_document_func(request, WebApiAuth, add_dataset, ragflow_tmp_dir):
def cleanup():
res = list_documents(WebApiAuth, {"kb_id": dataset_id})
for doc in res["data"]["docs"]:
delete_document(WebApiAuth, dataset_id, {"ids": [doc["id"]]})
request.addfinalizer(cleanup)
dataset_id = add_dataset
return dataset_id, bulk_upload_documents(WebApiAuth, dataset_id, 1, ragflow_tmp_dir)[0]
@pytest.fixture(scope="class")
def add_documents(request, WebApiAuth, add_dataset, ragflow_tmp_dir):
def cleanup():
res = list_documents(WebApiAuth, {"kb_id": dataset_id})
for doc in res["data"]["docs"]:
delete_document(WebApiAuth, dataset_id, {"ids": [doc["id"]]})
request.addfinalizer(cleanup)
dataset_id = add_dataset
return dataset_id, bulk_upload_documents(WebApiAuth, dataset_id, 5, ragflow_tmp_dir)
@pytest.fixture(scope="function")
def add_documents_func(request, WebApiAuth, add_dataset_func, ragflow_tmp_dir):
def cleanup():
res = list_documents(WebApiAuth, {"kb_id": dataset_id})
for doc in res["data"]["docs"]:
delete_document(WebApiAuth, dataset_id, {"ids": [doc["id"]]})
request.addfinalizer(cleanup)
dataset_id = add_dataset_func
return dataset_id, bulk_upload_documents(WebApiAuth, dataset_id, 3, ragflow_tmp_dir)
@pytest.fixture()
def document_app_module(monkeypatch):
repo_root = Path(__file__).resolve().parents[4]
common_pkg = ModuleType("common")
common_pkg.__path__ = [str(repo_root / "common")]
monkeypatch.setitem(sys.modules, "common", common_pkg)
deepdoc_pkg = ModuleType("deepdoc")
deepdoc_parser_pkg = ModuleType("deepdoc.parser")
deepdoc_parser_pkg.__path__ = []
class _StubPdfParser:
pass
class _StubExcelParser:
pass
deepdoc_parser_pkg.PdfParser = _StubPdfParser
deepdoc_pkg.parser = deepdoc_parser_pkg
monkeypatch.setitem(sys.modules, "deepdoc", deepdoc_pkg)
monkeypatch.setitem(sys.modules, "deepdoc.parser", deepdoc_parser_pkg)
deepdoc_excel_module = ModuleType("deepdoc.parser.excel_parser")
deepdoc_excel_module.RAGFlowExcelParser = _StubExcelParser
monkeypatch.setitem(sys.modules, "deepdoc.parser.excel_parser", deepdoc_excel_module)
deepdoc_html_module = ModuleType("deepdoc.parser.html_parser")
class _StubHtmlParser:
pass
deepdoc_html_module.RAGFlowHtmlParser = _StubHtmlParser
monkeypatch.setitem(sys.modules, "deepdoc.parser.html_parser", deepdoc_html_module)
deepdoc_mineru_module = ModuleType("deepdoc.parser.mineru_parser")
class _StubMinerUParser:
pass
deepdoc_mineru_module.MinerUParser = _StubMinerUParser
monkeypatch.setitem(sys.modules, "deepdoc.parser.mineru_parser", deepdoc_mineru_module)
deepdoc_paddleocr_module = ModuleType("deepdoc.parser.paddleocr_parser")
class _StubPaddleOCRParser:
pass
deepdoc_paddleocr_module.PaddleOCRParser = _StubPaddleOCRParser
monkeypatch.setitem(sys.modules, "deepdoc.parser.paddleocr_parser", deepdoc_paddleocr_module)
monkeypatch.setitem(sys.modules, "xgboost", ModuleType("xgboost"))
stub_apps = ModuleType("api.apps")
stub_apps.__path__ = [str(repo_root / "api" / "apps")]
stub_apps.current_user = SimpleNamespace(id="user-1")
stub_apps.login_required = lambda func: func
monkeypatch.setitem(sys.modules, "api.apps", stub_apps)
stub_apps_services = ModuleType("api.apps.services")
stub_apps_services.__path__ = [str(repo_root / "api" / "apps" / "services")]
monkeypatch.setitem(sys.modules, "api.apps.services", stub_apps_services)
document_api_service_mod = ModuleType("api.apps.services.document_api_service")
document_api_service_mod.validate_document_update_fields = lambda *_args, **_kwargs: (None, None)
document_api_service_mod.map_doc_keys = lambda doc: doc.to_dict() if hasattr(doc, "to_dict") else doc
def _map_doc_keys_with_run_status(doc, run_status="0"):
payload = doc if isinstance(doc, dict) else doc.to_dict()
return {**payload, "run": run_status}
document_api_service_mod.map_doc_keys_with_run_status = _map_doc_keys_with_run_status
document_api_service_mod.update_document_name_only = lambda *_args, **_kwargs: None
document_api_service_mod.update_chunk_method = lambda *_args, **_kwargs: None
document_api_service_mod.update_document_status_only = lambda *_args, **_kwargs: None
document_api_service_mod.reset_document_for_reparse = lambda *_args, **_kwargs: None
monkeypatch.setitem(sys.modules, "api.apps.services.document_api_service", document_api_service_mod)
module_path = repo_root / "api" / "apps" / "restful_apis" / "document_api.py"
spec = importlib.util.spec_from_file_location("test_document_app_unit", module_path)
module = importlib.util.module_from_spec(spec)
module.manager = _DummyManager()
spec.loader.exec_module(module)
return module
@pytest.fixture()
def document_rest_api_module(monkeypatch):
repo_root = Path(__file__).resolve().parents[4]
common_pkg = ModuleType("common")
common_pkg.__path__ = [str(repo_root / "common")]
monkeypatch.setitem(sys.modules, "common", common_pkg)
deepdoc_pkg = ModuleType("deepdoc")
deepdoc_parser_pkg = ModuleType("deepdoc.parser")
deepdoc_parser_pkg.__path__ = []
class _StubPdfParser:
pass
class _StubExcelParser:
pass
deepdoc_parser_pkg.PdfParser = _StubPdfParser
deepdoc_pkg.parser = deepdoc_parser_pkg
monkeypatch.setitem(sys.modules, "deepdoc", deepdoc_pkg)
monkeypatch.setitem(sys.modules, "deepdoc.parser", deepdoc_parser_pkg)
deepdoc_excel_module = ModuleType("deepdoc.parser.excel_parser")
deepdoc_excel_module.RAGFlowExcelParser = _StubExcelParser
monkeypatch.setitem(sys.modules, "deepdoc.parser.excel_parser", deepdoc_excel_module)
deepdoc_html_module = ModuleType("deepdoc.parser.html_parser")
class _StubHtmlParser:
pass
deepdoc_html_module.RAGFlowHtmlParser = _StubHtmlParser
monkeypatch.setitem(sys.modules, "deepdoc.parser.html_parser", deepdoc_html_module)
deepdoc_mineru_module = ModuleType("deepdoc.parser.mineru_parser")
class _StubMinerUParser:
pass
deepdoc_mineru_module.MinerUParser = _StubMinerUParser
monkeypatch.setitem(sys.modules, "deepdoc.parser.mineru_parser", deepdoc_mineru_module)
deepdoc_paddleocr_module = ModuleType("deepdoc.parser.paddleocr_parser")
class _StubPaddleOCRParser:
pass
deepdoc_paddleocr_module.PaddleOCRParser = _StubPaddleOCRParser
monkeypatch.setitem(sys.modules, "deepdoc.parser.paddleocr_parser", deepdoc_paddleocr_module)
monkeypatch.setitem(sys.modules, "xgboost", ModuleType("xgboost"))
stub_apps = ModuleType("api.apps")
stub_apps.__path__ = [str(repo_root / "api" / "apps")]
stub_apps.current_user = SimpleNamespace(id="user-1")
stub_apps.login_required = lambda func: func
monkeypatch.setitem(sys.modules, "api.apps", stub_apps)
stub_apps_services = ModuleType("api.apps.services")
stub_apps_services.__path__ = [str(repo_root / "api" / "apps" / "services")]
monkeypatch.setitem(sys.modules, "api.apps.services", stub_apps_services)
document_api_service_mod = ModuleType("api.apps.services.document_api_service")
document_api_service_mod.validate_document_update_fields = lambda *_args, **_kwargs: (None, None)
document_api_service_mod.map_doc_keys = lambda doc: doc.to_dict() if hasattr(doc, "to_dict") else doc
def _map_doc_keys_with_run_status(doc, run_status="0"):
payload = doc if isinstance(doc, dict) else doc.to_dict()
return {**payload, "run": run_status}
document_api_service_mod.map_doc_keys_with_run_status = _map_doc_keys_with_run_status
document_api_service_mod.update_document_name_only = lambda *_args, **_kwargs: None
document_api_service_mod.update_chunk_method = lambda *_args, **_kwargs: None
document_api_service_mod.update_document_status_only = lambda *_args, **_kwargs: None
document_api_service_mod.reset_document_for_reparse = lambda *_args, **_kwargs: None
monkeypatch.setitem(sys.modules, "api.apps.services.document_api_service", document_api_service_mod)
module_path = repo_root / "api" / "apps" / "restful_apis" / "document_api.py"
spec = importlib.util.spec_from_file_location("test_document_api_unit", module_path)
module = importlib.util.module_from_spec(spec)
module.manager = _DummyManager()
spec.loader.exec_module(module)
monkeypatch.setattr(
module.KnowledgebaseService,
"get_by_id",
lambda dataset_id: (
True,
_StubKBRecord(
id=dataset_id,
tenant_id="tenant1",
name="kb",
parser_id="parser",
pipeline_id="pipe",
parser_config={},
),
),
)
monkeypatch.setattr(module, "check_kb_team_permission", lambda *_args, **_kwargs: True)
return module