From c08cd8e0908c37dfb1fcdbdf3da38902c270d4b7 Mon Sep 17 00:00:00 2001 From: Jack Date: Wed, 22 Apr 2026 20:01:31 +0800 Subject: [PATCH] Refactor: Migrate document metadata config update API (#14286) ### What problem does this PR solve? Before migration Web API: POST /v1/document/update_metadata_setting After consolidation, Restful API PUT /api/v1/datasets//documents//metadata/config ### Type of change - [x] Refactoring --- api/apps/document_app.py | 20 ----- api/apps/restful_apis/document_api.py | 88 +++++++++++++++++-- test/testcases/test_web_api/test_common.py | 4 +- .../test_document_metadata.py | 44 ++++------ .../metedata/hooks/use-manage-modal.ts | 11 ++- web/src/services/knowledge-service.ts | 22 +++-- web/src/utils/api.ts | 3 +- 7 files changed, 122 insertions(+), 70 deletions(-) diff --git a/api/apps/document_app.py b/api/apps/document_app.py index f509ccdb2..f4c3e3355 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -210,26 +210,6 @@ async def metadata_update(): return get_json_result(data={"updated": updated, "matched_docs": len(document_ids)}) -@manager.route("/update_metadata_setting", methods=["POST"]) # noqa: F821 -@login_required -@validate_request("doc_id", "metadata") -async def update_metadata_setting(): - req = await get_request_json() - if not DocumentService.accessible(req["doc_id"], current_user.id): - return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR) - - e, doc = DocumentService.get_by_id(req["doc_id"]) - if not e: - return get_data_error_result(message="Document not found!") - - DocumentService.update_parser_config(doc.id, {"metadata": req["metadata"]}) - e, doc = DocumentService.get_by_id(doc.id) - if not e: - return get_data_error_result(message="Document not found!") - - return get_json_result(data=doc.to_dict()) - - @manager.route("/thumbnails", methods=["GET"]) # noqa: F821 # @login_required def thumbnails(): diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py index 9e422d0fd..56c4f56df 100644 --- a/api/apps/restful_apis/document_api.py +++ b/api/apps/restful_apis/document_api.py @@ -264,15 +264,15 @@ async def upload_document(dataset_id, tenant_id): """ from api.constants import FILE_NAME_LEN_LIMIT from api.db.services.file_service import FileService - + form = await request.form files = await request.files - + # Validation if "file" not in files: logging.error("No file part!") return get_error_data_result(message="No file part!", code=RetCode.ARGUMENT_ERROR) - + file_objs = files.getlist("file") for file_obj in file_objs: if file_obj is None or file_obj.filename is None or file_obj.filename == "": @@ -288,7 +288,7 @@ async def upload_document(dataset_id, tenant_id): if not e: logging.error(f"Can't find the dataset with ID {dataset_id}!") return get_error_data_result(message=f"Can't find the dataset with ID {dataset_id}!", code=RetCode.DATA_ERROR) - + # Permission Check if not check_kb_team_permission(kb, tenant_id): logging.error("No authorization.") @@ -308,7 +308,7 @@ async def upload_document(dataset_id, tenant_id): msg = "There seems to be an issue with your file format. please verify it is correct and not corrupted." logging.error(msg) return get_error_data_result(message=msg, code=RetCode.DATA_ERROR) - + files = [f[0] for f in files] # remove the blob # Check if we should return raw files without document key mapping @@ -580,7 +580,7 @@ def _parse_doc_id_filter_with_metadata(req, kb_id): - The metadata_condition uses operators like: =, !=, >, <, >=, <=, contains, not contains, in, not in, start with, end with, empty, not empty. - The metadata parameter performs exact matching where values are OR'd within the same key - and AND'd across different keys. + & AND'd across different keys. Examples: Simple metadata filter (exact match): @@ -758,6 +758,8 @@ async def delete_documents(tenant_id, dataset_id): except Exception as e: logging.exception(e) return get_error_data_result(message="Internal server error") + + def _aggregate_filters(docs): """Aggregate filter options from a list of documents. @@ -815,3 +817,77 @@ def _aggregate_filters(docs): "run_status": run_status_counter, "metadata": metadata_counter, } + +@manager.route("/datasets//documents//metadata/config", methods=["PUT"]) # noqa: F821 +@login_required +@add_tenant_id_to_kwargs +async def update_metadata_config(tenant_id, dataset_id, document_id): + """ + Update document metadata configuration. + --- + tags: + - Documents + security: + - ApiKeyAuth: [] + parameters: + - in: path + name: dataset_id + type: string + required: true + description: ID of the dataset. + - in: path + name: document_id + type: string + required: true + description: ID of the document. + - in: header + name: Authorization + type: string + required: true + description: Bearer token for authentication. + - in: body + name: body + description: Metadata configuration. + required: true + schema: + type: object + properties: + metadata: + type: object + description: Metadata configuration JSON. + responses: + 200: + description: Document updated successfully. + """ + # Verify ownership and existence of dataset + if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id): + return get_error_data_result(message="You don't own the dataset.") + + # Verify document exists in the dataset + doc = DocumentService.query(id=document_id, kb_id=dataset_id) + if not doc: + msg = f"Document {document_id} not found in dataset {dataset_id}" + return get_error_data_result(message=msg) + doc = doc[0] + + # Get request body + req = await get_request_json() + if "metadata" not in req: + return get_error_argument_result(message="metadata is required") + + # Update parser config with metadata + try: + DocumentService.update_parser_config(doc.id, {"metadata": req["metadata"]}) + except Exception as e: + logging.error("error when update_parser_config", exc_info=e) + return get_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e)) + + # Get updated document + try: + e, doc = DocumentService.get_by_id(doc.id) + if not e: + return get_data_error_result(message="Document not found!") + except Exception as e: + return get_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e)) + + return get_result(data=doc.to_dict()) diff --git a/test/testcases/test_web_api/test_common.py b/test/testcases/test_web_api/test_common.py index 877de3a37..06754956d 100644 --- a/test/testcases/test_web_api/test_common.py +++ b/test/testcases/test_web_api/test_common.py @@ -414,8 +414,8 @@ def document_metadata_update(auth, payload=None, *, headers=HEADERS, data=None): return res.json() -def document_update_metadata_setting(auth, payload=None, *, headers=HEADERS, data=None): - res = requests.post(url=f"{HOST_ADDRESS}{DOCUMENT_APP_URL}/update_metadata_setting", headers=headers, auth=auth, json=payload, data=data) +def document_update_metadata_setting(auth, dataset_id, doc_id, payload=None, *, headers=HEADERS, data=None): + res = requests.put(url=f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/documents/{doc_id}/metadata/config", headers=headers, auth=auth, json=payload, data=data) return res.json() diff --git a/test/testcases/test_web_api/test_document_app/test_document_metadata.py b/test/testcases/test_web_api/test_document_app/test_document_metadata.py index 8dacada2d..697676547 100644 --- a/test/testcases/test_web_api/test_document_app/test_document_metadata.py +++ b/test/testcases/test_web_api/test_document_app/test_document_metadata.py @@ -18,6 +18,7 @@ from types import SimpleNamespace import pytest from test_common import ( + delete_document, document_change_status, document_filter, document_infos, @@ -69,7 +70,7 @@ class TestAuthorization: @pytest.mark.p2 @pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES) def test_update_metadata_setting_auth_invalid(self, invalid_auth, expected_code, expected_fragment): - res = document_update_metadata_setting(invalid_auth, {"doc_id": "doc_id", "metadata": {}}) + res = document_update_metadata_setting(invalid_auth, "kb_id", "doc_id", {"metadata": {}}) assert res["code"] == expected_code, res assert expected_fragment in res["message"], res @@ -188,6 +189,19 @@ class TestDocumentMetadataNegative: assert "required argument are missing" in res["message"], res assert "metadata" in res["message"], res + @pytest.mark.p2 + def test_update_metadata_setting_not_found(self, WebApiAuth, add_document_func): + """Test updating metadata setting for a non-existent document returns error.""" + dataset_id, doc_id = add_document_func + # First delete the document + delete_res = delete_document(WebApiAuth, dataset_id, {"ids": [doc_id]}) + assert delete_res["code"] == 0, delete_res + + # Now try to update metadata setting for the deleted document + res = document_update_metadata_setting(WebApiAuth, dataset_id, doc_id, {"metadata": {"author": "test"}}) + assert res["code"] == 102, res + assert f"Document {doc_id} not found in dataset {dataset_id}" in res["message"], res + @pytest.mark.p3 def test_change_status_invalid_status(self, WebApiAuth, add_document_func): _, doc_id = add_document_func @@ -265,34 +279,6 @@ class TestDocumentMetadataUnit: assert res["code"] == module.RetCode.ARGUMENT_ERROR assert "Each delete requires key." in res["message"] - def test_update_metadata_setting_authorization_and_refetch_not_found_unit(self, document_app_module, monkeypatch): - module = document_app_module - - async def fake_request_json(): - return {"doc_id": "doc1", "metadata": {"author": "alice"}} - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - monkeypatch.setattr(module.DocumentService, "accessible", lambda *_args, **_kwargs: False) - res = _run(module.update_metadata_setting.__wrapped__()) - assert res["code"] == module.RetCode.AUTHENTICATION_ERROR - assert "No authorization." in res["message"] - - doc = SimpleNamespace(id="doc1", to_dict=lambda: {"id": "doc1", "parser_config": {}}) - state = {"count": 0} - - def fake_get_by_id(_doc_id): - state["count"] += 1 - if state["count"] == 1: - return True, doc - return False, None - - monkeypatch.setattr(module.DocumentService, "accessible", lambda *_args, **_kwargs: True) - monkeypatch.setattr(module.DocumentService, "get_by_id", fake_get_by_id) - monkeypatch.setattr(module.DocumentService, "update_parser_config", lambda *_args, **_kwargs: True) - res = _run(module.update_metadata_setting.__wrapped__()) - assert res["code"] == module.RetCode.DATA_ERROR - assert "Document not found!" in res["message"] - def test_thumbnails_missing_ids_rewrite_and_exception_unit(self, document_app_module, monkeypatch): module = document_app_module monkeypatch.setattr(module, "request", _DummyRequest(args={})) diff --git a/web/src/pages/dataset/components/metedata/hooks/use-manage-modal.ts b/web/src/pages/dataset/components/metedata/hooks/use-manage-modal.ts index cd9428f21..1cbb38fad 100644 --- a/web/src/pages/dataset/components/metedata/hooks/use-manage-modal.ts +++ b/web/src/pages/dataset/components/metedata/hooks/use-manage-modal.ts @@ -4,6 +4,7 @@ import { useSelectedIds } from '@/hooks/logic-hooks/use-row-selection'; import { DocumentApiAction } from '@/hooks/use-document-request'; import kbService, { getMetaDataService, + updateDocumentMetaDataConfig, updateMetaData, } from '@/services/knowledge-service'; import { useQuery, useQueryClient } from '@tanstack/react-query'; @@ -432,10 +433,14 @@ export const useManageMetaDataModal = ( const handleSaveSingleFileSettings = useCallback( async (callback: () => void) => { const data = util.tableDataToMetaDataSettingJSON(tableData); - if (otherData?.documentId) { - const { data: res } = await kbService.documentUpdateMetaData({ + // otherData contains: documentId + if (otherData?.documentId && id) { + const { data: res } = await updateDocumentMetaDataConfig({ + kb_id: id, doc_id: otherData.documentId, - metadata: data, + data: { + metadata: data, + }, }); if (res.code === 0) { message.success(t('message.operated')); diff --git a/web/src/services/knowledge-service.ts b/web/src/services/knowledge-service.ts index 3e6d57cb9..760248efd 100644 --- a/web/src/services/knowledge-service.ts +++ b/web/src/services/knowledge-service.ts @@ -41,7 +41,6 @@ const { fetchPipelineDatasetLogs, checkEmbedding, kbUpdateMetaData, - documentUpdateMetaData, } = api; const methods = { @@ -177,14 +176,6 @@ const methods = { url: kbUpdateMetaData, method: 'post', }, - documentUpdateMetaData: { - url: documentUpdateMetaData, - method: 'post', - }, - // getMetaData: { - // url: getMetaData, - // method: 'get', - // }, }; const kbService = registerServer(methods, request); @@ -289,6 +280,19 @@ export const updateMetaData = ({ data: any; }) => request.post(api.updateMetaData, { data: { kb_id, doc_ids, ...data } }); +export const updateDocumentMetaDataConfig = ({ + kb_id, + doc_id, + data, +}: { + kb_id: string; + doc_id: string; + data: any; +}) => + request.put(api.documentUpdateMetaDataConfig(kb_id, doc_id), { + data: { ...data }, + }); + export const listDataPipelineLogDocument = ( params?: IFetchKnowledgeListRequestParams, body?: IFetchDocumentListRequestBody, diff --git a/web/src/utils/api.ts b/web/src/utils/api.ts index 171ebdd46..7eb3f64f1 100644 --- a/web/src/utils/api.ts +++ b/web/src/utils/api.ts @@ -87,7 +87,8 @@ export default { `${restAPIv1}/datasets/${datasetId}/metadata/summary`, updateMetaData: `${webAPI}/document/metadata/update`, kbUpdateMetaData: `${webAPI}/kb/update_metadata_setting`, - documentUpdateMetaData: `${webAPI}/document/update_metadata_setting`, + documentUpdateMetaDataConfig: (datasetId: string, documentId: string) => + `${restAPIv1}/datasets/${datasetId}/documents/${documentId}/metadata/config`, // tags listTag: (knowledgeId: string) => `${webAPI}/kb/${knowledgeId}/tags`,