Refactor: Migrate document metadata config update API (#14286)

### What problem does this PR solve?

Before migration
Web API: POST /v1/document/update_metadata_setting

After consolidation, Restful API
PUT
/api/v1/datasets/<dataset_id>/documents/<document_id>/metadata/config

### Type of change

- [x] Refactoring
This commit is contained in:
Jack
2026-04-22 20:01:31 +08:00
committed by GitHub
parent d1c62fc19d
commit c08cd8e090
7 changed files with 122 additions and 70 deletions

View File

@ -210,26 +210,6 @@ async def metadata_update():
return get_json_result(data={"updated": updated, "matched_docs": len(document_ids)})
@manager.route("/update_metadata_setting", methods=["POST"]) # noqa: F821
@login_required
@validate_request("doc_id", "metadata")
async def update_metadata_setting():
req = await get_request_json()
if not DocumentService.accessible(req["doc_id"], current_user.id):
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
e, doc = DocumentService.get_by_id(req["doc_id"])
if not e:
return get_data_error_result(message="Document not found!")
DocumentService.update_parser_config(doc.id, {"metadata": req["metadata"]})
e, doc = DocumentService.get_by_id(doc.id)
if not e:
return get_data_error_result(message="Document not found!")
return get_json_result(data=doc.to_dict())
@manager.route("/thumbnails", methods=["GET"]) # noqa: F821
# @login_required
def thumbnails():

View File

@ -264,15 +264,15 @@ async def upload_document(dataset_id, tenant_id):
"""
from api.constants import FILE_NAME_LEN_LIMIT
from api.db.services.file_service import FileService
form = await request.form
files = await request.files
# Validation
if "file" not in files:
logging.error("No file part!")
return get_error_data_result(message="No file part!", code=RetCode.ARGUMENT_ERROR)
file_objs = files.getlist("file")
for file_obj in file_objs:
if file_obj is None or file_obj.filename is None or file_obj.filename == "":
@ -288,7 +288,7 @@ async def upload_document(dataset_id, tenant_id):
if not e:
logging.error(f"Can't find the dataset with ID {dataset_id}!")
return get_error_data_result(message=f"Can't find the dataset with ID {dataset_id}!", code=RetCode.DATA_ERROR)
# Permission Check
if not check_kb_team_permission(kb, tenant_id):
logging.error("No authorization.")
@ -308,7 +308,7 @@ async def upload_document(dataset_id, tenant_id):
msg = "There seems to be an issue with your file format. please verify it is correct and not corrupted."
logging.error(msg)
return get_error_data_result(message=msg, code=RetCode.DATA_ERROR)
files = [f[0] for f in files] # remove the blob
# Check if we should return raw files without document key mapping
@ -580,7 +580,7 @@ def _parse_doc_id_filter_with_metadata(req, kb_id):
- The metadata_condition uses operators like: =, !=, >, <, >=, <=, contains, not contains,
in, not in, start with, end with, empty, not empty.
- The metadata parameter performs exact matching where values are OR'd within the same key
and AND'd across different keys.
& AND'd across different keys.
Examples:
Simple metadata filter (exact match):
@ -758,6 +758,8 @@ async def delete_documents(tenant_id, dataset_id):
except Exception as e:
logging.exception(e)
return get_error_data_result(message="Internal server error")
def _aggregate_filters(docs):
"""Aggregate filter options from a list of documents.
@ -815,3 +817,77 @@ def _aggregate_filters(docs):
"run_status": run_status_counter,
"metadata": metadata_counter,
}
@manager.route("/datasets/<dataset_id>/documents/<document_id>/metadata/config", methods=["PUT"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
async def update_metadata_config(tenant_id, dataset_id, document_id):
"""
Update document metadata configuration.
---
tags:
- Documents
security:
- ApiKeyAuth: []
parameters:
- in: path
name: dataset_id
type: string
required: true
description: ID of the dataset.
- in: path
name: document_id
type: string
required: true
description: ID of the document.
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
- in: body
name: body
description: Metadata configuration.
required: true
schema:
type: object
properties:
metadata:
type: object
description: Metadata configuration JSON.
responses:
200:
description: Document updated successfully.
"""
# Verify ownership and existence of dataset
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
return get_error_data_result(message="You don't own the dataset.")
# Verify document exists in the dataset
doc = DocumentService.query(id=document_id, kb_id=dataset_id)
if not doc:
msg = f"Document {document_id} not found in dataset {dataset_id}"
return get_error_data_result(message=msg)
doc = doc[0]
# Get request body
req = await get_request_json()
if "metadata" not in req:
return get_error_argument_result(message="metadata is required")
# Update parser config with metadata
try:
DocumentService.update_parser_config(doc.id, {"metadata": req["metadata"]})
except Exception as e:
logging.error("error when update_parser_config", exc_info=e)
return get_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
# Get updated document
try:
e, doc = DocumentService.get_by_id(doc.id)
if not e:
return get_data_error_result(message="Document not found!")
except Exception as e:
return get_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
return get_result(data=doc.to_dict())

View File

@ -414,8 +414,8 @@ def document_metadata_update(auth, payload=None, *, headers=HEADERS, data=None):
return res.json()
def document_update_metadata_setting(auth, payload=None, *, headers=HEADERS, data=None):
res = requests.post(url=f"{HOST_ADDRESS}{DOCUMENT_APP_URL}/update_metadata_setting", headers=headers, auth=auth, json=payload, data=data)
def document_update_metadata_setting(auth, dataset_id, doc_id, payload=None, *, headers=HEADERS, data=None):
res = requests.put(url=f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/documents/{doc_id}/metadata/config", headers=headers, auth=auth, json=payload, data=data)
return res.json()

View File

@ -18,6 +18,7 @@ from types import SimpleNamespace
import pytest
from test_common import (
delete_document,
document_change_status,
document_filter,
document_infos,
@ -69,7 +70,7 @@ class TestAuthorization:
@pytest.mark.p2
@pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
def test_update_metadata_setting_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
res = document_update_metadata_setting(invalid_auth, {"doc_id": "doc_id", "metadata": {}})
res = document_update_metadata_setting(invalid_auth, "kb_id", "doc_id", {"metadata": {}})
assert res["code"] == expected_code, res
assert expected_fragment in res["message"], res
@ -188,6 +189,19 @@ class TestDocumentMetadataNegative:
assert "required argument are missing" in res["message"], res
assert "metadata" in res["message"], res
@pytest.mark.p2
def test_update_metadata_setting_not_found(self, WebApiAuth, add_document_func):
"""Test updating metadata setting for a non-existent document returns error."""
dataset_id, doc_id = add_document_func
# First delete the document
delete_res = delete_document(WebApiAuth, dataset_id, {"ids": [doc_id]})
assert delete_res["code"] == 0, delete_res
# Now try to update metadata setting for the deleted document
res = document_update_metadata_setting(WebApiAuth, dataset_id, doc_id, {"metadata": {"author": "test"}})
assert res["code"] == 102, res
assert f"Document {doc_id} not found in dataset {dataset_id}" in res["message"], res
@pytest.mark.p3
def test_change_status_invalid_status(self, WebApiAuth, add_document_func):
_, doc_id = add_document_func
@ -265,34 +279,6 @@ class TestDocumentMetadataUnit:
assert res["code"] == module.RetCode.ARGUMENT_ERROR
assert "Each delete requires key." in res["message"]
def test_update_metadata_setting_authorization_and_refetch_not_found_unit(self, document_app_module, monkeypatch):
module = document_app_module
async def fake_request_json():
return {"doc_id": "doc1", "metadata": {"author": "alice"}}
monkeypatch.setattr(module, "get_request_json", fake_request_json)
monkeypatch.setattr(module.DocumentService, "accessible", lambda *_args, **_kwargs: False)
res = _run(module.update_metadata_setting.__wrapped__())
assert res["code"] == module.RetCode.AUTHENTICATION_ERROR
assert "No authorization." in res["message"]
doc = SimpleNamespace(id="doc1", to_dict=lambda: {"id": "doc1", "parser_config": {}})
state = {"count": 0}
def fake_get_by_id(_doc_id):
state["count"] += 1
if state["count"] == 1:
return True, doc
return False, None
monkeypatch.setattr(module.DocumentService, "accessible", lambda *_args, **_kwargs: True)
monkeypatch.setattr(module.DocumentService, "get_by_id", fake_get_by_id)
monkeypatch.setattr(module.DocumentService, "update_parser_config", lambda *_args, **_kwargs: True)
res = _run(module.update_metadata_setting.__wrapped__())
assert res["code"] == module.RetCode.DATA_ERROR
assert "Document not found!" in res["message"]
def test_thumbnails_missing_ids_rewrite_and_exception_unit(self, document_app_module, monkeypatch):
module = document_app_module
monkeypatch.setattr(module, "request", _DummyRequest(args={}))

View File

@ -4,6 +4,7 @@ import { useSelectedIds } from '@/hooks/logic-hooks/use-row-selection';
import { DocumentApiAction } from '@/hooks/use-document-request';
import kbService, {
getMetaDataService,
updateDocumentMetaDataConfig,
updateMetaData,
} from '@/services/knowledge-service';
import { useQuery, useQueryClient } from '@tanstack/react-query';
@ -432,10 +433,14 @@ export const useManageMetaDataModal = (
const handleSaveSingleFileSettings = useCallback(
async (callback: () => void) => {
const data = util.tableDataToMetaDataSettingJSON(tableData);
if (otherData?.documentId) {
const { data: res } = await kbService.documentUpdateMetaData({
// otherData contains: documentId
if (otherData?.documentId && id) {
const { data: res } = await updateDocumentMetaDataConfig({
kb_id: id,
doc_id: otherData.documentId,
metadata: data,
data: {
metadata: data,
},
});
if (res.code === 0) {
message.success(t('message.operated'));

View File

@ -41,7 +41,6 @@ const {
fetchPipelineDatasetLogs,
checkEmbedding,
kbUpdateMetaData,
documentUpdateMetaData,
} = api;
const methods = {
@ -177,14 +176,6 @@ const methods = {
url: kbUpdateMetaData,
method: 'post',
},
documentUpdateMetaData: {
url: documentUpdateMetaData,
method: 'post',
},
// getMetaData: {
// url: getMetaData,
// method: 'get',
// },
};
const kbService = registerServer<keyof typeof methods>(methods, request);
@ -289,6 +280,19 @@ export const updateMetaData = ({
data: any;
}) => request.post(api.updateMetaData, { data: { kb_id, doc_ids, ...data } });
export const updateDocumentMetaDataConfig = ({
kb_id,
doc_id,
data,
}: {
kb_id: string;
doc_id: string;
data: any;
}) =>
request.put(api.documentUpdateMetaDataConfig(kb_id, doc_id), {
data: { ...data },
});
export const listDataPipelineLogDocument = (
params?: IFetchKnowledgeListRequestParams,
body?: IFetchDocumentListRequestBody,

View File

@ -87,7 +87,8 @@ export default {
`${restAPIv1}/datasets/${datasetId}/metadata/summary`,
updateMetaData: `${webAPI}/document/metadata/update`,
kbUpdateMetaData: `${webAPI}/kb/update_metadata_setting`,
documentUpdateMetaData: `${webAPI}/document/update_metadata_setting`,
documentUpdateMetaDataConfig: (datasetId: string, documentId: string) =>
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}/metadata/config`,
// tags
listTag: (knowledgeId: string) => `${webAPI}/kb/${knowledgeId}/tags`,