mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-08 08:07:21 +08:00
Refactor: Migrate document metadata config update API (#14286)
### What problem does this PR solve? Before migration Web API: POST /v1/document/update_metadata_setting After consolidation, Restful API PUT /api/v1/datasets/<dataset_id>/documents/<document_id>/metadata/config ### Type of change - [x] Refactoring
This commit is contained in:
@ -210,26 +210,6 @@ async def metadata_update():
|
||||
return get_json_result(data={"updated": updated, "matched_docs": len(document_ids)})
|
||||
|
||||
|
||||
@manager.route("/update_metadata_setting", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("doc_id", "metadata")
|
||||
async def update_metadata_setting():
|
||||
req = await get_request_json()
|
||||
if not DocumentService.accessible(req["doc_id"], current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
|
||||
DocumentService.update_parser_config(doc.id, {"metadata": req["metadata"]})
|
||||
e, doc = DocumentService.get_by_id(doc.id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
|
||||
return get_json_result(data=doc.to_dict())
|
||||
|
||||
|
||||
@manager.route("/thumbnails", methods=["GET"]) # noqa: F821
|
||||
# @login_required
|
||||
def thumbnails():
|
||||
|
||||
@ -264,15 +264,15 @@ async def upload_document(dataset_id, tenant_id):
|
||||
"""
|
||||
from api.constants import FILE_NAME_LEN_LIMIT
|
||||
from api.db.services.file_service import FileService
|
||||
|
||||
|
||||
form = await request.form
|
||||
files = await request.files
|
||||
|
||||
|
||||
# Validation
|
||||
if "file" not in files:
|
||||
logging.error("No file part!")
|
||||
return get_error_data_result(message="No file part!", code=RetCode.ARGUMENT_ERROR)
|
||||
|
||||
|
||||
file_objs = files.getlist("file")
|
||||
for file_obj in file_objs:
|
||||
if file_obj is None or file_obj.filename is None or file_obj.filename == "":
|
||||
@ -288,7 +288,7 @@ async def upload_document(dataset_id, tenant_id):
|
||||
if not e:
|
||||
logging.error(f"Can't find the dataset with ID {dataset_id}!")
|
||||
return get_error_data_result(message=f"Can't find the dataset with ID {dataset_id}!", code=RetCode.DATA_ERROR)
|
||||
|
||||
|
||||
# Permission Check
|
||||
if not check_kb_team_permission(kb, tenant_id):
|
||||
logging.error("No authorization.")
|
||||
@ -308,7 +308,7 @@ async def upload_document(dataset_id, tenant_id):
|
||||
msg = "There seems to be an issue with your file format. please verify it is correct and not corrupted."
|
||||
logging.error(msg)
|
||||
return get_error_data_result(message=msg, code=RetCode.DATA_ERROR)
|
||||
|
||||
|
||||
files = [f[0] for f in files] # remove the blob
|
||||
|
||||
# Check if we should return raw files without document key mapping
|
||||
@ -580,7 +580,7 @@ def _parse_doc_id_filter_with_metadata(req, kb_id):
|
||||
- The metadata_condition uses operators like: =, !=, >, <, >=, <=, contains, not contains,
|
||||
in, not in, start with, end with, empty, not empty.
|
||||
- The metadata parameter performs exact matching where values are OR'd within the same key
|
||||
and AND'd across different keys.
|
||||
& AND'd across different keys.
|
||||
|
||||
Examples:
|
||||
Simple metadata filter (exact match):
|
||||
@ -758,6 +758,8 @@ async def delete_documents(tenant_id, dataset_id):
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Internal server error")
|
||||
|
||||
|
||||
def _aggregate_filters(docs):
|
||||
"""Aggregate filter options from a list of documents.
|
||||
|
||||
@ -815,3 +817,77 @@ def _aggregate_filters(docs):
|
||||
"run_status": run_status_counter,
|
||||
"metadata": metadata_counter,
|
||||
}
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents/<document_id>/metadata/config", methods=["PUT"]) # noqa: F821
|
||||
@login_required
|
||||
@add_tenant_id_to_kwargs
|
||||
async def update_metadata_config(tenant_id, dataset_id, document_id):
|
||||
"""
|
||||
Update document metadata configuration.
|
||||
---
|
||||
tags:
|
||||
- Documents
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: path
|
||||
name: dataset_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the dataset.
|
||||
- in: path
|
||||
name: document_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the document.
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
required: true
|
||||
description: Bearer token for authentication.
|
||||
- in: body
|
||||
name: body
|
||||
description: Metadata configuration.
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
metadata:
|
||||
type: object
|
||||
description: Metadata configuration JSON.
|
||||
responses:
|
||||
200:
|
||||
description: Document updated successfully.
|
||||
"""
|
||||
# Verify ownership and existence of dataset
|
||||
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
||||
return get_error_data_result(message="You don't own the dataset.")
|
||||
|
||||
# Verify document exists in the dataset
|
||||
doc = DocumentService.query(id=document_id, kb_id=dataset_id)
|
||||
if not doc:
|
||||
msg = f"Document {document_id} not found in dataset {dataset_id}"
|
||||
return get_error_data_result(message=msg)
|
||||
doc = doc[0]
|
||||
|
||||
# Get request body
|
||||
req = await get_request_json()
|
||||
if "metadata" not in req:
|
||||
return get_error_argument_result(message="metadata is required")
|
||||
|
||||
# Update parser config with metadata
|
||||
try:
|
||||
DocumentService.update_parser_config(doc.id, {"metadata": req["metadata"]})
|
||||
except Exception as e:
|
||||
logging.error("error when update_parser_config", exc_info=e)
|
||||
return get_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
|
||||
|
||||
# Get updated document
|
||||
try:
|
||||
e, doc = DocumentService.get_by_id(doc.id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
except Exception as e:
|
||||
return get_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
|
||||
|
||||
return get_result(data=doc.to_dict())
|
||||
|
||||
@ -414,8 +414,8 @@ def document_metadata_update(auth, payload=None, *, headers=HEADERS, data=None):
|
||||
return res.json()
|
||||
|
||||
|
||||
def document_update_metadata_setting(auth, payload=None, *, headers=HEADERS, data=None):
|
||||
res = requests.post(url=f"{HOST_ADDRESS}{DOCUMENT_APP_URL}/update_metadata_setting", headers=headers, auth=auth, json=payload, data=data)
|
||||
def document_update_metadata_setting(auth, dataset_id, doc_id, payload=None, *, headers=HEADERS, data=None):
|
||||
res = requests.put(url=f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/documents/{doc_id}/metadata/config", headers=headers, auth=auth, json=payload, data=data)
|
||||
return res.json()
|
||||
|
||||
|
||||
|
||||
@ -18,6 +18,7 @@ from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
from test_common import (
|
||||
delete_document,
|
||||
document_change_status,
|
||||
document_filter,
|
||||
document_infos,
|
||||
@ -69,7 +70,7 @@ class TestAuthorization:
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
|
||||
def test_update_metadata_setting_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
|
||||
res = document_update_metadata_setting(invalid_auth, {"doc_id": "doc_id", "metadata": {}})
|
||||
res = document_update_metadata_setting(invalid_auth, "kb_id", "doc_id", {"metadata": {}})
|
||||
assert res["code"] == expected_code, res
|
||||
assert expected_fragment in res["message"], res
|
||||
|
||||
@ -188,6 +189,19 @@ class TestDocumentMetadataNegative:
|
||||
assert "required argument are missing" in res["message"], res
|
||||
assert "metadata" in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_update_metadata_setting_not_found(self, WebApiAuth, add_document_func):
|
||||
"""Test updating metadata setting for a non-existent document returns error."""
|
||||
dataset_id, doc_id = add_document_func
|
||||
# First delete the document
|
||||
delete_res = delete_document(WebApiAuth, dataset_id, {"ids": [doc_id]})
|
||||
assert delete_res["code"] == 0, delete_res
|
||||
|
||||
# Now try to update metadata setting for the deleted document
|
||||
res = document_update_metadata_setting(WebApiAuth, dataset_id, doc_id, {"metadata": {"author": "test"}})
|
||||
assert res["code"] == 102, res
|
||||
assert f"Document {doc_id} not found in dataset {dataset_id}" in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_change_status_invalid_status(self, WebApiAuth, add_document_func):
|
||||
_, doc_id = add_document_func
|
||||
@ -265,34 +279,6 @@ class TestDocumentMetadataUnit:
|
||||
assert res["code"] == module.RetCode.ARGUMENT_ERROR
|
||||
assert "Each delete requires key." in res["message"]
|
||||
|
||||
def test_update_metadata_setting_authorization_and_refetch_not_found_unit(self, document_app_module, monkeypatch):
|
||||
module = document_app_module
|
||||
|
||||
async def fake_request_json():
|
||||
return {"doc_id": "doc1", "metadata": {"author": "alice"}}
|
||||
|
||||
monkeypatch.setattr(module, "get_request_json", fake_request_json)
|
||||
monkeypatch.setattr(module.DocumentService, "accessible", lambda *_args, **_kwargs: False)
|
||||
res = _run(module.update_metadata_setting.__wrapped__())
|
||||
assert res["code"] == module.RetCode.AUTHENTICATION_ERROR
|
||||
assert "No authorization." in res["message"]
|
||||
|
||||
doc = SimpleNamespace(id="doc1", to_dict=lambda: {"id": "doc1", "parser_config": {}})
|
||||
state = {"count": 0}
|
||||
|
||||
def fake_get_by_id(_doc_id):
|
||||
state["count"] += 1
|
||||
if state["count"] == 1:
|
||||
return True, doc
|
||||
return False, None
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "accessible", lambda *_args, **_kwargs: True)
|
||||
monkeypatch.setattr(module.DocumentService, "get_by_id", fake_get_by_id)
|
||||
monkeypatch.setattr(module.DocumentService, "update_parser_config", lambda *_args, **_kwargs: True)
|
||||
res = _run(module.update_metadata_setting.__wrapped__())
|
||||
assert res["code"] == module.RetCode.DATA_ERROR
|
||||
assert "Document not found!" in res["message"]
|
||||
|
||||
def test_thumbnails_missing_ids_rewrite_and_exception_unit(self, document_app_module, monkeypatch):
|
||||
module = document_app_module
|
||||
monkeypatch.setattr(module, "request", _DummyRequest(args={}))
|
||||
|
||||
@ -4,6 +4,7 @@ import { useSelectedIds } from '@/hooks/logic-hooks/use-row-selection';
|
||||
import { DocumentApiAction } from '@/hooks/use-document-request';
|
||||
import kbService, {
|
||||
getMetaDataService,
|
||||
updateDocumentMetaDataConfig,
|
||||
updateMetaData,
|
||||
} from '@/services/knowledge-service';
|
||||
import { useQuery, useQueryClient } from '@tanstack/react-query';
|
||||
@ -432,10 +433,14 @@ export const useManageMetaDataModal = (
|
||||
const handleSaveSingleFileSettings = useCallback(
|
||||
async (callback: () => void) => {
|
||||
const data = util.tableDataToMetaDataSettingJSON(tableData);
|
||||
if (otherData?.documentId) {
|
||||
const { data: res } = await kbService.documentUpdateMetaData({
|
||||
// otherData contains: documentId
|
||||
if (otherData?.documentId && id) {
|
||||
const { data: res } = await updateDocumentMetaDataConfig({
|
||||
kb_id: id,
|
||||
doc_id: otherData.documentId,
|
||||
metadata: data,
|
||||
data: {
|
||||
metadata: data,
|
||||
},
|
||||
});
|
||||
if (res.code === 0) {
|
||||
message.success(t('message.operated'));
|
||||
|
||||
@ -41,7 +41,6 @@ const {
|
||||
fetchPipelineDatasetLogs,
|
||||
checkEmbedding,
|
||||
kbUpdateMetaData,
|
||||
documentUpdateMetaData,
|
||||
} = api;
|
||||
|
||||
const methods = {
|
||||
@ -177,14 +176,6 @@ const methods = {
|
||||
url: kbUpdateMetaData,
|
||||
method: 'post',
|
||||
},
|
||||
documentUpdateMetaData: {
|
||||
url: documentUpdateMetaData,
|
||||
method: 'post',
|
||||
},
|
||||
// getMetaData: {
|
||||
// url: getMetaData,
|
||||
// method: 'get',
|
||||
// },
|
||||
};
|
||||
|
||||
const kbService = registerServer<keyof typeof methods>(methods, request);
|
||||
@ -289,6 +280,19 @@ export const updateMetaData = ({
|
||||
data: any;
|
||||
}) => request.post(api.updateMetaData, { data: { kb_id, doc_ids, ...data } });
|
||||
|
||||
export const updateDocumentMetaDataConfig = ({
|
||||
kb_id,
|
||||
doc_id,
|
||||
data,
|
||||
}: {
|
||||
kb_id: string;
|
||||
doc_id: string;
|
||||
data: any;
|
||||
}) =>
|
||||
request.put(api.documentUpdateMetaDataConfig(kb_id, doc_id), {
|
||||
data: { ...data },
|
||||
});
|
||||
|
||||
export const listDataPipelineLogDocument = (
|
||||
params?: IFetchKnowledgeListRequestParams,
|
||||
body?: IFetchDocumentListRequestBody,
|
||||
|
||||
@ -87,7 +87,8 @@ export default {
|
||||
`${restAPIv1}/datasets/${datasetId}/metadata/summary`,
|
||||
updateMetaData: `${webAPI}/document/metadata/update`,
|
||||
kbUpdateMetaData: `${webAPI}/kb/update_metadata_setting`,
|
||||
documentUpdateMetaData: `${webAPI}/document/update_metadata_setting`,
|
||||
documentUpdateMetaDataConfig: (datasetId: string, documentId: string) =>
|
||||
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}/metadata/config`,
|
||||
|
||||
// tags
|
||||
listTag: (knowledgeId: string) => `${webAPI}/kb/${knowledgeId}/tags`,
|
||||
|
||||
Reference in New Issue
Block a user