Refactor: Migrate document metadata config update API (#14286)

### What problem does this PR solve?

Before migration
Web API: POST /v1/document/update_metadata_setting

After consolidation, Restful API
PUT
/api/v1/datasets/<dataset_id>/documents/<document_id>/metadata/config

### Type of change

- [x] Refactoring
This commit is contained in:
Jack
2026-04-22 20:01:31 +08:00
committed by GitHub
parent d1c62fc19d
commit c08cd8e090
7 changed files with 122 additions and 70 deletions

View File

@ -210,26 +210,6 @@ async def metadata_update():
return get_json_result(data={"updated": updated, "matched_docs": len(document_ids)})
@manager.route("/update_metadata_setting", methods=["POST"]) # noqa: F821
@login_required
@validate_request("doc_id", "metadata")
async def update_metadata_setting():
req = await get_request_json()
if not DocumentService.accessible(req["doc_id"], current_user.id):
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
e, doc = DocumentService.get_by_id(req["doc_id"])
if not e:
return get_data_error_result(message="Document not found!")
DocumentService.update_parser_config(doc.id, {"metadata": req["metadata"]})
e, doc = DocumentService.get_by_id(doc.id)
if not e:
return get_data_error_result(message="Document not found!")
return get_json_result(data=doc.to_dict())
@manager.route("/thumbnails", methods=["GET"]) # noqa: F821
# @login_required
def thumbnails():

View File

@ -264,15 +264,15 @@ async def upload_document(dataset_id, tenant_id):
"""
from api.constants import FILE_NAME_LEN_LIMIT
from api.db.services.file_service import FileService
form = await request.form
files = await request.files
# Validation
if "file" not in files:
logging.error("No file part!")
return get_error_data_result(message="No file part!", code=RetCode.ARGUMENT_ERROR)
file_objs = files.getlist("file")
for file_obj in file_objs:
if file_obj is None or file_obj.filename is None or file_obj.filename == "":
@ -288,7 +288,7 @@ async def upload_document(dataset_id, tenant_id):
if not e:
logging.error(f"Can't find the dataset with ID {dataset_id}!")
return get_error_data_result(message=f"Can't find the dataset with ID {dataset_id}!", code=RetCode.DATA_ERROR)
# Permission Check
if not check_kb_team_permission(kb, tenant_id):
logging.error("No authorization.")
@ -308,7 +308,7 @@ async def upload_document(dataset_id, tenant_id):
msg = "There seems to be an issue with your file format. please verify it is correct and not corrupted."
logging.error(msg)
return get_error_data_result(message=msg, code=RetCode.DATA_ERROR)
files = [f[0] for f in files] # remove the blob
# Check if we should return raw files without document key mapping
@ -580,7 +580,7 @@ def _parse_doc_id_filter_with_metadata(req, kb_id):
- The metadata_condition uses operators like: =, !=, >, <, >=, <=, contains, not contains,
in, not in, start with, end with, empty, not empty.
- The metadata parameter performs exact matching where values are OR'd within the same key
and AND'd across different keys.
& AND'd across different keys.
Examples:
Simple metadata filter (exact match):
@ -758,6 +758,8 @@ async def delete_documents(tenant_id, dataset_id):
except Exception as e:
logging.exception(e)
return get_error_data_result(message="Internal server error")
def _aggregate_filters(docs):
"""Aggregate filter options from a list of documents.
@ -815,3 +817,77 @@ def _aggregate_filters(docs):
"run_status": run_status_counter,
"metadata": metadata_counter,
}
@manager.route("/datasets/<dataset_id>/documents/<document_id>/metadata/config", methods=["PUT"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
async def update_metadata_config(tenant_id, dataset_id, document_id):
"""
Update document metadata configuration.
---
tags:
- Documents
security:
- ApiKeyAuth: []
parameters:
- in: path
name: dataset_id
type: string
required: true
description: ID of the dataset.
- in: path
name: document_id
type: string
required: true
description: ID of the document.
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
- in: body
name: body
description: Metadata configuration.
required: true
schema:
type: object
properties:
metadata:
type: object
description: Metadata configuration JSON.
responses:
200:
description: Document updated successfully.
"""
# Verify ownership and existence of dataset
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
return get_error_data_result(message="You don't own the dataset.")
# Verify document exists in the dataset
doc = DocumentService.query(id=document_id, kb_id=dataset_id)
if not doc:
msg = f"Document {document_id} not found in dataset {dataset_id}"
return get_error_data_result(message=msg)
doc = doc[0]
# Get request body
req = await get_request_json()
if "metadata" not in req:
return get_error_argument_result(message="metadata is required")
# Update parser config with metadata
try:
DocumentService.update_parser_config(doc.id, {"metadata": req["metadata"]})
except Exception as e:
logging.error("error when update_parser_config", exc_info=e)
return get_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
# Get updated document
try:
e, doc = DocumentService.get_by_id(doc.id)
if not e:
return get_data_error_result(message="Document not found!")
except Exception as e:
return get_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
return get_result(data=doc.to_dict())