mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-05-06 10:17:49 +08:00
Refactor: Migrate document metadata config update API (#14286)
### What problem does this PR solve? Before migration Web API: POST /v1/document/update_metadata_setting After consolidation, Restful API PUT /api/v1/datasets/<dataset_id>/documents/<document_id>/metadata/config ### Type of change - [x] Refactoring
This commit is contained in:
@ -210,26 +210,6 @@ async def metadata_update():
|
||||
return get_json_result(data={"updated": updated, "matched_docs": len(document_ids)})
|
||||
|
||||
|
||||
@manager.route("/update_metadata_setting", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("doc_id", "metadata")
|
||||
async def update_metadata_setting():
|
||||
req = await get_request_json()
|
||||
if not DocumentService.accessible(req["doc_id"], current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
|
||||
DocumentService.update_parser_config(doc.id, {"metadata": req["metadata"]})
|
||||
e, doc = DocumentService.get_by_id(doc.id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
|
||||
return get_json_result(data=doc.to_dict())
|
||||
|
||||
|
||||
@manager.route("/thumbnails", methods=["GET"]) # noqa: F821
|
||||
# @login_required
|
||||
def thumbnails():
|
||||
|
||||
@ -264,15 +264,15 @@ async def upload_document(dataset_id, tenant_id):
|
||||
"""
|
||||
from api.constants import FILE_NAME_LEN_LIMIT
|
||||
from api.db.services.file_service import FileService
|
||||
|
||||
|
||||
form = await request.form
|
||||
files = await request.files
|
||||
|
||||
|
||||
# Validation
|
||||
if "file" not in files:
|
||||
logging.error("No file part!")
|
||||
return get_error_data_result(message="No file part!", code=RetCode.ARGUMENT_ERROR)
|
||||
|
||||
|
||||
file_objs = files.getlist("file")
|
||||
for file_obj in file_objs:
|
||||
if file_obj is None or file_obj.filename is None or file_obj.filename == "":
|
||||
@ -288,7 +288,7 @@ async def upload_document(dataset_id, tenant_id):
|
||||
if not e:
|
||||
logging.error(f"Can't find the dataset with ID {dataset_id}!")
|
||||
return get_error_data_result(message=f"Can't find the dataset with ID {dataset_id}!", code=RetCode.DATA_ERROR)
|
||||
|
||||
|
||||
# Permission Check
|
||||
if not check_kb_team_permission(kb, tenant_id):
|
||||
logging.error("No authorization.")
|
||||
@ -308,7 +308,7 @@ async def upload_document(dataset_id, tenant_id):
|
||||
msg = "There seems to be an issue with your file format. please verify it is correct and not corrupted."
|
||||
logging.error(msg)
|
||||
return get_error_data_result(message=msg, code=RetCode.DATA_ERROR)
|
||||
|
||||
|
||||
files = [f[0] for f in files] # remove the blob
|
||||
|
||||
# Check if we should return raw files without document key mapping
|
||||
@ -580,7 +580,7 @@ def _parse_doc_id_filter_with_metadata(req, kb_id):
|
||||
- The metadata_condition uses operators like: =, !=, >, <, >=, <=, contains, not contains,
|
||||
in, not in, start with, end with, empty, not empty.
|
||||
- The metadata parameter performs exact matching where values are OR'd within the same key
|
||||
and AND'd across different keys.
|
||||
& AND'd across different keys.
|
||||
|
||||
Examples:
|
||||
Simple metadata filter (exact match):
|
||||
@ -758,6 +758,8 @@ async def delete_documents(tenant_id, dataset_id):
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Internal server error")
|
||||
|
||||
|
||||
def _aggregate_filters(docs):
|
||||
"""Aggregate filter options from a list of documents.
|
||||
|
||||
@ -815,3 +817,77 @@ def _aggregate_filters(docs):
|
||||
"run_status": run_status_counter,
|
||||
"metadata": metadata_counter,
|
||||
}
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents/<document_id>/metadata/config", methods=["PUT"]) # noqa: F821
|
||||
@login_required
|
||||
@add_tenant_id_to_kwargs
|
||||
async def update_metadata_config(tenant_id, dataset_id, document_id):
|
||||
"""
|
||||
Update document metadata configuration.
|
||||
---
|
||||
tags:
|
||||
- Documents
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: path
|
||||
name: dataset_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the dataset.
|
||||
- in: path
|
||||
name: document_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the document.
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
required: true
|
||||
description: Bearer token for authentication.
|
||||
- in: body
|
||||
name: body
|
||||
description: Metadata configuration.
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
metadata:
|
||||
type: object
|
||||
description: Metadata configuration JSON.
|
||||
responses:
|
||||
200:
|
||||
description: Document updated successfully.
|
||||
"""
|
||||
# Verify ownership and existence of dataset
|
||||
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
||||
return get_error_data_result(message="You don't own the dataset.")
|
||||
|
||||
# Verify document exists in the dataset
|
||||
doc = DocumentService.query(id=document_id, kb_id=dataset_id)
|
||||
if not doc:
|
||||
msg = f"Document {document_id} not found in dataset {dataset_id}"
|
||||
return get_error_data_result(message=msg)
|
||||
doc = doc[0]
|
||||
|
||||
# Get request body
|
||||
req = await get_request_json()
|
||||
if "metadata" not in req:
|
||||
return get_error_argument_result(message="metadata is required")
|
||||
|
||||
# Update parser config with metadata
|
||||
try:
|
||||
DocumentService.update_parser_config(doc.id, {"metadata": req["metadata"]})
|
||||
except Exception as e:
|
||||
logging.error("error when update_parser_config", exc_info=e)
|
||||
return get_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
|
||||
|
||||
# Get updated document
|
||||
try:
|
||||
e, doc = DocumentService.get_by_id(doc.id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
except Exception as e:
|
||||
return get_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
|
||||
|
||||
return get_result(data=doc.to_dict())
|
||||
|
||||
Reference in New Issue
Block a user