From 3d10e2075ca8e1f478b0cbd6c70ea1f21196a99d Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Tue, 24 Mar 2026 19:24:41 +0800 Subject: [PATCH] Refa: files /file API to RESTFul style (#13741) ### What problem does this PR solve? Files /file API to RESTFul style. ### Type of change - [x] Documentation Update - [x] Refactoring --------- Co-authored-by: writinwaters Co-authored-by: Liu An --- api/apps/document_app.py | 31 +- api/apps/file2document_app.py | 4 +- api/apps/file_app.py | 918 ++++++------- api/apps/restful_apis/file_api.py | 364 +++++ api/apps/sdk/doc.py | 75 +- api/apps/sdk/files.py | 835 ----------- api/apps/services/file_api_service.py | 397 ++++++ api/utils/validation_utils.py | 39 +- docs/references/http_api_reference.md | 325 +++-- .../test_file_app/test_file_routes.py | 1210 ++++------------ .../test_document_app/conftest.py | 14 + .../test_upload_documents.py | 5 +- .../test_upload_info_unit.py | 139 ++ .../test_file2document_routes_unit.py | 15 +- .../test_file_app/test_file_routes_unit.py | 1215 +++-------------- web/src/components/new-document-link.tsx | 6 +- .../uploaded-message-files.tsx | 2 +- web/src/hooks/use-file-request.ts | 22 +- web/src/pages/document-viewer/index.tsx | 12 +- web/src/pages/files/action-cell.tsx | 1 + web/src/services/file-manager-service.ts | 7 +- web/src/utils/api.ts | 15 +- web/src/utils/file-util.ts | 20 +- 23 files changed, 2118 insertions(+), 3553 deletions(-) create mode 100644 api/apps/restful_apis/file_api.py delete mode 100644 api/apps/sdk/files.py create mode 100644 api/apps/services/file_api_service.py create mode 100644 test/testcases/test_web_api/test_document_app/test_upload_info_unit.py diff --git a/api/apps/document_app.py b/api/apps/document_app.py index c9c20c911..dd66144ee 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -1,5 +1,5 @@ # -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# Copyright 2026 The InfiniFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -765,7 +765,6 @@ async def download_attachment(attachment_id): @login_required @validate_request("doc_id") async def change_parser(): - req = await get_request_json() if not DocumentService.accessible(req["doc_id"], current_user.id): return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR) @@ -983,10 +982,34 @@ async def set_meta(): @manager.route("/upload_info", methods=["POST"]) # noqa: F821 +@login_required async def upload_info(): files = await request.files - file = files["file"] if files and files.get("file") else None + file_objs = files.getlist("file") if files and files.get("file") else [] + url = request.args.get("url") + + if file_objs and url: + return get_json_result( + data=False, + message="Provide either multipart file(s) or ?url=..., not both.", + code=RetCode.BAD_REQUEST, + ) + + if not file_objs and not url: + return get_json_result( + data=False, + message="Missing input: provide multipart file(s) or url", + code=RetCode.BAD_REQUEST, + ) + try: - return get_json_result(data=FileService.upload_info(current_user.id, file, request.args.get("url"))) + if url and not file_objs: + return get_json_result(data=FileService.upload_info(current_user.id, None, url)) + + if len(file_objs) == 1: + return get_json_result(data=FileService.upload_info(current_user.id, file_objs[0], None)) + + results = [FileService.upload_info(current_user.id, f, None) for f in file_objs] + return get_json_result(data=results) except Exception as e: return server_error_response(e) diff --git a/api/apps/file2document_app.py b/api/apps/file2document_app.py index f410e8a17..745988a97 100644 --- a/api/apps/file2document_app.py +++ b/api/apps/file2document_app.py @@ -1,5 +1,5 @@ # -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# Copyright 2026 The InfiniFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -77,7 +77,7 @@ async def convert(): doc = DocumentService.insert({ "id": get_uuid(), "kb_id": kb.id, - "parser_id": kb.parser_id, + "parser_id": FileService.get_parser(file.type, file.name, kb.parser_id), "pipeline_id": kb.pipeline_id, "parser_config": kb.parser_config, "created_by": current_user.id, diff --git a/api/apps/file_app.py b/api/apps/file_app.py index 1733d9f80..172b49ff8 100644 --- a/api/apps/file_app.py +++ b/api/apps/file_app.py @@ -1,464 +1,464 @@ +# # +# # Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, software +# # distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License +# # +# import logging +# import os +# import pathlib +# import re +# from quart import request, make_response +# from api.apps import login_required, current_user # -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# from api.common.check_team_permission import check_file_team_permission +# from api.db.services.document_service import DocumentService +# from api.db.services.file2document_service import File2DocumentService +# from api.utils.api_utils import server_error_response, get_data_error_result, validate_request +# from common.misc_utils import get_uuid, thread_pool_exec +# from common.constants import RetCode, FileSource +# from api.db import FileType +# from api.db.services import duplicate_name +# from api.db.services.file_service import FileService +# from api.utils.api_utils import get_json_result, get_request_json +# from api.utils.file_utils import filename_type +# from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers +# from common import settings # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# @manager.route('/upload', methods=['POST']) # noqa: F821 +# @login_required +# # @validate_request("parent_id") +# async def upload(): +# form = await request.form +# pf_id = form.get("parent_id") # -# http://www.apache.org/licenses/LICENSE-2.0 +# if not pf_id: +# root_folder = FileService.get_root_folder(current_user.id) +# pf_id = root_folder["id"] # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License +# files = await request.files +# if 'file' not in files: +# return get_json_result( +# data=False, message='No file part!', code=RetCode.ARGUMENT_ERROR) +# file_objs = files.getlist('file') # -import logging -import os -import pathlib -import re -from quart import request, make_response -from api.apps import login_required, current_user - -from api.common.check_team_permission import check_file_team_permission -from api.db.services.document_service import DocumentService -from api.db.services.file2document_service import File2DocumentService -from api.utils.api_utils import server_error_response, get_data_error_result, validate_request -from common.misc_utils import get_uuid, thread_pool_exec -from common.constants import RetCode, FileSource -from api.db import FileType -from api.db.services import duplicate_name -from api.db.services.file_service import FileService -from api.utils.api_utils import get_json_result, get_request_json -from api.utils.file_utils import filename_type -from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers -from common import settings - -@manager.route('/upload', methods=['POST']) # noqa: F821 -@login_required -# @validate_request("parent_id") -async def upload(): - form = await request.form - pf_id = form.get("parent_id") - - if not pf_id: - root_folder = FileService.get_root_folder(current_user.id) - pf_id = root_folder["id"] - - files = await request.files - if 'file' not in files: - return get_json_result( - data=False, message='No file part!', code=RetCode.ARGUMENT_ERROR) - file_objs = files.getlist('file') - - for file_obj in file_objs: - if file_obj.filename == '': - return get_json_result( - data=False, message='No file selected!', code=RetCode.ARGUMENT_ERROR) - file_res = [] - try: - e, pf_folder = FileService.get_by_id(pf_id) - if not e: - return get_data_error_result( message="Can't find this folder!") - - async def _handle_single_file(file_obj): - MAX_FILE_NUM_PER_USER: int = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0)) - if 0 < MAX_FILE_NUM_PER_USER <= await thread_pool_exec(DocumentService.get_doc_count, current_user.id): - return get_data_error_result( message="Exceed the maximum file number of a free user!") - - # split file name path - if not file_obj.filename: - file_obj_names = [pf_folder.name, file_obj.filename] - else: - full_path = '/' + file_obj.filename - file_obj_names = full_path.split('/') - file_len = len(file_obj_names) - - # get folder - file_id_list = await thread_pool_exec(FileService.get_id_list_by_id, pf_id, file_obj_names, 1, [pf_id]) - len_id_list = len(file_id_list) - - # create folder - if file_len != len_id_list: - e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 1]) - if not e: - return get_data_error_result(message="Folder not found!") - last_folder = await thread_pool_exec(FileService.create_folder, file, file_id_list[len_id_list - 1], file_obj_names, - len_id_list) - else: - e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 2]) - if not e: - return get_data_error_result(message="Folder not found!") - last_folder = await thread_pool_exec(FileService.create_folder, file, file_id_list[len_id_list - 2], file_obj_names, - len_id_list) - - # file type - filetype = filename_type(file_obj_names[file_len - 1]) - location = file_obj_names[file_len - 1] - while await thread_pool_exec(settings.STORAGE_IMPL.obj_exist, last_folder.id, location): - location += "_" - blob = await thread_pool_exec(file_obj.read) - filename = await thread_pool_exec( - duplicate_name, - FileService.query, - name=file_obj_names[file_len - 1], - parent_id=last_folder.id) - await thread_pool_exec(settings.STORAGE_IMPL.put, last_folder.id, location, blob) - file_data = { - "id": get_uuid(), - "parent_id": last_folder.id, - "tenant_id": current_user.id, - "created_by": current_user.id, - "type": filetype, - "name": filename, - "location": location, - "size": len(blob), - } - inserted = await thread_pool_exec(FileService.insert, file_data) - return inserted.to_json() - - for file_obj in file_objs: - res = await _handle_single_file(file_obj) - file_res.append(res) - - return get_json_result(data=file_res) - except Exception as e: - return server_error_response(e) - - -@manager.route('/create', methods=['POST']) # noqa: F821 -@login_required -@validate_request("name") -async def create(): - req = await get_request_json() - pf_id = req.get("parent_id") - input_file_type = req.get("type") - if not pf_id: - root_folder = FileService.get_root_folder(current_user.id) - pf_id = root_folder["id"] - - try: - if not FileService.is_parent_folder_exist(pf_id): - return get_json_result( - data=False, message="Parent Folder Doesn't Exist!", code=RetCode.OPERATING_ERROR) - if FileService.query(name=req["name"], parent_id=pf_id): - return get_data_error_result( - message="Duplicated folder name in the same folder.") - - if input_file_type == FileType.FOLDER.value: - file_type = FileType.FOLDER.value - else: - file_type = FileType.VIRTUAL.value - - file = FileService.insert({ - "id": get_uuid(), - "parent_id": pf_id, - "tenant_id": current_user.id, - "created_by": current_user.id, - "name": req["name"], - "location": "", - "size": 0, - "type": file_type - }) - - return get_json_result(data=file.to_json()) - except Exception as e: - return server_error_response(e) - - -@manager.route('/list', methods=['GET']) # noqa: F821 -@login_required -def list_files(): - pf_id = request.args.get("parent_id") - - keywords = request.args.get("keywords", "") - - page_number = int(request.args.get("page", 1)) - items_per_page = int(request.args.get("page_size", 15)) - orderby = request.args.get("orderby", "create_time") - desc = request.args.get("desc", True) - if not pf_id: - root_folder = FileService.get_root_folder(current_user.id) - pf_id = root_folder["id"] - FileService.init_knowledgebase_docs(pf_id, current_user.id) - try: - e, file = FileService.get_by_id(pf_id) - if not e: - return get_data_error_result(message="Folder not found!") - - files, total = FileService.get_by_pf_id( - current_user.id, pf_id, page_number, items_per_page, orderby, desc, keywords) - - parent_folder = FileService.get_parent_folder(pf_id) - if not parent_folder: - return get_json_result(message="File not found!") - - return get_json_result(data={"total": total, "files": files, "parent_folder": parent_folder.to_json()}) - except Exception as e: - return server_error_response(e) - - -@manager.route('/root_folder', methods=['GET']) # noqa: F821 -@login_required -def get_root_folder(): - try: - root_folder = FileService.get_root_folder(current_user.id) - return get_json_result(data={"root_folder": root_folder}) - except Exception as e: - return server_error_response(e) - - -@manager.route('/parent_folder', methods=['GET']) # noqa: F821 -@login_required -def get_parent_folder(): - file_id = request.args.get("file_id") - try: - e, file = FileService.get_by_id(file_id) - if not e: - return get_data_error_result(message="Folder not found!") - - parent_folder = FileService.get_parent_folder(file_id) - return get_json_result(data={"parent_folder": parent_folder.to_json()}) - except Exception as e: - return server_error_response(e) - - -@manager.route('/all_parent_folder', methods=['GET']) # noqa: F821 -@login_required -def get_all_parent_folders(): - file_id = request.args.get("file_id") - try: - e, file = FileService.get_by_id(file_id) - if not e: - return get_data_error_result(message="Folder not found!") - - parent_folders = FileService.get_all_parent_folders(file_id) - parent_folders_res = [] - for parent_folder in parent_folders: - parent_folders_res.append(parent_folder.to_json()) - return get_json_result(data={"parent_folders": parent_folders_res}) - except Exception as e: - return server_error_response(e) - - -@manager.route("/rm", methods=["POST"]) # noqa: F821 -@login_required -@validate_request("file_ids") -async def rm(): - req = await get_request_json() - file_ids = req["file_ids"] - uid = current_user.id - - try: - def _delete_single_file(file): - try: - if file.location: - settings.STORAGE_IMPL.rm(file.parent_id, file.location) - except Exception as e: - logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}, error: {e}") - - informs = File2DocumentService.get_by_file_id(file.id) - for inform in informs: - doc_id = inform.document_id - e, doc = DocumentService.get_by_id(doc_id) - if e and doc: - tenant_id = DocumentService.get_tenant_id(doc_id) - if tenant_id: - DocumentService.remove_document(doc, tenant_id) - File2DocumentService.delete_by_file_id(file.id) - - FileService.delete(file) - - def _delete_folder_recursive(folder, tenant_id): - sub_files = FileService.list_all_files_by_parent_id(folder.id) - for sub_file in sub_files: - if sub_file.type == FileType.FOLDER.value: - _delete_folder_recursive(sub_file, tenant_id) - else: - _delete_single_file(sub_file) - - FileService.delete(folder) - - def _rm_sync(): - for file_id in file_ids: - e, file = FileService.get_by_id(file_id) - if not e or not file: - return get_data_error_result(message="File or Folder not found!") - if not file.tenant_id: - return get_data_error_result(message="Tenant not found!") - if not check_file_team_permission(file, uid): - return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR) - - if file.source_type == FileSource.KNOWLEDGEBASE: - continue - - if file.type == FileType.FOLDER.value: - _delete_folder_recursive(file, uid) - continue - - _delete_single_file(file) - - return get_json_result(data=True) - - return await thread_pool_exec(_rm_sync) - - except Exception as e: - return server_error_response(e) - - -@manager.route('/rename', methods=['POST']) # noqa: F821 -@login_required -@validate_request("file_id", "name") -async def rename(): - req = await get_request_json() - try: - e, file = FileService.get_by_id(req["file_id"]) - if not e: - return get_data_error_result(message="File not found!") - if not check_file_team_permission(file, current_user.id): - return get_json_result(data=False, message='No authorization.', code=RetCode.AUTHENTICATION_ERROR) - if file.type != FileType.FOLDER.value \ - and pathlib.Path(req["name"].lower()).suffix != pathlib.Path( - file.name.lower()).suffix: - return get_json_result( - data=False, - message="The extension of file can't be changed", - code=RetCode.ARGUMENT_ERROR) - for file in FileService.query(name=req["name"], pf_id=file.parent_id): - if file.name == req["name"]: - return get_data_error_result( - message="Duplicated file name in the same folder.") - - if not FileService.update_by_id( - req["file_id"], {"name": req["name"]}): - return get_data_error_result( - message="Database error (File rename)!") - - informs = File2DocumentService.get_by_file_id(req["file_id"]) - if informs: - if not DocumentService.update_by_id( - informs[0].document_id, {"name": req["name"]}): - return get_data_error_result( - message="Database error (Document rename)!") - - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) - - -@manager.route('/get/', methods=['GET']) # noqa: F821 -@login_required -async def get(file_id): - try: - e, file = FileService.get_by_id(file_id) - if not e: - return get_data_error_result(message="Document not found!") - if not check_file_team_permission(file, current_user.id): - return get_json_result(data=False, message='No authorization.', code=RetCode.AUTHENTICATION_ERROR) - - blob = await thread_pool_exec(settings.STORAGE_IMPL.get, file.parent_id, file.location) - if not blob: - b, n = File2DocumentService.get_storage_address(file_id=file_id) - blob = await thread_pool_exec(settings.STORAGE_IMPL.get, b, n) - - response = await make_response(blob) - ext = re.search(r"\.([^.]+)$", file.name.lower()) - ext = ext.group(1) if ext else None - content_type = None - if ext: - fallback_prefix = "image" if file.type == FileType.VISUAL.value else "application" - content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}") - apply_safe_file_response_headers(response, content_type, ext) - return response - except Exception as e: - return server_error_response(e) - - -@manager.route("/mv", methods=["POST"]) # noqa: F821 -@login_required -@validate_request("src_file_ids", "dest_file_id") -async def move(): - req = await get_request_json() - try: - file_ids = req["src_file_ids"] - dest_parent_id = req["dest_file_id"] - - ok, dest_folder = FileService.get_by_id(dest_parent_id) - if not ok or not dest_folder: - return get_data_error_result(message="Parent folder not found!") - - files = FileService.get_by_ids(file_ids) - if not files: - return get_data_error_result(message="Source files not found!") - - files_dict = {f.id: f for f in files} - - for file_id in file_ids: - file = files_dict.get(file_id) - if not file: - return get_data_error_result(message="File or folder not found!") - if not file.tenant_id: - return get_data_error_result(message="Tenant not found!") - if not check_file_team_permission(file, current_user.id): - return get_json_result( - data=False, - message="No authorization.", - code=RetCode.AUTHENTICATION_ERROR, - ) - - def _move_entry_recursive(source_file_entry, dest_folder): - if source_file_entry.type == FileType.FOLDER.value: - existing_folder = FileService.query(name=source_file_entry.name, parent_id=dest_folder.id) - if existing_folder: - new_folder = existing_folder[0] - else: - new_folder = FileService.insert( - { - "id": get_uuid(), - "parent_id": dest_folder.id, - "tenant_id": source_file_entry.tenant_id, - "created_by": current_user.id, - "name": source_file_entry.name, - "location": "", - "size": 0, - "type": FileType.FOLDER.value, - } - ) - - sub_files = FileService.list_all_files_by_parent_id(source_file_entry.id) - for sub_file in sub_files: - _move_entry_recursive(sub_file, new_folder) - - FileService.delete_by_id(source_file_entry.id) - return - - old_parent_id = source_file_entry.parent_id - old_location = source_file_entry.location - filename = source_file_entry.name - - new_location = filename - while settings.STORAGE_IMPL.obj_exist(dest_folder.id, new_location): - new_location += "_" - - try: - settings.STORAGE_IMPL.move(old_parent_id, old_location, dest_folder.id, new_location) - except Exception as storage_err: - raise RuntimeError(f"Move file failed at storage layer: {str(storage_err)}") - - FileService.update_by_id( - source_file_entry.id, - { - "parent_id": dest_folder.id, - "location": new_location, - }, - ) - - def _move_sync(): - for file in files: - _move_entry_recursive(file, dest_folder) - return get_json_result(data=True) - - return await thread_pool_exec(_move_sync) - - except Exception as e: - return server_error_response(e) +# for file_obj in file_objs: +# if file_obj.filename == '': +# return get_json_result( +# data=False, message='No file selected!', code=RetCode.ARGUMENT_ERROR) +# file_res = [] +# try: +# e, pf_folder = FileService.get_by_id(pf_id) +# if not e: +# return get_data_error_result( message="Can't find this folder!") +# +# async def _handle_single_file(file_obj): +# MAX_FILE_NUM_PER_USER: int = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0)) +# if 0 < MAX_FILE_NUM_PER_USER <= await thread_pool_exec(DocumentService.get_doc_count, current_user.id): +# return get_data_error_result( message="Exceed the maximum file number of a free user!") +# +# # split file name path +# if not file_obj.filename: +# file_obj_names = [pf_folder.name, file_obj.filename] +# else: +# full_path = '/' + file_obj.filename +# file_obj_names = full_path.split('/') +# file_len = len(file_obj_names) +# +# # get folder +# file_id_list = await thread_pool_exec(FileService.get_id_list_by_id, pf_id, file_obj_names, 1, [pf_id]) +# len_id_list = len(file_id_list) +# +# # create folder +# if file_len != len_id_list: +# e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 1]) +# if not e: +# return get_data_error_result(message="Folder not found!") +# last_folder = await thread_pool_exec(FileService.create_folder, file, file_id_list[len_id_list - 1], file_obj_names, +# len_id_list) +# else: +# e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 2]) +# if not e: +# return get_data_error_result(message="Folder not found!") +# last_folder = await thread_pool_exec(FileService.create_folder, file, file_id_list[len_id_list - 2], file_obj_names, +# len_id_list) +# +# # file type +# filetype = filename_type(file_obj_names[file_len - 1]) +# location = file_obj_names[file_len - 1] +# while await thread_pool_exec(settings.STORAGE_IMPL.obj_exist, last_folder.id, location): +# location += "_" +# blob = await thread_pool_exec(file_obj.read) +# filename = await thread_pool_exec( +# duplicate_name, +# FileService.query, +# name=file_obj_names[file_len - 1], +# parent_id=last_folder.id) +# await thread_pool_exec(settings.STORAGE_IMPL.put, last_folder.id, location, blob) +# file_data = { +# "id": get_uuid(), +# "parent_id": last_folder.id, +# "tenant_id": current_user.id, +# "created_by": current_user.id, +# "type": filetype, +# "name": filename, +# "location": location, +# "size": len(blob), +# } +# inserted = await thread_pool_exec(FileService.insert, file_data) +# return inserted.to_json() +# +# for file_obj in file_objs: +# res = await _handle_single_file(file_obj) +# file_res.append(res) +# +# return get_json_result(data=file_res) +# except Exception as e: +# return server_error_response(e) +# +# +# @manager.route('/create', methods=['POST']) # noqa: F821 +# @login_required +# @validate_request("name") +# async def create(): +# req = await get_request_json() +# pf_id = req.get("parent_id") +# input_file_type = req.get("type") +# if not pf_id: +# root_folder = FileService.get_root_folder(current_user.id) +# pf_id = root_folder["id"] +# +# try: +# if not FileService.is_parent_folder_exist(pf_id): +# return get_json_result( +# data=False, message="Parent Folder Doesn't Exist!", code=RetCode.OPERATING_ERROR) +# if FileService.query(name=req["name"], parent_id=pf_id): +# return get_data_error_result( +# message="Duplicated folder name in the same folder.") +# +# if input_file_type == FileType.FOLDER.value: +# file_type = FileType.FOLDER.value +# else: +# file_type = FileType.VIRTUAL.value +# +# file = FileService.insert({ +# "id": get_uuid(), +# "parent_id": pf_id, +# "tenant_id": current_user.id, +# "created_by": current_user.id, +# "name": req["name"], +# "location": "", +# "size": 0, +# "type": file_type +# }) +# +# return get_json_result(data=file.to_json()) +# except Exception as e: +# return server_error_response(e) +# +# +# @manager.route('/list', methods=['GET']) # noqa: F821 +# @login_required +# def list_files(): +# pf_id = request.args.get("parent_id") +# +# keywords = request.args.get("keywords", "") +# +# page_number = int(request.args.get("page", 1)) +# items_per_page = int(request.args.get("page_size", 15)) +# orderby = request.args.get("orderby", "create_time") +# desc = request.args.get("desc", True) +# if not pf_id: +# root_folder = FileService.get_root_folder(current_user.id) +# pf_id = root_folder["id"] +# FileService.init_knowledgebase_docs(pf_id, current_user.id) +# try: +# e, file = FileService.get_by_id(pf_id) +# if not e: +# return get_data_error_result(message="Folder not found!") +# +# files, total = FileService.get_by_pf_id( +# current_user.id, pf_id, page_number, items_per_page, orderby, desc, keywords) +# +# parent_folder = FileService.get_parent_folder(pf_id) +# if not parent_folder: +# return get_json_result(message="File not found!") +# +# return get_json_result(data={"total": total, "files": files, "parent_folder": parent_folder.to_json()}) +# except Exception as e: +# return server_error_response(e) +# +# +# @manager.route('/root_folder', methods=['GET']) # noqa: F821 +# @login_required +# def get_root_folder(): +# try: +# root_folder = FileService.get_root_folder(current_user.id) +# return get_json_result(data={"root_folder": root_folder}) +# except Exception as e: +# return server_error_response(e) +# +# +# @manager.route('/parent_folder', methods=['GET']) # noqa: F821 +# @login_required +# def get_parent_folder(): +# file_id = request.args.get("file_id") +# try: +# e, file = FileService.get_by_id(file_id) +# if not e: +# return get_data_error_result(message="Folder not found!") +# +# parent_folder = FileService.get_parent_folder(file_id) +# return get_json_result(data={"parent_folder": parent_folder.to_json()}) +# except Exception as e: +# return server_error_response(e) +# +# +# @manager.route('/all_parent_folder', methods=['GET']) # noqa: F821 +# @login_required +# def get_all_parent_folders(): +# file_id = request.args.get("file_id") +# try: +# e, file = FileService.get_by_id(file_id) +# if not e: +# return get_data_error_result(message="Folder not found!") +# +# parent_folders = FileService.get_all_parent_folders(file_id) +# parent_folders_res = [] +# for parent_folder in parent_folders: +# parent_folders_res.append(parent_folder.to_json()) +# return get_json_result(data={"parent_folders": parent_folders_res}) +# except Exception as e: +# return server_error_response(e) +# +# +# @manager.route("/rm", methods=["POST"]) # noqa: F821 +# @login_required +# @validate_request("file_ids") +# async def rm(): +# req = await get_request_json() +# file_ids = req["file_ids"] +# uid = current_user.id +# +# try: +# def _delete_single_file(file): +# try: +# if file.location: +# settings.STORAGE_IMPL.rm(file.parent_id, file.location) +# except Exception as e: +# logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}, error: {e}") +# +# informs = File2DocumentService.get_by_file_id(file.id) +# for inform in informs: +# doc_id = inform.document_id +# e, doc = DocumentService.get_by_id(doc_id) +# if e and doc: +# tenant_id = DocumentService.get_tenant_id(doc_id) +# if tenant_id: +# DocumentService.remove_document(doc, tenant_id) +# File2DocumentService.delete_by_file_id(file.id) +# +# FileService.delete(file) +# +# def _delete_folder_recursive(folder, tenant_id): +# sub_files = FileService.list_all_files_by_parent_id(folder.id) +# for sub_file in sub_files: +# if sub_file.type == FileType.FOLDER.value: +# _delete_folder_recursive(sub_file, tenant_id) +# else: +# _delete_single_file(sub_file) +# +# FileService.delete(folder) +# +# def _rm_sync(): +# for file_id in file_ids: +# e, file = FileService.get_by_id(file_id) +# if not e or not file: +# return get_data_error_result(message="File or Folder not found!") +# if not file.tenant_id: +# return get_data_error_result(message="Tenant not found!") +# if not check_file_team_permission(file, uid): +# return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR) +# +# if file.source_type == FileSource.KNOWLEDGEBASE: +# continue +# +# if file.type == FileType.FOLDER.value: +# _delete_folder_recursive(file, uid) +# continue +# +# _delete_single_file(file) +# +# return get_json_result(data=True) +# +# return await thread_pool_exec(_rm_sync) +# +# except Exception as e: +# return server_error_response(e) +# +# +# @manager.route('/rename', methods=['POST']) # noqa: F821 +# @login_required +# @validate_request("file_id", "name") +# async def rename(): +# req = await get_request_json() +# try: +# e, file = FileService.get_by_id(req["file_id"]) +# if not e: +# return get_data_error_result(message="File not found!") +# if not check_file_team_permission(file, current_user.id): +# return get_json_result(data=False, message='No authorization.', code=RetCode.AUTHENTICATION_ERROR) +# if file.type != FileType.FOLDER.value \ +# and pathlib.Path(req["name"].lower()).suffix != pathlib.Path( +# file.name.lower()).suffix: +# return get_json_result( +# data=False, +# message="The extension of file can't be changed", +# code=RetCode.ARGUMENT_ERROR) +# for file in FileService.query(name=req["name"], pf_id=file.parent_id): +# if file.name == req["name"]: +# return get_data_error_result( +# message="Duplicated file name in the same folder.") +# +# if not FileService.update_by_id( +# req["file_id"], {"name": req["name"]}): +# return get_data_error_result( +# message="Database error (File rename)!") +# +# informs = File2DocumentService.get_by_file_id(req["file_id"]) +# if informs: +# if not DocumentService.update_by_id( +# informs[0].document_id, {"name": req["name"]}): +# return get_data_error_result( +# message="Database error (Document rename)!") +# +# return get_json_result(data=True) +# except Exception as e: +# return server_error_response(e) +# +# +# @manager.route('/get/', methods=['GET']) # noqa: F821 +# @login_required +# async def get(file_id): +# try: +# e, file = FileService.get_by_id(file_id) +# if not e: +# return get_data_error_result(message="Document not found!") +# if not check_file_team_permission(file, current_user.id): +# return get_json_result(data=False, message='No authorization.', code=RetCode.AUTHENTICATION_ERROR) +# +# blob = await thread_pool_exec(settings.STORAGE_IMPL.get, file.parent_id, file.location) +# if not blob: +# b, n = File2DocumentService.get_storage_address(file_id=file_id) +# blob = await thread_pool_exec(settings.STORAGE_IMPL.get, b, n) +# +# response = await make_response(blob) +# ext = re.search(r"\.([^.]+)$", file.name.lower()) +# ext = ext.group(1) if ext else None +# content_type = None +# if ext: +# fallback_prefix = "image" if file.type == FileType.VISUAL.value else "application" +# content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}") +# apply_safe_file_response_headers(response, content_type, ext) +# return response +# except Exception as e: +# return server_error_response(e) +# +# +# @manager.route("/mv", methods=["POST"]) # noqa: F821 +# @login_required +# @validate_request("src_file_ids", "dest_file_id") +# async def move(): +# req = await get_request_json() +# try: +# file_ids = req["src_file_ids"] +# dest_parent_id = req["dest_file_id"] +# +# ok, dest_folder = FileService.get_by_id(dest_parent_id) +# if not ok or not dest_folder: +# return get_data_error_result(message="Parent folder not found!") +# +# files = FileService.get_by_ids(file_ids) +# if not files: +# return get_data_error_result(message="Source files not found!") +# +# files_dict = {f.id: f for f in files} +# +# for file_id in file_ids: +# file = files_dict.get(file_id) +# if not file: +# return get_data_error_result(message="File or folder not found!") +# if not file.tenant_id: +# return get_data_error_result(message="Tenant not found!") +# if not check_file_team_permission(file, current_user.id): +# return get_json_result( +# data=False, +# message="No authorization.", +# code=RetCode.AUTHENTICATION_ERROR, +# ) +# +# def _move_entry_recursive(source_file_entry, dest_folder): +# if source_file_entry.type == FileType.FOLDER.value: +# existing_folder = FileService.query(name=source_file_entry.name, parent_id=dest_folder.id) +# if existing_folder: +# new_folder = existing_folder[0] +# else: +# new_folder = FileService.insert( +# { +# "id": get_uuid(), +# "parent_id": dest_folder.id, +# "tenant_id": source_file_entry.tenant_id, +# "created_by": current_user.id, +# "name": source_file_entry.name, +# "location": "", +# "size": 0, +# "type": FileType.FOLDER.value, +# } +# ) +# +# sub_files = FileService.list_all_files_by_parent_id(source_file_entry.id) +# for sub_file in sub_files: +# _move_entry_recursive(sub_file, new_folder) +# +# FileService.delete_by_id(source_file_entry.id) +# return +# +# old_parent_id = source_file_entry.parent_id +# old_location = source_file_entry.location +# filename = source_file_entry.name +# +# new_location = filename +# while settings.STORAGE_IMPL.obj_exist(dest_folder.id, new_location): +# new_location += "_" +# +# try: +# settings.STORAGE_IMPL.move(old_parent_id, old_location, dest_folder.id, new_location) +# except Exception as storage_err: +# raise RuntimeError(f"Move file failed at storage layer: {str(storage_err)}") +# +# FileService.update_by_id( +# source_file_entry.id, +# { +# "parent_id": dest_folder.id, +# "location": new_location, +# }, +# ) +# +# def _move_sync(): +# for file in files: +# _move_entry_recursive(file, dest_folder) +# return get_json_result(data=True) +# +# return await thread_pool_exec(_move_sync) +# +# except Exception as e: +# return server_error_response(e) diff --git a/api/apps/restful_apis/file_api.py b/api/apps/restful_apis/file_api.py new file mode 100644 index 000000000..fbe1e39d5 --- /dev/null +++ b/api/apps/restful_apis/file_api.py @@ -0,0 +1,364 @@ +# +# Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import logging +import re + +from quart import request, make_response +from api.apps import login_required +from api.db import FileType +from api.db.services.file2document_service import File2DocumentService +from api.utils.api_utils import ( + add_tenant_id_to_kwargs, + get_error_argument_result, + get_error_data_result, + get_result, +) +from api.utils.validation_utils import ( + CreateFolderReq, + DeleteFileReq, + ListFileReq, + MoveFileReq, + validate_and_parse_json_request, + validate_and_parse_request_args, +) +from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers +from common import settings +from common.misc_utils import thread_pool_exec +from api.apps.services import file_api_service + + +@manager.route("/files", methods=["POST"]) # noqa: F821 +@login_required +@add_tenant_id_to_kwargs +async def create_or_upload(tenant_id: str = None): + """ + Upload files or create a folder. + --- + tags: + - Files + security: + - ApiKeyAuth: [] + parameters: + - in: header + name: Authorization + type: string + required: true + description: Bearer token for authentication. + responses: + 200: + description: Successful operation. + """ + content_type = request.content_type or "" + try: + if "multipart/form-data" in content_type: + form = await request.form + pf_id = form.get("parent_id") + files = await request.files + if 'file' not in files: + return get_error_argument_result("No file part!") + file_objs = files.getlist('file') + for file_obj in file_objs: + if file_obj.filename == '': + return get_error_argument_result("No file selected!") + + success, result = await file_api_service.upload_file(tenant_id, pf_id, file_objs) + if success: + return get_result(data=result) + else: + return get_error_data_result(message=result) + else: + req, err = await validate_and_parse_json_request(request, CreateFolderReq) + if err is not None: + return get_error_argument_result(err) + + success, result = await file_api_service.create_folder( + tenant_id, req["name"], req.get("parent_id"), req.get("type") + ) + if success: + return get_result(data=result) + else: + return get_error_data_result(message=result) + except Exception as e: + logging.exception(e) + return get_error_data_result(message="Internal server error") + + +@manager.route("/files", methods=["GET"]) # noqa: F821 +@login_required +@add_tenant_id_to_kwargs +def list_files(tenant_id: str = None): + """ + List files under a folder. + --- + tags: + - Files + security: + - ApiKeyAuth: [] + parameters: + - in: query + name: parent_id + type: string + description: Folder ID to list files from. + - in: query + name: keywords + type: string + description: Search keyword filter. + - in: query + name: page + type: integer + default: 1 + - in: query + name: page_size + type: integer + default: 15 + - in: query + name: orderby + type: string + default: "create_time" + - in: query + name: desc + type: boolean + default: true + responses: + 200: + description: Successful operation. + """ + args, err = validate_and_parse_request_args(request, ListFileReq) + if err is not None: + return get_error_argument_result(err) + + try: + success, result = file_api_service.list_files(tenant_id, args) + if success: + return get_result(data=result) + else: + return get_error_data_result(message=result) + except Exception as e: + logging.exception(e) + return get_error_data_result(message="Internal server error") + + +@manager.route("/files", methods=["DELETE"]) # noqa: F821 +@login_required +@add_tenant_id_to_kwargs +async def delete(tenant_id: str = None): + """ + Delete files. + --- + tags: + - Files + security: + - ApiKeyAuth: [] + parameters: + - in: body + name: body + required: true + schema: + type: object + required: + - ids + properties: + ids: + type: array + items: + type: string + description: List of file IDs to delete. + responses: + 200: + description: Successful operation. + """ + req, err = await validate_and_parse_json_request(request, DeleteFileReq) + if err is not None: + return get_error_argument_result(err) + + try: + success, result = await file_api_service.delete_files(tenant_id, req["ids"]) + if success: + return get_result(data=result) + else: + return get_error_data_result(message=result) + except Exception as e: + logging.exception(e) + return get_error_data_result(message="Internal server error") + + + +@manager.route("/files/move", methods=["POST"]) # noqa: F821 +@login_required +@add_tenant_id_to_kwargs +async def move(tenant_id: str = None): + """ + Move and/or rename files. Follows Linux mv semantics: + at least one of dest_file_id or new_name must be provided. + - dest_file_id only: move files to a new folder (names unchanged). + - new_name only: rename a single file in place (no storage operation). + - both: move and rename simultaneously. + --- + tags: + - Files + security: + - ApiKeyAuth: [] + parameters: + - in: body + name: body + required: true + schema: + type: object + required: + - src_file_ids + properties: + src_file_ids: + type: array + items: + type: string + description: List of source file IDs. Required. + dest_file_id: + type: string + description: Destination folder ID. Optional; omit to rename in place. + new_name: + type: string + description: New file name. Optional; only valid for a single source file. + responses: + 200: + description: Successful operation. + """ + req, err = await validate_and_parse_json_request(request, MoveFileReq) + if err is not None: + return get_error_argument_result(err) + + try: + success, result = await file_api_service.move_files( + tenant_id, req["src_file_ids"], req.get("dest_file_id"), req.get("new_name") + ) + if success: + return get_result(data=result) + else: + return get_error_data_result(message=result) + except Exception as e: + logging.exception(e) + return get_error_data_result(message="Internal server error") + + +@manager.route("/files/", methods=["GET"]) # noqa: F821 +@login_required +@add_tenant_id_to_kwargs +async def download(tenant_id: str = None, file_id: str = None): + """ + Download a file. + --- + tags: + - Files + security: + - ApiKeyAuth: [] + produces: + - application/octet-stream + parameters: + - in: path + name: file_id + type: string + required: true + description: File ID to download. + responses: + 200: + description: File stream. + """ + try: + success, result = file_api_service.get_file_content(tenant_id, file_id) + if not success: + return get_error_data_result(message=result) + + file = result + blob = await thread_pool_exec(settings.STORAGE_IMPL.get, file.parent_id, file.location) + if not blob: + b, n = File2DocumentService.get_storage_address(file_id=file_id) + blob = await thread_pool_exec(settings.STORAGE_IMPL.get, b, n) + + response = await make_response(blob) + ext = re.search(r"\.([^.]+)$", file.name.lower()) + ext = ext.group(1) if ext else None + content_type = None + if ext: + fallback_prefix = "image" if file.type == FileType.VISUAL.value else "application" + content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}") + apply_safe_file_response_headers(response, content_type, ext) + return response + except Exception as e: + logging.exception(e) + return get_error_data_result(message="Internal server error") + + +@manager.route("/files//parent", methods=["GET"]) # noqa: F821 +@login_required +@add_tenant_id_to_kwargs +def parent_folder(tenant_id: str = None, file_id: str = None): + """ + Get parent folder of a file. + --- + tags: + - Files + security: + - ApiKeyAuth: [] + parameters: + - in: path + name: file_id + type: string + required: true + responses: + 200: + description: Parent folder information. + """ + try: + success, result = file_api_service.get_parent_folder(file_id) + if success: + return get_result(data=result) + else: + return get_error_data_result(message=result) + except Exception as e: + logging.exception(e) + return get_error_data_result(message="Internal server error") + + +@manager.route("/files//ancestors", methods=["GET"]) # noqa: F821 +@login_required +@add_tenant_id_to_kwargs +def ancestors(tenant_id: str = None, file_id: str = None): + """ + Get all ancestor folders of a file. + --- + tags: + - Files + security: + - ApiKeyAuth: [] + parameters: + - in: path + name: file_id + type: string + required: true + responses: + 200: + description: List of ancestor folders. + """ + try: + success, result = file_api_service.get_all_parent_folders(file_id) + if success: + return get_result(data=result) + else: + return get_error_data_result(message=result) + except Exception as e: + logging.exception(e) + return get_error_data_result(message="Internal server error") + + diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index eb71e41bc..2da815774 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -1,5 +1,5 @@ # -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# Copyright 2026 The InfiniFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,34 +21,33 @@ import re from io import BytesIO import xxhash -from quart import request, send_file from peewee import OperationalError from pydantic import BaseModel, Field, validator +from quart import request, send_file from api.constants import FILE_NAME_LEN_LIMIT from api.db import FileType from api.db.db_models import APIToken, File, Task -from api.db.services.document_service import DocumentService +from api.db.joint_services.tenant_model_service import get_model_config_by_id, get_model_config_by_type_and_name, get_tenant_default_model_by_type from api.db.services.doc_metadata_service import DocMetadataService +from api.db.services.document_service import DocumentService from api.db.services.file2document_service import File2DocumentService from api.db.services.file_service import FileService from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.llm_service import LLMBundle +from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks from api.db.services.tenant_llm_service import TenantLLMService -from api.db.services.task_service import TaskService, queue_tasks, cancel_all_task_of -from api.db.joint_services.tenant_model_service import get_model_config_by_id, get_tenant_default_model_by_type, get_model_config_by_type_and_name -from common.metadata_utils import meta_filter, convert_conditions -from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_parser_config, get_result, server_error_response, token_required, \ - get_request_json +from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_parser_config, get_request_json, get_result, server_error_response, token_required +from api.utils.image_utils import store_chunk_image +from common import settings +from common.constants import FileSource, LLMType, ParserType, RetCode, TaskStatus +from common.metadata_utils import convert_conditions, meta_filter +from common.misc_utils import thread_pool_exec +from common.string_utils import remove_redundant_spaces from rag.app.qa import beAdoc, rmPrefix from rag.app.tag import label_question from rag.nlp import rag_tokenizer, search from rag.prompts.generator import cross_languages, keyword_extraction -from common.string_utils import remove_redundant_spaces -from common.misc_utils import thread_pool_exec -from common.constants import RetCode, LLMType, ParserType, TaskStatus, FileSource -from common import settings -from api.utils.image_utils import store_chunk_image MAXIMUM_OF_UPLOADING_FILES = 256 @@ -162,7 +161,7 @@ async def upload(dataset_id, tenant_id): """ e, kb = KnowledgebaseService.get_by_id(dataset_id) if not e: - raise LookupError(f"Can't find the dataset with ID {dataset_id}!") + return server_error_response(LookupError(f"Can't find the dataset with ID {dataset_id}!")) err, files = FileService.upload_document(kb, file_objs, tenant_id, parent_path=form.get("parent_path")) if err: return get_result(message="\n".join(err), code=RetCode.SERVER_ERROR) @@ -263,6 +262,8 @@ async def update_doc(tenant_id, dataset_id, document_id): return get_error_data_result(message="Failed to update metadata") if "name" in req and req["name"] != doc.name: + if not isinstance(req["name"], str): + return server_error_response(AttributeError(f"'{type(req['name']).__name__}' object has no attribute 'encode'")) if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT: return get_result( message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", @@ -426,12 +427,12 @@ async def download(tenant_id, dataset_id, document_id): async def download_doc(document_id): token = request.headers.get("Authorization").split() if len(token) != 2: - return get_error_data_result(message='Authorization is not valid!') + return get_error_data_result(message="Authorization is not valid!") token = token[1] objs = APIToken.query(beta=token) if not objs: return get_error_data_result(message='Authentication error: API key is invalid!"') - + if not document_id: return get_error_data_result(message="Specify document_id please.") doc = DocumentService.query(id=document_id) @@ -565,28 +566,28 @@ def list_docs(dataset_id, tenant_id): description: Processing status. """ if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id): - return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ") + return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ") q = request.args document_id = q.get("id") - name = q.get("name") + name = q.get("name") if document_id and not DocumentService.query(id=document_id, kb_id=dataset_id): return get_error_data_result(message=f"You don't own the document {document_id}.") if name and not DocumentService.query(name=name, kb_id=dataset_id): return get_error_data_result(message=f"You don't own the document {name}.") - page = int(q.get("page", 1)) - page_size = int(q.get("page_size", 30)) - orderby = q.get("orderby", "create_time") - desc = str(q.get("desc", "true")).strip().lower() != "false" - keywords = q.get("keywords", "") + page = int(q.get("page", 1)) + page_size = int(q.get("page_size", 30)) + orderby = q.get("orderby", "create_time") + desc = str(q.get("desc", "true")).strip().lower() != "false" + keywords = q.get("keywords", "") # filters - align with OpenAPI parameter names - suffix = q.getlist("suffix") - run_status = q.getlist("run") - create_time_from = int(q.get("create_time_from", 0)) - create_time_to = int(q.get("create_time_to", 0)) + suffix = q.getlist("suffix") + run_status = q.getlist("run") + create_time_from = int(q.get("create_time_from", 0)) + create_time_to = int(q.get("create_time_to", 0)) metadata_condition_raw = q.get("metadata_condition") metadata_condition = {} if metadata_condition_raw: @@ -608,17 +609,11 @@ def list_docs(dataset_id, tenant_id): if metadata_condition.get("conditions") and not doc_ids_filter: return get_result(data={"total": 0, "docs": []}) - docs, total = DocumentService.get_list( - dataset_id, page, page_size, orderby, desc, keywords, document_id, name, suffix, run_status_converted, doc_ids_filter - ) + docs, total = DocumentService.get_list(dataset_id, page, page_size, orderby, desc, keywords, document_id, name, suffix, run_status_converted, doc_ids_filter) # time range filter (0 means no bound) if create_time_from or create_time_to: - docs = [ - d for d in docs - if (create_time_from == 0 or d.get("create_time", 0) >= create_time_from) - and (create_time_to == 0 or d.get("create_time", 0) <= create_time_to) - ] + docs = [d for d in docs if (create_time_from == 0 or d.get("create_time", 0) >= create_time_from) and (create_time_to == 0 or d.get("create_time", 0) <= create_time_to)] # rename keys + map run status back to text for output key_mapping = { @@ -682,7 +677,7 @@ async def metadata_batch_update(dataset_id, tenant_id): for d in deletes: if not isinstance(d, dict) or not d.get("key"): return get_error_data_result(message="Each delete requires key.") - + if document_ids: kb_doc_ids = KnowledgebaseService.list_documents_by_ids([dataset_id]) target_doc_ids = set(kb_doc_ids) @@ -702,6 +697,7 @@ async def metadata_batch_update(dataset_id, tenant_id): updated = DocMetadataService.batch_update_metadata(dataset_id, target_doc_ids, updates, deletes) return get_result(data={"updated": updated, "matched_docs": len(target_doc_ids)}) + @manager.route("/datasets//documents", methods=["DELETE"]) # noqa: F821 @token_required async def delete(tenant_id, dataset_id): @@ -957,7 +953,7 @@ async def stop_parsing(tenant_id, dataset_id): doc = DocumentService.query(id=id, kb_id=dataset_id) if not doc: return get_error_data_result(message=f"You don't own the document {id}.") - if doc[0].run != TaskStatus.RUNNING.value : + if doc[0].run != TaskStatus.RUNNING.value: return construct_json_result( code=RetCode.DATA_ERROR, message=DOC_STOP_PARSING_INVALID_STATE_MESSAGE, @@ -1259,6 +1255,7 @@ async def add_chunk(tenant_id, dataset_id, document_id): if "tag_feas" in req: d["tag_feas"] = req["tag_feas"] import base64 + image_base64 = req.get("image_base64", None) if image_base64: d["img_id"] = "{}-{}".format(dataset_id, chunk_id) @@ -1702,8 +1699,8 @@ async def retrieval_test(tenant_id): toc_enhance = req.get("toc_enhance", False) langs = req.get("cross_languages", []) if not isinstance(doc_ids, list): - return get_error_data_result("`documents` should be a list") - if doc_ids: + return get_error_data_result("`documents` should be a list") + if doc_ids: doc_ids_list = KnowledgebaseService.list_documents_by_ids(kb_ids) for doc_id in doc_ids: if doc_id not in doc_ids_list: diff --git a/api/apps/sdk/files.py b/api/apps/sdk/files.py deleted file mode 100644 index 4d762017e..000000000 --- a/api/apps/sdk/files.py +++ /dev/null @@ -1,835 +0,0 @@ -# -# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pathlib -import re -from quart import request, make_response -from pathlib import Path - -from api.db.services.document_service import DocumentService -from api.db.services.file2document_service import File2DocumentService -from api.db.services.knowledgebase_service import KnowledgebaseService -from api.utils.api_utils import get_json_result, get_request_json, server_error_response, token_required -from common.misc_utils import get_uuid, thread_pool_exec -from api.db import FileType -from api.db.services import duplicate_name -from api.db.services.file_service import FileService -from api.utils.file_utils import filename_type -from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers -from common import settings -from common.constants import RetCode - -@manager.route('/file/upload', methods=['POST']) # noqa: F821 -@token_required -async def upload(tenant_id): - """ - Upload a file to the system. - --- - tags: - - File - security: - - ApiKeyAuth: [] - parameters: - - in: formData - name: file - type: file - required: true - description: The file to upload - - in: formData - name: parent_id - type: string - description: Parent folder ID where the file will be uploaded. Optional. - responses: - 200: - description: Successfully uploaded the file. - schema: - type: object - properties: - data: - type: array - items: - type: object - properties: - id: - type: string - description: File ID - name: - type: string - description: File name - size: - type: integer - description: File size in bytes - type: - type: string - description: File type (e.g., document, folder) - """ - form = await request.form - files = await request.files - pf_id = form.get("parent_id") - - if not pf_id: - root_folder = FileService.get_root_folder(tenant_id) - pf_id = root_folder["id"] - - if 'file' not in files: - return get_json_result(data=False, message='No file part!', code=RetCode.BAD_REQUEST) - file_objs = files.getlist('file') - - for file_obj in file_objs: - if file_obj.filename == '': - return get_json_result(data=False, message='No selected file!', code=RetCode.BAD_REQUEST) - - file_res = [] - - try: - e, pf_folder = FileService.get_by_id(pf_id) - if not e: - return get_json_result(data=False, message="Can't find this folder!", code=RetCode.NOT_FOUND) - - for file_obj in file_objs: - # Handle file path - full_path = '/' + file_obj.filename - file_obj_names = full_path.split('/') - file_len = len(file_obj_names) - - # Get folder path ID - file_id_list = FileService.get_id_list_by_id(pf_id, file_obj_names, 1, [pf_id]) - len_id_list = len(file_id_list) - - # Crete file folder - if file_len != len_id_list: - e, file = FileService.get_by_id(file_id_list[len_id_list - 1]) - if not e: - return get_json_result(data=False, message="Folder not found!", code=RetCode.NOT_FOUND) - last_folder = FileService.create_folder(file, file_id_list[len_id_list - 1], file_obj_names, - len_id_list) - else: - e, file = FileService.get_by_id(file_id_list[len_id_list - 2]) - if not e: - return get_json_result(data=False, message="Folder not found!", code=RetCode.NOT_FOUND) - last_folder = FileService.create_folder(file, file_id_list[len_id_list - 2], file_obj_names, - len_id_list) - - filetype = filename_type(file_obj_names[file_len - 1]) - location = file_obj_names[file_len - 1] - while settings.STORAGE_IMPL.obj_exist(last_folder.id, location): - location += "_" - blob = file_obj.read() - filename = duplicate_name(FileService.query, name=file_obj_names[file_len - 1], parent_id=last_folder.id) - - file = { - "id": get_uuid(), - "parent_id": last_folder.id, - "tenant_id": tenant_id, - "created_by": tenant_id, - "type": filetype, - "name": filename, - "location": location, - "size": len(blob), - } - file = FileService.insert(file) - settings.STORAGE_IMPL.put(last_folder.id, location, blob) - file_res.append(file.to_json()) - return get_json_result(data=file_res) - except Exception as e: - return server_error_response(e) - - -@manager.route("/file/upload_info", methods=["POST"]) # noqa: F821 -@token_required -async def upload_info(tenant_id): - """ - Upload runtime file metadata for SDK chat completions. - --- - tags: - - File - security: - - ApiKeyAuth: [] - parameters: - - in: formData - name: file - type: file - required: false - description: File(s) to upload as runtime attachments. - - in: query - name: url - type: string - required: false - description: Optional URL to fetch and convert into a runtime attachment. - responses: - 200: - description: Runtime attachment descriptor(s) for the `files` field in completions requests. - """ - files = await request.files - file_objs = files.getlist("file") if files and files.get("file") else [] - url = request.args.get("url") - - if file_objs and url: - return get_json_result( - data=False, - message="Provide either multipart file(s) or ?url=..., not both.", - code=RetCode.BAD_REQUEST, - ) - - if not file_objs and not url: - return get_json_result( - data=False, - message="Missing input: provide multipart file(s) or url", - code=RetCode.BAD_REQUEST, - ) - - try: - if url and not file_objs: - return get_json_result(data=FileService.upload_info(tenant_id, None, url)) - - if len(file_objs) == 1: - return get_json_result(data=FileService.upload_info(tenant_id, file_objs[0], None)) - - results = [FileService.upload_info(tenant_id, f) for f in file_objs] - return get_json_result(data=results) - except Exception as e: - return server_error_response(e) - - -@manager.route('/file/create', methods=['POST']) # noqa: F821 -@token_required -async def create(tenant_id): - """ - Create a new file or folder. - --- - tags: - - File - security: - - ApiKeyAuth: [] - parameters: - - in: body - name: body - description: File creation parameters - required: true - schema: - type: object - properties: - name: - type: string - description: Name of the file/folder - parent_id: - type: string - description: Parent folder ID. Optional. - type: - type: string - enum: ["FOLDER", "VIRTUAL"] - description: Type of the file - responses: - 200: - description: File created successfully. - schema: - type: object - properties: - data: - type: object - properties: - id: - type: string - name: - type: string - type: - type: string - """ - req = await get_request_json() - pf_id = req.get("parent_id") - input_file_type = req.get("type") - if not pf_id: - root_folder = FileService.get_root_folder(tenant_id) - pf_id = root_folder["id"] - - try: - if not FileService.is_parent_folder_exist(pf_id): - return get_json_result(data=False, message="Parent Folder Doesn't Exist!", code=RetCode.BAD_REQUEST) - if FileService.query(name=req["name"], parent_id=pf_id): - return get_json_result(data=False, message="Duplicated folder name in the same folder.", - code=RetCode.CONFLICT) - - if input_file_type == FileType.FOLDER.value: - file_type = FileType.FOLDER.value - else: - file_type = FileType.VIRTUAL.value - - file = FileService.insert({ - "id": get_uuid(), - "parent_id": pf_id, - "tenant_id": tenant_id, - "created_by": tenant_id, - "name": req["name"], - "location": "", - "size": 0, - "type": file_type - }) - - return get_json_result(data=file.to_json()) - except Exception as e: - return server_error_response(e) - - -@manager.route('/file/list', methods=['GET']) # noqa: F821 -@token_required -async def list_files(tenant_id): - """ - List files under a specific folder. - --- - tags: - - File - security: - - ApiKeyAuth: [] - parameters: - - in: query - name: parent_id - type: string - description: Folder ID to list files from - - in: query - name: keywords - type: string - description: Search keyword filter - - in: query - name: page - type: integer - default: 1 - description: Page number - - in: query - name: page_size - type: integer - default: 15 - description: Number of results per page - - in: query - name: orderby - type: string - default: "create_time" - description: Sort by field - - in: query - name: desc - type: boolean - default: true - description: Descending order - responses: - 200: - description: Successfully retrieved file list. - schema: - type: object - properties: - total: - type: integer - files: - type: array - items: - type: object - properties: - id: - type: string - name: - type: string - type: - type: string - size: - type: integer - create_time: - type: string - format: date-time - """ - pf_id = request.args.get("parent_id") - keywords = request.args.get("keywords", "") - page_number = int(request.args.get("page", 1)) - items_per_page = int(request.args.get("page_size", 15)) - orderby = request.args.get("orderby", "create_time") - desc = request.args.get("desc", True) - - if not pf_id: - root_folder = FileService.get_root_folder(tenant_id) - pf_id = root_folder["id"] - FileService.init_knowledgebase_docs(pf_id, tenant_id) - - try: - e, file = FileService.get_by_id(pf_id) - if not e: - return get_json_result(message="Folder not found!", code=RetCode.NOT_FOUND) - - files, total = FileService.get_by_pf_id(tenant_id, pf_id, page_number, items_per_page, orderby, desc, keywords) - - parent_folder = FileService.get_parent_folder(pf_id) - if not parent_folder: - return get_json_result(message="File not found!", code=RetCode.NOT_FOUND) - - return get_json_result(data={"total": total, "files": files, "parent_folder": parent_folder.to_json()}) - except Exception as e: - return server_error_response(e) - - -@manager.route('/file/root_folder', methods=['GET']) # noqa: F821 -@token_required -async def get_root_folder(tenant_id): - """ - Get user's root folder. - --- - tags: - - File - security: - - ApiKeyAuth: [] - responses: - 200: - description: Root folder information - schema: - type: object - properties: - data: - type: object - properties: - root_folder: - type: object - properties: - id: - type: string - name: - type: string - type: - type: string - """ - try: - root_folder = FileService.get_root_folder(tenant_id) - return get_json_result(data={"root_folder": root_folder}) - except Exception as e: - return server_error_response(e) - - -@manager.route('/file/parent_folder', methods=['GET']) # noqa: F821 -@token_required -async def get_parent_folder(): - """ - Get parent folder info of a file. - --- - tags: - - File - security: - - ApiKeyAuth: [] - parameters: - - in: query - name: file_id - type: string - required: true - description: Target file ID - responses: - 200: - description: Parent folder information - schema: - type: object - properties: - data: - type: object - properties: - parent_folder: - type: object - properties: - id: - type: string - name: - type: string - """ - file_id = request.args.get("file_id") - try: - e, file = FileService.get_by_id(file_id) - if not e: - return get_json_result(message="Folder not found!", code=RetCode.NOT_FOUND) - - parent_folder = FileService.get_parent_folder(file_id) - return get_json_result(data={"parent_folder": parent_folder.to_json()}) - except Exception as e: - return server_error_response(e) - - -@manager.route('/file/all_parent_folder', methods=['GET']) # noqa: F821 -@token_required -async def get_all_parent_folders(tenant_id): - """ - Get all parent folders of a file. - --- - tags: - - File - security: - - ApiKeyAuth: [] - parameters: - - in: query - name: file_id - type: string - required: true - description: Target file ID - responses: - 200: - description: All parent folders of the file - schema: - type: object - properties: - data: - type: object - properties: - parent_folders: - type: array - items: - type: object - properties: - id: - type: string - name: - type: string - """ - file_id = request.args.get("file_id") - try: - e, file = FileService.get_by_id(file_id) - if not e: - return get_json_result(message="Folder not found!", code=RetCode.NOT_FOUND) - - parent_folders = FileService.get_all_parent_folders(file_id) - parent_folders_res = [folder.to_json() for folder in parent_folders] - return get_json_result(data={"parent_folders": parent_folders_res}) - except Exception as e: - return server_error_response(e) - - -@manager.route('/file/rm', methods=['POST']) # noqa: F821 -@token_required -async def rm(tenant_id): - """ - Delete one or multiple files/folders. - --- - tags: - - File - security: - - ApiKeyAuth: [] - parameters: - - in: body - name: body - description: Files to delete - required: true - schema: - type: object - properties: - file_ids: - type: array - items: - type: string - description: List of file IDs to delete - responses: - 200: - description: Successfully deleted files - schema: - type: object - properties: - data: - type: boolean - example: true - """ - req = await get_request_json() - file_ids = req["file_ids"] - try: - for file_id in file_ids: - e, file = FileService.get_by_id(file_id) - if not e: - return get_json_result(message="File or Folder not found!", code=RetCode.NOT_FOUND) - if not file.tenant_id: - return get_json_result(message="Tenant not found!", code=RetCode.NOT_FOUND) - - if file.type == FileType.FOLDER.value: - file_id_list = FileService.get_all_innermost_file_ids(file_id, []) - for inner_file_id in file_id_list: - e, file = FileService.get_by_id(inner_file_id) - if not e: - return get_json_result(message="File not found!", code=RetCode.NOT_FOUND) - settings.STORAGE_IMPL.rm(file.parent_id, file.location) - FileService.delete_folder_by_pf_id(tenant_id, file_id) - else: - settings.STORAGE_IMPL.rm(file.parent_id, file.location) - if not FileService.delete(file): - return get_json_result(message="Database error (File removal)!", code=RetCode.SERVER_ERROR) - - informs = File2DocumentService.get_by_file_id(file_id) - for inform in informs: - doc_id = inform.document_id - e, doc = DocumentService.get_by_id(doc_id) - if not e: - return get_json_result(message="Document not found!", code=RetCode.NOT_FOUND) - tenant_id = DocumentService.get_tenant_id(doc_id) - if not tenant_id: - return get_json_result(message="Tenant not found!", code=RetCode.NOT_FOUND) - if not DocumentService.remove_document(doc, tenant_id): - return get_json_result(message="Database error (Document removal)!", code=RetCode.SERVER_ERROR) - File2DocumentService.delete_by_file_id(file_id) - - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) - - -@manager.route('/file/rename', methods=['POST']) # noqa: F821 -@token_required -async def rename(tenant_id): - """ - Rename a file. - --- - tags: - - File - security: - - ApiKeyAuth: [] - parameters: - - in: body - name: body - description: Rename file - required: true - schema: - type: object - properties: - file_id: - type: string - description: Target file ID - name: - type: string - description: New name for the file - responses: - 200: - description: File renamed successfully - schema: - type: object - properties: - data: - type: boolean - example: true - """ - req = await get_request_json() - try: - e, file = FileService.get_by_id(req["file_id"]) - if not e: - return get_json_result(message="File not found!", code=RetCode.NOT_FOUND) - - if file.type != FileType.FOLDER.value and pathlib.Path(req["name"].lower()).suffix != pathlib.Path( - file.name.lower()).suffix: - return get_json_result(data=False, message="The extension of file can't be changed", - code=RetCode.BAD_REQUEST) - - for existing_file in FileService.query(name=req["name"], pf_id=file.parent_id): - if existing_file.name == req["name"]: - return get_json_result(data=False, message="Duplicated file name in the same folder.", - code=RetCode.CONFLICT) - - if not FileService.update_by_id(req["file_id"], {"name": req["name"]}): - return get_json_result(message="Database error (File rename)!", code=RetCode.SERVER_ERROR) - - informs = File2DocumentService.get_by_file_id(req["file_id"]) - if informs: - if not DocumentService.update_by_id(informs[0].document_id, {"name": req["name"]}): - return get_json_result(message="Database error (Document rename)!", code=RetCode.SERVER_ERROR) - - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) - - -@manager.route('/file/get/', methods=['GET']) # noqa: F821 -@token_required -async def get(tenant_id, file_id): - """ - Download a file. - --- - tags: - - File - security: - - ApiKeyAuth: [] - produces: - - application/octet-stream - parameters: - - in: path - name: file_id - type: string - required: true - description: File ID to download - responses: - 200: - description: File stream - schema: - type: file - RetCode.NOT_FOUND: - description: File not found - """ - try: - e, file = FileService.get_by_id(file_id) - if not e: - return get_json_result(message="Document not found!", code=RetCode.NOT_FOUND) - - blob = settings.STORAGE_IMPL.get(file.parent_id, file.location) - if not blob: - b, n = File2DocumentService.get_storage_address(file_id=file_id) - blob = settings.STORAGE_IMPL.get(b, n) - - response = await make_response(blob) - ext = re.search(r"\.([^.]+)$", file.name) - extension = ext.group(1).lower() if ext else None - content_type = None - if extension: - fallback_prefix = "image" if file.type == FileType.VISUAL.value else "application" - content_type = CONTENT_TYPE_MAP.get(extension, f"{fallback_prefix}/{extension}") - apply_safe_file_response_headers(response, content_type, extension) - return response - except Exception as e: - return server_error_response(e) - - -@manager.route("/file/download/", methods=["GET"]) # noqa: F821 -@token_required -async def download_attachment(tenant_id, attachment_id): - try: - ext = request.args.get("ext", "markdown") - data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, attachment_id) - response = await make_response(data) - content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}") - apply_safe_file_response_headers(response, content_type, ext) - - return response - - except Exception as e: - return server_error_response(e) - - -@manager.route('/file/mv', methods=['POST']) # noqa: F821 -@token_required -async def move(tenant_id): - """ - Move one or multiple files to another folder. - --- - tags: - - File - security: - - ApiKeyAuth: [] - parameters: - - in: body - name: body - description: Move operation - required: true - schema: - type: object - properties: - src_file_ids: - type: array - items: - type: string - description: Source file IDs - dest_file_id: - type: string - description: Destination folder ID - responses: - 200: - description: Files moved successfully - schema: - type: object - properties: - data: - type: boolean - example: true - """ - req = await get_request_json() - try: - file_ids = req["src_file_ids"] - parent_id = req["dest_file_id"] - files = FileService.get_by_ids(file_ids) - files_dict = {f.id: f for f in files} - - for file_id in file_ids: - file = files_dict[file_id] - if not file: - return get_json_result(message="File or Folder not found!", code=RetCode.NOT_FOUND) - if not file.tenant_id: - return get_json_result(message="Tenant not found!", code=RetCode.NOT_FOUND) - - fe, _ = FileService.get_by_id(parent_id) - if not fe: - return get_json_result(message="Parent Folder not found!", code=RetCode.NOT_FOUND) - - FileService.move_file(file_ids, parent_id) - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) - - -@manager.route('/file/convert', methods=['POST']) # noqa: F821 -@token_required -async def convert(tenant_id): - req = await get_request_json() - kb_ids = req["kb_ids"] - file_ids = req["file_ids"] - file2documents = [] - - try: - files = FileService.get_by_ids(file_ids) - files_set = dict({file.id: file for file in files}) - for file_id in file_ids: - file = files_set[file_id] - if not file: - return get_json_result(message="File not found!", code=RetCode.NOT_FOUND) - file_ids_list = [file_id] - if file.type == FileType.FOLDER.value: - file_ids_list = FileService.get_all_innermost_file_ids(file_id, []) - for id in file_ids_list: - informs = File2DocumentService.get_by_file_id(id) - # delete - for inform in informs: - doc_id = inform.document_id - e, doc = DocumentService.get_by_id(doc_id) - if not e: - return get_json_result(message="Document not found!", code=RetCode.NOT_FOUND) - tenant_id = DocumentService.get_tenant_id(doc_id) - if not tenant_id: - return get_json_result(message="Tenant not found!", code=RetCode.NOT_FOUND) - if not DocumentService.remove_document(doc, tenant_id): - return get_json_result( - message="Database error (Document removal)!", code=RetCode.NOT_FOUND) - File2DocumentService.delete_by_file_id(id) - - # insert - for kb_id in kb_ids: - e, kb = KnowledgebaseService.get_by_id(kb_id) - if not e: - return get_json_result( - message="Can't find this dataset!", code=RetCode.NOT_FOUND) - e, file = FileService.get_by_id(id) - if not e: - return get_json_result( - message="Can't find this file!", code=RetCode.NOT_FOUND) - - doc = DocumentService.insert({ - "id": get_uuid(), - "kb_id": kb.id, - "parser_id": FileService.get_parser(file.type, file.name, kb.parser_id), - "parser_config": kb.parser_config, - "created_by": tenant_id, - "type": file.type, - "name": file.name, - "suffix": Path(file.name).suffix.lstrip("."), - "location": file.location, - "size": file.size - }) - file2document = File2DocumentService.insert({ - "id": get_uuid(), - "file_id": id, - "document_id": doc.id, - }) - - file2documents.append(file2document.to_json()) - return get_json_result(data=file2documents) - except Exception as e: - return server_error_response(e) diff --git a/api/apps/services/file_api_service.py b/api/apps/services/file_api_service.py new file mode 100644 index 000000000..d6fe9248a --- /dev/null +++ b/api/apps/services/file_api_service.py @@ -0,0 +1,397 @@ +# +# Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import logging +import os +import pathlib + +from api.common.check_team_permission import check_file_team_permission +from api.db import FileType +from api.db.services import duplicate_name +from api.db.services.document_service import DocumentService +from api.db.services.file2document_service import File2DocumentService +from api.db.services.file_service import FileService +from api.utils.file_utils import filename_type +from common import settings +from common.constants import FileSource +from common.misc_utils import get_uuid, thread_pool_exec + + +async def upload_file(tenant_id: str, pf_id: str, file_objs: list): + """ + Upload files to a folder. + + :param tenant_id: tenant ID + :param pf_id: parent folder ID + :param file_objs: list of file objects from request + :return: (success, result_list) or (success, error_message) + """ + if not pf_id: + root_folder = FileService.get_root_folder(tenant_id) + pf_id = root_folder["id"] + + e, pf_folder = FileService.get_by_id(pf_id) + if not e: + return False, "Can't find this folder!" + + file_res = [] + for file_obj in file_objs: + MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0)) + if 0 < MAX_FILE_NUM_PER_USER <= await thread_pool_exec(DocumentService.get_doc_count, tenant_id): + return False, "Exceed the maximum file number of a free user!" + + if not file_obj.filename: + file_obj_names = [pf_folder.name, file_obj.filename] + else: + full_path = '/' + file_obj.filename + file_obj_names = full_path.split('/') + file_len = len(file_obj_names) + + file_id_list = await thread_pool_exec(FileService.get_id_list_by_id, pf_id, file_obj_names, 1, [pf_id]) + len_id_list = len(file_id_list) + + if file_len != len_id_list: + e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 1]) + if not e: + return False, "Folder not found!" + last_folder = await thread_pool_exec( + FileService.create_folder, file, file_id_list[len_id_list - 1], file_obj_names, len_id_list + ) + else: + e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 2]) + if not e: + return False, "Folder not found!" + last_folder = await thread_pool_exec( + FileService.create_folder, file, file_id_list[len_id_list - 2], file_obj_names, len_id_list + ) + + filetype = filename_type(file_obj_names[file_len - 1]) + location = file_obj_names[file_len - 1] + while await thread_pool_exec(settings.STORAGE_IMPL.obj_exist, last_folder.id, location): + location += "_" + blob = await thread_pool_exec(file_obj.read) + filename = await thread_pool_exec( + duplicate_name, FileService.query, name=file_obj_names[file_len - 1], parent_id=last_folder.id + ) + await thread_pool_exec(settings.STORAGE_IMPL.put, last_folder.id, location, blob) + file_data = { + "id": get_uuid(), + "parent_id": last_folder.id, + "tenant_id": tenant_id, + "created_by": tenant_id, + "type": filetype, + "name": filename, + "location": location, + "size": len(blob), + } + inserted = await thread_pool_exec(FileService.insert, file_data) + file_res.append(inserted.to_json()) + + return True, file_res + + +async def create_folder(tenant_id: str, name: str, pf_id: str = None, file_type: str = None): + """ + Create a new folder or virtual file. + + :param tenant_id: tenant ID + :param name: folder name + :param pf_id: parent folder ID + :param file_type: file type (folder or virtual) + :return: (success, result) or (success, error_message) + """ + if not pf_id: + root_folder = FileService.get_root_folder(tenant_id) + pf_id = root_folder["id"] + + if not FileService.is_parent_folder_exist(pf_id): + return False, "Parent Folder Doesn't Exist!" + if FileService.query(name=name, parent_id=pf_id): + return False, "Duplicated folder name in the same folder." + + if file_type == FileType.FOLDER.value: + ft = FileType.FOLDER.value + else: + ft = FileType.VIRTUAL.value + + file = FileService.insert({ + "id": get_uuid(), + "parent_id": pf_id, + "tenant_id": tenant_id, + "created_by": tenant_id, + "name": name, + "location": "", + "size": 0, + "type": ft, + }) + return True, file.to_json() + + +def list_files(tenant_id: str, args: dict): + """ + List files under a folder. + + :param tenant_id: tenant ID + :param args: query arguments (parent_id, keywords, page, page_size, orderby, desc) + :return: (success, result) or (success, error_message) + """ + pf_id = args.get("parent_id") + keywords = args.get("keywords", "") + page_number = int(args.get("page", 1)) + items_per_page = int(args.get("page_size", 15)) + orderby = args.get("orderby", "create_time") + desc = args.get("desc", True) + + if not pf_id: + root_folder = FileService.get_root_folder(tenant_id) + pf_id = root_folder["id"] + FileService.init_knowledgebase_docs(pf_id, tenant_id) + + e, file = FileService.get_by_id(pf_id) + if not e: + return False, "Folder not found!" + + files, total = FileService.get_by_pf_id(tenant_id, pf_id, page_number, items_per_page, orderby, desc, keywords) + + parent_folder = FileService.get_parent_folder(pf_id) + if not parent_folder: + return False, "File not found!" + + return True, {"total": total, "files": files, "parent_folder": parent_folder.to_json()} + + + +def get_parent_folder(file_id: str): + """ + Get parent folder of a file. + + :param file_id: file ID + :return: (success, result) or (success, error_message) + """ + e, file = FileService.get_by_id(file_id) + if not e: + return False, "Folder not found!" + + parent_folder = FileService.get_parent_folder(file_id) + return True, {"parent_folder": parent_folder.to_json()} + + +def get_all_parent_folders(file_id: str): + """ + Get all ancestor folders of a file. + + :param file_id: file ID + :return: (success, result) or (success, error_message) + """ + e, file = FileService.get_by_id(file_id) + if not e: + return False, "Folder not found!" + + parent_folders = FileService.get_all_parent_folders(file_id) + return True, {"parent_folders": [pf.to_json() for pf in parent_folders]} + + +async def delete_files(uid: str, file_ids: list): + """ + Delete files/folders with team permission check and recursive deletion. + + :param uid: user ID + :param file_ids: list of file IDs to delete + :return: (success, result) or (success, error_message) + """ + def _delete_single_file(file): + try: + if file.location: + settings.STORAGE_IMPL.rm(file.parent_id, file.location) + except Exception as e: + logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}, error: {e}") + + informs = File2DocumentService.get_by_file_id(file.id) + for inform in informs: + doc_id = inform.document_id + e, doc = DocumentService.get_by_id(doc_id) + if e and doc: + tenant_id = DocumentService.get_tenant_id(doc_id) + if tenant_id: + DocumentService.remove_document(doc, tenant_id) + File2DocumentService.delete_by_file_id(file.id) + + FileService.delete(file) + + def _delete_folder_recursive(folder, tenant_id): + sub_files = FileService.list_all_files_by_parent_id(folder.id) + for sub_file in sub_files: + if sub_file.type == FileType.FOLDER.value: + _delete_folder_recursive(sub_file, tenant_id) + else: + _delete_single_file(sub_file) + FileService.delete(folder) + + def _rm_sync(): + for file_id in file_ids: + e, file = FileService.get_by_id(file_id) + if not e or not file: + return False, "File or Folder not found!" + if not file.tenant_id: + return False, "Tenant not found!" + if not check_file_team_permission(file, uid): + return False, "No authorization." + + if file.source_type == FileSource.KNOWLEDGEBASE: + continue + + if file.type == FileType.FOLDER.value: + _delete_folder_recursive(file, uid) + continue + + _delete_single_file(file) + + return True, True + + return await thread_pool_exec(_rm_sync) + + +async def move_files(uid: str, src_file_ids: list, dest_file_id: str = None, new_name: str = None): + """ + Move and/or rename files. Follows Linux mv semantics: + - new_name only: rename in place (no storage operation) + - dest_file_id only: move to new folder (keep names) + - both: move and rename simultaneously + + :param uid: user ID + :param src_file_ids: list of source file IDs + :param dest_file_id: destination folder ID (optional) + :param new_name: new name for the file (optional, single file only) + :return: (success, result) or (success, error_message) + """ + files = FileService.get_by_ids(src_file_ids) + if not files: + return False, "Source files not found!" + + files_dict = {f.id: f for f in files} + + for file_id in src_file_ids: + file = files_dict.get(file_id) + if not file: + return False, "File or folder not found!" + if not file.tenant_id: + return False, "Tenant not found!" + if not check_file_team_permission(file, uid): + return False, "No authorization." + + dest_folder = None + if dest_file_id: + ok, dest_folder = FileService.get_by_id(dest_file_id) + if not ok or not dest_folder: + return False, "Parent folder not found!" + + if new_name: + file = files_dict[src_file_ids[0]] + if file.type != FileType.FOLDER.value and \ + pathlib.Path(new_name.lower()).suffix != pathlib.Path(file.name.lower()).suffix: + return False, "The extension of file can't be changed" + target_parent_id = dest_folder.id if dest_folder else file.parent_id + for f in FileService.query(name=new_name, parent_id=target_parent_id): + if f.name == new_name: + return False, "Duplicated file name in the same folder." + + def _move_entry_recursive(source_file_entry, dest_folder_entry, override_name=None): + effective_name = override_name or source_file_entry.name + + if source_file_entry.type == FileType.FOLDER.value: + existing_folder = FileService.query(name=effective_name, parent_id=dest_folder_entry.id) + if existing_folder: + new_folder = existing_folder[0] + else: + new_folder = FileService.insert({ + "id": get_uuid(), + "parent_id": dest_folder_entry.id, + "tenant_id": source_file_entry.tenant_id, + "created_by": source_file_entry.tenant_id, + "name": effective_name, + "location": "", + "size": 0, + "type": FileType.FOLDER.value, + }) + + sub_files = FileService.list_all_files_by_parent_id(source_file_entry.id) + for sub_file in sub_files: + _move_entry_recursive(sub_file, new_folder) + + FileService.delete_by_id(source_file_entry.id) + return + + # Non-folder file + need_storage_move = dest_folder_entry.id != source_file_entry.parent_id + updates = {} + + if need_storage_move: + new_location = effective_name + while settings.STORAGE_IMPL.obj_exist(dest_folder_entry.id, new_location): + new_location += "_" + try: + settings.STORAGE_IMPL.move( + source_file_entry.parent_id, source_file_entry.location, + dest_folder_entry.id, new_location, + ) + except Exception as storage_err: + raise RuntimeError(f"Move file failed at storage layer: {str(storage_err)}") + updates["parent_id"] = dest_folder_entry.id + updates["location"] = new_location + + if override_name: + updates["name"] = override_name + + if updates: + FileService.update_by_id(source_file_entry.id, updates) + + if override_name: + informs = File2DocumentService.get_by_file_id(source_file_entry.id) + if informs: + if not DocumentService.update_by_id(informs[0].document_id, {"name": override_name}): + raise RuntimeError("Database error (Document rename)!") + + def _move_or_rename_sync(): + if dest_folder: + for file in files: + _move_entry_recursive(file, dest_folder, override_name=new_name) + else: + # Pure rename: no storage operation needed + file = files[0] + if not FileService.update_by_id(file.id, {"name": new_name}): + return False, "Database error (File rename)!" + informs = File2DocumentService.get_by_file_id(file.id) + if informs: + if not DocumentService.update_by_id(informs[0].document_id, {"name": new_name}): + return False, "Database error (Document rename)!" + return True, True + + return await thread_pool_exec(_move_or_rename_sync) + + +def get_file_content(uid: str, file_id: str): + """ + Get file content and metadata for download. + + :param uid: user ID + :param file_id: file ID + :return: (success, (blob, file_obj)) or (success, error_message) + """ + e, file = FileService.get_by_id(file_id) + if not e: + return False, "Document not found!" + if not check_file_team_permission(file, uid): + return False, "No authorization." + return True, file diff --git a/api/utils/validation_utils.py b/api/utils/validation_utils.py index 8296c79e8..fb2a04884 100644 --- a/api/utils/validation_utils.py +++ b/api/utils/validation_utils.py @@ -1,5 +1,5 @@ # -# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# Copyright 2026 The InfiniFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -781,3 +781,40 @@ class BaseListReq(BaseModel): class ListDatasetReq(BaseListReq): include_parsing_status: Annotated[bool, Field(default=False)] ext: Annotated[dict, Field(default={})] + + +# ---- File Management Request Models ---- + +class CreateFolderReq(Base): + name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=255), Field(...)] + parent_id: Annotated[str | None, Field(default=None)] + type: Annotated[str | None, Field(default=None)] + + +class DeleteFileReq(Base): + ids: Annotated[list[str], Field(min_length=1)] + + +class MoveFileReq(Base): + src_file_ids: Annotated[list[str], Field(min_length=1)] + dest_file_id: Annotated[str | None, Field(default=None)] + new_name: Annotated[str | None, StringConstraints(strip_whitespace=True, min_length=1, max_length=255), Field(default=None)] + + @model_validator(mode='after') + def check_operation(self): + if not self.dest_file_id and not self.new_name: + raise ValueError("At least one of dest_file_id or new_name must be provided") + if self.new_name and len(self.src_file_ids) > 1: + raise ValueError("new_name can only be used with a single file") + return self + + +class ListFileReq(BaseModel): + model_config = ConfigDict(extra="forbid") + + parent_id: Annotated[str | None, Field(default=None)] + keywords: Annotated[str, Field(default="")] + page: Annotated[int, Field(default=1, ge=1)] + page_size: Annotated[int, Field(default=15, ge=1, le=100)] + orderby: Annotated[str, Field(default="create_time")] + desc: Annotated[bool, Field(default=True)] diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index 2a9fc4010..886627369 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -6309,14 +6309,14 @@ Explanation: ### Upload file -**POST** `/api/v1/file/upload` +**POST** `/api/v1/files` Uploads one or multiple files to the system. #### Request - Method: POST -- URL: `/api/v1/file/upload` +- URL: `/api/v1/files` - Headers: - `'Content-Type: multipart/form-data'` - `'Authorization: Bearer '` @@ -6328,7 +6328,7 @@ Uploads one or multiple files to the system. ```bash curl --request POST \ - --url http://{address}/api/v1/file/upload \ + --url http://{address}/api/v1/files \ --header 'Content-Type: multipart/form-data' \ --header 'Authorization: Bearer ' \ --form 'file=@./test1.txt' \ @@ -6377,34 +6377,48 @@ Failure: ### Upload document -**POST** `/api/v1/file/upload_info` +**POST** `/v1/document/upload_info` -Uploads a file and creates the respective document +Uploads a file and creates the respective document. #### Request - Method: POST -- URL: `/api/v1/file/upload_info` +- URL: `/v1/document/upload_info` - Headers: - - `'Content-Type: multipart/form-data` + - `'Content-Type: multipart/form-data'` - `'Authorization: Bearer '` - Form: - - `'file=@{FILE_PATH}'` + - `'file=@{FILE_PATH}'` (mutually exclusive with `url`) +- Query: + - `url`: URL to crawl and convert to a runtime attachment (mutually exclusive with `file`). ##### Request example +Upload a local file: + ```bash curl --request POST \ - --url http://{address}/api/v1/file/upload_info \ + --url http://{address}/v1/document/upload_info \ --header 'Content-Type: multipart/form-data' \ --header 'Authorization: Bearer ' \ --form 'file=@./test1.pdf' ``` +Crawl a URL: + +```bash +curl --request POST \ + --url 'http://{address}/v1/document/upload_info?url=https://example.com/page' \ + --header 'Authorization: Bearer ' +``` + ##### Request parameters -- `'file'`: (*Form parameter*), `file`, *Required* - The file to upload. +- `'file'`: (*Form parameter*), `file`, *Optional* + The file to upload. Mutually exclusive with `url`; either `file` or `url` must be provided. +- `url`: (*Query parameter*), `string`, *Optional* + A URL to crawl and store as an attachment. Mutually exclusive with `file`; either `url` or `file` must be provided. #### Response @@ -6415,7 +6429,7 @@ Success: "code": 0, "data": { "created_at": 1772451421.7924063, - "created by": "be951084066611f18f5f00155d2f98f4", + "created_by": "be951084066611f18f5f00155d2f98f4", "extension": "pdf", "id": "2143a03d162c11f1b80f00155d334d02", "mime_type": "application/pdf", @@ -6438,16 +6452,70 @@ Failure: --- +### Download attachment + +**GET** `/v1/document/download/{attachment_id}` + +Downloads a runtime attachment previously uploaded via the [Upload document](#upload-document) method. + +#### Request + +- Method: GET +- URL: `/v1/document/download/{attachment_id}` +- Headers: + - `'Authorization: Bearer '` +- Query parameter: + - `ext`: `string` (Optional) + +##### Request example + +```bash +curl --request GET \ + --url 'http://{address}/v1/document/download/{attachment_id}?ext=pdf' \ + --header 'Authorization: Bearer ' \ + --output ./downloaded_attachment.pdf +``` + +##### Request parameters + +- `attachment_id`: (*Path parameter*), `string`, *Required* + The `id` value returned by the [Upload document](#upload-document) method. +- `ext`: (*Query parameter*), `string`, *Optional* + A file extension hint specifying the response's Content-Type. Defaults to `"markdown"`. Available values: + - `"markdown"` + - `"html"` + - `"pdf"` + - `"docx"` + - `"xlsx"` + - `"csv"` + +#### Response + +Success: + +Returns the file content as a binary stream with the relevant Content-Type header. + +Failure: + +```json +{ + "code": 500, + "message": "Internal server error" +} +``` + +--- + ### Create file or folder -**POST** `/api/v1/file/create` +**POST** `/api/v1/files` Creates a new file or folder in the system. #### Request - Method: POST -- URL: `/api/v1/file/create` +- URL: `/api/v1/files` - Headers: - `'Content-Type: application/json'` - `'Authorization: Bearer '` @@ -6460,12 +6528,12 @@ Creates a new file or folder in the system. ```bash curl --request POST \ - --url http://{address}/api/v1/file/create \ + --url http://{address}/api/v1/files \ --header 'Content-Type: application/json' \ --header 'Authorization: Bearer ' \ --data '{ "name": "New Folder", - "type": "FOLDER", + "type": "folder", "parent_id": "{folder_id}" }' ``` @@ -6478,8 +6546,8 @@ curl --request POST \ The parent folder ID. If not specified, the file/folder will be created in the root folder. - `"type"`: (*Body parameter*), `string` The type of the file to create. Available options: - - `"FOLDER"`: Create a folder - - `"VIRTUAL"`: Create a virtual file + - `"folder"`: Create a folder + - `"virtual"`: Create a virtual file #### Response @@ -6491,7 +6559,7 @@ Success: "data": { "id": "b330ec2e91ec11efbc510242ac120004", "name": "New Folder", - "type": "FOLDER", + "type": "folder", "parent_id": "527fa74891e811ef9c650242ac120006", "size": 0, "create_time": 1729763127646 @@ -6512,14 +6580,14 @@ Failure: ### List files -**GET** `/api/v1/file/list?parent_id={parent_id}&keywords={keywords}&page={page}&page_size={page_size}&orderby={orderby}&desc={desc}` +**GET** `/api/v1/files?parent_id={parent_id}&keywords={keywords}&page={page}&page_size={page_size}&orderby={orderby}&desc={desc}` Lists files and folders under a specific folder. #### Request - Method: GET -- URL: `/api/v1/file/list?parent_id={parent_id}&keywords={keywords}&page={page}&page_size={page_size}&orderby={orderby}&desc={desc}` +- URL: `/api/v1/files?parent_id={parent_id}&keywords={keywords}&page={page}&page_size={page_size}&orderby={orderby}&desc={desc}` - Headers: - `'Authorization: Bearer '` @@ -6527,7 +6595,7 @@ Lists files and folders under a specific folder. ```bash curl --request GET \ - --url 'http://{address}/api/v1/file/list?parent_id={folder_id}&page=1&page_size=15' \ + --url 'http://{address}/api/v1/files?parent_id={folder_id}&page=1&page_size=15' \ --header 'Authorization: Bearer ' ``` @@ -6585,60 +6653,16 @@ Failure: --- -### Get root folder - -**GET** `/api/v1/file/root_folder` - -Retrieves the user's root folder information. - -#### Request - -- Method: GET -- URL: `/api/v1/file/root_folder` -- Headers: - - `'Authorization: Bearer '` - -##### Request example - -```bash -curl --request GET \ - --url http://{address}/api/v1/file/root_folder \ - --header 'Authorization: Bearer ' -``` - -##### Request parameters - -No parameters required. - -#### Response - -Success: - -```json -{ - "code": 0, - "data": { - "root_folder": { - "id": "527fa74891e811ef9c650242ac120006", - "name": "root", - "type": "FOLDER" - } - } -} -``` - ---- - ### Get parent folder -**GET** `/api/v1/file/parent_folder?file_id={file_id}` +**GET** `/api/v1/files/{file_id}/parent` Retrieves the immediate parent folder information of a specified file. #### Request - Method: GET -- URL: `/api/v1/file/parent_folder?file_id={file_id}` +- URL: `/api/v1/files/{file_id}/parent` - Headers: - `'Authorization: Bearer '` @@ -6646,13 +6670,13 @@ Retrieves the immediate parent folder information of a specified file. ```bash curl --request GET \ - --url 'http://{address}/api/v1/file/parent_folder?file_id={file_id}' \ + --url 'http://{address}/api/v1/files/{file_id}/parent' \ --header 'Authorization: Bearer ' ``` ##### Request parameters -- `file_id`: (*Filter parameter*), `string`, *Required* +- `file_id`: (*Path parameter*), `string`, *Required* The ID of the file whose immediate parent folder to retrieve. #### Response @@ -6684,14 +6708,14 @@ Failure: ### Get all parent folders -**GET** `/api/v1/file/all_parent_folder?file_id={file_id}` +**GET** `/api/v1/files/{file_id}/ancestors` Retrieves all parent folders of a specified file in the folder hierarchy. #### Request - Method: GET -- URL: `/api/v1/file/all_parent_folder?file_id={file_id}` +- URL: `/api/v1/files/{file_id}/ancestors` - Headers: - `'Authorization: Bearer '` @@ -6699,13 +6723,13 @@ Retrieves all parent folders of a specified file in the folder hierarchy. ```bash curl --request GET \ - --url 'http://{address}/api/v1/file/all_parent_folder?file_id={file_id}' \ + --url 'http://{address}/api/v1/files/{file_id}/ancestors' \ --header 'Authorization: Bearer ' ``` ##### Request parameters -- `file_id`: (*Filter parameter*), `string`, *Required* +- `file_id`: (*Path parameter*), `string`, *Required* The ID of the file whose parent folders to retrieve. #### Response @@ -6743,35 +6767,35 @@ Failure: ### Delete files -**POST** `/api/v1/file/rm` +**DELETE** `/api/v1/files` Deletes one or multiple files or folders. #### Request -- Method: POST -- URL: `/api/v1/file/rm` +- Method: DELETE +- URL: `/api/v1/files` - Headers: - `'Content-Type: application/json'` - `'Authorization: Bearer '` - Body: - - `"file_ids"`: `list[string]` + - `"ids"`: `list[string]` ##### Request example ```bash -curl --request POST \ - --url http://{address}/api/v1/file/rm \ +curl --request DELETE \ + --url http://{address}/api/v1/files \ --header 'Content-Type: application/json' \ --header 'Authorization: Bearer ' \ --data '{ - "file_ids": ["file_id_1", "file_id_2"] + "ids": ["file_id_1", "file_id_2"] }' ``` ##### Request parameters -- `"file_ids"`: (*Body parameter*), `list[string]`, *Required* +- `"ids"`: (*Body parameter*), `list[string]`, *Required* The IDs of the files or folders to delete. #### Response @@ -6796,84 +6820,16 @@ Failure: --- -### Rename file - -**POST** `/api/v1/file/rename` - -Renames a file or folder. - -#### Request - -- Method: POST -- URL: `/api/v1/file/rename` -- Headers: - - `'Content-Type: application/json'` - - `'Authorization: Bearer '` -- Body: - - `"file_id"`: `string` - - `"name"`: `string` - -##### Request example - -```bash -curl --request POST \ - --url http://{address}/api/v1/file/rename \ - --header 'Content-Type: application/json' \ - --header 'Authorization: Bearer ' \ - --data '{ - "file_id": "{file_id}", - "name": "new_name.txt" - }' -``` - -##### Request parameters - -- `"file_id"`: (*Body parameter*), `string`, *Required* - The ID of the file or folder to rename. -- `"name"`: (*Body parameter*), `string`, *Required* - The new name for the file or folder. Note: Changing file extensions is *not* supported. - -#### Response - -Success: - -```json -{ - "code": 0, - "data": true -} -``` - -Failure: - -```json -{ - "code": 400, - "message": "The extension of file can't be changed" -} -``` - -or - -```json -{ - "code": 409, - "message": "Duplicated file name in the same folder." -} -``` - ---- - ### Download file -**GET** `/api/v1/file/get/{file_id}` +**GET** `/api/v1/files/{file_id}` Downloads a file from the system. #### Request - Method: GET -- URL: `/api/v1/file/get/{file_id}` +- URL: `/api/v1/files/{file_id}` - Headers: - `'Authorization: Bearer '` @@ -6881,7 +6837,7 @@ Downloads a file from the system. ```bash curl --request GET \ - --url http://{address}/api/v1/file/get/{file_id} \ + --url http://{address}/api/v1/files/{file_id} \ --header 'Authorization: Bearer ' \ --output ./downloaded_file.txt ``` @@ -6908,28 +6864,35 @@ Failure: --- -### Move files +### Move or rename files -**POST** `/api/v1/file/mv` +**POST** `/api/v1/files/move` -Moves one or multiple files or folders to a specified folder. +Moves and/or renames files or folders. Follows Linux `mv` semantics: at least one of `dest_file_id` or `new_name` must be provided. + +- `dest_file_id` only: move files to a new folder, names unchanged. +- `new_name` only: rename a single file or folder in place, no storage operation. +- Both: move and rename simultaneously. #### Request - Method: POST -- URL: `/api/v1/file/mv` +- URL: `/api/v1/files/move` - Headers: - `'Content-Type: application/json'` - `'Authorization: Bearer '` - Body: - - `"src_file_ids"`: `list[string]` - - `"dest_file_id"`: `string` + - `"src_file_ids"`: `list[string]`, *Required* + - `"dest_file_id"`: `string`, *Optional* + - `"new_name"`: `string`, *Optional* -##### Request example +##### Request examples + +Move files to a folder: ```bash curl --request POST \ - --url http://{address}/api/v1/file/mv \ + --url http://{address}/api/v1/files/move \ --header 'Content-Type: application/json' \ --header 'Authorization: Bearer ' \ --data '{ @@ -6938,12 +6901,27 @@ curl --request POST \ }' ``` +Rename a file in place: + +```bash +curl --request POST \ + --url http://{address}/api/v1/files/move \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer ' \ + --data '{ + "src_file_ids": ["{file_id}"], + "new_name": "new_name.txt" + }' +``` + ##### Request parameters -- `"src_file_ids"`: (*Body parameter*), `list[string]`, *Required* - The IDs of the files or folders to move. -- `"dest_file_id"`: (*Body parameter*), `string`, *Required* - The ID of the destination folder. +- `"src_file_ids"`: (*Body parameter*), `list[string]`, *Required* + The IDs of the files or folders to move or rename. +- `"dest_file_id"`: (*Body parameter*), `string`, *Optional* + The ID of the destination folder. Omit to rename in place. +- `"new_name"`: (*Body parameter*), `string`, *Optional* + New name for the file or folder. Only valid when `src_file_ids` contains a single entry. Note: Changing file extensions is *not* supported. #### Response @@ -6970,7 +6948,16 @@ or ```json { "code": 404, - "message": "Parent Folder not found!" + "message": "Parent folder not found!" +} +``` + +or + +```json +{ + "code": 400, + "message": "The extension of file can't be changed" } ``` @@ -6978,14 +6965,14 @@ or ### Convert files to documents and link them to datasets -**POST** `/api/v1/file/convert` +**POST** `/v1/file2document/convert` Converts files to documents and links them to specified datasets. #### Request - Method: POST -- URL: `/api/v1/file/convert` +- URL: `/v1/file2document/convert` - Headers: - `'Content-Type: application/json'` - `'Authorization: Bearer '` @@ -6997,7 +6984,7 @@ Converts files to documents and links them to specified datasets. ```bash curl --request POST \ - --url http://{address}/api/v1/file/convert \ + --url http://{address}/v1/file2document/convert \ --header 'Content-Type: application/json' \ --header 'Authorization: Bearer ' \ --data '{ diff --git a/test/testcases/test_http_api/test_file_app/test_file_routes.py b/test/testcases/test_http_api/test_file_app/test_file_routes.py index 757314210..85fa264b4 100644 --- a/test/testcases/test_http_api/test_file_app/test_file_routes.py +++ b/test/testcases/test_http_api/test_file_app/test_file_routes.py @@ -1,5 +1,5 @@ # -# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# Copyright 2026 The InfiniFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,8 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # + import asyncio -import functools import importlib.util import sys from enum import Enum @@ -24,29 +24,70 @@ from types import ModuleType, SimpleNamespace import pytest -class _DummyManager: - def route(self, *_args, **_kwargs): - def decorator(func): - return func - return decorator +@pytest.fixture(scope="session") +def auth(): + return "unit-auth" -def _load_files_app(monkeypatch): +@pytest.fixture(scope="session", autouse=True) +def set_tenant_info(): + return None + + +class _DummyUploadFile: + def __init__(self, filename, blob=b"blob"): + self.filename = filename + self._blob = blob + + def read(self): + return self._blob + + +class _DummyFile: + def __init__( + self, + file_id, + file_type, + *, + tenant_id="tenant1", + parent_id="pf1", + location="loc1", + name="doc.txt", + source_type="user", + size=1, + ): + self.id = file_id + self.type = file_type + self.tenant_id = tenant_id + self.parent_id = parent_id + self.location = location + self.name = name + self.source_type = source_type + self.size = size + + def to_json(self): + return {"id": self.id, "name": self.name, "type": self.type} + + +def _run(coro): + return asyncio.run(coro) + + +def _load_file_api_service(monkeypatch): repo_root = Path(__file__).resolve().parents[4] api_pkg = ModuleType("api") api_pkg.__path__ = [str(repo_root / "api")] monkeypatch.setitem(sys.modules, "api", api_pkg) - apps_pkg = ModuleType("api.apps") - apps_pkg.__path__ = [str(repo_root / "api" / "apps")] - monkeypatch.setitem(sys.modules, "api.apps", apps_pkg) - api_pkg.apps = apps_pkg + common_pkg = ModuleType("api.common") + common_pkg.__path__ = [] + monkeypatch.setitem(sys.modules, "api.common", common_pkg) - sdk_pkg = ModuleType("api.apps.sdk") - sdk_pkg.__path__ = [str(repo_root / "api" / "apps" / "sdk")] - monkeypatch.setitem(sys.modules, "api.apps.sdk", sdk_pkg) - apps_pkg.sdk = sdk_pkg + permission_mod = ModuleType("api.common.check_team_permission") + permission_mod.check_file_team_permission = lambda *_args, **_kwargs: True + monkeypatch.setitem(sys.modules, "api.common.check_team_permission", permission_mod) + common_pkg.check_team_permission = permission_mod db_pkg = ModuleType("api.db") db_pkg.__path__ = [] @@ -67,192 +108,71 @@ def _load_files_app(monkeypatch): monkeypatch.setitem(sys.modules, "api.db.services", services_pkg) document_service_mod = ModuleType("api.db.services.document_service") - - class _StubDocumentService: - @staticmethod - def get_by_id(_doc_id): - return True, SimpleNamespace(id=_doc_id) - - @staticmethod - def get_tenant_id(_doc_id): - return "tenant1" - - @staticmethod - def remove_document(*_args, **_kwargs): - return True - - @staticmethod - def update_by_id(*_args, **_kwargs): - return True - - @staticmethod - def insert(_doc): - return SimpleNamespace(id="doc1") - - document_service_mod.DocumentService = _StubDocumentService + document_service_mod.DocumentService = SimpleNamespace( + get_doc_count=lambda _uid: 0, + get_by_id=lambda doc_id: (True, SimpleNamespace(id=doc_id)), + get_tenant_id=lambda _doc_id: "tenant1", + remove_document=lambda *_args, **_kwargs: True, + update_by_id=lambda *_args, **_kwargs: True, + ) monkeypatch.setitem(sys.modules, "api.db.services.document_service", document_service_mod) services_pkg.document_service = document_service_mod - file2document_service_mod = ModuleType("api.db.services.file2document_service") - - class _StubFile2DocumentService: - @staticmethod - def get_by_file_id(_file_id): - return [] - - @staticmethod - def delete_by_file_id(*_args, **_kwargs): - return None - - @staticmethod - def get_storage_address(**_kwargs): - return "bucket", "location" - - @staticmethod - def insert(_data): - return SimpleNamespace(to_json=lambda: {}) - - file2document_service_mod.File2DocumentService = _StubFile2DocumentService - monkeypatch.setitem(sys.modules, "api.db.services.file2document_service", file2document_service_mod) - services_pkg.file2document_service = file2document_service_mod - - knowledgebase_service_mod = ModuleType("api.db.services.knowledgebase_service") - - class _StubKnowledgebaseService: - @staticmethod - def get_by_id(_kb_id): - return False, None - - knowledgebase_service_mod.KnowledgebaseService = _StubKnowledgebaseService - monkeypatch.setitem(sys.modules, "api.db.services.knowledgebase_service", knowledgebase_service_mod) - services_pkg.knowledgebase_service = knowledgebase_service_mod + file2doc_mod = ModuleType("api.db.services.file2document_service") + file2doc_mod.File2DocumentService = SimpleNamespace( + get_by_file_id=lambda _file_id: [], + delete_by_file_id=lambda _file_id: None, + ) + monkeypatch.setitem(sys.modules, "api.db.services.file2document_service", file2doc_mod) + services_pkg.file2document_service = file2doc_mod file_service_mod = ModuleType("api.db.services.file_service") - - class _StubFileService: - @staticmethod - def get_root_folder(_tenant_id): - return {"id": "root"} - - @staticmethod - def get_by_id(_file_id): - return True, SimpleNamespace(id=_file_id, parent_id="root", location="file", tenant_id="tenant1") - - @staticmethod - def get_id_list_by_id(_pf_id, _file_obj_names, _idx, ids): - return ids - - @staticmethod - def create_folder(_file, parent_id, _file_obj_names, _len_id_list): - return SimpleNamespace(id=parent_id) - - @staticmethod - def query(**_kwargs): - return [] - - @staticmethod - def insert(data): - return SimpleNamespace(to_json=lambda: data) - - @staticmethod - def is_parent_folder_exist(_pf_id): - return True - - @staticmethod - def get_by_pf_id(*_args, **_kwargs): - return [], 0 - - @staticmethod - def get_parent_folder(_file_id): - return SimpleNamespace(to_json=lambda: {"id": "root"}) - - @staticmethod - def get_all_parent_folders(_file_id): - return [] - - @staticmethod - def get_all_innermost_file_ids(_file_id, _acc): - return [] - - @staticmethod - def delete_folder_by_pf_id(*_args, **_kwargs): - return None - - @staticmethod - def delete(_file): - return True - - @staticmethod - def update_by_id(*_args, **_kwargs): - return True - - @staticmethod - def get_by_ids(_file_ids): - return [] - - @staticmethod - def move_file(*_args, **_kwargs): - return None - - @staticmethod - def init_knowledgebase_docs(*_args, **_kwargs): - return None - - @staticmethod - def get_parser(_file_type, _file_name, parser_id): - return parser_id - - file_service_mod.FileService = _StubFileService + file_service_mod.FileService = SimpleNamespace( + get_root_folder=lambda _tenant_id: {"id": "root"}, + get_by_id=lambda file_id: (True, _DummyFile(file_id, _FileType.DOC.value)), + get_id_list_by_id=lambda _pf_id, _names, _idx, ids: ids, + create_folder=lambda _file, parent_id, _names, _len_id: SimpleNamespace(id=parent_id, name=str(parent_id)), + query=lambda **_kwargs: [], + insert=lambda data: SimpleNamespace(to_json=lambda: data, **data), + is_parent_folder_exist=lambda _pf_id: True, + get_by_pf_id=lambda *_args, **_kwargs: ([], 0), + get_parent_folder=lambda _file_id: SimpleNamespace(to_json=lambda: {"id": "root"}), + get_all_parent_folders=lambda _file_id: [], + list_all_files_by_parent_id=lambda _parent_id: [], + delete=lambda _file: True, + delete_by_id=lambda _file_id: True, + update_by_id=lambda *_args, **_kwargs: True, + get_by_ids=lambda file_ids: [_DummyFile(file_id, _FileType.DOC.value) for file_id in file_ids], + ) monkeypatch.setitem(sys.modules, "api.db.services.file_service", file_service_mod) services_pkg.file_service = file_service_mod - api_utils_mod = ModuleType("api.utils.api_utils") - - def get_json_result(data=None, message="", code=0): - return {"code": code, "data": data, "message": message} - - async def get_request_json(): - return {} - - def server_error_response(err): - return {"code": 100, "data": None, "message": str(err)} - - def token_required(func): - @functools.wraps(func) - async def _wrapper(*args, **kwargs): - return await func(*args, **kwargs) - - return _wrapper - - api_utils_mod.get_json_result = get_json_result - api_utils_mod.get_request_json = get_request_json - api_utils_mod.server_error_response = server_error_response - api_utils_mod.token_required = token_required - monkeypatch.setitem(sys.modules, "api.utils.api_utils", api_utils_mod) - file_utils_mod = ModuleType("api.utils.file_utils") file_utils_mod.filename_type = lambda _filename: _FileType.DOC.value monkeypatch.setitem(sys.modules, "api.utils.file_utils", file_utils_mod) - web_utils_mod = ModuleType("api.utils.web_utils") - web_utils_mod.CONTENT_TYPE_MAP = {"txt": "text/plain", "json": "application/json"} - web_utils_mod.apply_safe_file_response_headers = lambda response, *_args, **_kwargs: response - monkeypatch.setitem(sys.modules, "api.utils.web_utils", web_utils_mod) - - common_pkg = ModuleType("common") - common_pkg.__path__ = [str(repo_root / "common")] - common_pkg.settings = SimpleNamespace( + common_root_mod = ModuleType("common") + common_root_mod.__path__ = [str(repo_root / "common")] + common_root_mod.settings = SimpleNamespace( STORAGE_IMPL=SimpleNamespace( obj_exist=lambda *_args, **_kwargs: False, put=lambda *_args, **_kwargs: None, - get=lambda *_args, **_kwargs: b"", rm=lambda *_args, **_kwargs: None, + move=lambda *_args, **_kwargs: None, ) ) - monkeypatch.setitem(sys.modules, "common", common_pkg) + monkeypatch.setitem(sys.modules, "common", common_root_mod) + + constants_mod = ModuleType("common.constants") + + class _FileSource: + KNOWLEDGEBASE = "knowledgebase" + + constants_mod.FileSource = _FileSource + monkeypatch.setitem(sys.modules, "common.constants", constants_mod) misc_utils_mod = ModuleType("common.misc_utils") - misc_utils_mod.get_uuid = lambda: "uuid" + misc_utils_mod.get_uuid = lambda: "uuid-1" async def thread_pool_exec(func, *args, **kwargs): return func(*args, **kwargs) @@ -260,796 +180,172 @@ def _load_files_app(monkeypatch): misc_utils_mod.thread_pool_exec = thread_pool_exec monkeypatch.setitem(sys.modules, "common.misc_utils", misc_utils_mod) - constants_mod = ModuleType("common.constants") - - class _RetCode: - SUCCESS = 0 - BAD_REQUEST = 400 - NOT_FOUND = 404 - CONFLICT = 409 - SERVER_ERROR = 500 - - constants_mod.RetCode = _RetCode - monkeypatch.setitem(sys.modules, "common.constants", constants_mod) - - module_path = repo_root / "api" / "apps" / "sdk" / "files.py" - spec = importlib.util.spec_from_file_location("api.apps.sdk.files", module_path) + module_path = repo_root / "api" / "apps" / "services" / "file_api_service.py" + spec = importlib.util.spec_from_file_location("api.apps.services.file_api_service", module_path) module = importlib.util.module_from_spec(spec) - module.manager = _DummyManager() - monkeypatch.setitem(sys.modules, "api.apps.sdk.files", module) + monkeypatch.setitem(sys.modules, "api.apps.services.file_api_service", module) spec.loader.exec_module(module) return module -def _run(coro): - return asyncio.run(coro) +@pytest.mark.p2 +def test_upload_file_requires_existing_folder(monkeypatch): + module = _load_file_api_service(monkeypatch) + monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (False, None)) - -class _DummyFile: - def __init__(self, file_id, file_type, name="doc.txt", tenant_id="tenant1", parent_id="parent1", location=None): - self.id = file_id - self.type = file_type - self.name = name - self.location = location or name - self.size = 1 - self.tenant_id = tenant_id - self.parent_id = parent_id - - def to_json(self): - return {"id": self.id, "name": self.name, "type": self.type} - - -class _FalsyFile(_DummyFile): - def __bool__(self): - return False - - -class _AwaitableValue: - def __init__(self, value): - self._value = value - - def __await__(self): - async def _co(): - return self._value - - return _co().__await__() - - -class _Args(dict): - def get(self, key, default=None, type=None): - value = super().get(key, default) - if value is None or type is None: - return value - try: - return type(value) - except (TypeError, ValueError): - return default - - -class _DummyRequest: - def __init__(self, *, args=None, form=None, files=None): - self.args = _Args(args or {}) - self.form = _AwaitableValue(form or {}) - self.files = _AwaitableValue(files if files is not None else _DummyFiles()) - - -class _DummyUploadFile: - def __init__(self, filename, blob=b"file-bytes"): - self.filename = filename - self._blob = blob - - def read(self): - return self._blob - - -class _DummyFiles(dict): - def __init__(self, file_objs=None): - super().__init__() - self._file_objs = file_objs or [] - if file_objs is not None: - self["file"] = self._file_objs - - def getlist(self, key): - if key == "file": - return list(self._file_objs) - return [] - - -class _DummyResponse: - def __init__(self, data): - self.data = data - self.headers = {} + ok, message = _run(module.upload_file("tenant1", "pf1", [_DummyUploadFile("a.txt")])) + assert ok is False + assert message == "Can't find this folder!" @pytest.mark.p2 -class TestFileMoveUnit: - def test_move_success_and_invalid_parent(self, monkeypatch): - module = _load_files_app(monkeypatch) - file_id = "file1" - parent_id = "parent1" +def test_upload_file_respects_user_limit(monkeypatch): + module = _load_file_api_service(monkeypatch) + monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (True, SimpleNamespace(id="pf1", name="pf1"))) + monkeypatch.setattr(module.DocumentService, "get_doc_count", lambda _uid: 1) + monkeypatch.setenv("MAX_FILE_NUM_PER_USER", "1") - async def fake_request_json(): - return {"src_file_ids": [file_id], "dest_file_id": parent_id} - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - monkeypatch.setattr(module.FileService, "get_by_ids", lambda _ids: [_DummyFile(file_id, module.FileType.DOC.value)]) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _pid: (True, _DummyFile(parent_id, module.FileType.FOLDER.value))) - monkeypatch.setattr(module.FileService, "move_file", lambda *_args, **_kwargs: None) - - res = _run(module.move.__wrapped__("tenant1")) - assert res["code"] == 0 - assert res["data"] is True - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _pid: (False, None)) - res = _run(module.move.__wrapped__("tenant1")) - assert res["code"] == 404 - assert res["message"] == "Parent Folder not found!" - - def test_move_missing_payload(self, monkeypatch): - module = _load_files_app(monkeypatch) - - async def fake_request_json(): - return {} - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - res = _run(module.move.__wrapped__("tenant1")) - assert res["code"] == 100 - - def test_move_missing_source_branch(self, monkeypatch): - module = _load_files_app(monkeypatch) - - async def fake_request_json(): - return {"src_file_ids": ["file1"], "dest_file_id": "parent1"} - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - monkeypatch.setattr(module.FileService, "get_by_ids", lambda _ids: [_FalsyFile("file1", module.FileType.DOC.value)]) - res = _run(module.move.__wrapped__("tenant1")) - assert res["code"] == 404 - assert res["message"] == "File or Folder not found!" + ok, message = _run(module.upload_file("tenant1", "pf1", [_DummyUploadFile("a.txt")])) + assert ok is False + assert message == "Exceed the maximum file number of a free user!" + monkeypatch.delenv("MAX_FILE_NUM_PER_USER", raising=False) @pytest.mark.p2 -class TestFileConvertUnit: - def test_convert_success_and_delete(self, monkeypatch): - module = _load_files_app(monkeypatch) - file_id = "file1" - kb_id = "kb1" +def test_upload_file_success_uses_new_service_layer(monkeypatch): + module = _load_file_api_service(monkeypatch) + storage_puts = [] - async def fake_request_json(): - return {"kb_ids": [kb_id], "file_ids": [file_id]} + monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (True, SimpleNamespace(id="pf1", name="pf1"))) + monkeypatch.setattr(module.FileService, "get_id_list_by_id", lambda *_args, **_kwargs: ["pf1"]) + monkeypatch.setattr( + module.FileService, + "create_folder", + lambda _file, parent_id, _names, _len_id: SimpleNamespace(id=parent_id), + ) + monkeypatch.setattr(module.settings, "STORAGE_IMPL", SimpleNamespace( + obj_exist=lambda *_args, **_kwargs: False, + put=lambda bucket, location, blob: storage_puts.append((bucket, location, blob)), + rm=lambda *_args, **_kwargs: None, + move=lambda *_args, **_kwargs: None, + )) - monkeypatch.setattr(module, "get_request_json", fake_request_json) - monkeypatch.setattr(module.FileService, "get_by_ids", lambda _ids: [_DummyFile(file_id, module.FileType.DOC.value)]) - - class _Inform: - document_id = "doc1" - - monkeypatch.setattr(module.File2DocumentService, "get_by_file_id", lambda _id: [_Inform()]) - monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _doc_id: (True, _DummyFile("doc1", module.FileType.DOC.value))) - monkeypatch.setattr(module.DocumentService, "get_tenant_id", lambda _doc_id: "tenant1") - monkeypatch.setattr(module.DocumentService, "remove_document", lambda *_args, **_kwargs: True) - monkeypatch.setattr(module.File2DocumentService, "delete_by_file_id", lambda *_args, **_kwargs: None) - - class _Kb: - id = kb_id - parser_id = "parser" - parser_config = {} - - monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, _Kb())) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _id: (True, _DummyFile(file_id, module.FileType.DOC.value))) - - class _Doc: - def __init__(self, doc_id): - self.id = doc_id - - monkeypatch.setattr(module.DocumentService, "insert", lambda _doc: _Doc("newdoc")) - - class _File2Doc: - def to_json(self): - return {"file_id": file_id, "document_id": "newdoc"} - - monkeypatch.setattr(module.File2DocumentService, "insert", lambda _data: _File2Doc()) - - res = _run(module.convert.__wrapped__("tenant1")) - assert res["code"] == 0 - assert res["data"] - - def test_convert_folder(self, monkeypatch): - module = _load_files_app(monkeypatch) - kb_id = "kb1" - - async def fake_request_json(): - return {"kb_ids": [kb_id], "file_ids": ["folder1"]} - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - monkeypatch.setattr(module.FileService, "get_by_ids", lambda _ids: [_DummyFile("folder1", module.FileType.FOLDER.value, name="folder")]) - monkeypatch.setattr(module.FileService, "get_all_innermost_file_ids", lambda *_args, **_kwargs: ["inner1"]) - monkeypatch.setattr(module.File2DocumentService, "get_by_file_id", lambda _id: []) - monkeypatch.setattr(module.File2DocumentService, "delete_by_file_id", lambda *_args, **_kwargs: None) - - class _Kb: - id = kb_id - parser_id = "parser" - parser_config = {} - - monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, _Kb())) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _id: (True, _DummyFile("inner1", module.FileType.DOC.value))) - monkeypatch.setattr(module.DocumentService, "insert", lambda _doc: _DummyFile("doc1", module.FileType.DOC.value)) - monkeypatch.setattr(module.File2DocumentService, "insert", lambda _data: SimpleNamespace(to_json=lambda: {"file_id": "inner1"})) - - res = _run(module.convert.__wrapped__("tenant1")) - assert res["code"] == 0 - assert res["data"] - - def test_convert_invalid_file_id(self, monkeypatch): - module = _load_files_app(monkeypatch) - - async def fake_request_json(): - return {"kb_ids": ["kb1"], "file_ids": ["missing"]} - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - monkeypatch.setattr(module.FileService, "get_by_ids", lambda _ids: [_FalsyFile("missing", module.FileType.DOC.value)]) - res = _run(module.convert.__wrapped__("tenant1")) - assert res["code"] == 404 - assert res["message"] == "File not found!" - - def test_convert_invalid_kb_id(self, monkeypatch): - module = _load_files_app(monkeypatch) - - async def fake_request_json(): - return {"kb_ids": ["missing"], "file_ids": ["file1"]} - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - monkeypatch.setattr(module.FileService, "get_by_ids", lambda _ids: [_DummyFile("file1", module.FileType.DOC.value)]) - monkeypatch.setattr(module.File2DocumentService, "get_by_file_id", lambda _id: []) - monkeypatch.setattr(module.File2DocumentService, "delete_by_file_id", lambda *_args, **_kwargs: None) - monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (False, None)) - res = _run(module.convert.__wrapped__("tenant1")) - assert res["code"] == 404 - assert res["message"] == "Can't find this dataset!" - - def test_convert_file_missing_second_lookup(self, monkeypatch): - module = _load_files_app(monkeypatch) - - async def fake_request_json(): - return {"kb_ids": ["kb1"], "file_ids": ["file1"]} - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - monkeypatch.setattr(module.FileService, "get_by_ids", lambda _ids: [_DummyFile("file1", module.FileType.DOC.value)]) - monkeypatch.setattr(module.File2DocumentService, "get_by_file_id", lambda _id: []) - monkeypatch.setattr(module.File2DocumentService, "delete_by_file_id", lambda *_args, **_kwargs: None) - - class _Kb: - id = "kb1" - parser_id = "parser" - parser_config = {} - - monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _kb_id: (True, _Kb())) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _id: (False, None)) - res = _run(module.convert.__wrapped__("tenant1")) - assert res["code"] == 404 - assert res["message"] == "Can't find this file!" - - def test_convert_missing_payload(self, monkeypatch): - module = _load_files_app(monkeypatch) - - async def fake_request_json(): - return {} - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - with pytest.raises(KeyError): - _run(module.convert.__wrapped__("tenant1")) + ok, data = _run(module.upload_file("tenant1", "pf1", [_DummyUploadFile("a.txt", b"hello")])) + assert ok is True + assert data[0]["name"] == "a.txt" + assert storage_puts == [("pf1", "a.txt", b"hello")] @pytest.mark.p2 -class TestFileRouteBranchUnit: - def test_upload_branch_matrix(self, monkeypatch): - module = _load_files_app(monkeypatch) - monkeypatch.setattr(module.FileService, "get_root_folder", lambda _tenant_id: {"id": "root"}) - - # Missing file part. - monkeypatch.setattr(module, "request", _DummyRequest(form={}, files=_DummyFiles())) - res = _run(module.upload.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.BAD_REQUEST - assert res["message"] == "No file part!" - - # Empty filename. - monkeypatch.setattr( - module, - "request", - _DummyRequest(form={"parent_id": "pf1"}, files=_DummyFiles([_DummyUploadFile("")])), - ) - res = _run(module.upload.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.BAD_REQUEST - assert res["message"] == "No selected file!" - - # Parent folder missing. - monkeypatch.setattr( - module, - "request", - _DummyRequest(form={"parent_id": "pf1"}, files=_DummyFiles([_DummyUploadFile("a.txt")])), - ) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (False, None)) - res = _run(module.upload.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.NOT_FOUND - assert res["message"] == "Can't find this folder!" - - # Missing folder in branch: file_len != len_id_list. - monkeypatch.setattr( - module, - "request", - _DummyRequest(form={"parent_id": "pf1"}, files=_DummyFiles([_DummyUploadFile("dir/a.txt")])), - ) - monkeypatch.setattr(module.FileService, "get_id_list_by_id", lambda *_args, **_kwargs: ["pf1", "missing-child"]) - - def get_by_id_missing_child(file_id): - if file_id == "missing-child": - return False, None - return True, SimpleNamespace(id="pf1") - - monkeypatch.setattr(module.FileService, "get_by_id", get_by_id_missing_child) - res = _run(module.upload.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.NOT_FOUND - assert res["message"] == "Folder not found!" - - # Missing folder in branch: file_len == len_id_list. - monkeypatch.setattr( - module, - "request", - _DummyRequest(form={"parent_id": "pf1"}, files=_DummyFiles([_DummyUploadFile("b.txt")])), - ) - monkeypatch.setattr(module.FileService, "get_id_list_by_id", lambda *_args, **_kwargs: ["pf1", "leaf"]) - pf1_calls = {"count": 0} - - def get_by_id_missing_parent_in_else(file_id): - if file_id == "pf1": - pf1_calls["count"] += 1 - if pf1_calls["count"] == 1: - return True, SimpleNamespace(id="pf1") - return False, None - return True, SimpleNamespace(id=file_id) - - monkeypatch.setattr(module.FileService, "get_by_id", get_by_id_missing_parent_in_else) - res = _run(module.upload.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.NOT_FOUND - assert res["message"] == "Folder not found!" - - class _Storage: - def __init__(self): - self.obj_calls = 0 - self.put_calls = [] - - def obj_exist(self, _bucket, _location): - self.obj_calls += 1 - return self.obj_calls == 1 - - def put(self, bucket, location, blob): - self.put_calls.append((bucket, location, blob)) - - storage = _Storage() - monkeypatch.setattr(module.settings, "STORAGE_IMPL", storage) - monkeypatch.setattr( - module, - "request", - _DummyRequest( - form={"parent_id": "pf1"}, - files=_DummyFiles([_DummyUploadFile("dir/a.txt", b"a"), _DummyUploadFile("b.txt", b"b")]), - ), - ) - - def fake_get_by_id(file_id): - if file_id == "mid-id": - return True, SimpleNamespace(id="mid-id") - return True, SimpleNamespace(id="pf1") - - def fake_get_id_list_by_id(_pf_id, file_obj_names, _idx, _ids): - if file_obj_names[-1] == "a.txt": - return ["pf1", "mid-id"] - return ["pf1", "leaf-id"] - - def fake_create_folder(_file, parent_id, _file_obj_names, _len_id_list): - return SimpleNamespace(id=f"{parent_id}-folder") - - monkeypatch.setattr(module.FileService, "get_by_id", fake_get_by_id) - monkeypatch.setattr(module.FileService, "get_id_list_by_id", fake_get_id_list_by_id) - monkeypatch.setattr(module.FileService, "create_folder", fake_create_folder) - monkeypatch.setattr(module, "filename_type", lambda _name: module.FileType.DOC.value) - monkeypatch.setattr(module, "duplicate_name", lambda _query, **kwargs: kwargs["name"]) - monkeypatch.setattr(module, "get_uuid", lambda: "file-id") - monkeypatch.setattr(module.FileService, "query", lambda **_kwargs: []) - monkeypatch.setattr(module.FileService, "insert", lambda data: SimpleNamespace(to_json=lambda: {"id": data["id"]})) - res = _run(module.upload.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.SUCCESS - assert len(res["data"]) == 2 - assert storage.put_calls - - # Exception path. - monkeypatch.setattr( - module, - "request", - _DummyRequest(form={"parent_id": "pf1"}, files=_DummyFiles([_DummyUploadFile("boom.txt")])), - ) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (_ for _ in ()).throw(RuntimeError("upload boom"))) - monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)}) - res = _run(module.upload.__wrapped__("tenant1")) - assert res["code"] == 500 - assert "upload boom" in res["message"] - - def test_create_branch_matrix(self, monkeypatch): - module = _load_files_app(monkeypatch) - state = {"req": {"name": "file1"}} - - async def fake_request_json(): - return state["req"] - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - monkeypatch.setattr(module.FileService, "get_root_folder", lambda _tenant_id: {"id": "root"}) - monkeypatch.setattr(module.FileService, "is_parent_folder_exist", lambda _pf_id: False) - res = _run(module.create.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.BAD_REQUEST - assert "Parent Folder Doesn't Exist!" in res["message"] - - state["req"] = {"name": "dup", "parent_id": "pf1"} - monkeypatch.setattr(module.FileService, "is_parent_folder_exist", lambda _pf_id: True) - monkeypatch.setattr(module.FileService, "query", lambda **_kwargs: [object()]) - res = _run(module.create.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.CONFLICT - assert "Duplicated folder name" in res["message"] - - inserted = {} - - def fake_insert(data): - inserted["payload"] = data - return SimpleNamespace(to_json=lambda: data) - - monkeypatch.setattr(module.FileService, "query", lambda **_kwargs: []) - monkeypatch.setattr(module, "get_uuid", lambda: "uuid-folder") - monkeypatch.setattr(module.FileService, "insert", fake_insert) - - state["req"] = {"name": "folder", "parent_id": "pf1", "type": module.FileType.FOLDER.value} - res = _run(module.create.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.SUCCESS - assert inserted["payload"]["type"] == module.FileType.FOLDER.value - - state["req"] = {"name": "virtual", "parent_id": "pf1", "type": "UNKNOWN"} - res = _run(module.create.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.SUCCESS - assert inserted["payload"]["type"] == module.FileType.VIRTUAL.value - - monkeypatch.setattr(module.FileService, "is_parent_folder_exist", lambda _pf_id: (_ for _ in ()).throw(RuntimeError("create boom"))) - monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)}) - res = _run(module.create.__wrapped__("tenant1")) - assert res["code"] == 500 - assert "create boom" in res["message"] - - def test_list_files_branch_matrix(self, monkeypatch): - module = _load_files_app(monkeypatch) - calls = {"init": 0} - - monkeypatch.setattr(module.FileService, "get_root_folder", lambda _tenant_id: {"id": "root"}) - monkeypatch.setattr( - module.FileService, - "init_knowledgebase_docs", - lambda _pf_id, _tenant_id: calls.__setitem__("init", calls["init"] + 1), - ) - monkeypatch.setattr(module, "request", _DummyRequest(args={})) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _pf_id: (False, None)) - res = _run(module.list_files.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.NOT_FOUND - assert res["message"] == "Folder not found!" - assert calls["init"] == 1 - - monkeypatch.setattr( - module, - "request", - _DummyRequest(args={"parent_id": "p1", "keywords": "k", "page": "2", "page_size": "10", "orderby": "name", "desc": "False"}), - ) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _pf_id: (True, SimpleNamespace(id="p1"))) - monkeypatch.setattr(module.FileService, "get_by_pf_id", lambda *_args, **_kwargs: ([{"id": "f1"}], 1)) - monkeypatch.setattr(module.FileService, "get_parent_folder", lambda _pf_id: None) - res = _run(module.list_files.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.NOT_FOUND - assert res["message"] == "File not found!" - - monkeypatch.setattr(module.FileService, "get_parent_folder", lambda _pf_id: SimpleNamespace(to_json=lambda: {"id": "p0"})) - res = _run(module.list_files.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.SUCCESS - assert res["data"]["total"] == 1 - assert res["data"]["parent_folder"]["id"] == "p0" - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _pf_id: (_ for _ in ()).throw(RuntimeError("list boom"))) - monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)}) - res = _run(module.list_files.__wrapped__("tenant1")) - assert res["code"] == 500 - assert "list boom" in res["message"] - - def test_get_root_folder_branch_matrix(self, monkeypatch): - module = _load_files_app(monkeypatch) - monkeypatch.setattr(module.FileService, "get_root_folder", lambda _tenant_id: {"id": "root"}) - res = _run(module.get_root_folder.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.SUCCESS - assert res["data"]["root_folder"]["id"] == "root" - - monkeypatch.setattr(module.FileService, "get_root_folder", lambda _tenant_id: (_ for _ in ()).throw(RuntimeError("root boom"))) - monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)}) - res = _run(module.get_root_folder.__wrapped__("tenant1")) - assert res["code"] == 500 - assert "root boom" in res["message"] - - def test_get_parent_folder_branch_matrix(self, monkeypatch): - module = _load_files_app(monkeypatch) - monkeypatch.setattr(module, "request", _DummyRequest(args={"file_id": "missing"})) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (False, None)) - res = _run(module.get_parent_folder.__wrapped__()) - assert res["code"] == module.RetCode.NOT_FOUND - assert res["message"] == "Folder not found!" - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (True, SimpleNamespace(id="f1"))) - monkeypatch.setattr(module.FileService, "get_parent_folder", lambda _file_id: SimpleNamespace(to_json=lambda: {"id": "p1"})) - res = _run(module.get_parent_folder.__wrapped__()) - assert res["code"] == module.RetCode.SUCCESS - assert res["data"]["parent_folder"]["id"] == "p1" - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (_ for _ in ()).throw(RuntimeError("parent boom"))) - monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)}) - res = _run(module.get_parent_folder.__wrapped__()) - assert res["code"] == 500 - assert "parent boom" in res["message"] - - def test_get_all_parent_folders_branch_matrix(self, monkeypatch): - module = _load_files_app(monkeypatch) - monkeypatch.setattr(module, "request", _DummyRequest(args={"file_id": "missing"})) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (False, None)) - res = _run(module.get_all_parent_folders.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.NOT_FOUND - assert res["message"] == "Folder not found!" - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (True, SimpleNamespace(id="f1"))) - monkeypatch.setattr( - module.FileService, - "get_all_parent_folders", - lambda _file_id: [SimpleNamespace(to_json=lambda: {"id": "p1"})], - ) - res = _run(module.get_all_parent_folders.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.SUCCESS - assert res["data"]["parent_folders"] == [{"id": "p1"}] - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (_ for _ in ()).throw(RuntimeError("all parent boom"))) - monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)}) - res = _run(module.get_all_parent_folders.__wrapped__("tenant1")) - assert res["code"] == 500 - assert "all parent boom" in res["message"] - - def test_rm_branch_matrix(self, monkeypatch): - module = _load_files_app(monkeypatch) - req_state = {"file_ids": ["f1"]} - - async def fake_request_json(): - return req_state - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - monkeypatch.setattr(module.settings, "STORAGE_IMPL", SimpleNamespace(rm=lambda *_args, **_kwargs: None)) - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (False, None)) - res = _run(module.rm.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.NOT_FOUND - assert res["message"] == "File or Folder not found!" - - monkeypatch.setattr( - module.FileService, - "get_by_id", - lambda _file_id: (True, _DummyFile(_file_id, module.FileType.DOC.value, tenant_id=None)), - ) - res = _run(module.rm.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.NOT_FOUND - assert res["message"] == "Tenant not found!" - - req_state["file_ids"] = ["folder1"] - - def folder_missing_inner(file_id): - if file_id == "folder1": - return True, _DummyFile("folder1", module.FileType.FOLDER.value, parent_id="pf1") - if file_id == "inner1": - return False, None - return False, None - - monkeypatch.setattr(module.FileService, "get_by_id", folder_missing_inner) - monkeypatch.setattr(module.FileService, "get_all_innermost_file_ids", lambda _file_id, _acc: ["inner1"]) - res = _run(module.rm.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.NOT_FOUND - assert res["message"] == "File not found!" - - req_state["file_ids"] = ["doc1"] - monkeypatch.setattr( - module.FileService, - "get_by_id", - lambda _file_id: (True, _DummyFile("doc1", module.FileType.DOC.value, parent_id="pf1")), - ) - monkeypatch.setattr(module.FileService, "delete", lambda _file: False) - res = _run(module.rm.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.SERVER_ERROR - assert "Database error (File removal)!" in res["message"] - - class _Inform: - document_id = "doc1" - - monkeypatch.setattr(module.FileService, "delete", lambda _file: True) - monkeypatch.setattr(module.File2DocumentService, "get_by_file_id", lambda _file_id: [_Inform()]) - monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _doc_id: (False, None)) - res = _run(module.rm.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.NOT_FOUND - assert res["message"] == "Document not found!" - - monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _doc_id: (True, SimpleNamespace(id=_doc_id))) - monkeypatch.setattr(module.DocumentService, "get_tenant_id", lambda _doc_id: None) - res = _run(module.rm.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.NOT_FOUND - assert res["message"] == "Tenant not found!" - - monkeypatch.setattr(module.DocumentService, "get_tenant_id", lambda _doc_id: "tenant1") - monkeypatch.setattr(module.DocumentService, "remove_document", lambda *_args, **_kwargs: False) - res = _run(module.rm.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.SERVER_ERROR - assert "Database error (Document removal)!" in res["message"] - - req_state["file_ids"] = ["folder-ok"] - deleted = {"folder": 0, "link": 0} - - def folder_success(file_id): - if file_id == "folder-ok": - return True, _DummyFile("folder-ok", module.FileType.FOLDER.value, parent_id="pf1") - if file_id == "inner-ok": - return True, _DummyFile("inner-ok", module.FileType.DOC.value, parent_id="pf1", location="inner.bin") - return False, None - - monkeypatch.setattr(module.FileService, "get_by_id", folder_success) - monkeypatch.setattr(module.FileService, "get_all_innermost_file_ids", lambda _file_id, _acc: ["inner-ok"]) - monkeypatch.setattr( - module.FileService, - "delete_folder_by_pf_id", - lambda _tenant_id, _file_id: deleted.__setitem__("folder", deleted["folder"] + 1), - ) - monkeypatch.setattr(module.File2DocumentService, "get_by_file_id", lambda _file_id: []) - monkeypatch.setattr( - module.File2DocumentService, - "delete_by_file_id", - lambda _file_id: deleted.__setitem__("link", deleted["link"] + 1), - ) - res = _run(module.rm.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.SUCCESS - assert res["data"] is True - assert deleted == {"folder": 1, "link": 1} - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (_ for _ in ()).throw(RuntimeError("rm boom"))) - monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)}) - req_state["file_ids"] = ["boom"] - res = _run(module.rm.__wrapped__("tenant1")) - assert res["code"] == 500 - assert "rm boom" in res["message"] - - def test_rename_branch_matrix(self, monkeypatch): - module = _load_files_app(monkeypatch) - req_state = {"file_id": "f1", "name": "new.txt"} - - async def fake_request_json(): - return req_state - - monkeypatch.setattr(module, "get_request_json", fake_request_json) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (False, None)) - res = _run(module.rename.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.NOT_FOUND - assert res["message"] == "File not found!" - - monkeypatch.setattr( - module.FileService, - "get_by_id", - lambda _file_id: (True, _DummyFile("f1", module.FileType.DOC.value, name="origin.txt")), - ) - req_state["name"] = "new.pdf" - res = _run(module.rename.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.BAD_REQUEST - assert "extension of file can't be changed" in res["message"] - - req_state["name"] = "new.txt" - monkeypatch.setattr(module.FileService, "query", lambda **_kwargs: [SimpleNamespace(name="new.txt")]) - res = _run(module.rename.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.CONFLICT - assert "Duplicated file name in the same folder." in res["message"] - - monkeypatch.setattr(module.FileService, "query", lambda **_kwargs: []) - monkeypatch.setattr(module.FileService, "update_by_id", lambda *_args, **_kwargs: False) - res = _run(module.rename.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.SERVER_ERROR - assert "Database error (File rename)!" in res["message"] - - monkeypatch.setattr(module.FileService, "update_by_id", lambda *_args, **_kwargs: True) - monkeypatch.setattr(module.File2DocumentService, "get_by_file_id", lambda _file_id: [SimpleNamespace(document_id="doc1")]) - monkeypatch.setattr(module.DocumentService, "update_by_id", lambda *_args, **_kwargs: False) - res = _run(module.rename.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.SERVER_ERROR - assert "Database error (Document rename)!" in res["message"] - - monkeypatch.setattr(module.File2DocumentService, "get_by_file_id", lambda _file_id: []) - res = _run(module.rename.__wrapped__("tenant1")) - assert res["code"] == module.RetCode.SUCCESS - assert res["data"] is True - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (_ for _ in ()).throw(RuntimeError("rename boom"))) - monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)}) - res = _run(module.rename.__wrapped__("tenant1")) - assert res["code"] == 500 - assert "rename boom" in res["message"] - - def test_get_file_branch_matrix(self, monkeypatch): - module = _load_files_app(monkeypatch) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (False, None)) - res = _run(module.get.__wrapped__("tenant1", "missing")) - assert res["code"] == module.RetCode.NOT_FOUND - assert res["message"] == "Document not found!" - - class _Storage: - def __init__(self): - self.calls = 0 - - def get(self, _bucket, _location): - self.calls += 1 - if self.calls == 1: - return None - return b"blob-data" - - storage = _Storage() - monkeypatch.setattr(module.settings, "STORAGE_IMPL", storage) - monkeypatch.setattr( - module.FileService, - "get_by_id", - lambda _file_id: (True, _DummyFile("f1", module.FileType.VISUAL.value, name="image.abc", parent_id="pf1", location="loc1")), - ) - monkeypatch.setattr(module.File2DocumentService, "get_storage_address", lambda **_kwargs: ("pf2", "loc2")) - - async def fake_make_response(data): - return _DummyResponse(data) - - monkeypatch.setattr(module, "make_response", fake_make_response) - monkeypatch.setattr( - module, - "apply_safe_file_response_headers", - lambda response, content_type, extension: response.headers.update( - {"content_type": content_type, "extension": extension} - ), - ) - res = _run(module.get.__wrapped__("tenant1", "f1")) - assert isinstance(res, _DummyResponse) - assert res.data == b"blob-data" - assert res.headers["extension"] == "abc" - assert res.headers["content_type"] == "image/abc" - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (_ for _ in ()).throw(RuntimeError("get boom"))) - monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)}) - res = _run(module.get.__wrapped__("tenant1", "f1")) - assert res["code"] == 500 - assert "get boom" in res["message"] - - def test_download_attachment_branch_matrix(self, monkeypatch): - module = _load_files_app(monkeypatch) - monkeypatch.setattr(module, "request", _DummyRequest(args={"ext": "abc"})) - - async def fake_thread_pool_exec(_fn, _tenant_id, _attachment_id): - return b"attachment" - - async def fake_make_response(data): - return _DummyResponse(data) - - monkeypatch.setattr(module, "thread_pool_exec", fake_thread_pool_exec) - monkeypatch.setattr(module, "make_response", fake_make_response) - monkeypatch.setattr( - module, - "apply_safe_file_response_headers", - lambda response, content_type, extension: response.headers.update( - {"content_type": content_type, "extension": extension} - ), - ) - res = _run(module.download_attachment.__wrapped__("tenant1", "att1")) - assert isinstance(res, _DummyResponse) - assert res.data == b"attachment" - assert res.headers["extension"] == "abc" - assert res.headers["content_type"] == "application/abc" +def test_create_folder_rejects_duplicate_name(monkeypatch): + module = _load_file_api_service(monkeypatch) + monkeypatch.setattr(module.FileService, "query", lambda **_kwargs: [SimpleNamespace(id="existing")]) + + ok, message = _run(module.create_folder("tenant1", "dup", "pf1", module.FileType.FOLDER.value)) + assert ok is False + assert message == "Duplicated folder name in the same folder." + + +@pytest.mark.p2 +def test_delete_files_checks_team_permission(monkeypatch): + module = _load_file_api_service(monkeypatch) + monkeypatch.setattr( + module.FileService, + "get_by_id", + lambda _file_id: (True, _DummyFile("file1", module.FileType.DOC.value)), + ) + monkeypatch.setattr(module, "check_file_team_permission", lambda *_args, **_kwargs: False) + + ok, message = _run(module.delete_files("tenant1", ["file1"])) + assert ok is False + assert message == "No authorization." + + +@pytest.mark.p2 +def test_move_files_rejects_extension_change_in_new_name(monkeypatch): + module = _load_file_api_service(monkeypatch) + monkeypatch.setattr( + module.FileService, + "get_by_ids", + lambda _ids: [_DummyFile("file1", module.FileType.DOC.value, name="a.txt")], + ) + + ok, message = _run(module.move_files("tenant1", ["file1"], new_name="a.pdf")) + assert ok is False + assert message == "The extension of file can't be changed" + + +@pytest.mark.p2 +def test_move_files_handles_dest_and_storage_move(monkeypatch): + module = _load_file_api_service(monkeypatch) + moved = [] + updated = [] + + monkeypatch.setattr( + module.FileService, + "get_by_id", + lambda file_id: (False, None) if file_id == "missing" else (True, _DummyFile(file_id, module.FileType.FOLDER.value, name="dest")), + ) + monkeypatch.setattr( + module.FileService, + "get_by_ids", + lambda _ids: [_DummyFile("file1", module.FileType.DOC.value, parent_id="src", location="old", name="a.txt")], + ) + monkeypatch.setattr(module.settings, "STORAGE_IMPL", SimpleNamespace( + obj_exist=lambda *_args, **_kwargs: False, + put=lambda *_args, **_kwargs: None, + rm=lambda *_args, **_kwargs: None, + move=lambda old_bucket, old_loc, new_bucket, new_loc: moved.append((old_bucket, old_loc, new_bucket, new_loc)), + )) + monkeypatch.setattr(module.FileService, "update_by_id", lambda file_id, data: updated.append((file_id, data)) or True) + + ok, message = _run(module.move_files("tenant1", ["file1"], "missing")) + assert ok is False + assert message == "Parent folder not found!" + + ok, data = _run(module.move_files("tenant1", ["file1"], "dest")) + assert ok is True + assert data is True + assert moved == [("src", "old", "dest", "a.txt")] + assert updated == [("file1", {"parent_id": "dest", "location": "a.txt"})] + + +@pytest.mark.p2 +def test_move_files_renames_in_place_without_storage_move(monkeypatch): + module = _load_file_api_service(monkeypatch) + db_updates = [] + doc_updates = [] + + monkeypatch.setattr( + module.FileService, + "get_by_ids", + lambda _ids: [_DummyFile("file1", module.FileType.DOC.value, parent_id="pf1", name="a.txt")], + ) + monkeypatch.setattr(module.FileService, "update_by_id", lambda file_id, data: db_updates.append((file_id, data)) or True) + monkeypatch.setattr( + module.File2DocumentService, + "get_by_file_id", + lambda _file_id: [SimpleNamespace(document_id="doc1")], + ) + monkeypatch.setattr(module.DocumentService, "update_by_id", lambda doc_id, data: doc_updates.append((doc_id, data)) or True) + + ok, data = _run(module.move_files("tenant1", ["file1"], new_name="b.txt")) + assert ok is True + assert data is True + assert db_updates == [("file1", {"name": "b.txt"})] + assert doc_updates == [("doc1", {"name": "b.txt"})] + + +@pytest.mark.p2 +def test_get_file_content_checks_permission(monkeypatch): + module = _load_file_api_service(monkeypatch) + monkeypatch.setattr(module, "check_file_team_permission", lambda *_args, **_kwargs: False) + + ok, message = module.get_file_content("tenant1", "file1") + assert ok is False + assert message == "No authorization." + + monkeypatch.setattr(module, "check_file_team_permission", lambda *_args, **_kwargs: True) + ok, file = module.get_file_content("tenant1", "file1") + assert ok is True + assert file.id == "file1" diff --git a/test/testcases/test_web_api/test_document_app/conftest.py b/test/testcases/test_web_api/test_document_app/conftest.py index 107e45951..b5470939b 100644 --- a/test/testcases/test_web_api/test_document_app/conftest.py +++ b/test/testcases/test_web_api/test_document_app/conftest.py @@ -101,6 +101,20 @@ def document_app_module(monkeypatch): deepdoc_html_module.RAGFlowHtmlParser = _StubHtmlParser monkeypatch.setitem(sys.modules, "deepdoc.parser.html_parser", deepdoc_html_module) + deepdoc_mineru_module = ModuleType("deepdoc.parser.mineru_parser") + + class _StubMinerUParser: + pass + + deepdoc_mineru_module.MinerUParser = _StubMinerUParser + monkeypatch.setitem(sys.modules, "deepdoc.parser.mineru_parser", deepdoc_mineru_module) + deepdoc_paddleocr_module = ModuleType("deepdoc.parser.paddleocr_parser") + + class _StubPaddleOCRParser: + pass + + deepdoc_paddleocr_module.PaddleOCRParser = _StubPaddleOCRParser + monkeypatch.setitem(sys.modules, "deepdoc.parser.paddleocr_parser", deepdoc_paddleocr_module) monkeypatch.setitem(sys.modules, "xgboost", ModuleType("xgboost")) stub_apps = ModuleType("api.apps") diff --git a/test/testcases/test_web_api/test_document_app/test_upload_documents.py b/test/testcases/test_web_api/test_document_app/test_upload_documents.py index c8b82774e..642abd678 100644 --- a/test/testcases/test_web_api/test_document_app/test_upload_documents.py +++ b/test/testcases/test_web_api/test_document_app/test_upload_documents.py @@ -1,5 +1,5 @@ # -# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# Copyright 2026 The InfiniFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -225,9 +225,10 @@ class _DummyFile: class _DummyRequest: - def __init__(self, form=None, files=None): + def __init__(self, form=None, files=None, args=None): self._form = form or {} self._files = files or _DummyFiles() + self.args = args or {} @property def form(self): diff --git a/test/testcases/test_web_api/test_document_app/test_upload_info_unit.py b/test/testcases/test_web_api/test_document_app/test_upload_info_unit.py new file mode 100644 index 000000000..0e5511039 --- /dev/null +++ b/test/testcases/test_web_api/test_document_app/test_upload_info_unit.py @@ -0,0 +1,139 @@ +# +# Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import asyncio +from pathlib import Path +import importlib.util +import sys +from types import ModuleType + +import pytest + + +class _AwaitableValue: + def __init__(self, value): + self._value = value + + def __await__(self): + async def _co(): + return self._value + + return _co().__await__() + + +class _DummyFiles(dict): + def getlist(self, key): + value = self.get(key, []) + if isinstance(value, list): + return value + return [value] + + +class _DummyFile: + def __init__(self, filename): + self.filename = filename + + +class _DummyRequest: + def __init__(self, *, files=None, args=None): + self._files = files or _DummyFiles() + self.args = args or {} + + @property + def files(self): + return _AwaitableValue(self._files) + + +def _run(coro): + return asyncio.run(coro) + + +def _load_document_app_module(monkeypatch): + repo_root = Path(__file__).resolve().parents[4] + common_mod = ModuleType("common") + common_mod.bulk_upload_documents = lambda *_args, **_kwargs: [] + common_mod.delete_document = lambda *_args, **_kwargs: None + common_mod.list_documents = lambda *_args, **_kwargs: {"data": {"docs": []}} + monkeypatch.setitem(sys.modules, "common", common_mod) + module_path = repo_root / "test" / "testcases" / "test_web_api" / "test_document_app" / "conftest.py" + spec = importlib.util.spec_from_file_location("test_document_app_unit_conftest", module_path) + module = importlib.util.module_from_spec(spec) + sys.modules["test_document_app_unit_conftest"] = module + spec.loader.exec_module(module) + return module.document_app_module.__wrapped__(monkeypatch) + + +@pytest.mark.p2 +def test_upload_info_rejects_mixed_inputs(monkeypatch): + module = _load_document_app_module(monkeypatch) + files = _DummyFiles({"file": [_DummyFile("a.txt")]}) + monkeypatch.setattr(module, "request", _DummyRequest(files=files, args={"url": "https://example.com/a.txt"})) + + res = _run(module.upload_info()) + assert res["code"] == module.RetCode.BAD_REQUEST + assert "not both" in res["message"] + + +@pytest.mark.p2 +def test_upload_info_requires_file_or_url(monkeypatch): + module = _load_document_app_module(monkeypatch) + monkeypatch.setattr(module, "request", _DummyRequest(files=_DummyFiles())) + + res = _run(module.upload_info()) + assert res["code"] == module.RetCode.BAD_REQUEST + assert "Missing input" in res["message"] + + +@pytest.mark.p2 +def test_upload_info_supports_url_single_and_multiple_files(monkeypatch): + module = _load_document_app_module(monkeypatch) + captured = [] + + def fake_upload_info(user_id, file_obj, url=None): + captured.append((user_id, getattr(file_obj, "filename", None), url)) + if url is not None: + return {"kind": "url", "value": url} + return {"kind": "file", "value": file_obj.filename} + + monkeypatch.setattr(module.FileService, "upload_info", fake_upload_info) + + monkeypatch.setattr(module, "request", _DummyRequest(files=_DummyFiles(), args={"url": "https://example.com/a.txt"})) + res = _run(module.upload_info()) + assert res["code"] == 0 + assert res["data"] == {"kind": "url", "value": "https://example.com/a.txt"} + + monkeypatch.setattr(module, "request", _DummyRequest(files=_DummyFiles({"file": _DummyFile("single.txt")}))) + res = _run(module.upload_info()) + assert res["code"] == 0 + assert res["data"] == {"kind": "file", "value": "single.txt"} + + monkeypatch.setattr( + module, + "request", + _DummyRequest(files=_DummyFiles({"file": [_DummyFile("a.txt"), _DummyFile("b.txt")]})), + ) + res = _run(module.upload_info()) + assert res["code"] == 0 + assert res["data"] == [ + {"kind": "file", "value": "a.txt"}, + {"kind": "file", "value": "b.txt"}, + ] + assert captured == [ + ("user-1", None, "https://example.com/a.txt"), + ("user-1", "single.txt", None), + ("user-1", "a.txt", None), + ("user-1", "b.txt", None), + ] diff --git a/test/testcases/test_web_api/test_file_app/test_file2document_routes_unit.py b/test/testcases/test_web_api/test_file_app/test_file2document_routes_unit.py index df1294f79..fdcbc59f0 100644 --- a/test/testcases/test_web_api/test_file_app/test_file2document_routes_unit.py +++ b/test/testcases/test_web_api/test_file_app/test_file2document_routes_unit.py @@ -143,6 +143,10 @@ def _load_file2document_module(monkeypatch): def get_by_id(_file_id): return True, _DummyFile(_file_id, _FileType.DOC.value) + @staticmethod + def get_parser(_file_type, _file_name, parser_id): + return parser_id + file_service_mod.FileService = _StubFileService monkeypatch.setitem(sys.modules, "api.db.services.file_service", file_service_mod) services_pkg.file_service = file_service_mod @@ -284,7 +288,14 @@ def test_convert_branch_matrix_unit(monkeypatch): "get_by_id", lambda _file_id: (True, _DummyFile("inner-1", module.FileType.DOC.value, name="inner.txt", location="inner.loc", size=2)), ) - monkeypatch.setattr(module.DocumentService, "insert", lambda _payload: SimpleNamespace(id="doc-new")) + inserted = {} + + def _insert(payload): + inserted.update(payload) + return SimpleNamespace(id="doc-new") + + monkeypatch.setattr(module.DocumentService, "insert", _insert) + monkeypatch.setattr(module.FileService, "get_parser", lambda _ft, _name, _parser_id: "picked-parser") monkeypatch.setattr( module.File2DocumentService, "insert", @@ -293,6 +304,8 @@ def test_convert_branch_matrix_unit(monkeypatch): res = _run(module.convert()) assert res["code"] == 0 assert res["data"] == [{"file_id": "inner-1", "document_id": "doc-new"}] + assert inserted["parser_id"] == "picked-parser" + assert inserted["pipeline_id"] == "p1" req_state["file_ids"] = ["f1"] monkeypatch.setattr( diff --git a/test/testcases/test_web_api/test_file_app/test_file_routes_unit.py b/test/testcases/test_web_api/test_file_app/test_file_routes_unit.py index 85ccc36d7..17af3cbd9 100644 --- a/test/testcases/test_web_api/test_file_app/test_file_routes_unit.py +++ b/test/testcases/test_web_api/test_file_app/test_file_routes_unit.py @@ -17,7 +17,6 @@ import asyncio import importlib.util import sys -from copy import deepcopy from enum import Enum from pathlib import Path from types import ModuleType, SimpleNamespace @@ -44,26 +43,6 @@ class _AwaitableValue: return _co().__await__() -class _Args(dict): - def get(self, key, default=None, type=None): - value = super().get(key, default) - if value is None or type is None: - return value - try: - return type(value) - except (TypeError, ValueError): - return default - - -class _DummyUploadFile: - def __init__(self, filename, blob=b"blob"): - self.filename = filename - self._blob = blob - - def read(self): - return self._blob - - class _DummyFiles(dict): def __init__(self, file_objs=None): super().__init__() @@ -77,25 +56,21 @@ class _DummyFiles(dict): return [] +class _DummyUploadFile: + def __init__(self, filename, blob=b"blob"): + self.filename = filename + self._blob = blob + + def read(self): + return self._blob + + class _DummyRequest: - def __init__( - self, - *, - args=None, - form=None, - files=None, - json_data=None, - headers=None, - method="POST", - content_length=0, - ): - self.args = _Args(args or {}) + def __init__(self, *, content_type="", form=None, files=None, args=None): + self.content_type = content_type self.form = _AwaitableValue(form or {}) self.files = _AwaitableValue(files if files is not None else _DummyFiles()) - self.json = _AwaitableValue(json_data or {}) - self.headers = headers or {} - self.method = method - self.content_length = content_length + self.args = args or {} class _DummyResponse: @@ -104,79 +79,11 @@ class _DummyResponse: self.headers = {} -class _DummyFile: - def __init__( - self, - file_id, - file_type, - *, - tenant_id="tenant1", - parent_id="pf1", - location="file.bin", - name="file.txt", - source_type="user", - ): - self.id = file_id - self.type = file_type - self.tenant_id = tenant_id - self.parent_id = parent_id - self.location = location - self.name = name - self.source_type = source_type - - def to_json(self): - return {"id": self.id, "name": self.name, "type": self.type} - - def _run(coro): return asyncio.run(coro) -def _set_request( - monkeypatch, - module, - *, - args=None, - form=None, - files=None, - json_data=None, - headers=None, - method="POST", - content_length=0, -): - monkeypatch.setattr( - module, - "request", - _DummyRequest( - args=args, - form=form, - files=files, - json_data=json_data, - headers=headers, - method=method, - content_length=content_length, - ), - ) - - -def _set_request_json(monkeypatch, module, payload_state): - async def _req_json(): - return deepcopy(payload_state) - - monkeypatch.setattr(module, "get_request_json", _req_json) - - -@pytest.fixture(scope="session") -def auth(): - return "unit-auth" - - -@pytest.fixture(scope="session", autouse=True) -def set_tenant_info(): - return None - - -def _load_file_app_module(monkeypatch): +def _load_file_api_module(monkeypatch): repo_root = Path(__file__).resolve().parents[4] quart_mod = ModuleType("quart") @@ -192,28 +99,49 @@ def _load_file_app_module(monkeypatch): api_pkg.__path__ = [str(repo_root / "api")] monkeypatch.setitem(sys.modules, "api", api_pkg) - apps_mod = ModuleType("api.apps") - apps_mod.__path__ = [str(repo_root / "api" / "apps")] - apps_mod.current_user = SimpleNamespace(id="tenant1", tenant_id="tenant1") - apps_mod.login_required = lambda func: func - monkeypatch.setitem(sys.modules, "api.apps", apps_mod) - api_pkg.apps = apps_mod + apps_pkg = ModuleType("api.apps") + apps_pkg.__path__ = [str(repo_root / "api" / "apps")] + apps_pkg.login_required = lambda func: func + monkeypatch.setitem(sys.modules, "api.apps", apps_pkg) + api_pkg.apps = apps_pkg - api_common_pkg = ModuleType("api.common") - api_common_pkg.__path__ = [] - monkeypatch.setitem(sys.modules, "api.common", api_common_pkg) + services_pkg = ModuleType("api.apps.services") + services_pkg.__path__ = [str(repo_root / "api" / "apps" / "services")] + monkeypatch.setitem(sys.modules, "api.apps.services", services_pkg) + apps_pkg.services = services_pkg - permission_mod = ModuleType("api.common.check_team_permission") - permission_mod.check_file_team_permission = lambda *_args, **_kwargs: True - monkeypatch.setitem(sys.modules, "api.common.check_team_permission", permission_mod) - api_common_pkg.check_team_permission = permission_mod + file_api_service_mod = ModuleType("api.apps.services.file_api_service") + + async def _upload_file(_tenant_id, _pf_id, _file_objs): + return True, [{"id": "f1"}] + + async def _create_folder(_tenant_id, _name, _parent_id=None, _file_type=None): + return True, {"id": "folder1"} + + async def _delete_files(_tenant_id, _ids): + return True, True + + async def _move_files(_tenant_id, _src_file_ids, _dest_file_id=None, _new_name=None): + return True, True + + file_api_service_mod.upload_file = _upload_file + file_api_service_mod.create_folder = _create_folder + file_api_service_mod.list_files = lambda _tenant_id, _args: (True, {"files": [], "total": 0}) + file_api_service_mod.delete_files = _delete_files + file_api_service_mod.move_files = _move_files + file_api_service_mod.get_file_content = lambda _tenant_id, _file_id: ( + True, + SimpleNamespace(parent_id="bucket1", location="path1", name="doc.txt", type="doc"), + ) + file_api_service_mod.get_parent_folder = lambda _file_id: (True, {"parent_folder": {"id": "parent1"}}) + file_api_service_mod.get_all_parent_folders = lambda _file_id: (True, {"parent_folders": [{"id": "root"}]}) + monkeypatch.setitem(sys.modules, "api.apps.services.file_api_service", file_api_service_mod) + services_pkg.file_api_service = file_api_service_mod db_pkg = ModuleType("api.db") db_pkg.__path__ = [] class _FileType(Enum): - FOLDER = "folder" - VIRTUAL = "virtual" DOC = "doc" VISUAL = "visual" @@ -221,197 +149,45 @@ def _load_file_app_module(monkeypatch): monkeypatch.setitem(sys.modules, "api.db", db_pkg) api_pkg.db = db_pkg - services_pkg = ModuleType("api.db.services") - services_pkg.__path__ = [] - services_pkg.duplicate_name = lambda _query, **kwargs: kwargs.get("name", "") - monkeypatch.setitem(sys.modules, "api.db.services", services_pkg) - - document_service_mod = ModuleType("api.db.services.document_service") - - class _StubDocumentService: - @staticmethod - def get_doc_count(_uid): - return 0 - - @staticmethod - def get_by_id(doc_id): - return True, SimpleNamespace(id=doc_id) - - @staticmethod - def get_tenant_id(_doc_id): - return "tenant1" - - @staticmethod - def remove_document(*_args, **_kwargs): - return True - - @staticmethod - def update_by_id(*_args, **_kwargs): - return True - - document_service_mod.DocumentService = _StubDocumentService - monkeypatch.setitem(sys.modules, "api.db.services.document_service", document_service_mod) - services_pkg.document_service = document_service_mod - file2doc_mod = ModuleType("api.db.services.file2document_service") - - class _StubFile2DocumentService: - @staticmethod - def get_by_file_id(_file_id): - return [] - - @staticmethod - def delete_by_file_id(*_args, **_kwargs): - return None - - @staticmethod - def get_storage_address(**_kwargs): - return "bucket2", "location2" - - file2doc_mod.File2DocumentService = _StubFile2DocumentService + file2doc_mod.File2DocumentService = SimpleNamespace(get_storage_address=lambda **_kwargs: ("bucket2", "path2")) monkeypatch.setitem(sys.modules, "api.db.services.file2document_service", file2doc_mod) - services_pkg.file2document_service = file2doc_mod - - file_service_mod = ModuleType("api.db.services.file_service") - - class _StubFileService: - @staticmethod - def get_root_folder(_tenant_id): - return {"id": "root"} - - @staticmethod - def get_by_id(file_id): - return True, _DummyFile(file_id, _FileType.DOC.value, name="file.txt") - - @staticmethod - def get_id_list_by_id(_pf_id, _names, _index, ids): - return ids - - @staticmethod - def create_folder(_file, parent_id, _names, _len_id): - return SimpleNamespace(id=parent_id, name=str(parent_id)) - - @staticmethod - def query(**_kwargs): - return [] - - @staticmethod - def insert(data): - return SimpleNamespace(to_json=lambda: data) - - @staticmethod - def is_parent_folder_exist(_pf_id): - return True - - @staticmethod - def get_by_pf_id(*_args, **_kwargs): - return [], 0 - - @staticmethod - def get_parent_folder(_file_id): - return SimpleNamespace(to_json=lambda: {"id": "root"}) - - @staticmethod - def get_all_parent_folders(_file_id): - return [] - - @staticmethod - def init_knowledgebase_docs(*_args, **_kwargs): - return None - - @staticmethod - def list_all_files_by_parent_id(_parent_id): - return [] - - @staticmethod - def delete(_file): - return True - - @staticmethod - def update_by_id(*_args, **_kwargs): - return True - - @staticmethod - def get_by_ids(_file_ids): - return [] - - @staticmethod - def delete_by_id(_file_id): - return True - - file_service_mod.FileService = _StubFileService - monkeypatch.setitem(sys.modules, "api.db.services.file_service", file_service_mod) - services_pkg.file_service = file_service_mod api_utils_mod = ModuleType("api.utils.api_utils") - - class _RetCode: - SUCCESS = 0 - ARGUMENT_ERROR = 101 - AUTHENTICATION_ERROR = 401 - OPERATING_ERROR = 103 - - def get_json_result(data=None, message="", code=_RetCode.SUCCESS): - return {"code": code, "data": data, "message": message} - - async def get_request_json(): - return {} - - def get_data_error_result(message=""): - return {"code": _RetCode.OPERATING_ERROR, "data": None, "message": message} - - def server_error_response(err): - return {"code": 500, "data": None, "message": str(err)} - - def validate_request(*_required): - def _decorator(func): - return func - - return _decorator - - api_utils_mod.get_json_result = get_json_result - api_utils_mod.get_request_json = get_request_json - api_utils_mod.get_data_error_result = get_data_error_result - api_utils_mod.server_error_response = server_error_response - api_utils_mod.validate_request = validate_request + api_utils_mod.add_tenant_id_to_kwargs = lambda func: func + api_utils_mod.get_error_argument_result = lambda message: {"code": 400, "data": None, "message": message} + api_utils_mod.get_error_data_result = lambda message: {"code": 500, "data": None, "message": message} + api_utils_mod.get_result = lambda data=None: {"code": 0, "data": data, "message": ""} monkeypatch.setitem(sys.modules, "api.utils.api_utils", api_utils_mod) - file_utils_mod = ModuleType("api.utils.file_utils") - file_utils_mod.filename_type = lambda _name: _FileType.DOC.value - monkeypatch.setitem(sys.modules, "api.utils.file_utils", file_utils_mod) + validation_mod = ModuleType("api.utils.validation_utils") + validation_mod.CreateFolderReq = object + validation_mod.DeleteFileReq = object + validation_mod.ListFileReq = object + validation_mod.MoveFileReq = object + + async def _validate_json_request(_request, _schema): + return {}, None + + validation_mod.validate_and_parse_json_request = _validate_json_request + validation_mod.validate_and_parse_request_args = lambda _request, _schema: ({}, None) + monkeypatch.setitem(sys.modules, "api.utils.validation_utils", validation_mod) web_utils_mod = ModuleType("api.utils.web_utils") - web_utils_mod.CONTENT_TYPE_MAP = {"txt": "text/plain", "json": "application/json"} - web_utils_mod.apply_safe_file_response_headers = ( - lambda response, content_type, ext: response.headers.update({"content_type": content_type, "extension": ext}) - ) + web_utils_mod.CONTENT_TYPE_MAP = {"txt": "text/plain"} + web_utils_mod.apply_safe_file_response_headers = lambda response, content_type, ext: response.headers.update({"content_type": content_type, "ext": ext}) monkeypatch.setitem(sys.modules, "api.utils.web_utils", web_utils_mod) common_pkg = ModuleType("common") common_pkg.__path__ = [str(repo_root / "common")] + common_pkg.settings = SimpleNamespace( + STORAGE_IMPL=SimpleNamespace( + get=lambda *_args, **_kwargs: b"blob", + ) + ) monkeypatch.setitem(sys.modules, "common", common_pkg) - settings_mod = ModuleType("common.settings") - settings_mod.STORAGE_IMPL = SimpleNamespace( - obj_exist=lambda *_args, **_kwargs: False, - put=lambda *_args, **_kwargs: None, - rm=lambda *_args, **_kwargs: None, - get=lambda *_args, **_kwargs: b"", - ) - common_pkg.settings = settings_mod - monkeypatch.setitem(sys.modules, "common.settings", settings_mod) - - constants_mod = ModuleType("common.constants") - - class _FileSource: - KNOWLEDGEBASE = "knowledgebase" - - constants_mod.RetCode = _RetCode - constants_mod.FileSource = _FileSource - monkeypatch.setitem(sys.modules, "common.constants", constants_mod) - misc_utils_mod = ModuleType("common.misc_utils") - misc_utils_mod.get_uuid = lambda: "uuid-1" async def thread_pool_exec(func, *args, **kwargs): return func(*args, **kwargs) @@ -419,808 +195,147 @@ def _load_file_app_module(monkeypatch): misc_utils_mod.thread_pool_exec = thread_pool_exec monkeypatch.setitem(sys.modules, "common.misc_utils", misc_utils_mod) - module_name = "test_file_app_routes_unit_module" - module_path = repo_root / "api" / "apps" / "file_app.py" - spec = importlib.util.spec_from_file_location(module_name, module_path) + module_path = repo_root / "api" / "apps" / "restful_apis" / "file_api.py" + spec = importlib.util.spec_from_file_location("api.apps.restful_apis.file_api", module_path) module = importlib.util.module_from_spec(spec) module.manager = _DummyManager() - monkeypatch.setitem(sys.modules, module_name, module) + monkeypatch.setitem(sys.modules, "api.apps.restful_apis.file_api", module) spec.loader.exec_module(module) return module @pytest.mark.p2 -def test_upload_branch_matrix_unit(monkeypatch): - module = _load_file_app_module(monkeypatch) - monkeypatch.setattr(module.FileService, "get_root_folder", lambda _uid: {"id": "root"}) +def test_create_or_upload_multipart_requires_file(monkeypatch): + module = _load_file_api_module(monkeypatch) + monkeypatch.setattr(module, "request", _DummyRequest(content_type="multipart/form-data", form={}, files=_DummyFiles())) - _set_request(monkeypatch, module, form={}, files=_DummyFiles()) - res = _run(module.upload()) - assert res["code"] == module.RetCode.ARGUMENT_ERROR + res = _run(module.create_or_upload("tenant1")) + assert res["code"] == 400 assert res["message"] == "No file part!" - _set_request( - monkeypatch, - module, - form={"parent_id": "pf1"}, - files=_DummyFiles([_DummyUploadFile("")]), - ) - res = _run(module.upload()) - assert res["code"] == module.RetCode.ARGUMENT_ERROR - assert res["message"] == "No file selected!" - _set_request( - monkeypatch, - module, - form={"parent_id": "pf1"}, - files=_DummyFiles([_DummyUploadFile("a.txt")]), - ) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (False, None)) - res = _run(module.upload()) - assert res["code"] == module.RetCode.OPERATING_ERROR - assert res["message"] == "Can't find this folder!" +@pytest.mark.p2 +def test_create_or_upload_uploads_via_new_service(monkeypatch): + module = _load_file_api_module(monkeypatch) + files = _DummyFiles([_DummyUploadFile("a.txt")]) + monkeypatch.setattr(module, "request", _DummyRequest(content_type="multipart/form-data", form={"parent_id": "pf1"}, files=files)) - monkeypatch.setenv("MAX_FILE_NUM_PER_USER", "1") - _set_request( - monkeypatch, - module, - form={"parent_id": "pf1"}, - files=_DummyFiles([_DummyUploadFile("cap.txt")]), - ) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (True, SimpleNamespace(id="pf1", name="pf1"))) - monkeypatch.setattr(module.DocumentService, "get_doc_count", lambda _uid: 1) - res = _run(module.upload()) - assert res["code"] == module.RetCode.SUCCESS - assert "Exceed the maximum file number of a free user!" in res["data"][0]["message"] - monkeypatch.delenv("MAX_FILE_NUM_PER_USER", raising=False) + seen = {} - class _StorageNoCollision: - def __init__(self): - self.put_calls = [] + async def _upload_file(tenant_id, pf_id, file_objs): + seen["args"] = (tenant_id, pf_id, [f.filename for f in file_objs]) + return True, [{"id": "f1"}] - def obj_exist(self, _bucket, _location): - return False + monkeypatch.setattr(module.file_api_service, "upload_file", _upload_file) + res = _run(module.create_or_upload("tenant1")) - def put(self, bucket, location, blob): - self.put_calls.append((bucket, location, blob)) - - storage_no_collision = _StorageNoCollision() - monkeypatch.setattr(module.settings, "STORAGE_IMPL", storage_no_collision) - _set_request( - monkeypatch, - module, - form={"parent_id": "pf1"}, - files=_DummyFiles([_DummyUploadFile(None, b"none-blob")]), - ) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (True, SimpleNamespace(id="pf1", name="pf1"))) - monkeypatch.setattr(module.FileService, "get_id_list_by_id", lambda *_args, **_kwargs: ["pf1"]) - monkeypatch.setattr( - module.FileService, - "create_folder", - lambda _file, parent_id, _names, _len_id: SimpleNamespace(id=f"{parent_id}-folder"), - ) - monkeypatch.setattr(module, "filename_type", lambda _name: module.FileType.DOC.value) - monkeypatch.setattr(module, "duplicate_name", lambda _query, **kwargs: kwargs.get("name")) - monkeypatch.setattr(module, "get_uuid", lambda: "uuid-none") - monkeypatch.setattr(module.FileService, "insert", lambda data: SimpleNamespace(to_json=lambda: {"id": data["id"]})) - res = _run(module.upload()) - assert res["code"] == module.RetCode.SUCCESS - assert len(res["data"]) == 1 - assert storage_no_collision.put_calls == [("pf1-folder", None, b"none-blob")] - - _set_request( - monkeypatch, - module, - form={"parent_id": "pf1"}, - files=_DummyFiles([_DummyUploadFile("dir/a.txt")]), - ) - monkeypatch.setattr(module.FileService, "get_id_list_by_id", lambda *_args, **_kwargs: ["pf1", "missing-child"]) - - def _get_by_id_missing_child(file_id): - if file_id == "missing-child": - return False, None - return True, SimpleNamespace(id=file_id, name=file_id) - - monkeypatch.setattr(module.FileService, "get_by_id", _get_by_id_missing_child) - res = _run(module.upload()) - assert res["code"] == module.RetCode.SUCCESS - assert res["data"][0]["message"] == "Folder not found!" - - _set_request( - monkeypatch, - module, - form={"parent_id": "pf1"}, - files=_DummyFiles([_DummyUploadFile("b.txt")]), - ) - monkeypatch.setattr(module.FileService, "get_id_list_by_id", lambda *_args, **_kwargs: ["pf1", "leaf"]) - pf1_calls = {"count": 0} - - def _get_by_id_missing_parent_else(file_id): - if file_id == "pf1": - pf1_calls["count"] += 1 - if pf1_calls["count"] == 1: - return True, SimpleNamespace(id="pf1", name="pf1") - return False, None - return True, SimpleNamespace(id=file_id, name=file_id) - - monkeypatch.setattr(module.FileService, "get_by_id", _get_by_id_missing_parent_else) - res = _run(module.upload()) - assert res["code"] == module.RetCode.SUCCESS - assert res["data"][0]["message"] == "Folder not found!" - - class _StorageCollision: - def __init__(self): - self.obj_calls = 0 - self.put_calls = [] - - def obj_exist(self, _bucket, _location): - self.obj_calls += 1 - return self.obj_calls == 1 - - def put(self, bucket, location, blob): - self.put_calls.append((bucket, location, blob)) - - storage_collision = _StorageCollision() - monkeypatch.setattr(module.settings, "STORAGE_IMPL", storage_collision) - _set_request( - monkeypatch, - module, - form={"parent_id": "pf1"}, - files=_DummyFiles([_DummyUploadFile("dir/a.txt", b"a"), _DummyUploadFile("b.txt", b"b")]), - ) - - def _get_by_id_ok(file_id): - return True, SimpleNamespace(id=file_id, name=file_id) - - def _get_id_list(_pf_id, file_obj_names, _idx, _ids): - if file_obj_names[-1] == "a.txt": - return ["pf1", "mid-id"] - return ["pf1", "leaf-id"] - - def _create_folder(_file, parent_id, _names, _len_id): - return SimpleNamespace(id=f"{parent_id}-folder") - - inserted_payloads = [] - monkeypatch.setattr(module.FileService, "get_by_id", _get_by_id_ok) - monkeypatch.setattr(module.FileService, "get_id_list_by_id", _get_id_list) - monkeypatch.setattr(module.FileService, "create_folder", _create_folder) - monkeypatch.setattr(module, "filename_type", lambda _name: module.FileType.DOC.value) - monkeypatch.setattr(module, "duplicate_name", lambda _query, **kwargs: kwargs["name"]) - monkeypatch.setattr(module, "get_uuid", lambda: "file-id") - monkeypatch.setattr(module.FileService, "query", lambda **_kwargs: []) - - def _insert(data): - inserted_payloads.append(data) - return SimpleNamespace(to_json=lambda: {"id": data["id"], "location": data["location"]}) - - monkeypatch.setattr(module.FileService, "insert", _insert) - res = _run(module.upload()) - assert res["code"] == module.RetCode.SUCCESS - assert len(res["data"]) == 2 - assert len(storage_collision.put_calls) == 2 - assert any(location.endswith("_") for _, location, _ in storage_collision.put_calls) - assert len(inserted_payloads) == 2 - - _set_request( - monkeypatch, - module, - form={"parent_id": "pf1"}, - files=_DummyFiles([_DummyUploadFile("boom.txt")]), - ) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (_ for _ in ()).throw(RuntimeError("upload boom"))) - monkeypatch.setattr(module, "server_error_response", lambda err: {"code": 500, "message": str(err)}) - res = _run(module.upload()) - assert res["code"] == 500 - assert "upload boom" in res["message"] + assert seen["args"] == ("tenant1", "pf1", ["a.txt"]) + assert res["code"] == 0 + assert res["data"] == [{"id": "f1"}] @pytest.mark.p2 -def test_create_and_list_branch_matrix_unit(monkeypatch): - module = _load_file_app_module(monkeypatch) - req_state = {"name": "file1"} - _set_request_json(monkeypatch, module, req_state) - monkeypatch.setattr(module.FileService, "get_root_folder", lambda _uid: {"id": "root"}) +def test_create_or_upload_creates_folder_from_json(monkeypatch): + module = _load_file_api_module(monkeypatch) + monkeypatch.setattr(module, "request", _DummyRequest(content_type="application/json")) - monkeypatch.setattr(module.FileService, "is_parent_folder_exist", lambda _pf_id: False) - res = _run(module.create()) - assert res["code"] == module.RetCode.OPERATING_ERROR - assert "Parent Folder Doesn't Exist!" in res["message"] + async def _validate(_request, _schema): + return {"name": "folder-a", "parent_id": "pf1", "type": "folder"}, None - req_state.update({"name": "dup", "parent_id": "pf1"}) - monkeypatch.setattr(module.FileService, "is_parent_folder_exist", lambda _pf_id: True) - monkeypatch.setattr(module.FileService, "query", lambda **_kwargs: [object()]) - res = _run(module.create()) - assert "Duplicated folder name" in res["message"] + async def _create_folder(tenant_id, name, parent_id=None, file_type=None): + return True, {"tenant_id": tenant_id, "name": name, "parent_id": parent_id, "type": file_type} - inserted = {} + monkeypatch.setattr(module, "validate_and_parse_json_request", _validate) + monkeypatch.setattr(module.file_api_service, "create_folder", _create_folder) - def _insert(data): - inserted["payload"] = data - return SimpleNamespace(to_json=lambda: data) - - monkeypatch.setattr(module.FileService, "query", lambda **_kwargs: []) - monkeypatch.setattr(module, "get_uuid", lambda: "uuid-folder") - monkeypatch.setattr(module.FileService, "insert", _insert) - - req_state.update({"name": "folder", "parent_id": "pf1", "type": module.FileType.FOLDER.value}) - res = _run(module.create()) - assert res["code"] == module.RetCode.SUCCESS - assert inserted["payload"]["type"] == module.FileType.FOLDER.value - - req_state.update({"name": "virtual", "parent_id": "pf1", "type": "UNKNOWN"}) - res = _run(module.create()) - assert res["code"] == module.RetCode.SUCCESS - assert inserted["payload"]["type"] == module.FileType.VIRTUAL.value - - monkeypatch.setattr( - module.FileService, - "is_parent_folder_exist", - lambda _pf_id: (_ for _ in ()).throw(RuntimeError("create boom")), - ) - monkeypatch.setattr(module, "server_error_response", lambda err: {"code": 500, "message": str(err)}) - res = _run(module.create()) - assert res["code"] == 500 - assert "create boom" in res["message"] - - list_calls = {"init": 0} - monkeypatch.setattr(module.FileService, "get_root_folder", lambda _uid: {"id": "root"}) - monkeypatch.setattr( - module.FileService, - "init_knowledgebase_docs", - lambda _pf_id, _uid: list_calls.__setitem__("init", list_calls["init"] + 1), - ) - _set_request(monkeypatch, module, args={}) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _pf_id: (False, None)) - res = module.list_files() - assert res["message"] == "Folder not found!" - assert list_calls["init"] == 1 - - _set_request( - monkeypatch, - module, - args={ - "parent_id": "p1", - "keywords": "k", - "page": "2", - "page_size": "10", - "orderby": "name", - "desc": "False", - }, - ) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _pf_id: (True, SimpleNamespace(id="p1"))) - monkeypatch.setattr(module.FileService, "get_by_pf_id", lambda *_args, **_kwargs: ([{"id": "f1"}], 1)) - monkeypatch.setattr(module.FileService, "get_parent_folder", lambda _pf_id: None) - res = module.list_files() - assert res["message"] == "File not found!" - - monkeypatch.setattr(module.FileService, "get_parent_folder", lambda _pf_id: SimpleNamespace(to_json=lambda: {"id": "p0"})) - res = module.list_files() - assert res["code"] == module.RetCode.SUCCESS - assert res["data"]["total"] == 1 - assert res["data"]["parent_folder"]["id"] == "p0" - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _pf_id: (_ for _ in ()).throw(RuntimeError("list boom"))) - monkeypatch.setattr(module, "server_error_response", lambda err: {"code": 500, "message": str(err)}) - res = module.list_files() - assert res["code"] == 500 - assert "list boom" in res["message"] + res = _run(module.create_or_upload("tenant1")) + assert res["code"] == 0 + assert res["data"]["tenant_id"] == "tenant1" + assert res["data"]["name"] == "folder-a" @pytest.mark.p2 -def test_folder_lookup_routes_branch_matrix_unit(monkeypatch): - module = _load_file_app_module(monkeypatch) +def test_list_files_validation_error(monkeypatch): + module = _load_file_api_module(monkeypatch) + monkeypatch.setattr(module, "validate_and_parse_request_args", lambda _request, _schema: (None, "bad args")) - monkeypatch.setattr(module.FileService, "get_root_folder", lambda _uid: {"id": "root"}) - res = module.get_root_folder() - assert res["code"] == module.RetCode.SUCCESS - assert res["data"]["root_folder"]["id"] == "root" - - monkeypatch.setattr(module.FileService, "get_root_folder", lambda _uid: (_ for _ in ()).throw(RuntimeError("root boom"))) - monkeypatch.setattr(module, "server_error_response", lambda err: {"code": 500, "message": str(err)}) - res = module.get_root_folder() - assert res["code"] == 500 - assert "root boom" in res["message"] - - _set_request(monkeypatch, module, args={"file_id": "missing"}) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (False, None)) - res = module.get_parent_folder() - assert res["message"] == "Folder not found!" - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (True, SimpleNamespace(id="f1"))) - monkeypatch.setattr(module.FileService, "get_parent_folder", lambda _file_id: SimpleNamespace(to_json=lambda: {"id": "p1"})) - res = module.get_parent_folder() - assert res["code"] == module.RetCode.SUCCESS - assert res["data"]["parent_folder"]["id"] == "p1" - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (_ for _ in ()).throw(RuntimeError("parent boom"))) - monkeypatch.setattr(module, "server_error_response", lambda err: {"code": 500, "message": str(err)}) - res = module.get_parent_folder() - assert res["code"] == 500 - assert "parent boom" in res["message"] - - _set_request(monkeypatch, module, args={"file_id": "missing"}) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (False, None)) - res = module.get_all_parent_folders() - assert res["message"] == "Folder not found!" - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (True, SimpleNamespace(id="f1"))) - monkeypatch.setattr( - module.FileService, - "get_all_parent_folders", - lambda _file_id: [SimpleNamespace(to_json=lambda: {"id": "p1"}), SimpleNamespace(to_json=lambda: {"id": "p2"})], - ) - res = module.get_all_parent_folders() - assert res["code"] == module.RetCode.SUCCESS - assert res["data"]["parent_folders"] == [{"id": "p1"}, {"id": "p2"}] - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (_ for _ in ()).throw(RuntimeError("all-parent boom"))) - monkeypatch.setattr(module, "server_error_response", lambda err: {"code": 500, "message": str(err)}) - res = module.get_all_parent_folders() - assert res["code"] == 500 - assert "all-parent boom" in res["message"] + res = module.list_files("tenant1") + assert res["code"] == 400 + assert res["message"] == "bad args" @pytest.mark.p2 -def test_rm_branch_matrix_unit(monkeypatch): - module = _load_file_app_module(monkeypatch) - req_state = {"file_ids": ["missing"]} - _set_request_json(monkeypatch, module, req_state) +def test_move_uses_new_payload_shape(monkeypatch): + module = _load_file_api_module(monkeypatch) - allow = {"value": True} - monkeypatch.setattr(module, "check_file_team_permission", lambda _file, _uid: allow["value"]) + async def _validate(_request, _schema): + return {"src_file_ids": ["f1"], "dest_file_id": "pf2"}, None - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (False, None)) - res = _run(module.rm()) - assert res["message"] == "File or Folder not found!" + seen = {} - req_state["file_ids"] = ["tenant-missing"] - monkeypatch.setattr( - module.FileService, - "get_by_id", - lambda _file_id: (True, _DummyFile(_file_id, module.FileType.DOC.value, tenant_id=None)), - ) - res = _run(module.rm()) - assert res["message"] == "Tenant not found!" + async def _move_files(tenant_id, src_file_ids, dest_file_id=None, new_name=None): + seen["args"] = (tenant_id, src_file_ids, dest_file_id, new_name) + return True, True - req_state["file_ids"] = ["deny"] - allow["value"] = False - monkeypatch.setattr( - module.FileService, - "get_by_id", - lambda _file_id: (True, _DummyFile(_file_id, module.FileType.DOC.value)), - ) - res = _run(module.rm()) - assert res["code"] == module.RetCode.AUTHENTICATION_ERROR - assert res["message"] == "No authorization." - allow["value"] = True + monkeypatch.setattr(module, "validate_and_parse_json_request", _validate) + monkeypatch.setattr(module.file_api_service, "move_files", _move_files) - req_state["file_ids"] = ["kb"] - monkeypatch.setattr( - module.FileService, - "get_by_id", - lambda _file_id: ( - True, - _DummyFile( - _file_id, - module.FileType.DOC.value, - source_type=module.FileSource.KNOWLEDGEBASE, - ), - ), - ) - res = _run(module.rm()) - assert res["code"] == module.RetCode.SUCCESS + res = _run(module.move("tenant1")) + assert seen["args"] == ("tenant1", ["f1"], "pf2", None) + assert res["code"] == 0 assert res["data"] is True - events = { - "rm_calls": [], - "deleted_files": [], - "deleted_links": [], - "removed_docs": [], - } - - class _Storage: - def rm(self, bucket, location): - events["rm_calls"].append((bucket, location)) - raise RuntimeError("storage rm boom") - - monkeypatch.setattr(module.settings, "STORAGE_IMPL", _Storage()) - monkeypatch.setattr(module.File2DocumentService, "get_by_file_id", lambda file_id: [SimpleNamespace(document_id=f"doc-{file_id}")]) - monkeypatch.setattr(module.DocumentService, "get_by_id", lambda doc_id: (True, SimpleNamespace(id=doc_id))) - monkeypatch.setattr(module.DocumentService, "get_tenant_id", lambda _doc_id: "tenant1") - monkeypatch.setattr( - module.DocumentService, - "remove_document", - lambda doc, tenant: events["removed_docs"].append((doc.id, tenant)), - ) - monkeypatch.setattr( - module.File2DocumentService, - "delete_by_file_id", - lambda file_id: events["deleted_links"].append(file_id), - ) - monkeypatch.setattr(module.FileService, "delete", lambda file: events["deleted_files"].append(file.id)) - - req_state["file_ids"] = ["doc-top"] - monkeypatch.setattr( - module.FileService, - "get_by_id", - lambda _file_id: (True, _DummyFile("doc-top", module.FileType.DOC.value, location="top.bin")), - ) - res = _run(module.rm()) - assert res["code"] == module.RetCode.SUCCESS - - req_state["file_ids"] = ["folder1"] - folder1 = _DummyFile("folder1", module.FileType.FOLDER.value, location="") - nested_folder = _DummyFile("nested-folder", module.FileType.FOLDER.value, parent_id="folder1", location="") - doc1 = _DummyFile("doc1", module.FileType.DOC.value, parent_id="folder1", location="doc1.bin") - doc2 = _DummyFile("doc2", module.FileType.DOC.value, parent_id="nested-folder", location="doc2.bin") - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (True, folder1)) - - def _list_all(parent_id): - if parent_id == "folder1": - return [nested_folder, doc1] - if parent_id == "nested-folder": - return [doc2] - return [] - - monkeypatch.setattr(module.FileService, "list_all_files_by_parent_id", _list_all) - res = _run(module.rm()) - assert res["code"] == module.RetCode.SUCCESS - assert res["data"] is True - assert ("pf1", "top.bin") in events["rm_calls"] - assert ("folder1", "doc1.bin") in events["rm_calls"] - assert ("nested-folder", "doc2.bin") in events["rm_calls"] - assert {"doc-top", "doc1", "doc2", "nested-folder", "folder1"}.issubset(set(events["deleted_files"])) - assert {"doc-top", "doc1", "doc2"}.issubset(set(events["deleted_links"])) - assert len(events["removed_docs"]) >= 3 - - async def _thread_pool_boom(_func, *_args, **_kwargs): - raise RuntimeError("rm route boom") - - monkeypatch.setattr(module, "thread_pool_exec", _thread_pool_boom) - req_state["file_ids"] = ["boom"] - res = _run(module.rm()) - assert res["code"] == 500 - assert "rm route boom" in res["message"] - @pytest.mark.p2 -def test_rename_branch_matrix_unit(monkeypatch): - module = _load_file_app_module(monkeypatch) - req_state = {"file_id": "f1", "name": "new.txt"} - _set_request_json(monkeypatch, module, req_state) +def test_rename_via_move_route(monkeypatch): + module = _load_file_api_module(monkeypatch) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (False, None)) - res = _run(module.rename()) - assert res["message"] == "File not found!" + async def _validate(_request, _schema): + return {"src_file_ids": ["file1"], "new_name": "renamed.txt"}, None - monkeypatch.setattr( - module.FileService, - "get_by_id", - lambda _file_id: (True, _DummyFile("f1", module.FileType.DOC.value, name="origin.txt")), - ) - monkeypatch.setattr(module, "check_file_team_permission", lambda _file, _uid: False) - res = _run(module.rename()) - assert res["code"] == module.RetCode.AUTHENTICATION_ERROR - assert res["message"] == "No authorization." + seen = {} - monkeypatch.setattr(module, "check_file_team_permission", lambda _file, _uid: True) - req_state["name"] = "new.pdf" - res = _run(module.rename()) - assert res["code"] == module.RetCode.ARGUMENT_ERROR - assert "extension of file can't be changed" in res["message"] + async def _move_files(tenant_id, src_file_ids, dest_file_id=None, new_name=None): + seen["args"] = (tenant_id, src_file_ids, dest_file_id, new_name) + return True, True - req_state["name"] = "new.txt" - monkeypatch.setattr(module.FileService, "query", lambda **_kwargs: [SimpleNamespace(name="new.txt")]) - res = _run(module.rename()) - assert "Duplicated file name in the same folder." in res["message"] + monkeypatch.setattr(module, "validate_and_parse_json_request", _validate) + monkeypatch.setattr(module.file_api_service, "move_files", _move_files) - monkeypatch.setattr(module.FileService, "query", lambda **_kwargs: []) - monkeypatch.setattr(module.FileService, "update_by_id", lambda *_args, **_kwargs: False) - res = _run(module.rename()) - assert "Database error (File rename)!" in res["message"] - - monkeypatch.setattr(module.FileService, "update_by_id", lambda *_args, **_kwargs: True) - monkeypatch.setattr(module.File2DocumentService, "get_by_file_id", lambda _file_id: [SimpleNamespace(document_id="doc1")]) - monkeypatch.setattr(module.DocumentService, "update_by_id", lambda *_args, **_kwargs: False) - res = _run(module.rename()) - assert "Database error (Document rename)!" in res["message"] - - monkeypatch.setattr(module.File2DocumentService, "get_by_file_id", lambda _file_id: []) - res = _run(module.rename()) - assert res["code"] == module.RetCode.SUCCESS + res = _run(module.move("tenant1")) + assert seen["args"] == ("tenant1", ["file1"], None, "renamed.txt") + assert res["code"] == 0 assert res["data"] is True - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (_ for _ in ()).throw(RuntimeError("rename boom"))) - monkeypatch.setattr(module, "server_error_response", lambda err: {"code": 500, "message": str(err)}) - res = _run(module.rename()) - assert res["code"] == 500 - assert "rename boom" in res["message"] + +@pytest.mark.p2 +def test_download_falls_back_to_document_storage(monkeypatch): + module = _load_file_api_module(monkeypatch) + storage_calls = [] + + def _get(bucket, location): + storage_calls.append((bucket, location)) + return b"" if len(storage_calls) == 1 else b"fallback-blob" + + monkeypatch.setattr(module.settings, "STORAGE_IMPL", SimpleNamespace(get=_get)) + res = _run(module.download("tenant1", "file1")) + + assert storage_calls == [("bucket1", "path1"), ("bucket2", "path2")] + assert res.data == b"fallback-blob" + assert res.headers["content_type"] == "text/plain" + assert res.headers["ext"] == "txt" @pytest.mark.p2 -def test_get_file_branch_matrix_unit(monkeypatch): - module = _load_file_app_module(monkeypatch) +def test_parent_and_ancestors_use_new_routes(monkeypatch): + module = _load_file_api_module(monkeypatch) - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (False, None)) - res = _run(module.get("missing")) - assert res["message"] == "Document not found!" + parent_res = module.parent_folder("tenant1", "file1") + ancestors_res = module.ancestors("tenant1", "file1") - monkeypatch.setattr( - module.FileService, - "get_by_id", - lambda _file_id: (True, _DummyFile("f1", module.FileType.DOC.value, name="a.txt")), - ) - monkeypatch.setattr(module, "check_file_team_permission", lambda _file, _uid: False) - res = _run(module.get("f1")) - assert res["code"] == module.RetCode.AUTHENTICATION_ERROR - assert res["message"] == "No authorization." - - class _Storage: - def __init__(self): - self.calls = [] - - def get(self, bucket, location): - self.calls.append((bucket, location)) - if len(self.calls) == 1: - return None - return b"blob-data" - - storage = _Storage() - monkeypatch.setattr(module.settings, "STORAGE_IMPL", storage) - monkeypatch.setattr( - module.FileService, - "get_by_id", - lambda _file_id: ( - True, - _DummyFile( - "f1", - module.FileType.VISUAL.value, - parent_id="pf1", - location="loc1", - name="image.abc", - ), - ), - ) - monkeypatch.setattr(module, "check_file_team_permission", lambda _file, _uid: True) - monkeypatch.setattr(module.File2DocumentService, "get_storage_address", lambda **_kwargs: ("pf2", "loc2")) - - async def _make_response(data): - return _DummyResponse(data) - - monkeypatch.setattr(module, "make_response", _make_response) - monkeypatch.setattr( - module, - "apply_safe_file_response_headers", - lambda response, content_type, ext: response.headers.update( - {"content_type": content_type, "extension": ext} - ), - ) - res = _run(module.get("f1")) - assert isinstance(res, _DummyResponse) - assert res.data == b"blob-data" - assert storage.calls == [("pf1", "loc1"), ("pf2", "loc2")] - assert res.headers["extension"] == "abc" - assert res.headers["content_type"] == "image/abc" + assert parent_res["code"] == 0 + assert parent_res["data"]["parent_folder"]["id"] == "parent1" + assert ancestors_res["code"] == 0 + assert ancestors_res["data"]["parent_folders"][0]["id"] == "root" -@pytest.mark.p2 -def test_get_file_content_type_and_error_paths_unit(monkeypatch): - module = _load_file_app_module(monkeypatch) - monkeypatch.setattr(module, "check_file_team_permission", lambda _file, _uid: True) - - class _Storage: - @staticmethod - def get(_bucket, _location): - return b"blob-data" - - monkeypatch.setattr(module.settings, "STORAGE_IMPL", _Storage()) - monkeypatch.setattr(module.File2DocumentService, "get_storage_address", lambda **_kwargs: ("pf2", "loc2")) - - async def _make_response(data): - return _DummyResponse(data) - - headers_calls = [] - - def _apply_headers(response, content_type, ext): - headers_calls.append((content_type, ext)) - response.headers["content_type"] = content_type - response.headers["extension"] = ext - - monkeypatch.setattr(module, "make_response", _make_response) - monkeypatch.setattr(module, "apply_safe_file_response_headers", _apply_headers) - - monkeypatch.setattr( - module.FileService, - "get_by_id", - lambda _file_id: ( - True, - _DummyFile("img", module.FileType.VISUAL.value, parent_id="pf1", location="loc1", name="image.abc"), - ), - ) - res = _run(module.get("img")) - assert isinstance(res, _DummyResponse) - assert res.headers["content_type"] == "image/abc" - assert res.headers["extension"] == "abc" - - monkeypatch.setattr( - module.FileService, - "get_by_id", - lambda _file_id: ( - True, - _DummyFile("noext", module.FileType.DOC.value, parent_id="pf1", location="loc1", name="README"), - ), - ) - res = _run(module.get("noext")) - assert isinstance(res, _DummyResponse) - assert res.headers["content_type"] is None - assert res.headers["extension"] is None - assert headers_calls == [("image/abc", "abc"), (None, None)] - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (_ for _ in ()).throw(RuntimeError("get crash"))) - monkeypatch.setattr(module, "server_error_response", lambda err: {"code": 500, "message": str(err)}) - res = _run(module.get("boom")) - assert res["code"] == 500 - assert "get crash" in res["message"] - - -@pytest.mark.p2 -def test_move_recursive_branch_matrix_unit(monkeypatch): - module = _load_file_app_module(monkeypatch) - req_state = {"src_file_ids": ["f1"], "dest_file_id": "dest"} - _set_request_json(monkeypatch, module, req_state) - - async def _thread_pool_exec(fn, *args, **kwargs): - return fn(*args, **kwargs) - - monkeypatch.setattr(module, "thread_pool_exec", _thread_pool_exec) - monkeypatch.setattr(module, "check_file_team_permission", lambda _file, _uid: True) - - dest_folder = SimpleNamespace(id="dest") - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (False, None)) - res = _run(module.move()) - assert res["message"] == "Parent folder not found!" - - monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (True, dest_folder)) - monkeypatch.setattr(module.FileService, "get_by_ids", lambda _file_ids: []) - res = _run(module.move()) - assert res["message"] == "Source files not found!" - - req_state["src_file_ids"] = ["f1", "f2"] - monkeypatch.setattr(module.FileService, "get_by_ids", lambda _file_ids: [_DummyFile("f1", module.FileType.DOC.value)]) - res = _run(module.move()) - assert res["message"] == "File or folder not found!" - - req_state["src_file_ids"] = ["tenant-missing"] - monkeypatch.setattr( - module.FileService, - "get_by_ids", - lambda _file_ids: [_DummyFile("tenant-missing", module.FileType.DOC.value, tenant_id=None)], - ) - res = _run(module.move()) - assert res["message"] == "Tenant not found!" - - req_state["src_file_ids"] = ["deny"] - monkeypatch.setattr(module.FileService, "get_by_ids", lambda _file_ids: [_DummyFile("deny", module.FileType.DOC.value)]) - monkeypatch.setattr(module, "check_file_team_permission", lambda _file, _uid: False) - res = _run(module.move()) - assert res["code"] == module.RetCode.AUTHENTICATION_ERROR - assert res["message"] == "No authorization." - - monkeypatch.setattr(module, "check_file_team_permission", lambda _file, _uid: True) - - req_state["src_file_ids"] = ["folder_existing", "folder_new", "doc_main"] - folder_existing = _DummyFile( - "folder_existing", - module.FileType.FOLDER.value, - tenant_id="tenant1", - parent_id="old_bucket", - location="", - name="existing-folder", - ) - folder_new = _DummyFile( - "folder_new", - module.FileType.FOLDER.value, - tenant_id="tenant1", - parent_id="old_bucket", - location="", - name="new-folder", - ) - doc_main = _DummyFile( - "doc_main", - module.FileType.DOC.value, - tenant_id="tenant1", - parent_id="old_bucket", - location="doc.bin", - name="doc.bin", - ) - sub_doc = _DummyFile( - "sub_doc", - module.FileType.DOC.value, - tenant_id="tenant1", - parent_id="folder_existing", - location="sub.txt", - name="sub.txt", - ) - - monkeypatch.setattr(module.FileService, "get_by_ids", lambda _file_ids: [folder_existing, folder_new, doc_main]) - - inserted = [] - deleted = [] - updated = [] - existing_dest = SimpleNamespace(id="dest-existing") - new_dest = SimpleNamespace(id="dest-new") - - def _query(**kwargs): - if kwargs.get("name") == "existing-folder": - return [existing_dest] - if kwargs.get("name") == "new-folder": - return [] - return [] - - def _insert(payload): - inserted.append(payload) - return new_dest - - def _list_subfiles(parent_id): - if parent_id == "folder_existing": - return [sub_doc] - if parent_id == "folder_new": - return [] - return [] - - class _Storage: - def __init__(self): - self.move_calls = [] - self._collision = 0 - - def obj_exist(self, _bucket, location): - if location == "doc.bin" and self._collision == 0: - self._collision += 1 - return True - return False - - def move(self, old_parent, old_location, new_parent, new_location): - self.move_calls.append((old_parent, old_location, new_parent, new_location)) - - storage = _Storage() - monkeypatch.setattr(module.settings, "STORAGE_IMPL", storage) - monkeypatch.setattr(module.FileService, "query", _query) - monkeypatch.setattr(module.FileService, "insert", _insert) - monkeypatch.setattr(module.FileService, "list_all_files_by_parent_id", _list_subfiles) - monkeypatch.setattr(module.FileService, "delete_by_id", lambda file_id: deleted.append(file_id)) - monkeypatch.setattr(module.FileService, "update_by_id", lambda file_id, payload: updated.append((file_id, payload)) or True) - - res = _run(module.move()) - assert res["code"] == module.RetCode.SUCCESS - assert res["data"] is True - assert inserted and inserted[0]["name"] == "new-folder" - assert set(deleted) == {"folder_existing", "folder_new"} - assert ("old_bucket", "doc.bin", "dest", "doc.bin_") in storage.move_calls - assert ("folder_existing", "sub.txt", "dest-existing", "sub.txt") in storage.move_calls - assert ("doc_main", {"parent_id": "dest", "location": "doc.bin_"}) in updated - assert ("sub_doc", {"parent_id": "dest-existing", "location": "sub.txt"}) in updated - - req_state["src_file_ids"] = ["boom_doc"] - monkeypatch.setattr( - module.FileService, - "get_by_ids", - lambda _file_ids: [ - _DummyFile("boom_doc", module.FileType.DOC.value, tenant_id="tenant1", parent_id="old_bucket", location="boom", name="boom") - ], - ) - - class _StorageBoom: - @staticmethod - def obj_exist(_bucket, _location): - return False - - @staticmethod - def move(*_args, **_kwargs): - raise RuntimeError("storage down") - - monkeypatch.setattr(module.settings, "STORAGE_IMPL", _StorageBoom()) - monkeypatch.setattr(module, "server_error_response", lambda err: {"code": 500, "message": str(err)}) - res = _run(module.move()) - assert res["code"] == 500 - assert "Move file failed at storage layer: storage down" in res["message"] diff --git a/web/src/components/new-document-link.tsx b/web/src/components/new-document-link.tsx index 85480d145..e2c65a62a 100644 --- a/web/src/components/new-document-link.tsx +++ b/web/src/components/new-document-link.tsx @@ -10,7 +10,7 @@ interface IProps extends React.PropsWithChildren { color?: string; documentName: string; documentId?: string; - prefix?: string; + resource?: 'document' | 'files'; className?: string; } @@ -21,13 +21,13 @@ const NewDocumentLink = ({ color = 'rgb(15, 79, 170)', documentId, documentName, - prefix = 'file', + resource = 'document', className, }: IProps) => { let nextLink = link; const extension = getExtension(documentName); if (!link) { - nextLink = `/document/${documentId}?ext=${extension}&prefix=${prefix}`; + nextLink = `/document/${documentId}?ext=${extension}&resource=${resource}`; } return ( diff --git a/web/src/components/next-message-item/uploaded-message-files.tsx b/web/src/components/next-message-item/uploaded-message-files.tsx index bf1894626..badeeb160 100644 --- a/web/src/components/next-message-item/uploaded-message-files.tsx +++ b/web/src/components/next-message-item/uploaded-message-files.tsx @@ -19,7 +19,7 @@ function NameWidget({ name, size }: NameWidgetType) { return (
{/* {id ? ( - + {name} ) : ( diff --git a/web/src/hooks/use-file-request.ts b/web/src/hooks/use-file-request.ts index 9e1bf88d3..abd2972ea 100644 --- a/web/src/hooks/use-file-request.ts +++ b/web/src/hooks/use-file-request.ts @@ -78,7 +78,8 @@ export const useUploadFile = () => { export interface IMoveFileBody { src_file_ids: string[]; - dest_file_id: string; // target folder id + dest_file_id?: string; + new_name?: string; } export const useMoveFile = () => { @@ -119,7 +120,8 @@ export const useCreateFolder = () => { mutationKey: [FileApiAction.CreateFolder], mutationFn: async (params: { parentId: string; name: string }) => { const { data } = await fileManagerService.createFolder({ - ...params, + name: params.name, + parent_id: params.parentId, type: 'folder', }); if (data.code === 0) { @@ -143,9 +145,10 @@ export const useFetchParentFolderList = () => { initialData: [], enabled: !!id, queryFn: async () => { - const { data } = await fileManagerService.getAllParentFolder({ - fileId: id, - }); + const { data } = await fileManagerService.getAllParentFolder( + {}, + `${id}/ancestors`, + ); return data?.data?.parent_folders?.toReversed() ?? []; }, @@ -221,7 +224,9 @@ export const useDeleteFile = () => { } = useMutation({ mutationKey: [FileApiAction.DeleteFile], mutationFn: async (params: { fileIds: string[]; parentId: string }) => { - const { data } = await fileManagerService.removeFile(params); + const { data } = await fileManagerService.removeFile({ + ids: params.fileIds, + }); if (data.code === 0) { message.success(t('message.deleted')); setPaginationParams(1); // TODO: There should be a better way to paginate the request list @@ -262,7 +267,10 @@ export const useRenameFile = () => { } = useMutation({ mutationKey: [FileApiAction.RenameFile], mutationFn: async (params: { fileId: string; name: string }) => { - const { data } = await fileManagerService.renameFile(params); + const { data } = await fileManagerService.moveFile({ + src_file_ids: [params.fileId], + new_name: params.name, + }); if (data.code === 0) { message.success(t('message.renamed')); queryClient.invalidateQueries({ diff --git a/web/src/pages/document-viewer/index.tsx b/web/src/pages/document-viewer/index.tsx index 9bcd60c0f..5a66a1402 100644 --- a/web/src/pages/document-viewer/index.tsx +++ b/web/src/pages/document-viewer/index.tsx @@ -1,5 +1,5 @@ import { Images } from '@/constants/common'; -import { api_host } from '@/utils/api'; +import { ExternalApi, api_host } from '@/utils/api'; import { useParams, useSearchParams } from 'react-router'; // import Docx from './docx'; // import Excel from './excel'; @@ -24,12 +24,16 @@ const DocumentViewer = () => { const { id: documentId } = useParams(); const [currentQueryParameters] = useSearchParams(); const ext = currentQueryParameters.get('ext'); - const prefix = currentQueryParameters.get('prefix'); - const api = `${api_host}/${prefix || 'file'}/get/${documentId}`; + const resource = + currentQueryParameters.get('resource') === 'files' ? 'files' : 'document'; + const api = + resource === 'files' + ? `${ExternalApi}${api_host}/files/${documentId}` + : `${api_host}/document/get/${documentId}`; // request.head if (ext === 'html' && documentId) { - previewHtmlFile(documentId); + previewHtmlFile(documentId, resource); return; } diff --git a/web/src/pages/files/action-cell.tsx b/web/src/pages/files/action-cell.tsx index b70531dee..9e5285b60 100644 --- a/web/src/pages/files/action-cell.tsx +++ b/web/src/pages/files/action-cell.tsx @@ -124,6 +124,7 @@ export function ActionCell({