Refa: files /file API to RESTFul style (#13741)

### What problem does this PR solve?

Files /file API to RESTFul style.

### Type of change

- [x] Documentation Update
- [x] Refactoring

---------

Co-authored-by: writinwaters <cai.keith@gmail.com>
Co-authored-by: Liu An <asiro@qq.com>
This commit is contained in:
Yongteng Lei
2026-03-24 19:24:41 +08:00
committed by GitHub
parent 10a36d6443
commit 3d10e2075c
23 changed files with 2118 additions and 3553 deletions

View File

@ -1,5 +1,5 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -765,7 +765,6 @@ async def download_attachment(attachment_id):
@login_required
@validate_request("doc_id")
async def change_parser():
req = await get_request_json()
if not DocumentService.accessible(req["doc_id"], current_user.id):
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
@ -983,10 +982,34 @@ async def set_meta():
@manager.route("/upload_info", methods=["POST"]) # noqa: F821
@login_required
async def upload_info():
files = await request.files
file = files["file"] if files and files.get("file") else None
file_objs = files.getlist("file") if files and files.get("file") else []
url = request.args.get("url")
if file_objs and url:
return get_json_result(
data=False,
message="Provide either multipart file(s) or ?url=..., not both.",
code=RetCode.BAD_REQUEST,
)
if not file_objs and not url:
return get_json_result(
data=False,
message="Missing input: provide multipart file(s) or url",
code=RetCode.BAD_REQUEST,
)
try:
return get_json_result(data=FileService.upload_info(current_user.id, file, request.args.get("url")))
if url and not file_objs:
return get_json_result(data=FileService.upload_info(current_user.id, None, url))
if len(file_objs) == 1:
return get_json_result(data=FileService.upload_info(current_user.id, file_objs[0], None))
results = [FileService.upload_info(current_user.id, f, None) for f in file_objs]
return get_json_result(data=results)
except Exception as e:
return server_error_response(e)

View File

@ -1,5 +1,5 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -77,7 +77,7 @@ async def convert():
doc = DocumentService.insert({
"id": get_uuid(),
"kb_id": kb.id,
"parser_id": kb.parser_id,
"parser_id": FileService.get_parser(file.type, file.name, kb.parser_id),
"pipeline_id": kb.pipeline_id,
"parser_config": kb.parser_config,
"created_by": current_user.id,

View File

@ -1,464 +1,464 @@
# #
# # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
# #
# # Licensed under the Apache License, Version 2.0 (the "License");
# # you may not use this file except in compliance with the License.
# # You may obtain a copy of the License at
# #
# # http://www.apache.org/licenses/LICENSE-2.0
# #
# # Unless required by applicable law or agreed to in writing, software
# # distributed under the License is distributed on an "AS IS" BASIS,
# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# # See the License for the specific language governing permissions and
# # limitations under the License
# #
# import logging
# import os
# import pathlib
# import re
# from quart import request, make_response
# from api.apps import login_required, current_user
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
# from api.common.check_team_permission import check_file_team_permission
# from api.db.services.document_service import DocumentService
# from api.db.services.file2document_service import File2DocumentService
# from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
# from common.misc_utils import get_uuid, thread_pool_exec
# from common.constants import RetCode, FileSource
# from api.db import FileType
# from api.db.services import duplicate_name
# from api.db.services.file_service import FileService
# from api.utils.api_utils import get_json_result, get_request_json
# from api.utils.file_utils import filename_type
# from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers
# from common import settings
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# @manager.route('/upload', methods=['POST']) # noqa: F821
# @login_required
# # @validate_request("parent_id")
# async def upload():
# form = await request.form
# pf_id = form.get("parent_id")
#
# http://www.apache.org/licenses/LICENSE-2.0
# if not pf_id:
# root_folder = FileService.get_root_folder(current_user.id)
# pf_id = root_folder["id"]
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
# files = await request.files
# if 'file' not in files:
# return get_json_result(
# data=False, message='No file part!', code=RetCode.ARGUMENT_ERROR)
# file_objs = files.getlist('file')
#
import logging
import os
import pathlib
import re
from quart import request, make_response
from api.apps import login_required, current_user
from api.common.check_team_permission import check_file_team_permission
from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
from common.misc_utils import get_uuid, thread_pool_exec
from common.constants import RetCode, FileSource
from api.db import FileType
from api.db.services import duplicate_name
from api.db.services.file_service import FileService
from api.utils.api_utils import get_json_result, get_request_json
from api.utils.file_utils import filename_type
from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers
from common import settings
@manager.route('/upload', methods=['POST']) # noqa: F821
@login_required
# @validate_request("parent_id")
async def upload():
form = await request.form
pf_id = form.get("parent_id")
if not pf_id:
root_folder = FileService.get_root_folder(current_user.id)
pf_id = root_folder["id"]
files = await request.files
if 'file' not in files:
return get_json_result(
data=False, message='No file part!', code=RetCode.ARGUMENT_ERROR)
file_objs = files.getlist('file')
for file_obj in file_objs:
if file_obj.filename == '':
return get_json_result(
data=False, message='No file selected!', code=RetCode.ARGUMENT_ERROR)
file_res = []
try:
e, pf_folder = FileService.get_by_id(pf_id)
if not e:
return get_data_error_result( message="Can't find this folder!")
async def _handle_single_file(file_obj):
MAX_FILE_NUM_PER_USER: int = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
if 0 < MAX_FILE_NUM_PER_USER <= await thread_pool_exec(DocumentService.get_doc_count, current_user.id):
return get_data_error_result( message="Exceed the maximum file number of a free user!")
# split file name path
if not file_obj.filename:
file_obj_names = [pf_folder.name, file_obj.filename]
else:
full_path = '/' + file_obj.filename
file_obj_names = full_path.split('/')
file_len = len(file_obj_names)
# get folder
file_id_list = await thread_pool_exec(FileService.get_id_list_by_id, pf_id, file_obj_names, 1, [pf_id])
len_id_list = len(file_id_list)
# create folder
if file_len != len_id_list:
e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 1])
if not e:
return get_data_error_result(message="Folder not found!")
last_folder = await thread_pool_exec(FileService.create_folder, file, file_id_list[len_id_list - 1], file_obj_names,
len_id_list)
else:
e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 2])
if not e:
return get_data_error_result(message="Folder not found!")
last_folder = await thread_pool_exec(FileService.create_folder, file, file_id_list[len_id_list - 2], file_obj_names,
len_id_list)
# file type
filetype = filename_type(file_obj_names[file_len - 1])
location = file_obj_names[file_len - 1]
while await thread_pool_exec(settings.STORAGE_IMPL.obj_exist, last_folder.id, location):
location += "_"
blob = await thread_pool_exec(file_obj.read)
filename = await thread_pool_exec(
duplicate_name,
FileService.query,
name=file_obj_names[file_len - 1],
parent_id=last_folder.id)
await thread_pool_exec(settings.STORAGE_IMPL.put, last_folder.id, location, blob)
file_data = {
"id": get_uuid(),
"parent_id": last_folder.id,
"tenant_id": current_user.id,
"created_by": current_user.id,
"type": filetype,
"name": filename,
"location": location,
"size": len(blob),
}
inserted = await thread_pool_exec(FileService.insert, file_data)
return inserted.to_json()
for file_obj in file_objs:
res = await _handle_single_file(file_obj)
file_res.append(res)
return get_json_result(data=file_res)
except Exception as e:
return server_error_response(e)
@manager.route('/create', methods=['POST']) # noqa: F821
@login_required
@validate_request("name")
async def create():
req = await get_request_json()
pf_id = req.get("parent_id")
input_file_type = req.get("type")
if not pf_id:
root_folder = FileService.get_root_folder(current_user.id)
pf_id = root_folder["id"]
try:
if not FileService.is_parent_folder_exist(pf_id):
return get_json_result(
data=False, message="Parent Folder Doesn't Exist!", code=RetCode.OPERATING_ERROR)
if FileService.query(name=req["name"], parent_id=pf_id):
return get_data_error_result(
message="Duplicated folder name in the same folder.")
if input_file_type == FileType.FOLDER.value:
file_type = FileType.FOLDER.value
else:
file_type = FileType.VIRTUAL.value
file = FileService.insert({
"id": get_uuid(),
"parent_id": pf_id,
"tenant_id": current_user.id,
"created_by": current_user.id,
"name": req["name"],
"location": "",
"size": 0,
"type": file_type
})
return get_json_result(data=file.to_json())
except Exception as e:
return server_error_response(e)
@manager.route('/list', methods=['GET']) # noqa: F821
@login_required
def list_files():
pf_id = request.args.get("parent_id")
keywords = request.args.get("keywords", "")
page_number = int(request.args.get("page", 1))
items_per_page = int(request.args.get("page_size", 15))
orderby = request.args.get("orderby", "create_time")
desc = request.args.get("desc", True)
if not pf_id:
root_folder = FileService.get_root_folder(current_user.id)
pf_id = root_folder["id"]
FileService.init_knowledgebase_docs(pf_id, current_user.id)
try:
e, file = FileService.get_by_id(pf_id)
if not e:
return get_data_error_result(message="Folder not found!")
files, total = FileService.get_by_pf_id(
current_user.id, pf_id, page_number, items_per_page, orderby, desc, keywords)
parent_folder = FileService.get_parent_folder(pf_id)
if not parent_folder:
return get_json_result(message="File not found!")
return get_json_result(data={"total": total, "files": files, "parent_folder": parent_folder.to_json()})
except Exception as e:
return server_error_response(e)
@manager.route('/root_folder', methods=['GET']) # noqa: F821
@login_required
def get_root_folder():
try:
root_folder = FileService.get_root_folder(current_user.id)
return get_json_result(data={"root_folder": root_folder})
except Exception as e:
return server_error_response(e)
@manager.route('/parent_folder', methods=['GET']) # noqa: F821
@login_required
def get_parent_folder():
file_id = request.args.get("file_id")
try:
e, file = FileService.get_by_id(file_id)
if not e:
return get_data_error_result(message="Folder not found!")
parent_folder = FileService.get_parent_folder(file_id)
return get_json_result(data={"parent_folder": parent_folder.to_json()})
except Exception as e:
return server_error_response(e)
@manager.route('/all_parent_folder', methods=['GET']) # noqa: F821
@login_required
def get_all_parent_folders():
file_id = request.args.get("file_id")
try:
e, file = FileService.get_by_id(file_id)
if not e:
return get_data_error_result(message="Folder not found!")
parent_folders = FileService.get_all_parent_folders(file_id)
parent_folders_res = []
for parent_folder in parent_folders:
parent_folders_res.append(parent_folder.to_json())
return get_json_result(data={"parent_folders": parent_folders_res})
except Exception as e:
return server_error_response(e)
@manager.route("/rm", methods=["POST"]) # noqa: F821
@login_required
@validate_request("file_ids")
async def rm():
req = await get_request_json()
file_ids = req["file_ids"]
uid = current_user.id
try:
def _delete_single_file(file):
try:
if file.location:
settings.STORAGE_IMPL.rm(file.parent_id, file.location)
except Exception as e:
logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}, error: {e}")
informs = File2DocumentService.get_by_file_id(file.id)
for inform in informs:
doc_id = inform.document_id
e, doc = DocumentService.get_by_id(doc_id)
if e and doc:
tenant_id = DocumentService.get_tenant_id(doc_id)
if tenant_id:
DocumentService.remove_document(doc, tenant_id)
File2DocumentService.delete_by_file_id(file.id)
FileService.delete(file)
def _delete_folder_recursive(folder, tenant_id):
sub_files = FileService.list_all_files_by_parent_id(folder.id)
for sub_file in sub_files:
if sub_file.type == FileType.FOLDER.value:
_delete_folder_recursive(sub_file, tenant_id)
else:
_delete_single_file(sub_file)
FileService.delete(folder)
def _rm_sync():
for file_id in file_ids:
e, file = FileService.get_by_id(file_id)
if not e or not file:
return get_data_error_result(message="File or Folder not found!")
if not file.tenant_id:
return get_data_error_result(message="Tenant not found!")
if not check_file_team_permission(file, uid):
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
if file.source_type == FileSource.KNOWLEDGEBASE:
continue
if file.type == FileType.FOLDER.value:
_delete_folder_recursive(file, uid)
continue
_delete_single_file(file)
return get_json_result(data=True)
return await thread_pool_exec(_rm_sync)
except Exception as e:
return server_error_response(e)
@manager.route('/rename', methods=['POST']) # noqa: F821
@login_required
@validate_request("file_id", "name")
async def rename():
req = await get_request_json()
try:
e, file = FileService.get_by_id(req["file_id"])
if not e:
return get_data_error_result(message="File not found!")
if not check_file_team_permission(file, current_user.id):
return get_json_result(data=False, message='No authorization.', code=RetCode.AUTHENTICATION_ERROR)
if file.type != FileType.FOLDER.value \
and pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
file.name.lower()).suffix:
return get_json_result(
data=False,
message="The extension of file can't be changed",
code=RetCode.ARGUMENT_ERROR)
for file in FileService.query(name=req["name"], pf_id=file.parent_id):
if file.name == req["name"]:
return get_data_error_result(
message="Duplicated file name in the same folder.")
if not FileService.update_by_id(
req["file_id"], {"name": req["name"]}):
return get_data_error_result(
message="Database error (File rename)!")
informs = File2DocumentService.get_by_file_id(req["file_id"])
if informs:
if not DocumentService.update_by_id(
informs[0].document_id, {"name": req["name"]}):
return get_data_error_result(
message="Database error (Document rename)!")
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@manager.route('/get/<file_id>', methods=['GET']) # noqa: F821
@login_required
async def get(file_id):
try:
e, file = FileService.get_by_id(file_id)
if not e:
return get_data_error_result(message="Document not found!")
if not check_file_team_permission(file, current_user.id):
return get_json_result(data=False, message='No authorization.', code=RetCode.AUTHENTICATION_ERROR)
blob = await thread_pool_exec(settings.STORAGE_IMPL.get, file.parent_id, file.location)
if not blob:
b, n = File2DocumentService.get_storage_address(file_id=file_id)
blob = await thread_pool_exec(settings.STORAGE_IMPL.get, b, n)
response = await make_response(blob)
ext = re.search(r"\.([^.]+)$", file.name.lower())
ext = ext.group(1) if ext else None
content_type = None
if ext:
fallback_prefix = "image" if file.type == FileType.VISUAL.value else "application"
content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}")
apply_safe_file_response_headers(response, content_type, ext)
return response
except Exception as e:
return server_error_response(e)
@manager.route("/mv", methods=["POST"]) # noqa: F821
@login_required
@validate_request("src_file_ids", "dest_file_id")
async def move():
req = await get_request_json()
try:
file_ids = req["src_file_ids"]
dest_parent_id = req["dest_file_id"]
ok, dest_folder = FileService.get_by_id(dest_parent_id)
if not ok or not dest_folder:
return get_data_error_result(message="Parent folder not found!")
files = FileService.get_by_ids(file_ids)
if not files:
return get_data_error_result(message="Source files not found!")
files_dict = {f.id: f for f in files}
for file_id in file_ids:
file = files_dict.get(file_id)
if not file:
return get_data_error_result(message="File or folder not found!")
if not file.tenant_id:
return get_data_error_result(message="Tenant not found!")
if not check_file_team_permission(file, current_user.id):
return get_json_result(
data=False,
message="No authorization.",
code=RetCode.AUTHENTICATION_ERROR,
)
def _move_entry_recursive(source_file_entry, dest_folder):
if source_file_entry.type == FileType.FOLDER.value:
existing_folder = FileService.query(name=source_file_entry.name, parent_id=dest_folder.id)
if existing_folder:
new_folder = existing_folder[0]
else:
new_folder = FileService.insert(
{
"id": get_uuid(),
"parent_id": dest_folder.id,
"tenant_id": source_file_entry.tenant_id,
"created_by": current_user.id,
"name": source_file_entry.name,
"location": "",
"size": 0,
"type": FileType.FOLDER.value,
}
)
sub_files = FileService.list_all_files_by_parent_id(source_file_entry.id)
for sub_file in sub_files:
_move_entry_recursive(sub_file, new_folder)
FileService.delete_by_id(source_file_entry.id)
return
old_parent_id = source_file_entry.parent_id
old_location = source_file_entry.location
filename = source_file_entry.name
new_location = filename
while settings.STORAGE_IMPL.obj_exist(dest_folder.id, new_location):
new_location += "_"
try:
settings.STORAGE_IMPL.move(old_parent_id, old_location, dest_folder.id, new_location)
except Exception as storage_err:
raise RuntimeError(f"Move file failed at storage layer: {str(storage_err)}")
FileService.update_by_id(
source_file_entry.id,
{
"parent_id": dest_folder.id,
"location": new_location,
},
)
def _move_sync():
for file in files:
_move_entry_recursive(file, dest_folder)
return get_json_result(data=True)
return await thread_pool_exec(_move_sync)
except Exception as e:
return server_error_response(e)
# for file_obj in file_objs:
# if file_obj.filename == '':
# return get_json_result(
# data=False, message='No file selected!', code=RetCode.ARGUMENT_ERROR)
# file_res = []
# try:
# e, pf_folder = FileService.get_by_id(pf_id)
# if not e:
# return get_data_error_result( message="Can't find this folder!")
#
# async def _handle_single_file(file_obj):
# MAX_FILE_NUM_PER_USER: int = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
# if 0 < MAX_FILE_NUM_PER_USER <= await thread_pool_exec(DocumentService.get_doc_count, current_user.id):
# return get_data_error_result( message="Exceed the maximum file number of a free user!")
#
# # split file name path
# if not file_obj.filename:
# file_obj_names = [pf_folder.name, file_obj.filename]
# else:
# full_path = '/' + file_obj.filename
# file_obj_names = full_path.split('/')
# file_len = len(file_obj_names)
#
# # get folder
# file_id_list = await thread_pool_exec(FileService.get_id_list_by_id, pf_id, file_obj_names, 1, [pf_id])
# len_id_list = len(file_id_list)
#
# # create folder
# if file_len != len_id_list:
# e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 1])
# if not e:
# return get_data_error_result(message="Folder not found!")
# last_folder = await thread_pool_exec(FileService.create_folder, file, file_id_list[len_id_list - 1], file_obj_names,
# len_id_list)
# else:
# e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 2])
# if not e:
# return get_data_error_result(message="Folder not found!")
# last_folder = await thread_pool_exec(FileService.create_folder, file, file_id_list[len_id_list - 2], file_obj_names,
# len_id_list)
#
# # file type
# filetype = filename_type(file_obj_names[file_len - 1])
# location = file_obj_names[file_len - 1]
# while await thread_pool_exec(settings.STORAGE_IMPL.obj_exist, last_folder.id, location):
# location += "_"
# blob = await thread_pool_exec(file_obj.read)
# filename = await thread_pool_exec(
# duplicate_name,
# FileService.query,
# name=file_obj_names[file_len - 1],
# parent_id=last_folder.id)
# await thread_pool_exec(settings.STORAGE_IMPL.put, last_folder.id, location, blob)
# file_data = {
# "id": get_uuid(),
# "parent_id": last_folder.id,
# "tenant_id": current_user.id,
# "created_by": current_user.id,
# "type": filetype,
# "name": filename,
# "location": location,
# "size": len(blob),
# }
# inserted = await thread_pool_exec(FileService.insert, file_data)
# return inserted.to_json()
#
# for file_obj in file_objs:
# res = await _handle_single_file(file_obj)
# file_res.append(res)
#
# return get_json_result(data=file_res)
# except Exception as e:
# return server_error_response(e)
#
#
# @manager.route('/create', methods=['POST']) # noqa: F821
# @login_required
# @validate_request("name")
# async def create():
# req = await get_request_json()
# pf_id = req.get("parent_id")
# input_file_type = req.get("type")
# if not pf_id:
# root_folder = FileService.get_root_folder(current_user.id)
# pf_id = root_folder["id"]
#
# try:
# if not FileService.is_parent_folder_exist(pf_id):
# return get_json_result(
# data=False, message="Parent Folder Doesn't Exist!", code=RetCode.OPERATING_ERROR)
# if FileService.query(name=req["name"], parent_id=pf_id):
# return get_data_error_result(
# message="Duplicated folder name in the same folder.")
#
# if input_file_type == FileType.FOLDER.value:
# file_type = FileType.FOLDER.value
# else:
# file_type = FileType.VIRTUAL.value
#
# file = FileService.insert({
# "id": get_uuid(),
# "parent_id": pf_id,
# "tenant_id": current_user.id,
# "created_by": current_user.id,
# "name": req["name"],
# "location": "",
# "size": 0,
# "type": file_type
# })
#
# return get_json_result(data=file.to_json())
# except Exception as e:
# return server_error_response(e)
#
#
# @manager.route('/list', methods=['GET']) # noqa: F821
# @login_required
# def list_files():
# pf_id = request.args.get("parent_id")
#
# keywords = request.args.get("keywords", "")
#
# page_number = int(request.args.get("page", 1))
# items_per_page = int(request.args.get("page_size", 15))
# orderby = request.args.get("orderby", "create_time")
# desc = request.args.get("desc", True)
# if not pf_id:
# root_folder = FileService.get_root_folder(current_user.id)
# pf_id = root_folder["id"]
# FileService.init_knowledgebase_docs(pf_id, current_user.id)
# try:
# e, file = FileService.get_by_id(pf_id)
# if not e:
# return get_data_error_result(message="Folder not found!")
#
# files, total = FileService.get_by_pf_id(
# current_user.id, pf_id, page_number, items_per_page, orderby, desc, keywords)
#
# parent_folder = FileService.get_parent_folder(pf_id)
# if not parent_folder:
# return get_json_result(message="File not found!")
#
# return get_json_result(data={"total": total, "files": files, "parent_folder": parent_folder.to_json()})
# except Exception as e:
# return server_error_response(e)
#
#
# @manager.route('/root_folder', methods=['GET']) # noqa: F821
# @login_required
# def get_root_folder():
# try:
# root_folder = FileService.get_root_folder(current_user.id)
# return get_json_result(data={"root_folder": root_folder})
# except Exception as e:
# return server_error_response(e)
#
#
# @manager.route('/parent_folder', methods=['GET']) # noqa: F821
# @login_required
# def get_parent_folder():
# file_id = request.args.get("file_id")
# try:
# e, file = FileService.get_by_id(file_id)
# if not e:
# return get_data_error_result(message="Folder not found!")
#
# parent_folder = FileService.get_parent_folder(file_id)
# return get_json_result(data={"parent_folder": parent_folder.to_json()})
# except Exception as e:
# return server_error_response(e)
#
#
# @manager.route('/all_parent_folder', methods=['GET']) # noqa: F821
# @login_required
# def get_all_parent_folders():
# file_id = request.args.get("file_id")
# try:
# e, file = FileService.get_by_id(file_id)
# if not e:
# return get_data_error_result(message="Folder not found!")
#
# parent_folders = FileService.get_all_parent_folders(file_id)
# parent_folders_res = []
# for parent_folder in parent_folders:
# parent_folders_res.append(parent_folder.to_json())
# return get_json_result(data={"parent_folders": parent_folders_res})
# except Exception as e:
# return server_error_response(e)
#
#
# @manager.route("/rm", methods=["POST"]) # noqa: F821
# @login_required
# @validate_request("file_ids")
# async def rm():
# req = await get_request_json()
# file_ids = req["file_ids"]
# uid = current_user.id
#
# try:
# def _delete_single_file(file):
# try:
# if file.location:
# settings.STORAGE_IMPL.rm(file.parent_id, file.location)
# except Exception as e:
# logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}, error: {e}")
#
# informs = File2DocumentService.get_by_file_id(file.id)
# for inform in informs:
# doc_id = inform.document_id
# e, doc = DocumentService.get_by_id(doc_id)
# if e and doc:
# tenant_id = DocumentService.get_tenant_id(doc_id)
# if tenant_id:
# DocumentService.remove_document(doc, tenant_id)
# File2DocumentService.delete_by_file_id(file.id)
#
# FileService.delete(file)
#
# def _delete_folder_recursive(folder, tenant_id):
# sub_files = FileService.list_all_files_by_parent_id(folder.id)
# for sub_file in sub_files:
# if sub_file.type == FileType.FOLDER.value:
# _delete_folder_recursive(sub_file, tenant_id)
# else:
# _delete_single_file(sub_file)
#
# FileService.delete(folder)
#
# def _rm_sync():
# for file_id in file_ids:
# e, file = FileService.get_by_id(file_id)
# if not e or not file:
# return get_data_error_result(message="File or Folder not found!")
# if not file.tenant_id:
# return get_data_error_result(message="Tenant not found!")
# if not check_file_team_permission(file, uid):
# return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
#
# if file.source_type == FileSource.KNOWLEDGEBASE:
# continue
#
# if file.type == FileType.FOLDER.value:
# _delete_folder_recursive(file, uid)
# continue
#
# _delete_single_file(file)
#
# return get_json_result(data=True)
#
# return await thread_pool_exec(_rm_sync)
#
# except Exception as e:
# return server_error_response(e)
#
#
# @manager.route('/rename', methods=['POST']) # noqa: F821
# @login_required
# @validate_request("file_id", "name")
# async def rename():
# req = await get_request_json()
# try:
# e, file = FileService.get_by_id(req["file_id"])
# if not e:
# return get_data_error_result(message="File not found!")
# if not check_file_team_permission(file, current_user.id):
# return get_json_result(data=False, message='No authorization.', code=RetCode.AUTHENTICATION_ERROR)
# if file.type != FileType.FOLDER.value \
# and pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
# file.name.lower()).suffix:
# return get_json_result(
# data=False,
# message="The extension of file can't be changed",
# code=RetCode.ARGUMENT_ERROR)
# for file in FileService.query(name=req["name"], pf_id=file.parent_id):
# if file.name == req["name"]:
# return get_data_error_result(
# message="Duplicated file name in the same folder.")
#
# if not FileService.update_by_id(
# req["file_id"], {"name": req["name"]}):
# return get_data_error_result(
# message="Database error (File rename)!")
#
# informs = File2DocumentService.get_by_file_id(req["file_id"])
# if informs:
# if not DocumentService.update_by_id(
# informs[0].document_id, {"name": req["name"]}):
# return get_data_error_result(
# message="Database error (Document rename)!")
#
# return get_json_result(data=True)
# except Exception as e:
# return server_error_response(e)
#
#
# @manager.route('/get/<file_id>', methods=['GET']) # noqa: F821
# @login_required
# async def get(file_id):
# try:
# e, file = FileService.get_by_id(file_id)
# if not e:
# return get_data_error_result(message="Document not found!")
# if not check_file_team_permission(file, current_user.id):
# return get_json_result(data=False, message='No authorization.', code=RetCode.AUTHENTICATION_ERROR)
#
# blob = await thread_pool_exec(settings.STORAGE_IMPL.get, file.parent_id, file.location)
# if not blob:
# b, n = File2DocumentService.get_storage_address(file_id=file_id)
# blob = await thread_pool_exec(settings.STORAGE_IMPL.get, b, n)
#
# response = await make_response(blob)
# ext = re.search(r"\.([^.]+)$", file.name.lower())
# ext = ext.group(1) if ext else None
# content_type = None
# if ext:
# fallback_prefix = "image" if file.type == FileType.VISUAL.value else "application"
# content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}")
# apply_safe_file_response_headers(response, content_type, ext)
# return response
# except Exception as e:
# return server_error_response(e)
#
#
# @manager.route("/mv", methods=["POST"]) # noqa: F821
# @login_required
# @validate_request("src_file_ids", "dest_file_id")
# async def move():
# req = await get_request_json()
# try:
# file_ids = req["src_file_ids"]
# dest_parent_id = req["dest_file_id"]
#
# ok, dest_folder = FileService.get_by_id(dest_parent_id)
# if not ok or not dest_folder:
# return get_data_error_result(message="Parent folder not found!")
#
# files = FileService.get_by_ids(file_ids)
# if not files:
# return get_data_error_result(message="Source files not found!")
#
# files_dict = {f.id: f for f in files}
#
# for file_id in file_ids:
# file = files_dict.get(file_id)
# if not file:
# return get_data_error_result(message="File or folder not found!")
# if not file.tenant_id:
# return get_data_error_result(message="Tenant not found!")
# if not check_file_team_permission(file, current_user.id):
# return get_json_result(
# data=False,
# message="No authorization.",
# code=RetCode.AUTHENTICATION_ERROR,
# )
#
# def _move_entry_recursive(source_file_entry, dest_folder):
# if source_file_entry.type == FileType.FOLDER.value:
# existing_folder = FileService.query(name=source_file_entry.name, parent_id=dest_folder.id)
# if existing_folder:
# new_folder = existing_folder[0]
# else:
# new_folder = FileService.insert(
# {
# "id": get_uuid(),
# "parent_id": dest_folder.id,
# "tenant_id": source_file_entry.tenant_id,
# "created_by": current_user.id,
# "name": source_file_entry.name,
# "location": "",
# "size": 0,
# "type": FileType.FOLDER.value,
# }
# )
#
# sub_files = FileService.list_all_files_by_parent_id(source_file_entry.id)
# for sub_file in sub_files:
# _move_entry_recursive(sub_file, new_folder)
#
# FileService.delete_by_id(source_file_entry.id)
# return
#
# old_parent_id = source_file_entry.parent_id
# old_location = source_file_entry.location
# filename = source_file_entry.name
#
# new_location = filename
# while settings.STORAGE_IMPL.obj_exist(dest_folder.id, new_location):
# new_location += "_"
#
# try:
# settings.STORAGE_IMPL.move(old_parent_id, old_location, dest_folder.id, new_location)
# except Exception as storage_err:
# raise RuntimeError(f"Move file failed at storage layer: {str(storage_err)}")
#
# FileService.update_by_id(
# source_file_entry.id,
# {
# "parent_id": dest_folder.id,
# "location": new_location,
# },
# )
#
# def _move_sync():
# for file in files:
# _move_entry_recursive(file, dest_folder)
# return get_json_result(data=True)
#
# return await thread_pool_exec(_move_sync)
#
# except Exception as e:
# return server_error_response(e)

View File

@ -0,0 +1,364 @@
#
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import re
from quart import request, make_response
from api.apps import login_required
from api.db import FileType
from api.db.services.file2document_service import File2DocumentService
from api.utils.api_utils import (
add_tenant_id_to_kwargs,
get_error_argument_result,
get_error_data_result,
get_result,
)
from api.utils.validation_utils import (
CreateFolderReq,
DeleteFileReq,
ListFileReq,
MoveFileReq,
validate_and_parse_json_request,
validate_and_parse_request_args,
)
from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers
from common import settings
from common.misc_utils import thread_pool_exec
from api.apps.services import file_api_service
@manager.route("/files", methods=["POST"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
async def create_or_upload(tenant_id: str = None):
"""
Upload files or create a folder.
---
tags:
- Files
security:
- ApiKeyAuth: []
parameters:
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
responses:
200:
description: Successful operation.
"""
content_type = request.content_type or ""
try:
if "multipart/form-data" in content_type:
form = await request.form
pf_id = form.get("parent_id")
files = await request.files
if 'file' not in files:
return get_error_argument_result("No file part!")
file_objs = files.getlist('file')
for file_obj in file_objs:
if file_obj.filename == '':
return get_error_argument_result("No file selected!")
success, result = await file_api_service.upload_file(tenant_id, pf_id, file_objs)
if success:
return get_result(data=result)
else:
return get_error_data_result(message=result)
else:
req, err = await validate_and_parse_json_request(request, CreateFolderReq)
if err is not None:
return get_error_argument_result(err)
success, result = await file_api_service.create_folder(
tenant_id, req["name"], req.get("parent_id"), req.get("type")
)
if success:
return get_result(data=result)
else:
return get_error_data_result(message=result)
except Exception as e:
logging.exception(e)
return get_error_data_result(message="Internal server error")
@manager.route("/files", methods=["GET"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
def list_files(tenant_id: str = None):
"""
List files under a folder.
---
tags:
- Files
security:
- ApiKeyAuth: []
parameters:
- in: query
name: parent_id
type: string
description: Folder ID to list files from.
- in: query
name: keywords
type: string
description: Search keyword filter.
- in: query
name: page
type: integer
default: 1
- in: query
name: page_size
type: integer
default: 15
- in: query
name: orderby
type: string
default: "create_time"
- in: query
name: desc
type: boolean
default: true
responses:
200:
description: Successful operation.
"""
args, err = validate_and_parse_request_args(request, ListFileReq)
if err is not None:
return get_error_argument_result(err)
try:
success, result = file_api_service.list_files(tenant_id, args)
if success:
return get_result(data=result)
else:
return get_error_data_result(message=result)
except Exception as e:
logging.exception(e)
return get_error_data_result(message="Internal server error")
@manager.route("/files", methods=["DELETE"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
async def delete(tenant_id: str = None):
"""
Delete files.
---
tags:
- Files
security:
- ApiKeyAuth: []
parameters:
- in: body
name: body
required: true
schema:
type: object
required:
- ids
properties:
ids:
type: array
items:
type: string
description: List of file IDs to delete.
responses:
200:
description: Successful operation.
"""
req, err = await validate_and_parse_json_request(request, DeleteFileReq)
if err is not None:
return get_error_argument_result(err)
try:
success, result = await file_api_service.delete_files(tenant_id, req["ids"])
if success:
return get_result(data=result)
else:
return get_error_data_result(message=result)
except Exception as e:
logging.exception(e)
return get_error_data_result(message="Internal server error")
@manager.route("/files/move", methods=["POST"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
async def move(tenant_id: str = None):
"""
Move and/or rename files. Follows Linux mv semantics:
at least one of dest_file_id or new_name must be provided.
- dest_file_id only: move files to a new folder (names unchanged).
- new_name only: rename a single file in place (no storage operation).
- both: move and rename simultaneously.
---
tags:
- Files
security:
- ApiKeyAuth: []
parameters:
- in: body
name: body
required: true
schema:
type: object
required:
- src_file_ids
properties:
src_file_ids:
type: array
items:
type: string
description: List of source file IDs. Required.
dest_file_id:
type: string
description: Destination folder ID. Optional; omit to rename in place.
new_name:
type: string
description: New file name. Optional; only valid for a single source file.
responses:
200:
description: Successful operation.
"""
req, err = await validate_and_parse_json_request(request, MoveFileReq)
if err is not None:
return get_error_argument_result(err)
try:
success, result = await file_api_service.move_files(
tenant_id, req["src_file_ids"], req.get("dest_file_id"), req.get("new_name")
)
if success:
return get_result(data=result)
else:
return get_error_data_result(message=result)
except Exception as e:
logging.exception(e)
return get_error_data_result(message="Internal server error")
@manager.route("/files/<file_id>", methods=["GET"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
async def download(tenant_id: str = None, file_id: str = None):
"""
Download a file.
---
tags:
- Files
security:
- ApiKeyAuth: []
produces:
- application/octet-stream
parameters:
- in: path
name: file_id
type: string
required: true
description: File ID to download.
responses:
200:
description: File stream.
"""
try:
success, result = file_api_service.get_file_content(tenant_id, file_id)
if not success:
return get_error_data_result(message=result)
file = result
blob = await thread_pool_exec(settings.STORAGE_IMPL.get, file.parent_id, file.location)
if not blob:
b, n = File2DocumentService.get_storage_address(file_id=file_id)
blob = await thread_pool_exec(settings.STORAGE_IMPL.get, b, n)
response = await make_response(blob)
ext = re.search(r"\.([^.]+)$", file.name.lower())
ext = ext.group(1) if ext else None
content_type = None
if ext:
fallback_prefix = "image" if file.type == FileType.VISUAL.value else "application"
content_type = CONTENT_TYPE_MAP.get(ext, f"{fallback_prefix}/{ext}")
apply_safe_file_response_headers(response, content_type, ext)
return response
except Exception as e:
logging.exception(e)
return get_error_data_result(message="Internal server error")
@manager.route("/files/<file_id>/parent", methods=["GET"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
def parent_folder(tenant_id: str = None, file_id: str = None):
"""
Get parent folder of a file.
---
tags:
- Files
security:
- ApiKeyAuth: []
parameters:
- in: path
name: file_id
type: string
required: true
responses:
200:
description: Parent folder information.
"""
try:
success, result = file_api_service.get_parent_folder(file_id)
if success:
return get_result(data=result)
else:
return get_error_data_result(message=result)
except Exception as e:
logging.exception(e)
return get_error_data_result(message="Internal server error")
@manager.route("/files/<file_id>/ancestors", methods=["GET"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
def ancestors(tenant_id: str = None, file_id: str = None):
"""
Get all ancestor folders of a file.
---
tags:
- Files
security:
- ApiKeyAuth: []
parameters:
- in: path
name: file_id
type: string
required: true
responses:
200:
description: List of ancestor folders.
"""
try:
success, result = file_api_service.get_all_parent_folders(file_id)
if success:
return get_result(data=result)
else:
return get_error_data_result(message=result)
except Exception as e:
logging.exception(e)
return get_error_data_result(message="Internal server error")

View File

@ -1,5 +1,5 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -21,34 +21,33 @@ import re
from io import BytesIO
import xxhash
from quart import request, send_file
from peewee import OperationalError
from pydantic import BaseModel, Field, validator
from quart import request, send_file
from api.constants import FILE_NAME_LEN_LIMIT
from api.db import FileType
from api.db.db_models import APIToken, File, Task
from api.db.services.document_service import DocumentService
from api.db.joint_services.tenant_model_service import get_model_config_by_id, get_model_config_by_type_and_name, get_tenant_default_model_by_type
from api.db.services.doc_metadata_service import DocMetadataService
from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.llm_service import LLMBundle
from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks
from api.db.services.tenant_llm_service import TenantLLMService
from api.db.services.task_service import TaskService, queue_tasks, cancel_all_task_of
from api.db.joint_services.tenant_model_service import get_model_config_by_id, get_tenant_default_model_by_type, get_model_config_by_type_and_name
from common.metadata_utils import meta_filter, convert_conditions
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_parser_config, get_result, server_error_response, token_required, \
get_request_json
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_parser_config, get_request_json, get_result, server_error_response, token_required
from api.utils.image_utils import store_chunk_image
from common import settings
from common.constants import FileSource, LLMType, ParserType, RetCode, TaskStatus
from common.metadata_utils import convert_conditions, meta_filter
from common.misc_utils import thread_pool_exec
from common.string_utils import remove_redundant_spaces
from rag.app.qa import beAdoc, rmPrefix
from rag.app.tag import label_question
from rag.nlp import rag_tokenizer, search
from rag.prompts.generator import cross_languages, keyword_extraction
from common.string_utils import remove_redundant_spaces
from common.misc_utils import thread_pool_exec
from common.constants import RetCode, LLMType, ParserType, TaskStatus, FileSource
from common import settings
from api.utils.image_utils import store_chunk_image
MAXIMUM_OF_UPLOADING_FILES = 256
@ -162,7 +161,7 @@ async def upload(dataset_id, tenant_id):
"""
e, kb = KnowledgebaseService.get_by_id(dataset_id)
if not e:
raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
return server_error_response(LookupError(f"Can't find the dataset with ID {dataset_id}!"))
err, files = FileService.upload_document(kb, file_objs, tenant_id, parent_path=form.get("parent_path"))
if err:
return get_result(message="\n".join(err), code=RetCode.SERVER_ERROR)
@ -263,6 +262,8 @@ async def update_doc(tenant_id, dataset_id, document_id):
return get_error_data_result(message="Failed to update metadata")
if "name" in req and req["name"] != doc.name:
if not isinstance(req["name"], str):
return server_error_response(AttributeError(f"'{type(req['name']).__name__}' object has no attribute 'encode'"))
if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT:
return get_result(
message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.",
@ -426,12 +427,12 @@ async def download(tenant_id, dataset_id, document_id):
async def download_doc(document_id):
token = request.headers.get("Authorization").split()
if len(token) != 2:
return get_error_data_result(message='Authorization is not valid!')
return get_error_data_result(message="Authorization is not valid!")
token = token[1]
objs = APIToken.query(beta=token)
if not objs:
return get_error_data_result(message='Authentication error: API key is invalid!"')
if not document_id:
return get_error_data_result(message="Specify document_id please.")
doc = DocumentService.query(id=document_id)
@ -565,28 +566,28 @@ def list_docs(dataset_id, tenant_id):
description: Processing status.
"""
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
q = request.args
document_id = q.get("id")
name = q.get("name")
name = q.get("name")
if document_id and not DocumentService.query(id=document_id, kb_id=dataset_id):
return get_error_data_result(message=f"You don't own the document {document_id}.")
if name and not DocumentService.query(name=name, kb_id=dataset_id):
return get_error_data_result(message=f"You don't own the document {name}.")
page = int(q.get("page", 1))
page_size = int(q.get("page_size", 30))
orderby = q.get("orderby", "create_time")
desc = str(q.get("desc", "true")).strip().lower() != "false"
keywords = q.get("keywords", "")
page = int(q.get("page", 1))
page_size = int(q.get("page_size", 30))
orderby = q.get("orderby", "create_time")
desc = str(q.get("desc", "true")).strip().lower() != "false"
keywords = q.get("keywords", "")
# filters - align with OpenAPI parameter names
suffix = q.getlist("suffix")
run_status = q.getlist("run")
create_time_from = int(q.get("create_time_from", 0))
create_time_to = int(q.get("create_time_to", 0))
suffix = q.getlist("suffix")
run_status = q.getlist("run")
create_time_from = int(q.get("create_time_from", 0))
create_time_to = int(q.get("create_time_to", 0))
metadata_condition_raw = q.get("metadata_condition")
metadata_condition = {}
if metadata_condition_raw:
@ -608,17 +609,11 @@ def list_docs(dataset_id, tenant_id):
if metadata_condition.get("conditions") and not doc_ids_filter:
return get_result(data={"total": 0, "docs": []})
docs, total = DocumentService.get_list(
dataset_id, page, page_size, orderby, desc, keywords, document_id, name, suffix, run_status_converted, doc_ids_filter
)
docs, total = DocumentService.get_list(dataset_id, page, page_size, orderby, desc, keywords, document_id, name, suffix, run_status_converted, doc_ids_filter)
# time range filter (0 means no bound)
if create_time_from or create_time_to:
docs = [
d for d in docs
if (create_time_from == 0 or d.get("create_time", 0) >= create_time_from)
and (create_time_to == 0 or d.get("create_time", 0) <= create_time_to)
]
docs = [d for d in docs if (create_time_from == 0 or d.get("create_time", 0) >= create_time_from) and (create_time_to == 0 or d.get("create_time", 0) <= create_time_to)]
# rename keys + map run status back to text for output
key_mapping = {
@ -682,7 +677,7 @@ async def metadata_batch_update(dataset_id, tenant_id):
for d in deletes:
if not isinstance(d, dict) or not d.get("key"):
return get_error_data_result(message="Each delete requires key.")
if document_ids:
kb_doc_ids = KnowledgebaseService.list_documents_by_ids([dataset_id])
target_doc_ids = set(kb_doc_ids)
@ -702,6 +697,7 @@ async def metadata_batch_update(dataset_id, tenant_id):
updated = DocMetadataService.batch_update_metadata(dataset_id, target_doc_ids, updates, deletes)
return get_result(data={"updated": updated, "matched_docs": len(target_doc_ids)})
@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821
@token_required
async def delete(tenant_id, dataset_id):
@ -957,7 +953,7 @@ async def stop_parsing(tenant_id, dataset_id):
doc = DocumentService.query(id=id, kb_id=dataset_id)
if not doc:
return get_error_data_result(message=f"You don't own the document {id}.")
if doc[0].run != TaskStatus.RUNNING.value :
if doc[0].run != TaskStatus.RUNNING.value:
return construct_json_result(
code=RetCode.DATA_ERROR,
message=DOC_STOP_PARSING_INVALID_STATE_MESSAGE,
@ -1259,6 +1255,7 @@ async def add_chunk(tenant_id, dataset_id, document_id):
if "tag_feas" in req:
d["tag_feas"] = req["tag_feas"]
import base64
image_base64 = req.get("image_base64", None)
if image_base64:
d["img_id"] = "{}-{}".format(dataset_id, chunk_id)
@ -1702,8 +1699,8 @@ async def retrieval_test(tenant_id):
toc_enhance = req.get("toc_enhance", False)
langs = req.get("cross_languages", [])
if not isinstance(doc_ids, list):
return get_error_data_result("`documents` should be a list")
if doc_ids:
return get_error_data_result("`documents` should be a list")
if doc_ids:
doc_ids_list = KnowledgebaseService.list_documents_by_ids(kb_ids)
for doc_id in doc_ids:
if doc_id not in doc_ids_list:

View File

@ -1,835 +0,0 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pathlib
import re
from quart import request, make_response
from pathlib import Path
from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.utils.api_utils import get_json_result, get_request_json, server_error_response, token_required
from common.misc_utils import get_uuid, thread_pool_exec
from api.db import FileType
from api.db.services import duplicate_name
from api.db.services.file_service import FileService
from api.utils.file_utils import filename_type
from api.utils.web_utils import CONTENT_TYPE_MAP, apply_safe_file_response_headers
from common import settings
from common.constants import RetCode
@manager.route('/file/upload', methods=['POST']) # noqa: F821
@token_required
async def upload(tenant_id):
"""
Upload a file to the system.
---
tags:
- File
security:
- ApiKeyAuth: []
parameters:
- in: formData
name: file
type: file
required: true
description: The file to upload
- in: formData
name: parent_id
type: string
description: Parent folder ID where the file will be uploaded. Optional.
responses:
200:
description: Successfully uploaded the file.
schema:
type: object
properties:
data:
type: array
items:
type: object
properties:
id:
type: string
description: File ID
name:
type: string
description: File name
size:
type: integer
description: File size in bytes
type:
type: string
description: File type (e.g., document, folder)
"""
form = await request.form
files = await request.files
pf_id = form.get("parent_id")
if not pf_id:
root_folder = FileService.get_root_folder(tenant_id)
pf_id = root_folder["id"]
if 'file' not in files:
return get_json_result(data=False, message='No file part!', code=RetCode.BAD_REQUEST)
file_objs = files.getlist('file')
for file_obj in file_objs:
if file_obj.filename == '':
return get_json_result(data=False, message='No selected file!', code=RetCode.BAD_REQUEST)
file_res = []
try:
e, pf_folder = FileService.get_by_id(pf_id)
if not e:
return get_json_result(data=False, message="Can't find this folder!", code=RetCode.NOT_FOUND)
for file_obj in file_objs:
# Handle file path
full_path = '/' + file_obj.filename
file_obj_names = full_path.split('/')
file_len = len(file_obj_names)
# Get folder path ID
file_id_list = FileService.get_id_list_by_id(pf_id, file_obj_names, 1, [pf_id])
len_id_list = len(file_id_list)
# Crete file folder
if file_len != len_id_list:
e, file = FileService.get_by_id(file_id_list[len_id_list - 1])
if not e:
return get_json_result(data=False, message="Folder not found!", code=RetCode.NOT_FOUND)
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 1], file_obj_names,
len_id_list)
else:
e, file = FileService.get_by_id(file_id_list[len_id_list - 2])
if not e:
return get_json_result(data=False, message="Folder not found!", code=RetCode.NOT_FOUND)
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 2], file_obj_names,
len_id_list)
filetype = filename_type(file_obj_names[file_len - 1])
location = file_obj_names[file_len - 1]
while settings.STORAGE_IMPL.obj_exist(last_folder.id, location):
location += "_"
blob = file_obj.read()
filename = duplicate_name(FileService.query, name=file_obj_names[file_len - 1], parent_id=last_folder.id)
file = {
"id": get_uuid(),
"parent_id": last_folder.id,
"tenant_id": tenant_id,
"created_by": tenant_id,
"type": filetype,
"name": filename,
"location": location,
"size": len(blob),
}
file = FileService.insert(file)
settings.STORAGE_IMPL.put(last_folder.id, location, blob)
file_res.append(file.to_json())
return get_json_result(data=file_res)
except Exception as e:
return server_error_response(e)
@manager.route("/file/upload_info", methods=["POST"]) # noqa: F821
@token_required
async def upload_info(tenant_id):
"""
Upload runtime file metadata for SDK chat completions.
---
tags:
- File
security:
- ApiKeyAuth: []
parameters:
- in: formData
name: file
type: file
required: false
description: File(s) to upload as runtime attachments.
- in: query
name: url
type: string
required: false
description: Optional URL to fetch and convert into a runtime attachment.
responses:
200:
description: Runtime attachment descriptor(s) for the `files` field in completions requests.
"""
files = await request.files
file_objs = files.getlist("file") if files and files.get("file") else []
url = request.args.get("url")
if file_objs and url:
return get_json_result(
data=False,
message="Provide either multipart file(s) or ?url=..., not both.",
code=RetCode.BAD_REQUEST,
)
if not file_objs and not url:
return get_json_result(
data=False,
message="Missing input: provide multipart file(s) or url",
code=RetCode.BAD_REQUEST,
)
try:
if url and not file_objs:
return get_json_result(data=FileService.upload_info(tenant_id, None, url))
if len(file_objs) == 1:
return get_json_result(data=FileService.upload_info(tenant_id, file_objs[0], None))
results = [FileService.upload_info(tenant_id, f) for f in file_objs]
return get_json_result(data=results)
except Exception as e:
return server_error_response(e)
@manager.route('/file/create', methods=['POST']) # noqa: F821
@token_required
async def create(tenant_id):
"""
Create a new file or folder.
---
tags:
- File
security:
- ApiKeyAuth: []
parameters:
- in: body
name: body
description: File creation parameters
required: true
schema:
type: object
properties:
name:
type: string
description: Name of the file/folder
parent_id:
type: string
description: Parent folder ID. Optional.
type:
type: string
enum: ["FOLDER", "VIRTUAL"]
description: Type of the file
responses:
200:
description: File created successfully.
schema:
type: object
properties:
data:
type: object
properties:
id:
type: string
name:
type: string
type:
type: string
"""
req = await get_request_json()
pf_id = req.get("parent_id")
input_file_type = req.get("type")
if not pf_id:
root_folder = FileService.get_root_folder(tenant_id)
pf_id = root_folder["id"]
try:
if not FileService.is_parent_folder_exist(pf_id):
return get_json_result(data=False, message="Parent Folder Doesn't Exist!", code=RetCode.BAD_REQUEST)
if FileService.query(name=req["name"], parent_id=pf_id):
return get_json_result(data=False, message="Duplicated folder name in the same folder.",
code=RetCode.CONFLICT)
if input_file_type == FileType.FOLDER.value:
file_type = FileType.FOLDER.value
else:
file_type = FileType.VIRTUAL.value
file = FileService.insert({
"id": get_uuid(),
"parent_id": pf_id,
"tenant_id": tenant_id,
"created_by": tenant_id,
"name": req["name"],
"location": "",
"size": 0,
"type": file_type
})
return get_json_result(data=file.to_json())
except Exception as e:
return server_error_response(e)
@manager.route('/file/list', methods=['GET']) # noqa: F821
@token_required
async def list_files(tenant_id):
"""
List files under a specific folder.
---
tags:
- File
security:
- ApiKeyAuth: []
parameters:
- in: query
name: parent_id
type: string
description: Folder ID to list files from
- in: query
name: keywords
type: string
description: Search keyword filter
- in: query
name: page
type: integer
default: 1
description: Page number
- in: query
name: page_size
type: integer
default: 15
description: Number of results per page
- in: query
name: orderby
type: string
default: "create_time"
description: Sort by field
- in: query
name: desc
type: boolean
default: true
description: Descending order
responses:
200:
description: Successfully retrieved file list.
schema:
type: object
properties:
total:
type: integer
files:
type: array
items:
type: object
properties:
id:
type: string
name:
type: string
type:
type: string
size:
type: integer
create_time:
type: string
format: date-time
"""
pf_id = request.args.get("parent_id")
keywords = request.args.get("keywords", "")
page_number = int(request.args.get("page", 1))
items_per_page = int(request.args.get("page_size", 15))
orderby = request.args.get("orderby", "create_time")
desc = request.args.get("desc", True)
if not pf_id:
root_folder = FileService.get_root_folder(tenant_id)
pf_id = root_folder["id"]
FileService.init_knowledgebase_docs(pf_id, tenant_id)
try:
e, file = FileService.get_by_id(pf_id)
if not e:
return get_json_result(message="Folder not found!", code=RetCode.NOT_FOUND)
files, total = FileService.get_by_pf_id(tenant_id, pf_id, page_number, items_per_page, orderby, desc, keywords)
parent_folder = FileService.get_parent_folder(pf_id)
if not parent_folder:
return get_json_result(message="File not found!", code=RetCode.NOT_FOUND)
return get_json_result(data={"total": total, "files": files, "parent_folder": parent_folder.to_json()})
except Exception as e:
return server_error_response(e)
@manager.route('/file/root_folder', methods=['GET']) # noqa: F821
@token_required
async def get_root_folder(tenant_id):
"""
Get user's root folder.
---
tags:
- File
security:
- ApiKeyAuth: []
responses:
200:
description: Root folder information
schema:
type: object
properties:
data:
type: object
properties:
root_folder:
type: object
properties:
id:
type: string
name:
type: string
type:
type: string
"""
try:
root_folder = FileService.get_root_folder(tenant_id)
return get_json_result(data={"root_folder": root_folder})
except Exception as e:
return server_error_response(e)
@manager.route('/file/parent_folder', methods=['GET']) # noqa: F821
@token_required
async def get_parent_folder():
"""
Get parent folder info of a file.
---
tags:
- File
security:
- ApiKeyAuth: []
parameters:
- in: query
name: file_id
type: string
required: true
description: Target file ID
responses:
200:
description: Parent folder information
schema:
type: object
properties:
data:
type: object
properties:
parent_folder:
type: object
properties:
id:
type: string
name:
type: string
"""
file_id = request.args.get("file_id")
try:
e, file = FileService.get_by_id(file_id)
if not e:
return get_json_result(message="Folder not found!", code=RetCode.NOT_FOUND)
parent_folder = FileService.get_parent_folder(file_id)
return get_json_result(data={"parent_folder": parent_folder.to_json()})
except Exception as e:
return server_error_response(e)
@manager.route('/file/all_parent_folder', methods=['GET']) # noqa: F821
@token_required
async def get_all_parent_folders(tenant_id):
"""
Get all parent folders of a file.
---
tags:
- File
security:
- ApiKeyAuth: []
parameters:
- in: query
name: file_id
type: string
required: true
description: Target file ID
responses:
200:
description: All parent folders of the file
schema:
type: object
properties:
data:
type: object
properties:
parent_folders:
type: array
items:
type: object
properties:
id:
type: string
name:
type: string
"""
file_id = request.args.get("file_id")
try:
e, file = FileService.get_by_id(file_id)
if not e:
return get_json_result(message="Folder not found!", code=RetCode.NOT_FOUND)
parent_folders = FileService.get_all_parent_folders(file_id)
parent_folders_res = [folder.to_json() for folder in parent_folders]
return get_json_result(data={"parent_folders": parent_folders_res})
except Exception as e:
return server_error_response(e)
@manager.route('/file/rm', methods=['POST']) # noqa: F821
@token_required
async def rm(tenant_id):
"""
Delete one or multiple files/folders.
---
tags:
- File
security:
- ApiKeyAuth: []
parameters:
- in: body
name: body
description: Files to delete
required: true
schema:
type: object
properties:
file_ids:
type: array
items:
type: string
description: List of file IDs to delete
responses:
200:
description: Successfully deleted files
schema:
type: object
properties:
data:
type: boolean
example: true
"""
req = await get_request_json()
file_ids = req["file_ids"]
try:
for file_id in file_ids:
e, file = FileService.get_by_id(file_id)
if not e:
return get_json_result(message="File or Folder not found!", code=RetCode.NOT_FOUND)
if not file.tenant_id:
return get_json_result(message="Tenant not found!", code=RetCode.NOT_FOUND)
if file.type == FileType.FOLDER.value:
file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
for inner_file_id in file_id_list:
e, file = FileService.get_by_id(inner_file_id)
if not e:
return get_json_result(message="File not found!", code=RetCode.NOT_FOUND)
settings.STORAGE_IMPL.rm(file.parent_id, file.location)
FileService.delete_folder_by_pf_id(tenant_id, file_id)
else:
settings.STORAGE_IMPL.rm(file.parent_id, file.location)
if not FileService.delete(file):
return get_json_result(message="Database error (File removal)!", code=RetCode.SERVER_ERROR)
informs = File2DocumentService.get_by_file_id(file_id)
for inform in informs:
doc_id = inform.document_id
e, doc = DocumentService.get_by_id(doc_id)
if not e:
return get_json_result(message="Document not found!", code=RetCode.NOT_FOUND)
tenant_id = DocumentService.get_tenant_id(doc_id)
if not tenant_id:
return get_json_result(message="Tenant not found!", code=RetCode.NOT_FOUND)
if not DocumentService.remove_document(doc, tenant_id):
return get_json_result(message="Database error (Document removal)!", code=RetCode.SERVER_ERROR)
File2DocumentService.delete_by_file_id(file_id)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@manager.route('/file/rename', methods=['POST']) # noqa: F821
@token_required
async def rename(tenant_id):
"""
Rename a file.
---
tags:
- File
security:
- ApiKeyAuth: []
parameters:
- in: body
name: body
description: Rename file
required: true
schema:
type: object
properties:
file_id:
type: string
description: Target file ID
name:
type: string
description: New name for the file
responses:
200:
description: File renamed successfully
schema:
type: object
properties:
data:
type: boolean
example: true
"""
req = await get_request_json()
try:
e, file = FileService.get_by_id(req["file_id"])
if not e:
return get_json_result(message="File not found!", code=RetCode.NOT_FOUND)
if file.type != FileType.FOLDER.value and pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
file.name.lower()).suffix:
return get_json_result(data=False, message="The extension of file can't be changed",
code=RetCode.BAD_REQUEST)
for existing_file in FileService.query(name=req["name"], pf_id=file.parent_id):
if existing_file.name == req["name"]:
return get_json_result(data=False, message="Duplicated file name in the same folder.",
code=RetCode.CONFLICT)
if not FileService.update_by_id(req["file_id"], {"name": req["name"]}):
return get_json_result(message="Database error (File rename)!", code=RetCode.SERVER_ERROR)
informs = File2DocumentService.get_by_file_id(req["file_id"])
if informs:
if not DocumentService.update_by_id(informs[0].document_id, {"name": req["name"]}):
return get_json_result(message="Database error (Document rename)!", code=RetCode.SERVER_ERROR)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@manager.route('/file/get/<file_id>', methods=['GET']) # noqa: F821
@token_required
async def get(tenant_id, file_id):
"""
Download a file.
---
tags:
- File
security:
- ApiKeyAuth: []
produces:
- application/octet-stream
parameters:
- in: path
name: file_id
type: string
required: true
description: File ID to download
responses:
200:
description: File stream
schema:
type: file
RetCode.NOT_FOUND:
description: File not found
"""
try:
e, file = FileService.get_by_id(file_id)
if not e:
return get_json_result(message="Document not found!", code=RetCode.NOT_FOUND)
blob = settings.STORAGE_IMPL.get(file.parent_id, file.location)
if not blob:
b, n = File2DocumentService.get_storage_address(file_id=file_id)
blob = settings.STORAGE_IMPL.get(b, n)
response = await make_response(blob)
ext = re.search(r"\.([^.]+)$", file.name)
extension = ext.group(1).lower() if ext else None
content_type = None
if extension:
fallback_prefix = "image" if file.type == FileType.VISUAL.value else "application"
content_type = CONTENT_TYPE_MAP.get(extension, f"{fallback_prefix}/{extension}")
apply_safe_file_response_headers(response, content_type, extension)
return response
except Exception as e:
return server_error_response(e)
@manager.route("/file/download/<attachment_id>", methods=["GET"]) # noqa: F821
@token_required
async def download_attachment(tenant_id, attachment_id):
try:
ext = request.args.get("ext", "markdown")
data = await thread_pool_exec(settings.STORAGE_IMPL.get, tenant_id, attachment_id)
response = await make_response(data)
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
apply_safe_file_response_headers(response, content_type, ext)
return response
except Exception as e:
return server_error_response(e)
@manager.route('/file/mv', methods=['POST']) # noqa: F821
@token_required
async def move(tenant_id):
"""
Move one or multiple files to another folder.
---
tags:
- File
security:
- ApiKeyAuth: []
parameters:
- in: body
name: body
description: Move operation
required: true
schema:
type: object
properties:
src_file_ids:
type: array
items:
type: string
description: Source file IDs
dest_file_id:
type: string
description: Destination folder ID
responses:
200:
description: Files moved successfully
schema:
type: object
properties:
data:
type: boolean
example: true
"""
req = await get_request_json()
try:
file_ids = req["src_file_ids"]
parent_id = req["dest_file_id"]
files = FileService.get_by_ids(file_ids)
files_dict = {f.id: f for f in files}
for file_id in file_ids:
file = files_dict[file_id]
if not file:
return get_json_result(message="File or Folder not found!", code=RetCode.NOT_FOUND)
if not file.tenant_id:
return get_json_result(message="Tenant not found!", code=RetCode.NOT_FOUND)
fe, _ = FileService.get_by_id(parent_id)
if not fe:
return get_json_result(message="Parent Folder not found!", code=RetCode.NOT_FOUND)
FileService.move_file(file_ids, parent_id)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@manager.route('/file/convert', methods=['POST']) # noqa: F821
@token_required
async def convert(tenant_id):
req = await get_request_json()
kb_ids = req["kb_ids"]
file_ids = req["file_ids"]
file2documents = []
try:
files = FileService.get_by_ids(file_ids)
files_set = dict({file.id: file for file in files})
for file_id in file_ids:
file = files_set[file_id]
if not file:
return get_json_result(message="File not found!", code=RetCode.NOT_FOUND)
file_ids_list = [file_id]
if file.type == FileType.FOLDER.value:
file_ids_list = FileService.get_all_innermost_file_ids(file_id, [])
for id in file_ids_list:
informs = File2DocumentService.get_by_file_id(id)
# delete
for inform in informs:
doc_id = inform.document_id
e, doc = DocumentService.get_by_id(doc_id)
if not e:
return get_json_result(message="Document not found!", code=RetCode.NOT_FOUND)
tenant_id = DocumentService.get_tenant_id(doc_id)
if not tenant_id:
return get_json_result(message="Tenant not found!", code=RetCode.NOT_FOUND)
if not DocumentService.remove_document(doc, tenant_id):
return get_json_result(
message="Database error (Document removal)!", code=RetCode.NOT_FOUND)
File2DocumentService.delete_by_file_id(id)
# insert
for kb_id in kb_ids:
e, kb = KnowledgebaseService.get_by_id(kb_id)
if not e:
return get_json_result(
message="Can't find this dataset!", code=RetCode.NOT_FOUND)
e, file = FileService.get_by_id(id)
if not e:
return get_json_result(
message="Can't find this file!", code=RetCode.NOT_FOUND)
doc = DocumentService.insert({
"id": get_uuid(),
"kb_id": kb.id,
"parser_id": FileService.get_parser(file.type, file.name, kb.parser_id),
"parser_config": kb.parser_config,
"created_by": tenant_id,
"type": file.type,
"name": file.name,
"suffix": Path(file.name).suffix.lstrip("."),
"location": file.location,
"size": file.size
})
file2document = File2DocumentService.insert({
"id": get_uuid(),
"file_id": id,
"document_id": doc.id,
})
file2documents.append(file2document.to_json())
return get_json_result(data=file2documents)
except Exception as e:
return server_error_response(e)

View File

@ -0,0 +1,397 @@
#
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import os
import pathlib
from api.common.check_team_permission import check_file_team_permission
from api.db import FileType
from api.db.services import duplicate_name
from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService
from api.utils.file_utils import filename_type
from common import settings
from common.constants import FileSource
from common.misc_utils import get_uuid, thread_pool_exec
async def upload_file(tenant_id: str, pf_id: str, file_objs: list):
"""
Upload files to a folder.
:param tenant_id: tenant ID
:param pf_id: parent folder ID
:param file_objs: list of file objects from request
:return: (success, result_list) or (success, error_message)
"""
if not pf_id:
root_folder = FileService.get_root_folder(tenant_id)
pf_id = root_folder["id"]
e, pf_folder = FileService.get_by_id(pf_id)
if not e:
return False, "Can't find this folder!"
file_res = []
for file_obj in file_objs:
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
if 0 < MAX_FILE_NUM_PER_USER <= await thread_pool_exec(DocumentService.get_doc_count, tenant_id):
return False, "Exceed the maximum file number of a free user!"
if not file_obj.filename:
file_obj_names = [pf_folder.name, file_obj.filename]
else:
full_path = '/' + file_obj.filename
file_obj_names = full_path.split('/')
file_len = len(file_obj_names)
file_id_list = await thread_pool_exec(FileService.get_id_list_by_id, pf_id, file_obj_names, 1, [pf_id])
len_id_list = len(file_id_list)
if file_len != len_id_list:
e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 1])
if not e:
return False, "Folder not found!"
last_folder = await thread_pool_exec(
FileService.create_folder, file, file_id_list[len_id_list - 1], file_obj_names, len_id_list
)
else:
e, file = await thread_pool_exec(FileService.get_by_id, file_id_list[len_id_list - 2])
if not e:
return False, "Folder not found!"
last_folder = await thread_pool_exec(
FileService.create_folder, file, file_id_list[len_id_list - 2], file_obj_names, len_id_list
)
filetype = filename_type(file_obj_names[file_len - 1])
location = file_obj_names[file_len - 1]
while await thread_pool_exec(settings.STORAGE_IMPL.obj_exist, last_folder.id, location):
location += "_"
blob = await thread_pool_exec(file_obj.read)
filename = await thread_pool_exec(
duplicate_name, FileService.query, name=file_obj_names[file_len - 1], parent_id=last_folder.id
)
await thread_pool_exec(settings.STORAGE_IMPL.put, last_folder.id, location, blob)
file_data = {
"id": get_uuid(),
"parent_id": last_folder.id,
"tenant_id": tenant_id,
"created_by": tenant_id,
"type": filetype,
"name": filename,
"location": location,
"size": len(blob),
}
inserted = await thread_pool_exec(FileService.insert, file_data)
file_res.append(inserted.to_json())
return True, file_res
async def create_folder(tenant_id: str, name: str, pf_id: str = None, file_type: str = None):
"""
Create a new folder or virtual file.
:param tenant_id: tenant ID
:param name: folder name
:param pf_id: parent folder ID
:param file_type: file type (folder or virtual)
:return: (success, result) or (success, error_message)
"""
if not pf_id:
root_folder = FileService.get_root_folder(tenant_id)
pf_id = root_folder["id"]
if not FileService.is_parent_folder_exist(pf_id):
return False, "Parent Folder Doesn't Exist!"
if FileService.query(name=name, parent_id=pf_id):
return False, "Duplicated folder name in the same folder."
if file_type == FileType.FOLDER.value:
ft = FileType.FOLDER.value
else:
ft = FileType.VIRTUAL.value
file = FileService.insert({
"id": get_uuid(),
"parent_id": pf_id,
"tenant_id": tenant_id,
"created_by": tenant_id,
"name": name,
"location": "",
"size": 0,
"type": ft,
})
return True, file.to_json()
def list_files(tenant_id: str, args: dict):
"""
List files under a folder.
:param tenant_id: tenant ID
:param args: query arguments (parent_id, keywords, page, page_size, orderby, desc)
:return: (success, result) or (success, error_message)
"""
pf_id = args.get("parent_id")
keywords = args.get("keywords", "")
page_number = int(args.get("page", 1))
items_per_page = int(args.get("page_size", 15))
orderby = args.get("orderby", "create_time")
desc = args.get("desc", True)
if not pf_id:
root_folder = FileService.get_root_folder(tenant_id)
pf_id = root_folder["id"]
FileService.init_knowledgebase_docs(pf_id, tenant_id)
e, file = FileService.get_by_id(pf_id)
if not e:
return False, "Folder not found!"
files, total = FileService.get_by_pf_id(tenant_id, pf_id, page_number, items_per_page, orderby, desc, keywords)
parent_folder = FileService.get_parent_folder(pf_id)
if not parent_folder:
return False, "File not found!"
return True, {"total": total, "files": files, "parent_folder": parent_folder.to_json()}
def get_parent_folder(file_id: str):
"""
Get parent folder of a file.
:param file_id: file ID
:return: (success, result) or (success, error_message)
"""
e, file = FileService.get_by_id(file_id)
if not e:
return False, "Folder not found!"
parent_folder = FileService.get_parent_folder(file_id)
return True, {"parent_folder": parent_folder.to_json()}
def get_all_parent_folders(file_id: str):
"""
Get all ancestor folders of a file.
:param file_id: file ID
:return: (success, result) or (success, error_message)
"""
e, file = FileService.get_by_id(file_id)
if not e:
return False, "Folder not found!"
parent_folders = FileService.get_all_parent_folders(file_id)
return True, {"parent_folders": [pf.to_json() for pf in parent_folders]}
async def delete_files(uid: str, file_ids: list):
"""
Delete files/folders with team permission check and recursive deletion.
:param uid: user ID
:param file_ids: list of file IDs to delete
:return: (success, result) or (success, error_message)
"""
def _delete_single_file(file):
try:
if file.location:
settings.STORAGE_IMPL.rm(file.parent_id, file.location)
except Exception as e:
logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}, error: {e}")
informs = File2DocumentService.get_by_file_id(file.id)
for inform in informs:
doc_id = inform.document_id
e, doc = DocumentService.get_by_id(doc_id)
if e and doc:
tenant_id = DocumentService.get_tenant_id(doc_id)
if tenant_id:
DocumentService.remove_document(doc, tenant_id)
File2DocumentService.delete_by_file_id(file.id)
FileService.delete(file)
def _delete_folder_recursive(folder, tenant_id):
sub_files = FileService.list_all_files_by_parent_id(folder.id)
for sub_file in sub_files:
if sub_file.type == FileType.FOLDER.value:
_delete_folder_recursive(sub_file, tenant_id)
else:
_delete_single_file(sub_file)
FileService.delete(folder)
def _rm_sync():
for file_id in file_ids:
e, file = FileService.get_by_id(file_id)
if not e or not file:
return False, "File or Folder not found!"
if not file.tenant_id:
return False, "Tenant not found!"
if not check_file_team_permission(file, uid):
return False, "No authorization."
if file.source_type == FileSource.KNOWLEDGEBASE:
continue
if file.type == FileType.FOLDER.value:
_delete_folder_recursive(file, uid)
continue
_delete_single_file(file)
return True, True
return await thread_pool_exec(_rm_sync)
async def move_files(uid: str, src_file_ids: list, dest_file_id: str = None, new_name: str = None):
"""
Move and/or rename files. Follows Linux mv semantics:
- new_name only: rename in place (no storage operation)
- dest_file_id only: move to new folder (keep names)
- both: move and rename simultaneously
:param uid: user ID
:param src_file_ids: list of source file IDs
:param dest_file_id: destination folder ID (optional)
:param new_name: new name for the file (optional, single file only)
:return: (success, result) or (success, error_message)
"""
files = FileService.get_by_ids(src_file_ids)
if not files:
return False, "Source files not found!"
files_dict = {f.id: f for f in files}
for file_id in src_file_ids:
file = files_dict.get(file_id)
if not file:
return False, "File or folder not found!"
if not file.tenant_id:
return False, "Tenant not found!"
if not check_file_team_permission(file, uid):
return False, "No authorization."
dest_folder = None
if dest_file_id:
ok, dest_folder = FileService.get_by_id(dest_file_id)
if not ok or not dest_folder:
return False, "Parent folder not found!"
if new_name:
file = files_dict[src_file_ids[0]]
if file.type != FileType.FOLDER.value and \
pathlib.Path(new_name.lower()).suffix != pathlib.Path(file.name.lower()).suffix:
return False, "The extension of file can't be changed"
target_parent_id = dest_folder.id if dest_folder else file.parent_id
for f in FileService.query(name=new_name, parent_id=target_parent_id):
if f.name == new_name:
return False, "Duplicated file name in the same folder."
def _move_entry_recursive(source_file_entry, dest_folder_entry, override_name=None):
effective_name = override_name or source_file_entry.name
if source_file_entry.type == FileType.FOLDER.value:
existing_folder = FileService.query(name=effective_name, parent_id=dest_folder_entry.id)
if existing_folder:
new_folder = existing_folder[0]
else:
new_folder = FileService.insert({
"id": get_uuid(),
"parent_id": dest_folder_entry.id,
"tenant_id": source_file_entry.tenant_id,
"created_by": source_file_entry.tenant_id,
"name": effective_name,
"location": "",
"size": 0,
"type": FileType.FOLDER.value,
})
sub_files = FileService.list_all_files_by_parent_id(source_file_entry.id)
for sub_file in sub_files:
_move_entry_recursive(sub_file, new_folder)
FileService.delete_by_id(source_file_entry.id)
return
# Non-folder file
need_storage_move = dest_folder_entry.id != source_file_entry.parent_id
updates = {}
if need_storage_move:
new_location = effective_name
while settings.STORAGE_IMPL.obj_exist(dest_folder_entry.id, new_location):
new_location += "_"
try:
settings.STORAGE_IMPL.move(
source_file_entry.parent_id, source_file_entry.location,
dest_folder_entry.id, new_location,
)
except Exception as storage_err:
raise RuntimeError(f"Move file failed at storage layer: {str(storage_err)}")
updates["parent_id"] = dest_folder_entry.id
updates["location"] = new_location
if override_name:
updates["name"] = override_name
if updates:
FileService.update_by_id(source_file_entry.id, updates)
if override_name:
informs = File2DocumentService.get_by_file_id(source_file_entry.id)
if informs:
if not DocumentService.update_by_id(informs[0].document_id, {"name": override_name}):
raise RuntimeError("Database error (Document rename)!")
def _move_or_rename_sync():
if dest_folder:
for file in files:
_move_entry_recursive(file, dest_folder, override_name=new_name)
else:
# Pure rename: no storage operation needed
file = files[0]
if not FileService.update_by_id(file.id, {"name": new_name}):
return False, "Database error (File rename)!"
informs = File2DocumentService.get_by_file_id(file.id)
if informs:
if not DocumentService.update_by_id(informs[0].document_id, {"name": new_name}):
return False, "Database error (Document rename)!"
return True, True
return await thread_pool_exec(_move_or_rename_sync)
def get_file_content(uid: str, file_id: str):
"""
Get file content and metadata for download.
:param uid: user ID
:param file_id: file ID
:return: (success, (blob, file_obj)) or (success, error_message)
"""
e, file = FileService.get_by_id(file_id)
if not e:
return False, "Document not found!"
if not check_file_team_permission(file, uid):
return False, "No authorization."
return True, file

View File

@ -1,5 +1,5 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -781,3 +781,40 @@ class BaseListReq(BaseModel):
class ListDatasetReq(BaseListReq):
include_parsing_status: Annotated[bool, Field(default=False)]
ext: Annotated[dict, Field(default={})]
# ---- File Management Request Models ----
class CreateFolderReq(Base):
name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=255), Field(...)]
parent_id: Annotated[str | None, Field(default=None)]
type: Annotated[str | None, Field(default=None)]
class DeleteFileReq(Base):
ids: Annotated[list[str], Field(min_length=1)]
class MoveFileReq(Base):
src_file_ids: Annotated[list[str], Field(min_length=1)]
dest_file_id: Annotated[str | None, Field(default=None)]
new_name: Annotated[str | None, StringConstraints(strip_whitespace=True, min_length=1, max_length=255), Field(default=None)]
@model_validator(mode='after')
def check_operation(self):
if not self.dest_file_id and not self.new_name:
raise ValueError("At least one of dest_file_id or new_name must be provided")
if self.new_name and len(self.src_file_ids) > 1:
raise ValueError("new_name can only be used with a single file")
return self
class ListFileReq(BaseModel):
model_config = ConfigDict(extra="forbid")
parent_id: Annotated[str | None, Field(default=None)]
keywords: Annotated[str, Field(default="")]
page: Annotated[int, Field(default=1, ge=1)]
page_size: Annotated[int, Field(default=15, ge=1, le=100)]
orderby: Annotated[str, Field(default="create_time")]
desc: Annotated[bool, Field(default=True)]

View File

@ -6309,14 +6309,14 @@ Explanation:
### Upload file
**POST** `/api/v1/file/upload`
**POST** `/api/v1/files`
Uploads one or multiple files to the system.
#### Request
- Method: POST
- URL: `/api/v1/file/upload`
- URL: `/api/v1/files`
- Headers:
- `'Content-Type: multipart/form-data'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
@ -6328,7 +6328,7 @@ Uploads one or multiple files to the system.
```bash
curl --request POST \
--url http://{address}/api/v1/file/upload \
--url http://{address}/api/v1/files \
--header 'Content-Type: multipart/form-data' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--form 'file=@./test1.txt' \
@ -6377,34 +6377,48 @@ Failure:
### Upload document
**POST** `/api/v1/file/upload_info`
**POST** `/v1/document/upload_info`
Uploads a file and creates the respective document
Uploads a file and creates the respective document.
#### Request
- Method: POST
- URL: `/api/v1/file/upload_info`
- URL: `/v1/document/upload_info`
- Headers:
- `'Content-Type: multipart/form-data`
- `'Content-Type: multipart/form-data'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Form:
- `'file=@{FILE_PATH}'`
- `'file=@{FILE_PATH}'` (mutually exclusive with `url`)
- Query:
- `url`: URL to crawl and convert to a runtime attachment (mutually exclusive with `file`).
##### Request example
Upload a local file:
```bash
curl --request POST \
--url http://{address}/api/v1/file/upload_info \
--url http://{address}/v1/document/upload_info \
--header 'Content-Type: multipart/form-data' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--form 'file=@./test1.pdf'
```
Crawl a URL:
```bash
curl --request POST \
--url 'http://{address}/v1/document/upload_info?url=https://example.com/page' \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
##### Request parameters
- `'file'`: (*Form parameter*), `file`, *Required*
The file to upload.
- `'file'`: (*Form parameter*), `file`, *Optional*
The file to upload. Mutually exclusive with `url`; either `file` or `url` must be provided.
- `url`: (*Query parameter*), `string`, *Optional*
A URL to crawl and store as an attachment. Mutually exclusive with `file`; either `url` or `file` must be provided.
#### Response
@ -6415,7 +6429,7 @@ Success:
"code": 0,
"data": {
"created_at": 1772451421.7924063,
"created by": "be951084066611f18f5f00155d2f98f4",
"created_by": "be951084066611f18f5f00155d2f98f4",
"extension": "pdf",
"id": "2143a03d162c11f1b80f00155d334d02",
"mime_type": "application/pdf",
@ -6438,16 +6452,70 @@ Failure:
---
### Download attachment
**GET** `/v1/document/download/{attachment_id}`
Downloads a runtime attachment previously uploaded via the [Upload document](#upload-document) method.
#### Request
- Method: GET
- URL: `/v1/document/download/{attachment_id}`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Query parameter:
- `ext`: `string` (Optional)
##### Request example
```bash
curl --request GET \
--url 'http://{address}/v1/document/download/{attachment_id}?ext=pdf' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--output ./downloaded_attachment.pdf
```
##### Request parameters
- `attachment_id`: (*Path parameter*), `string`, *Required*
The `id` value returned by the [Upload document](#upload-document) method.
- `ext`: (*Query parameter*), `string`, *Optional*
A file extension hint specifying the response's Content-Type. Defaults to `"markdown"`. Available values:
- `"markdown"`
- `"html"`
- `"pdf"`
- `"docx"`
- `"xlsx"`
- `"csv"`
#### Response
Success:
Returns the file content as a binary stream with the relevant Content-Type header.
Failure:
```json
{
"code": 500,
"message": "Internal server error"
}
```
---
### Create file or folder
**POST** `/api/v1/file/create`
**POST** `/api/v1/files`
Creates a new file or folder in the system.
#### Request
- Method: POST
- URL: `/api/v1/file/create`
- URL: `/api/v1/files`
- Headers:
- `'Content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
@ -6460,12 +6528,12 @@ Creates a new file or folder in the system.
```bash
curl --request POST \
--url http://{address}/api/v1/file/create \
--url http://{address}/api/v1/files \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"name": "New Folder",
"type": "FOLDER",
"type": "folder",
"parent_id": "{folder_id}"
}'
```
@ -6478,8 +6546,8 @@ curl --request POST \
The parent folder ID. If not specified, the file/folder will be created in the root folder.
- `"type"`: (*Body parameter*), `string`
The type of the file to create. Available options:
- `"FOLDER"`: Create a folder
- `"VIRTUAL"`: Create a virtual file
- `"folder"`: Create a folder
- `"virtual"`: Create a virtual file
#### Response
@ -6491,7 +6559,7 @@ Success:
"data": {
"id": "b330ec2e91ec11efbc510242ac120004",
"name": "New Folder",
"type": "FOLDER",
"type": "folder",
"parent_id": "527fa74891e811ef9c650242ac120006",
"size": 0,
"create_time": 1729763127646
@ -6512,14 +6580,14 @@ Failure:
### List files
**GET** `/api/v1/file/list?parent_id={parent_id}&keywords={keywords}&page={page}&page_size={page_size}&orderby={orderby}&desc={desc}`
**GET** `/api/v1/files?parent_id={parent_id}&keywords={keywords}&page={page}&page_size={page_size}&orderby={orderby}&desc={desc}`
Lists files and folders under a specific folder.
#### Request
- Method: GET
- URL: `/api/v1/file/list?parent_id={parent_id}&keywords={keywords}&page={page}&page_size={page_size}&orderby={orderby}&desc={desc}`
- URL: `/api/v1/files?parent_id={parent_id}&keywords={keywords}&page={page}&page_size={page_size}&orderby={orderby}&desc={desc}`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
@ -6527,7 +6595,7 @@ Lists files and folders under a specific folder.
```bash
curl --request GET \
--url 'http://{address}/api/v1/file/list?parent_id={folder_id}&page=1&page_size=15' \
--url 'http://{address}/api/v1/files?parent_id={folder_id}&page=1&page_size=15' \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
@ -6585,60 +6653,16 @@ Failure:
---
### Get root folder
**GET** `/api/v1/file/root_folder`
Retrieves the user's root folder information.
#### Request
- Method: GET
- URL: `/api/v1/file/root_folder`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Request example
```bash
curl --request GET \
--url http://{address}/api/v1/file/root_folder \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
##### Request parameters
No parameters required.
#### Response
Success:
```json
{
"code": 0,
"data": {
"root_folder": {
"id": "527fa74891e811ef9c650242ac120006",
"name": "root",
"type": "FOLDER"
}
}
}
```
---
### Get parent folder
**GET** `/api/v1/file/parent_folder?file_id={file_id}`
**GET** `/api/v1/files/{file_id}/parent`
Retrieves the immediate parent folder information of a specified file.
#### Request
- Method: GET
- URL: `/api/v1/file/parent_folder?file_id={file_id}`
- URL: `/api/v1/files/{file_id}/parent`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
@ -6646,13 +6670,13 @@ Retrieves the immediate parent folder information of a specified file.
```bash
curl --request GET \
--url 'http://{address}/api/v1/file/parent_folder?file_id={file_id}' \
--url 'http://{address}/api/v1/files/{file_id}/parent' \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
##### Request parameters
- `file_id`: (*Filter parameter*), `string`, *Required*
- `file_id`: (*Path parameter*), `string`, *Required*
The ID of the file whose immediate parent folder to retrieve.
#### Response
@ -6684,14 +6708,14 @@ Failure:
### Get all parent folders
**GET** `/api/v1/file/all_parent_folder?file_id={file_id}`
**GET** `/api/v1/files/{file_id}/ancestors`
Retrieves all parent folders of a specified file in the folder hierarchy.
#### Request
- Method: GET
- URL: `/api/v1/file/all_parent_folder?file_id={file_id}`
- URL: `/api/v1/files/{file_id}/ancestors`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
@ -6699,13 +6723,13 @@ Retrieves all parent folders of a specified file in the folder hierarchy.
```bash
curl --request GET \
--url 'http://{address}/api/v1/file/all_parent_folder?file_id={file_id}' \
--url 'http://{address}/api/v1/files/{file_id}/ancestors' \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
##### Request parameters
- `file_id`: (*Filter parameter*), `string`, *Required*
- `file_id`: (*Path parameter*), `string`, *Required*
The ID of the file whose parent folders to retrieve.
#### Response
@ -6743,35 +6767,35 @@ Failure:
### Delete files
**POST** `/api/v1/file/rm`
**DELETE** `/api/v1/files`
Deletes one or multiple files or folders.
#### Request
- Method: POST
- URL: `/api/v1/file/rm`
- Method: DELETE
- URL: `/api/v1/files`
- Headers:
- `'Content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `"file_ids"`: `list[string]`
- `"ids"`: `list[string]`
##### Request example
```bash
curl --request POST \
--url http://{address}/api/v1/file/rm \
curl --request DELETE \
--url http://{address}/api/v1/files \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"file_ids": ["file_id_1", "file_id_2"]
"ids": ["file_id_1", "file_id_2"]
}'
```
##### Request parameters
- `"file_ids"`: (*Body parameter*), `list[string]`, *Required*
- `"ids"`: (*Body parameter*), `list[string]`, *Required*
The IDs of the files or folders to delete.
#### Response
@ -6796,84 +6820,16 @@ Failure:
---
### Rename file
**POST** `/api/v1/file/rename`
Renames a file or folder.
#### Request
- Method: POST
- URL: `/api/v1/file/rename`
- Headers:
- `'Content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `"file_id"`: `string`
- `"name"`: `string`
##### Request example
```bash
curl --request POST \
--url http://{address}/api/v1/file/rename \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"file_id": "{file_id}",
"name": "new_name.txt"
}'
```
##### Request parameters
- `"file_id"`: (*Body parameter*), `string`, *Required*
The ID of the file or folder to rename.
- `"name"`: (*Body parameter*), `string`, *Required*
The new name for the file or folder. Note: Changing file extensions is *not* supported.
#### Response
Success:
```json
{
"code": 0,
"data": true
}
```
Failure:
```json
{
"code": 400,
"message": "The extension of file can't be changed"
}
```
or
```json
{
"code": 409,
"message": "Duplicated file name in the same folder."
}
```
---
### Download file
**GET** `/api/v1/file/get/{file_id}`
**GET** `/api/v1/files/{file_id}`
Downloads a file from the system.
#### Request
- Method: GET
- URL: `/api/v1/file/get/{file_id}`
- URL: `/api/v1/files/{file_id}`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
@ -6881,7 +6837,7 @@ Downloads a file from the system.
```bash
curl --request GET \
--url http://{address}/api/v1/file/get/{file_id} \
--url http://{address}/api/v1/files/{file_id} \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--output ./downloaded_file.txt
```
@ -6908,28 +6864,35 @@ Failure:
---
### Move files
### Move or rename files
**POST** `/api/v1/file/mv`
**POST** `/api/v1/files/move`
Moves one or multiple files or folders to a specified folder.
Moves and/or renames files or folders. Follows Linux `mv` semantics: at least one of `dest_file_id` or `new_name` must be provided.
- `dest_file_id` only: move files to a new folder, names unchanged.
- `new_name` only: rename a single file or folder in place, no storage operation.
- Both: move and rename simultaneously.
#### Request
- Method: POST
- URL: `/api/v1/file/mv`
- URL: `/api/v1/files/move`
- Headers:
- `'Content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `"src_file_ids"`: `list[string]`
- `"dest_file_id"`: `string`
- `"src_file_ids"`: `list[string]`, *Required*
- `"dest_file_id"`: `string`, *Optional*
- `"new_name"`: `string`, *Optional*
##### Request example
##### Request examples
Move files to a folder:
```bash
curl --request POST \
--url http://{address}/api/v1/file/mv \
--url http://{address}/api/v1/files/move \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
@ -6938,12 +6901,27 @@ curl --request POST \
}'
```
Rename a file in place:
```bash
curl --request POST \
--url http://{address}/api/v1/files/move \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"src_file_ids": ["{file_id}"],
"new_name": "new_name.txt"
}'
```
##### Request parameters
- `"src_file_ids"`: (*Body parameter*), `list[string]`, *Required*
The IDs of the files or folders to move.
- `"dest_file_id"`: (*Body parameter*), `string`, *Required*
The ID of the destination folder.
- `"src_file_ids"`: (*Body parameter*), `list[string]`, *Required*
The IDs of the files or folders to move or rename.
- `"dest_file_id"`: (*Body parameter*), `string`, *Optional*
The ID of the destination folder. Omit to rename in place.
- `"new_name"`: (*Body parameter*), `string`, *Optional*
New name for the file or folder. Only valid when `src_file_ids` contains a single entry. Note: Changing file extensions is *not* supported.
#### Response
@ -6970,7 +6948,16 @@ or
```json
{
"code": 404,
"message": "Parent Folder not found!"
"message": "Parent folder not found!"
}
```
or
```json
{
"code": 400,
"message": "The extension of file can't be changed"
}
```
@ -6978,14 +6965,14 @@ or
### Convert files to documents and link them to datasets
**POST** `/api/v1/file/convert`
**POST** `/v1/file2document/convert`
Converts files to documents and links them to specified datasets.
#### Request
- Method: POST
- URL: `/api/v1/file/convert`
- URL: `/v1/file2document/convert`
- Headers:
- `'Content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
@ -6997,7 +6984,7 @@ Converts files to documents and links them to specified datasets.
```bash
curl --request POST \
--url http://{address}/api/v1/file/convert \
--url http://{address}/v1/file2document/convert \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{

File diff suppressed because it is too large Load Diff

View File

@ -101,6 +101,20 @@ def document_app_module(monkeypatch):
deepdoc_html_module.RAGFlowHtmlParser = _StubHtmlParser
monkeypatch.setitem(sys.modules, "deepdoc.parser.html_parser", deepdoc_html_module)
deepdoc_mineru_module = ModuleType("deepdoc.parser.mineru_parser")
class _StubMinerUParser:
pass
deepdoc_mineru_module.MinerUParser = _StubMinerUParser
monkeypatch.setitem(sys.modules, "deepdoc.parser.mineru_parser", deepdoc_mineru_module)
deepdoc_paddleocr_module = ModuleType("deepdoc.parser.paddleocr_parser")
class _StubPaddleOCRParser:
pass
deepdoc_paddleocr_module.PaddleOCRParser = _StubPaddleOCRParser
monkeypatch.setitem(sys.modules, "deepdoc.parser.paddleocr_parser", deepdoc_paddleocr_module)
monkeypatch.setitem(sys.modules, "xgboost", ModuleType("xgboost"))
stub_apps = ModuleType("api.apps")

View File

@ -1,5 +1,5 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -225,9 +225,10 @@ class _DummyFile:
class _DummyRequest:
def __init__(self, form=None, files=None):
def __init__(self, form=None, files=None, args=None):
self._form = form or {}
self._files = files or _DummyFiles()
self.args = args or {}
@property
def form(self):

View File

@ -0,0 +1,139 @@
#
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import asyncio
from pathlib import Path
import importlib.util
import sys
from types import ModuleType
import pytest
class _AwaitableValue:
def __init__(self, value):
self._value = value
def __await__(self):
async def _co():
return self._value
return _co().__await__()
class _DummyFiles(dict):
def getlist(self, key):
value = self.get(key, [])
if isinstance(value, list):
return value
return [value]
class _DummyFile:
def __init__(self, filename):
self.filename = filename
class _DummyRequest:
def __init__(self, *, files=None, args=None):
self._files = files or _DummyFiles()
self.args = args or {}
@property
def files(self):
return _AwaitableValue(self._files)
def _run(coro):
return asyncio.run(coro)
def _load_document_app_module(monkeypatch):
repo_root = Path(__file__).resolve().parents[4]
common_mod = ModuleType("common")
common_mod.bulk_upload_documents = lambda *_args, **_kwargs: []
common_mod.delete_document = lambda *_args, **_kwargs: None
common_mod.list_documents = lambda *_args, **_kwargs: {"data": {"docs": []}}
monkeypatch.setitem(sys.modules, "common", common_mod)
module_path = repo_root / "test" / "testcases" / "test_web_api" / "test_document_app" / "conftest.py"
spec = importlib.util.spec_from_file_location("test_document_app_unit_conftest", module_path)
module = importlib.util.module_from_spec(spec)
sys.modules["test_document_app_unit_conftest"] = module
spec.loader.exec_module(module)
return module.document_app_module.__wrapped__(monkeypatch)
@pytest.mark.p2
def test_upload_info_rejects_mixed_inputs(monkeypatch):
module = _load_document_app_module(monkeypatch)
files = _DummyFiles({"file": [_DummyFile("a.txt")]})
monkeypatch.setattr(module, "request", _DummyRequest(files=files, args={"url": "https://example.com/a.txt"}))
res = _run(module.upload_info())
assert res["code"] == module.RetCode.BAD_REQUEST
assert "not both" in res["message"]
@pytest.mark.p2
def test_upload_info_requires_file_or_url(monkeypatch):
module = _load_document_app_module(monkeypatch)
monkeypatch.setattr(module, "request", _DummyRequest(files=_DummyFiles()))
res = _run(module.upload_info())
assert res["code"] == module.RetCode.BAD_REQUEST
assert "Missing input" in res["message"]
@pytest.mark.p2
def test_upload_info_supports_url_single_and_multiple_files(monkeypatch):
module = _load_document_app_module(monkeypatch)
captured = []
def fake_upload_info(user_id, file_obj, url=None):
captured.append((user_id, getattr(file_obj, "filename", None), url))
if url is not None:
return {"kind": "url", "value": url}
return {"kind": "file", "value": file_obj.filename}
monkeypatch.setattr(module.FileService, "upload_info", fake_upload_info)
monkeypatch.setattr(module, "request", _DummyRequest(files=_DummyFiles(), args={"url": "https://example.com/a.txt"}))
res = _run(module.upload_info())
assert res["code"] == 0
assert res["data"] == {"kind": "url", "value": "https://example.com/a.txt"}
monkeypatch.setattr(module, "request", _DummyRequest(files=_DummyFiles({"file": _DummyFile("single.txt")})))
res = _run(module.upload_info())
assert res["code"] == 0
assert res["data"] == {"kind": "file", "value": "single.txt"}
monkeypatch.setattr(
module,
"request",
_DummyRequest(files=_DummyFiles({"file": [_DummyFile("a.txt"), _DummyFile("b.txt")]})),
)
res = _run(module.upload_info())
assert res["code"] == 0
assert res["data"] == [
{"kind": "file", "value": "a.txt"},
{"kind": "file", "value": "b.txt"},
]
assert captured == [
("user-1", None, "https://example.com/a.txt"),
("user-1", "single.txt", None),
("user-1", "a.txt", None),
("user-1", "b.txt", None),
]

View File

@ -143,6 +143,10 @@ def _load_file2document_module(monkeypatch):
def get_by_id(_file_id):
return True, _DummyFile(_file_id, _FileType.DOC.value)
@staticmethod
def get_parser(_file_type, _file_name, parser_id):
return parser_id
file_service_mod.FileService = _StubFileService
monkeypatch.setitem(sys.modules, "api.db.services.file_service", file_service_mod)
services_pkg.file_service = file_service_mod
@ -284,7 +288,14 @@ def test_convert_branch_matrix_unit(monkeypatch):
"get_by_id",
lambda _file_id: (True, _DummyFile("inner-1", module.FileType.DOC.value, name="inner.txt", location="inner.loc", size=2)),
)
monkeypatch.setattr(module.DocumentService, "insert", lambda _payload: SimpleNamespace(id="doc-new"))
inserted = {}
def _insert(payload):
inserted.update(payload)
return SimpleNamespace(id="doc-new")
monkeypatch.setattr(module.DocumentService, "insert", _insert)
monkeypatch.setattr(module.FileService, "get_parser", lambda _ft, _name, _parser_id: "picked-parser")
monkeypatch.setattr(
module.File2DocumentService,
"insert",
@ -293,6 +304,8 @@ def test_convert_branch_matrix_unit(monkeypatch):
res = _run(module.convert())
assert res["code"] == 0
assert res["data"] == [{"file_id": "inner-1", "document_id": "doc-new"}]
assert inserted["parser_id"] == "picked-parser"
assert inserted["pipeline_id"] == "p1"
req_state["file_ids"] = ["f1"]
monkeypatch.setattr(

View File

@ -10,7 +10,7 @@ interface IProps extends React.PropsWithChildren {
color?: string;
documentName: string;
documentId?: string;
prefix?: string;
resource?: 'document' | 'files';
className?: string;
}
@ -21,13 +21,13 @@ const NewDocumentLink = ({
color = 'rgb(15, 79, 170)',
documentId,
documentName,
prefix = 'file',
resource = 'document',
className,
}: IProps) => {
let nextLink = link;
const extension = getExtension(documentName);
if (!link) {
nextLink = `/document/${documentId}?ext=${extension}&prefix=${prefix}`;
nextLink = `/document/${documentId}?ext=${extension}&resource=${resource}`;
}
return (

View File

@ -19,7 +19,7 @@ function NameWidget({ name, size }: NameWidgetType) {
return (
<div className="text-xs max-w-20">
{/* {id ? (
<NewDocumentLink documentId={id} documentName={name} prefix="document">
<NewDocumentLink documentId={id} documentName={name} resource="document">
{name}
</NewDocumentLink>
) : (

View File

@ -78,7 +78,8 @@ export const useUploadFile = () => {
export interface IMoveFileBody {
src_file_ids: string[];
dest_file_id: string; // target folder id
dest_file_id?: string;
new_name?: string;
}
export const useMoveFile = () => {
@ -119,7 +120,8 @@ export const useCreateFolder = () => {
mutationKey: [FileApiAction.CreateFolder],
mutationFn: async (params: { parentId: string; name: string }) => {
const { data } = await fileManagerService.createFolder({
...params,
name: params.name,
parent_id: params.parentId,
type: 'folder',
});
if (data.code === 0) {
@ -143,9 +145,10 @@ export const useFetchParentFolderList = () => {
initialData: [],
enabled: !!id,
queryFn: async () => {
const { data } = await fileManagerService.getAllParentFolder({
fileId: id,
});
const { data } = await fileManagerService.getAllParentFolder(
{},
`${id}/ancestors`,
);
return data?.data?.parent_folders?.toReversed() ?? [];
},
@ -221,7 +224,9 @@ export const useDeleteFile = () => {
} = useMutation({
mutationKey: [FileApiAction.DeleteFile],
mutationFn: async (params: { fileIds: string[]; parentId: string }) => {
const { data } = await fileManagerService.removeFile(params);
const { data } = await fileManagerService.removeFile({
ids: params.fileIds,
});
if (data.code === 0) {
message.success(t('message.deleted'));
setPaginationParams(1); // TODO: There should be a better way to paginate the request list
@ -262,7 +267,10 @@ export const useRenameFile = () => {
} = useMutation({
mutationKey: [FileApiAction.RenameFile],
mutationFn: async (params: { fileId: string; name: string }) => {
const { data } = await fileManagerService.renameFile(params);
const { data } = await fileManagerService.moveFile({
src_file_ids: [params.fileId],
new_name: params.name,
});
if (data.code === 0) {
message.success(t('message.renamed'));
queryClient.invalidateQueries({

View File

@ -1,5 +1,5 @@
import { Images } from '@/constants/common';
import { api_host } from '@/utils/api';
import { ExternalApi, api_host } from '@/utils/api';
import { useParams, useSearchParams } from 'react-router';
// import Docx from './docx';
// import Excel from './excel';
@ -24,12 +24,16 @@ const DocumentViewer = () => {
const { id: documentId } = useParams();
const [currentQueryParameters] = useSearchParams();
const ext = currentQueryParameters.get('ext');
const prefix = currentQueryParameters.get('prefix');
const api = `${api_host}/${prefix || 'file'}/get/${documentId}`;
const resource =
currentQueryParameters.get('resource') === 'files' ? 'files' : 'document';
const api =
resource === 'files'
? `${ExternalApi}${api_host}/files/${documentId}`
: `${api_host}/document/get/${documentId}`;
// request.head
if (ext === 'html' && documentId) {
previewHtmlFile(documentId);
previewHtmlFile(documentId, resource);
return;
}

View File

@ -124,6 +124,7 @@ export function ActionCell({
<NewDocumentLink
documentId={documentId}
documentName={record.name}
resource="files"
className="text-text-sub-title-invert"
>
<Button

View File

@ -6,7 +6,6 @@ const {
listFile,
removeFile,
uploadFile,
renameFile,
getAllParentFolder,
createFolder,
connectFileToKnowledge,
@ -23,16 +22,12 @@ const methods = {
},
removeFile: {
url: removeFile,
method: 'post',
method: 'delete',
},
uploadFile: {
url: uploadFile,
method: 'post',
},
renameFile: {
url: renameFile,
method: 'post',
},
getAllParentFolder: {
url: getAllParentFolder,
method: 'get',

View File

@ -161,15 +161,14 @@ export default {
`${ExternalApi}${api_host}/chatbots/${id}/info`,
// file manager
listFile: `${api_host}/file/list`,
uploadFile: `${api_host}/file/upload`,
removeFile: `${api_host}/file/rm`,
renameFile: `${api_host}/file/rename`,
getAllParentFolder: `${api_host}/file/all_parent_folder`,
createFolder: `${api_host}/file/create`,
listFile: `${ExternalApi}${api_host}/files`,
uploadFile: `${ExternalApi}${api_host}/files`,
removeFile: `${ExternalApi}${api_host}/files`,
getAllParentFolder: `${ExternalApi}${api_host}/files`,
createFolder: `${ExternalApi}${api_host}/files`,
connectFileToKnowledge: `${api_host}/file2document/convert`,
getFile: `${api_host}/file/get`,
moveFile: `${api_host}/file/mv`,
getFile: `${ExternalApi}${api_host}/files`,
moveFile: `${ExternalApi}${api_host}/files/move`,
// system
getSystemVersion: `${api_host}/system/version`,

View File

@ -101,8 +101,15 @@ export const getBase64FromUploadFileList = async (fileList?: UploadFile[]) => {
return '';
};
async function fetchDocumentBlob(id: string, mimeType?: FileMimeType) {
const response = await fileManagerService.getDocumentFile({}, id);
async function fetchPreviewBlob(
id: string,
resource: 'document' | 'files',
mimeType?: FileMimeType,
) {
const response =
resource === 'files'
? await fileManagerService.getFile({}, id)
: await fileManagerService.getDocumentFile({}, id);
const blob = new Blob([response.data], {
type: mimeType || response.data.type,
});
@ -110,8 +117,11 @@ async function fetchDocumentBlob(id: string, mimeType?: FileMimeType) {
return blob;
}
export async function previewHtmlFile(id: string) {
const blob = await fetchDocumentBlob(id, FileMimeType.Html);
export async function previewHtmlFile(
id: string,
resource: 'document' | 'files' = 'document',
) {
const blob = await fetchPreviewBlob(id, resource, FileMimeType.Html);
const url = URL.createObjectURL(blob);
const link = document.createElement('a');
link.href = url;
@ -137,7 +147,7 @@ export const downloadDocument = async ({
id: string;
filename?: string;
}) => {
const blob = await fetchDocumentBlob(id);
const blob = await fetchPreviewBlob(id, 'document');
downloadFileFromBlob(blob, filename);
};