Files
ragflow/api/utils/tenant_utils.py
Syed Shahmeer Ali ff92b5575b Fix: /file2document/convert blocks event loop on large folders causing 504 timeout (#13784)
Problem

The /file2document/convert endpoint ran all file lookups, document
deletions, and insertions synchronously inside the
request cycle. Linking a large folder (~1.7GB with many files) caused
504 Gateway Timeout because the blocking DB loop
  held the HTTP connection open for too long.

  Fix

- Extracted the heavy DB work into a plain sync function _convert_files
- Inputs are validated and folder file IDs expanded upfront (fast path)
- The blocking work is dispatched to a thread pool via
get_running_loop().run_in_executor() and the endpoint returns 200
  immediately
- Frontend only checks data.code === 0 so the response change
(file2documents list → True) has no impact

  Fixes #13781

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-26 16:45:10 +08:00

38 lines
1.5 KiB
Python

#
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from common.constants import LLMType
from api.db.services.tenant_llm_service import TenantLLMService
_KEY_TO_MODEL_TYPE = {
"llm_id": LLMType.CHAT,
"embd_id": LLMType.EMBEDDING,
"asr_id": LLMType.SPEECH2TEXT,
"img2txt_id": LLMType.IMAGE2TEXT,
"rerank_id": LLMType.RERANK,
"tts_id": LLMType.TTS,
}
def ensure_tenant_model_id_for_params(tenant_id: str, param_dict: dict) -> dict:
for key in ["llm_id", "embd_id", "asr_id", "img2txt_id", "rerank_id", "tts_id"]:
if param_dict.get(key) and not param_dict.get(f"tenant_{key}"):
model_type = _KEY_TO_MODEL_TYPE.get(key)
tenant_model = TenantLLMService.get_api_key(tenant_id, param_dict[key], model_type)
if tenant_model:
param_dict.update({f"tenant_{key}": tenant_model.id})
else:
param_dict.update({f"tenant_{key}": 0})
return param_dict