Files
ragflow/api/db/joint_services/tenant_model_service.py
VincentLambert c44dc85143 Fix: IMAGE2TEXT→CHAT fallback with model_type normalization in tenant_model_service (#14704)
## Summary

- When a model is registered as `chat` in `tenant_llm` but has the
`IMAGE2TEXT` tag in `llm_factories.json`, requesting it as `image2text`
(e.g. PDF parser) fails with `Tenant Model with name <model> and type
image2text not found`.
- After resolution via the new fallback, the returned
`config_dict["model_type"]` was still `"chat"`, causing
`tenant_llm_service.model_instance()` to instantiate `ChatModel` instead
of `CvModel` — breaking `describe_with_prompt` at ingestion time.

## What problem does this PR solve?

RAGFlow already has a `CHAT→IMAGE2TEXT` fallback: when a chat model is
not found, it retries with `image2text`. The symmetric fallback
(`IMAGE2TEXT→CHAT`) was missing.

This matters for multimodal models declared as `model_type: "chat"` with
an `IMAGE2TEXT` tag in `llm_factories.json` (e.g. models added after
tenant creation, or providers where a single model serves both
purposes). The frontend PDF parser selector correctly surfaces these
models via the `IMAGE2TEXT` tag, but the backend fails to resolve them
at runtime.

## Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

## Changes

**`api/db/joint_services/tenant_model_service.py`**

1. Add `IMAGE2TEXT→CHAT` fallback in
`get_model_config_by_type_and_name`: when an `image2text` model is not
found in `tenant_llm`, retry with `chat` — but only if the `llm` table
confirms `IMAGE2TEXT` capability via the `tags` field. This mirrors the
philosophy of the existing `CHAT→IMAGE2TEXT` fallback: substitution is
only allowed when the model has declared the required capability.

2. Normalize `config_dict["model_type"]` to `image2text` after the
fallback, so the caller (`model_instance`) correctly routes to `CvModel`
instead of `ChatModel`.

3. Extend the type validation guard to allow `(requested=image2text,
found=chat)` alongside the existing `(requested=chat, found=image2text)`
exception.

## Test plan

- [ ] Add a model with `model_type=chat` and `tags` containing
`IMAGE2TEXT` to a tenant
- [ ] Select it as PDF parser in a knowledge base
- [ ] Verify ingestion succeeds without `image2text not found` or
`describe_with_prompt` errors
- [ ] Verify the same model still works correctly in chat context

🤖 Generated with [Claude Code](https://claude.ai/claude-code)

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-09 10:40:58 +08:00

152 lines
7.4 KiB
Python

#
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import os
import enum
from common import settings
from common.constants import LLMType
from api.db.services.llm_service import LLMService
from api.db.services.tenant_llm_service import TenantLLMService, TenantService
logger = logging.getLogger(__name__)
def get_model_config_by_id(tenant_model_id: int) -> dict:
found, model_config = TenantLLMService.get_by_id(tenant_model_id)
if not found:
raise LookupError(f"Tenant Model with id {tenant_model_id} not found")
config_dict = model_config.to_dict()
api_key, is_tools, api_key_payload = TenantLLMService._decode_api_key_config(config_dict.get("api_key", ""))
config_dict["api_key"] = api_key
if api_key_payload is not None:
config_dict["api_key_payload"] = api_key_payload
if is_tools is not None:
config_dict["is_tools"] = is_tools
llm = LLMService.query(llm_name=config_dict["llm_name"])
if "is_tools" not in config_dict and llm:
config_dict["is_tools"] = llm[0].is_tools
return config_dict
def get_model_config_by_type_and_name(tenant_id: str, model_type: str, model_name: str):
if not model_name:
raise Exception("Model Name is required")
model_type_val = model_type.value if hasattr(model_type, "value") else model_type
model_config = TenantLLMService.get_api_key(tenant_id, model_name, model_type_val)
if not model_config:
# model_name in format 'name@factory', split model_name and try again
pure_model_name, fid = TenantLLMService.split_model_name_and_factory(model_name)
compose_profiles = os.getenv("COMPOSE_PROFILES", "")
is_tei_builtin_embedding = (
model_type_val == LLMType.EMBEDDING.value
and "tei-" in compose_profiles
and pure_model_name == os.getenv("TEI_MODEL", "")
and (fid == "Builtin" or fid is None)
)
if is_tei_builtin_embedding:
# configured local embedding model
embedding_cfg = settings.EMBEDDING_CFG
config_dict = {
"llm_factory": "Builtin",
"api_key": embedding_cfg["api_key"],
"llm_name": pure_model_name,
"api_base": embedding_cfg["base_url"],
"model_type": LLMType.EMBEDDING.value,
}
elif model_type_val == LLMType.CHAT.value:
# Retry as CHAT with pure_model_name first; then fall back to a multimodal model registered under IMAGE2TEXT.
model_config = TenantLLMService.get_api_key(tenant_id, pure_model_name, LLMType.CHAT.value)
if not model_config:
model_config = TenantLLMService.get_api_key(tenant_id, pure_model_name, LLMType.IMAGE2TEXT.value)
if not model_config:
raise LookupError(f"Tenant Model with name {model_name} and type {model_type_val} not found")
config_dict = model_config.to_dict()
elif model_type_val == LLMType.IMAGE2TEXT.value:
model_config = TenantLLMService.get_api_key(tenant_id, pure_model_name, LLMType.IMAGE2TEXT.value)
if not model_config:
# Fall back to a chat model only if it has declared IMAGE2TEXT capability (tag check via llm table)
chat_config = TenantLLMService.get_api_key(tenant_id, pure_model_name, LLMType.CHAT.value)
logger.debug("IMAGE2TEXT config not found for %s; chat_config found: %s", pure_model_name, chat_config is not None)
if chat_config:
llm_entry = LLMService.query(fid=chat_config.llm_factory, llm_name=chat_config.llm_name)
tags = [t.strip() for t in (llm_entry[0].tags or "").split(",")] if llm_entry else []
logger.debug("LLM tags for %s/%s: %s", chat_config.llm_factory, chat_config.llm_name, tags)
if "IMAGE2TEXT" in tags:
logger.debug("Promoting chat config to IMAGE2TEXT for %s", pure_model_name)
model_config = chat_config
if not model_config:
raise LookupError(f"Tenant Model with name {model_name} and type {model_type_val} not found")
config_dict = model_config.to_dict()
config_dict["model_type"] = LLMType.IMAGE2TEXT.value
else:
model_config = TenantLLMService.get_api_key(tenant_id, pure_model_name, model_type_val)
if not model_config:
raise LookupError(f"Tenant Model with name {model_name} and type {model_type_val} not found")
config_dict = model_config.to_dict()
else:
# model_name without @factory
config_dict = model_config.to_dict()
api_key, is_tools, api_key_payload = TenantLLMService._decode_api_key_config(config_dict.get("api_key", ""))
config_dict["api_key"] = api_key
if api_key_payload is not None:
config_dict["api_key_payload"] = api_key_payload
if is_tools is not None:
config_dict["is_tools"] = is_tools
config_model_type = config_dict.get("model_type")
config_model_type = config_model_type.value if hasattr(config_model_type, "value") else config_model_type
if config_model_type != model_type_val and not (
model_type_val == LLMType.CHAT.value
and config_model_type == LLMType.IMAGE2TEXT.value
) and not (
model_type_val == LLMType.IMAGE2TEXT.value
and config_model_type == LLMType.CHAT.value
):
raise LookupError(
f"Tenant Model with name {model_name} has type {config_model_type}, expected {model_type_val}"
)
llm = LLMService.query(llm_name=config_dict["llm_name"])
if "is_tools" not in config_dict and llm:
config_dict["is_tools"] = llm[0].is_tools
return config_dict
def get_tenant_default_model_by_type(tenant_id: str, model_type: str|enum.Enum):
exist, tenant = TenantService.get_by_id(tenant_id)
if not exist:
raise LookupError("Tenant not found")
model_type_val = model_type if isinstance(model_type, str) else model_type.value
model_name: str = ""
match model_type_val:
case LLMType.EMBEDDING.value:
model_name = tenant.embd_id
case LLMType.SPEECH2TEXT.value:
model_name = tenant.asr_id
case LLMType.IMAGE2TEXT.value:
model_name = tenant.img2txt_id
case LLMType.CHAT.value:
model_name = tenant.llm_id
case LLMType.RERANK.value:
model_name = tenant.rerank_id
case LLMType.TTS.value:
model_name = tenant.tts_id
case LLMType.OCR.value:
raise Exception("OCR model name is required")
case _:
raise Exception(f"Unknown model type {model_type}")
if not model_name:
raise Exception(f"No default {model_type} model is set.")
return get_model_config_by_type_and_name(tenant_id, model_type, model_name)