feat: chatflow support multimodal (#31293)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
wangxiaolei
2026-01-27 00:24:48 +08:00
committed by GitHub
parent 5eaf0c733a
commit e48419937b
14 changed files with 1051 additions and 133 deletions

View File

@ -236,4 +236,7 @@ class AgentChatAppRunner(AppRunner):
queue_manager=queue_manager,
stream=application_generate_entity.stream,
agent=True,
message_id=message.id,
user_id=application_generate_entity.user_id,
tenant_id=app_config.tenant_id,
)

View File

@ -1,6 +1,8 @@
import base64
import logging
import time
from collections.abc import Generator, Mapping, Sequence
from mimetypes import guess_extension
from typing import TYPE_CHECKING, Any, Union
from core.app.app_config.entities import ExternalDataVariableEntity, PromptTemplateEntity
@ -11,10 +13,16 @@ from core.app.entities.app_invoke_entities import (
InvokeFrom,
ModelConfigWithCredentialsEntity,
)
from core.app.entities.queue_entities import QueueAgentMessageEvent, QueueLLMChunkEvent, QueueMessageEndEvent
from core.app.entities.queue_entities import (
QueueAgentMessageEvent,
QueueLLMChunkEvent,
QueueMessageEndEvent,
QueueMessageFileEvent,
)
from core.app.features.annotation_reply.annotation_reply import AnnotationReplyFeature
from core.app.features.hosting_moderation.hosting_moderation import HostingModerationFeature
from core.external_data_tool.external_data_fetch import ExternalDataFetch
from core.file.enums import FileTransferMethod, FileType
from core.memory.token_buffer_memory import TokenBufferMemory
from core.model_manager import ModelInstance
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
@ -22,6 +30,7 @@ from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
ImagePromptMessageContent,
PromptMessage,
TextPromptMessageContent,
)
from core.model_runtime.entities.model_entities import ModelPropertyKey
from core.model_runtime.errors.invoke import InvokeBadRequestError
@ -29,7 +38,10 @@ from core.moderation.input_moderation import InputModeration
from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate, MemoryConfig
from core.prompt.simple_prompt_transform import ModelMode, SimplePromptTransform
from models.model import App, AppMode, Message, MessageAnnotation
from core.tools.tool_file_manager import ToolFileManager
from extensions.ext_database import db
from models.enums import CreatorUserRole
from models.model import App, AppMode, Message, MessageAnnotation, MessageFile
if TYPE_CHECKING:
from core.file.models import File
@ -203,6 +215,9 @@ class AppRunner:
queue_manager: AppQueueManager,
stream: bool,
agent: bool = False,
message_id: str | None = None,
user_id: str | None = None,
tenant_id: str | None = None,
):
"""
Handle invoke result
@ -210,21 +225,41 @@ class AppRunner:
:param queue_manager: application queue manager
:param stream: stream
:param agent: agent
:param message_id: message id for multimodal output
:param user_id: user id for multimodal output
:param tenant_id: tenant id for multimodal output
:return:
"""
if not stream and isinstance(invoke_result, LLMResult):
self._handle_invoke_result_direct(invoke_result=invoke_result, queue_manager=queue_manager, agent=agent)
self._handle_invoke_result_direct(
invoke_result=invoke_result,
queue_manager=queue_manager,
)
elif stream and isinstance(invoke_result, Generator):
self._handle_invoke_result_stream(invoke_result=invoke_result, queue_manager=queue_manager, agent=agent)
self._handle_invoke_result_stream(
invoke_result=invoke_result,
queue_manager=queue_manager,
agent=agent,
message_id=message_id,
user_id=user_id,
tenant_id=tenant_id,
)
else:
raise NotImplementedError(f"unsupported invoke result type: {type(invoke_result)}")
def _handle_invoke_result_direct(self, invoke_result: LLMResult, queue_manager: AppQueueManager, agent: bool):
def _handle_invoke_result_direct(
self,
invoke_result: LLMResult,
queue_manager: AppQueueManager,
):
"""
Handle invoke result direct
:param invoke_result: invoke result
:param queue_manager: application queue manager
:param agent: agent
:param message_id: message id for multimodal output
:param user_id: user id for multimodal output
:param tenant_id: tenant id for multimodal output
:return:
"""
queue_manager.publish(
@ -235,13 +270,22 @@ class AppRunner:
)
def _handle_invoke_result_stream(
self, invoke_result: Generator[LLMResultChunk, None, None], queue_manager: AppQueueManager, agent: bool
self,
invoke_result: Generator[LLMResultChunk, None, None],
queue_manager: AppQueueManager,
agent: bool,
message_id: str | None = None,
user_id: str | None = None,
tenant_id: str | None = None,
):
"""
Handle invoke result
:param invoke_result: invoke result
:param queue_manager: application queue manager
:param agent: agent
:param message_id: message id for multimodal output
:param user_id: user id for multimodal output
:param tenant_id: tenant id for multimodal output
:return:
"""
model: str = ""
@ -259,12 +303,26 @@ class AppRunner:
text += message.content
elif isinstance(message.content, list):
for content in message.content:
if not isinstance(content, str):
# TODO(QuantumGhost): Add multimodal output support for easy ui.
_logger.warning("received multimodal output, type=%s", type(content))
if isinstance(content, str):
text += content
elif isinstance(content, TextPromptMessageContent):
text += content.data
elif isinstance(content, ImagePromptMessageContent):
if message_id and user_id and tenant_id:
try:
self._handle_multimodal_image_content(
content=content,
message_id=message_id,
user_id=user_id,
tenant_id=tenant_id,
queue_manager=queue_manager,
)
except Exception:
_logger.exception("Failed to handle multimodal image output")
else:
_logger.warning("Received multimodal output but missing required parameters")
else:
text += content # failback to str
text += content.data if hasattr(content, "data") else str(content)
if not model:
model = result.model
@ -289,6 +347,101 @@ class AppRunner:
PublishFrom.APPLICATION_MANAGER,
)
def _handle_multimodal_image_content(
self,
content: ImagePromptMessageContent,
message_id: str,
user_id: str,
tenant_id: str,
queue_manager: AppQueueManager,
):
"""
Handle multimodal image content from LLM response.
Save the image and create a MessageFile record.
:param content: ImagePromptMessageContent instance
:param message_id: message id
:param user_id: user id
:param tenant_id: tenant id
:param queue_manager: queue manager
:return:
"""
_logger.info("Handling multimodal image content for message %s", message_id)
image_url = content.url
base64_data = content.base64_data
_logger.info("Image URL: %s, Base64 data present: %s", image_url, base64_data)
if not image_url and not base64_data:
_logger.warning("Image content has neither URL nor base64 data")
return
tool_file_manager = ToolFileManager()
# Save the image file
try:
if image_url:
# Download image from URL
_logger.info("Downloading image from URL: %s", image_url)
tool_file = tool_file_manager.create_file_by_url(
user_id=user_id,
tenant_id=tenant_id,
file_url=image_url,
conversation_id=None,
)
_logger.info("Image saved successfully, tool_file_id: %s", tool_file.id)
elif base64_data:
if base64_data.startswith("data:"):
base64_data = base64_data.split(",", 1)[1]
image_binary = base64.b64decode(base64_data)
mimetype = content.mime_type or "image/png"
extension = guess_extension(mimetype) or ".png"
tool_file = tool_file_manager.create_file_by_raw(
user_id=user_id,
tenant_id=tenant_id,
conversation_id=None,
file_binary=image_binary,
mimetype=mimetype,
filename=f"generated_image{extension}",
)
_logger.info("Image saved successfully, tool_file_id: %s", tool_file.id)
else:
return
except Exception:
_logger.exception("Failed to save image file")
return
# Create MessageFile record
message_file = MessageFile(
message_id=message_id,
type=FileType.IMAGE,
transfer_method=FileTransferMethod.TOOL_FILE,
belongs_to="assistant",
url=f"/files/tools/{tool_file.id}",
upload_file_id=tool_file.id,
created_by_role=(
CreatorUserRole.ACCOUNT
if queue_manager.invoke_from in {InvokeFrom.DEBUGGER, InvokeFrom.EXPLORE}
else CreatorUserRole.END_USER
),
created_by=user_id,
)
db.session.add(message_file)
db.session.commit()
db.session.refresh(message_file)
# Publish QueueMessageFileEvent
queue_manager.publish(
QueueMessageFileEvent(message_file_id=message_file.id),
PublishFrom.APPLICATION_MANAGER,
)
_logger.info("QueueMessageFileEvent published for message_file_id: %s", message_file.id)
def moderation_for_inputs(
self,
*,

View File

@ -226,5 +226,10 @@ class ChatAppRunner(AppRunner):
# handle invoke result
self._handle_invoke_result(
invoke_result=invoke_result, queue_manager=queue_manager, stream=application_generate_entity.stream
invoke_result=invoke_result,
queue_manager=queue_manager,
stream=application_generate_entity.stream,
message_id=message.id,
user_id=application_generate_entity.user_id,
tenant_id=app_config.tenant_id,
)

View File

@ -184,5 +184,10 @@ class CompletionAppRunner(AppRunner):
# handle invoke result
self._handle_invoke_result(
invoke_result=invoke_result, queue_manager=queue_manager, stream=application_generate_entity.stream
invoke_result=invoke_result,
queue_manager=queue_manager,
stream=application_generate_entity.stream,
message_id=message.id,
user_id=application_generate_entity.user_id,
tenant_id=app_config.tenant_id,
)

View File

@ -39,6 +39,7 @@ from core.app.entities.task_entities import (
MessageAudioEndStreamResponse,
MessageAudioStreamResponse,
MessageEndStreamResponse,
StreamEvent,
StreamResponse,
)
from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTaskPipeline
@ -70,6 +71,7 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
_task_state: EasyUITaskState
_application_generate_entity: Union[ChatAppGenerateEntity, CompletionAppGenerateEntity, AgentChatAppGenerateEntity]
_precomputed_event_type: StreamEvent | None = None
def __init__(
self,
@ -342,11 +344,15 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
self._task_state.llm_result.message.content = current_content
if isinstance(event, QueueLLMChunkEvent):
event_type = self._message_cycle_manager.get_message_event_type(message_id=self._message_id)
# Determine the event type once, on first LLM chunk, and reuse for subsequent chunks
if not hasattr(self, "_precomputed_event_type") or self._precomputed_event_type is None:
self._precomputed_event_type = self._message_cycle_manager.get_message_event_type(
message_id=self._message_id
)
yield self._message_cycle_manager.message_to_stream_response(
answer=cast(str, delta_text),
message_id=self._message_id,
event_type=event_type,
event_type=self._precomputed_event_type,
)
else:
yield self._agent_message_to_stream_response(

View File

@ -5,7 +5,7 @@ from threading import Thread
from typing import Union
from flask import Flask, current_app
from sqlalchemy import exists, select
from sqlalchemy import select
from sqlalchemy.orm import Session
from configs import dify_config
@ -30,6 +30,7 @@ from core.app.entities.task_entities import (
StreamEvent,
WorkflowTaskState,
)
from core.db.session_factory import session_factory
from core.llm_generator.llm_generator import LLMGenerator
from core.tools.signature import sign_tool_file
from extensions.ext_database import db
@ -57,13 +58,15 @@ class MessageCycleManager:
self._message_has_file: set[str] = set()
def get_message_event_type(self, message_id: str) -> StreamEvent:
# Fast path: cached determination from prior QueueMessageFileEvent
if message_id in self._message_has_file:
return StreamEvent.MESSAGE_FILE
with Session(db.engine, expire_on_commit=False) as session:
has_file = session.query(exists().where(MessageFile.message_id == message_id)).scalar()
# Use SQLAlchemy 2.x style session.scalar(select(...))
with session_factory.create_session() as session:
message_file = session.scalar(select(MessageFile).where(MessageFile.message_id == message_id))
if has_file:
if message_file:
self._message_has_file.add(message_id)
return StreamEvent.MESSAGE_FILE
@ -199,6 +202,8 @@ class MessageCycleManager:
message_file = session.scalar(select(MessageFile).where(MessageFile.id == event.message_file_id))
if message_file and message_file.url is not None:
self._message_has_file.add(message_file.message_id)
# get tool file id
tool_file_id = message_file.url.split("/")[-1]
# trim extension