Merge branch 'feat/agent-node-v2' into feat/support-agent-sandbox

2026-05-06 02:18:08 +08:00 · 2026-01-09 14:19:27 +08:00
parent 00d787a75b eec57e84e4
commit 1557f48740
294 changed files with 14621 additions and 3601 deletions
--- a/api/core/app/apps/advanced_chat/app_runner.py
+++ b/api/core/app/apps/advanced_chat/app_runner.py
@ -20,6 +20,8 @@ from core.app.entities.queue_entities import (
    QueueTextChunkEvent,
 )
 from core.app.features.annotation_reply.annotation_reply import AnnotationReplyFeature
+from core.app.layers.conversation_variable_persist_layer import ConversationVariablePersistenceLayer
+from core.db.session_factory import session_factory
 from core.moderation.base import ModerationError
 from core.moderation.input_moderation import InputModeration
 from core.variables.variables import VariableUnion
@ -40,6 +42,7 @@ from models import Workflow
 from models.enums import UserFrom
 from models.model import App, Conversation, Message, MessageAnnotation
 from models.workflow import ConversationVariable
+from services.conversation_variable_updater import ConversationVariableUpdater

 logger = logging.getLogger(__name__)

@ -200,6 +203,10 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
        )

        workflow_entry.graph_engine.layer(persistence_layer)
+        conversation_variable_layer = ConversationVariablePersistenceLayer(
+            ConversationVariableUpdater(session_factory.get_session_maker())
+        )
+        workflow_entry.graph_engine.layer(conversation_variable_layer)
        for layer in self._graph_engine_layers:
            workflow_entry.graph_engine.layer(layer)

--- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py
+++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
@ -4,6 +4,7 @@ import re
 import time
 from collections.abc import Callable, Generator, Mapping
 from contextlib import contextmanager
+from dataclasses import dataclass, field
 from threading import Thread
 from typing import Any, Union

@ -19,6 +20,7 @@ from core.app.entities.app_invoke_entities import (
    InvokeFrom,
 )
 from core.app.entities.queue_entities import (
+    ChunkType,
    MessageQueueMessage,
    QueueAdvancedChatMessageEndEvent,
    QueueAgentLogEvent,
@ -70,13 +72,122 @@ from core.workflow.runtime import GraphRuntimeState
 from core.workflow.system_variable import SystemVariable
 from extensions.ext_database import db
 from libs.datetime_utils import naive_utc_now
-from models import Account, Conversation, EndUser, Message, MessageFile
+from models import Account, Conversation, EndUser, LLMGenerationDetail, Message, MessageFile
 from models.enums import CreatorUserRole
 from models.workflow import Workflow

 logger = logging.getLogger(__name__)


+@dataclass
+class StreamEventBuffer:
+    """
+    Buffer for recording stream events in order to reconstruct the generation sequence.
+    Records the exact order of text chunks, thoughts, and tool calls as they stream.
+    """
+
+    # Accumulated reasoning content (each thought block is a separate element)
+    reasoning_content: list[str] = field(default_factory=list)
+    # Current reasoning buffer (accumulates until we see a different event type)
+    _current_reasoning: str = ""
+    # Tool calls with their details
+    tool_calls: list[dict] = field(default_factory=list)
+    # Tool call ID to index mapping for updating results
+    _tool_call_id_map: dict[str, int] = field(default_factory=dict)
+    # Sequence of events in stream order
+    sequence: list[dict] = field(default_factory=list)
+    # Current position in answer text
+    _content_position: int = 0
+    # Track last event type to detect transitions
+    _last_event_type: str | None = None
+
+    def _flush_current_reasoning(self) -> None:
+        """Flush accumulated reasoning to the list and add to sequence."""
+        if self._current_reasoning.strip():
+            self.reasoning_content.append(self._current_reasoning.strip())
+            self.sequence.append({"type": "reasoning", "index": len(self.reasoning_content) - 1})
+            self._current_reasoning = ""
+
+    def record_text_chunk(self, text: str) -> None:
+        """Record a text chunk event."""
+        if not text:
+            return
+
+        # Flush any pending reasoning first
+        if self._last_event_type == "thought":
+            self._flush_current_reasoning()
+
+        text_len = len(text)
+        start_pos = self._content_position
+
+        # If last event was also content, extend it; otherwise create new
+        if self.sequence and self.sequence[-1].get("type") == "content":
+            self.sequence[-1]["end"] = start_pos + text_len
+        else:
+            self.sequence.append({"type": "content", "start": start_pos, "end": start_pos + text_len})
+
+        self._content_position += text_len
+        self._last_event_type = "content"
+
+    def record_thought_chunk(self, text: str) -> None:
+        """Record a thought/reasoning chunk event."""
+        if not text:
+            return
+
+        # Accumulate thought content
+        self._current_reasoning += text
+        self._last_event_type = "thought"
+
+    def record_tool_call(self, tool_call_id: str, tool_name: str, tool_arguments: str) -> None:
+        """Record a tool call event."""
+        if not tool_call_id:
+            return
+
+        # Flush any pending reasoning first
+        if self._last_event_type == "thought":
+            self._flush_current_reasoning()
+
+        # Check if this tool call already exists (we might get multiple chunks)
+        if tool_call_id in self._tool_call_id_map:
+            idx = self._tool_call_id_map[tool_call_id]
+            # Update arguments if provided
+            if tool_arguments:
+                self.tool_calls[idx]["arguments"] = tool_arguments
+        else:
+            # New tool call
+            tool_call = {
+                "id": tool_call_id or "",
+                "name": tool_name or "",
+                "arguments": tool_arguments or "",
+                "result": "",
+                "elapsed_time": None,
+            }
+            self.tool_calls.append(tool_call)
+            idx = len(self.tool_calls) - 1
+            self._tool_call_id_map[tool_call_id] = idx
+            self.sequence.append({"type": "tool_call", "index": idx})
+
+        self._last_event_type = "tool_call"
+
+    def record_tool_result(self, tool_call_id: str, result: str, tool_elapsed_time: float | None = None) -> None:
+        """Record a tool result event (update existing tool call)."""
+        if not tool_call_id:
+            return
+        if tool_call_id in self._tool_call_id_map:
+            idx = self._tool_call_id_map[tool_call_id]
+            self.tool_calls[idx]["result"] = result
+            self.tool_calls[idx]["elapsed_time"] = tool_elapsed_time
+
+    def finalize(self) -> None:
+        """Finalize the buffer, flushing any pending data."""
+        if self._last_event_type == "thought":
+            self._flush_current_reasoning()
+
+    def has_data(self) -> bool:
+        """Check if there's any meaningful data recorded."""
+        return bool(self.reasoning_content or self.tool_calls or self.sequence)
+
+
 class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
    """
    AdvancedChatAppGenerateTaskPipeline is a class that generate stream output and state management for Application.
@ -144,6 +255,8 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
        self._workflow_run_id: str = ""
        self._draft_var_saver_factory = draft_var_saver_factory
        self._graph_runtime_state: GraphRuntimeState | None = None
+        # Stream event buffer for recording generation sequence
+        self._stream_buffer = StreamEventBuffer()
        self._seed_graph_runtime_state_from_queue_manager()

    def process(self) -> Union[ChatbotAppBlockingResponse, Generator[ChatbotAppStreamResponse, None, None]]:
@ -358,6 +471,25 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
        if node_finish_resp:
            yield node_finish_resp

+        # For ANSWER nodes, check if we need to send a message_replace event
+        # Only send if the final output differs from the accumulated task_state.answer
+        # This happens when variables were updated by variable_assigner during workflow execution
+        if event.node_type == NodeType.ANSWER and event.outputs:
+            final_answer = event.outputs.get("answer")
+            if final_answer is not None and final_answer != self._task_state.answer:
+                logger.info(
+                    "ANSWER node final output '%s' differs from accumulated answer '%s', sending message_replace event",
+                    final_answer,
+                    self._task_state.answer,
+                )
+                # Update the task state answer
+                self._task_state.answer = str(final_answer)
+                # Send message_replace event to update the UI
+                yield self._message_cycle_manager.message_replace_to_stream_response(
+                    answer=str(final_answer),
+                    reason="variable_update",
+                )
+
    def _handle_node_failed_events(
        self,
        event: Union[QueueNodeFailedEvent, QueueNodeExceptionEvent],
@ -383,7 +515,7 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
        queue_message: Union[WorkflowQueueMessage, MessageQueueMessage] | None = None,
        **kwargs,
    ) -> Generator[StreamResponse, None, None]:
-        """Handle text chunk events."""
+        """Handle text chunk events and record to stream buffer for sequence reconstruction."""
        delta_text = event.text
        if delta_text is None:
            return
@ -405,9 +537,52 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
        if tts_publisher and queue_message:
            tts_publisher.publish(queue_message)

-        self._task_state.answer += delta_text
+        tool_call = event.tool_call
+        tool_result = event.tool_result
+        tool_payload = tool_call or tool_result
+        tool_call_id = tool_payload.id if tool_payload and tool_payload.id else ""
+        tool_name = tool_payload.name if tool_payload and tool_payload.name else ""
+        tool_arguments = tool_call.arguments if tool_call and tool_call.arguments else ""
+        tool_files = tool_result.files if tool_result else []
+        tool_elapsed_time = tool_result.elapsed_time if tool_result else None
+        tool_icon = tool_payload.icon if tool_payload else None
+        tool_icon_dark = tool_payload.icon_dark if tool_payload else None
+        # Record stream event based on chunk type
+        chunk_type = event.chunk_type or ChunkType.TEXT
+        match chunk_type:
+            case ChunkType.TEXT:
+                self._stream_buffer.record_text_chunk(delta_text)
+                self._task_state.answer += delta_text
+            case ChunkType.THOUGHT:
+                # Reasoning should not be part of final answer text
+                self._stream_buffer.record_thought_chunk(delta_text)
+            case ChunkType.TOOL_CALL:
+                self._stream_buffer.record_tool_call(
+                    tool_call_id=tool_call_id,
+                    tool_name=tool_name,
+                    tool_arguments=tool_arguments,
+                )
+            case ChunkType.TOOL_RESULT:
+                self._stream_buffer.record_tool_result(
+                    tool_call_id=tool_call_id,
+                    result=delta_text,
+                    tool_elapsed_time=tool_elapsed_time,
+                )
+                self._task_state.answer += delta_text
+            case _:
+                pass
        yield self._message_cycle_manager.message_to_stream_response(
-            answer=delta_text, message_id=self._message_id, from_variable_selector=event.from_variable_selector
+            answer=delta_text,
+            message_id=self._message_id,
+            from_variable_selector=event.from_variable_selector,
+            chunk_type=event.chunk_type.value if event.chunk_type else None,
+            tool_call_id=tool_call_id or None,
+            tool_name=tool_name or None,
+            tool_arguments=tool_arguments or None,
+            tool_files=tool_files,
+            tool_elapsed_time=tool_elapsed_time,
+            tool_icon=tool_icon,
+            tool_icon_dark=tool_icon_dark,
        )

    def _handle_iteration_start_event(
@ -775,6 +950,7 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):

        # If there are assistant files, remove markdown image links from answer
        answer_text = self._task_state.answer
+        answer_text = self._strip_think_blocks(answer_text)
        if self._recorded_files:
            # Remove markdown image links since we're storing files separately
            answer_text = re.sub(r"!\[.*?\]\(.*?\)", "", answer_text).strip()
@ -826,6 +1002,54 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
        ]
        session.add_all(message_files)

+        # Save generation detail (reasoning/tool calls/sequence) from stream buffer
+        self._save_generation_detail(session=session, message=message)
+
+    @staticmethod
+    def _strip_think_blocks(text: str) -> str:
+        """Remove <think>...</think> blocks (including their content) from text."""
+        if not text or "<think" not in text.lower():
+            return text
+
+        clean_text = re.sub(r"<think[^>]*>.*?</think>", "", text, flags=re.IGNORECASE | re.DOTALL)
+        clean_text = re.sub(r"\n\s*\n", "\n\n", clean_text).strip()
+        return clean_text
+
+    def _save_generation_detail(self, *, session: Session, message: Message) -> None:
+        """
+        Save LLM generation detail for Chatflow using stream event buffer.
+        The buffer records the exact order of events as they streamed,
+        allowing accurate reconstruction of the generation sequence.
+        """
+        # Finalize the stream buffer to flush any pending data
+        self._stream_buffer.finalize()
+
+        # Only save if there's meaningful data
+        if not self._stream_buffer.has_data():
+            return
+
+        reasoning_content = self._stream_buffer.reasoning_content
+        tool_calls = self._stream_buffer.tool_calls
+        sequence = self._stream_buffer.sequence
+
+        # Check if generation detail already exists for this message
+        existing = session.query(LLMGenerationDetail).filter_by(message_id=message.id).first()
+
+        if existing:
+            existing.reasoning_content = json.dumps(reasoning_content) if reasoning_content else None
+            existing.tool_calls = json.dumps(tool_calls) if tool_calls else None
+            existing.sequence = json.dumps(sequence) if sequence else None
+        else:
+            generation_detail = LLMGenerationDetail(
+                tenant_id=self._application_generate_entity.app_config.tenant_id,
+                app_id=self._application_generate_entity.app_config.app_id,
+                message_id=message.id,
+                reasoning_content=json.dumps(reasoning_content) if reasoning_content else None,
+                tool_calls=json.dumps(tool_calls) if tool_calls else None,
+                sequence=json.dumps(sequence) if sequence else None,
+            )
+            session.add(generation_detail)
+
    def _seed_graph_runtime_state_from_queue_manager(self) -> None:
        """Bootstrap the cached runtime state from the queue manager when present."""
        candidate = self._base_task_pipeline.queue_manager.graph_runtime_state
--- a/api/core/app/apps/agent_chat/app_runner.py
+++ b/api/core/app/apps/agent_chat/app_runner.py
@ -3,10 +3,8 @@ from typing import cast

 from sqlalchemy import select

-from core.agent.cot_chat_agent_runner import CotChatAgentRunner
-from core.agent.cot_completion_agent_runner import CotCompletionAgentRunner
+from core.agent.agent_app_runner import AgentAppRunner
 from core.agent.entities import AgentEntity
-from core.agent.fc_agent_runner import FunctionCallAgentRunner
 from core.app.apps.agent_chat.app_config_manager import AgentChatAppConfig
 from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
 from core.app.apps.base_app_runner import AppRunner
@ -14,8 +12,7 @@ from core.app.entities.app_invoke_entities import AgentChatAppGenerateEntity
 from core.app.entities.queue_entities import QueueAnnotationReplyEvent
 from core.memory.token_buffer_memory import TokenBufferMemory
 from core.model_manager import ModelInstance
-from core.model_runtime.entities.llm_entities import LLMMode
-from core.model_runtime.entities.model_entities import ModelFeature, ModelPropertyKey
+from core.model_runtime.entities.model_entities import ModelFeature
 from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
 from core.moderation.base import ModerationError
 from extensions.ext_database import db
@ -194,22 +191,7 @@ class AgentChatAppRunner(AppRunner):
            raise ValueError("Message not found")
        db.session.close()

-        runner_cls: type[FunctionCallAgentRunner] | type[CotChatAgentRunner] | type[CotCompletionAgentRunner]
-        # start agent runner
-        if agent_entity.strategy == AgentEntity.Strategy.CHAIN_OF_THOUGHT:
-            # check LLM mode
-            if model_schema.model_properties.get(ModelPropertyKey.MODE) == LLMMode.CHAT:
-                runner_cls = CotChatAgentRunner
-            elif model_schema.model_properties.get(ModelPropertyKey.MODE) == LLMMode.COMPLETION:
-                runner_cls = CotCompletionAgentRunner
-            else:
-                raise ValueError(f"Invalid LLM mode: {model_schema.model_properties.get(ModelPropertyKey.MODE)}")
-        elif agent_entity.strategy == AgentEntity.Strategy.FUNCTION_CALLING:
-            runner_cls = FunctionCallAgentRunner
-        else:
-            raise ValueError(f"Invalid agent strategy: {agent_entity.strategy}")
-
-        runner = runner_cls(
+        runner = AgentAppRunner(
            tenant_id=app_config.tenant_id,
            application_generate_entity=application_generate_entity,
            conversation=conversation_result,
--- a/api/core/app/apps/common/workflow_response_converter.py
+++ b/api/core/app/apps/common/workflow_response_converter.py
@ -671,7 +671,7 @@ class WorkflowResponseConverter:
            task_id=task_id,
            data=AgentLogStreamResponse.Data(
                node_execution_id=event.node_execution_id,
-                id=event.id,
+                message_id=event.id,
                parent_id=event.parent_id,
                label=event.label,
                error=event.error,
--- a/api/core/app/apps/pipeline/pipeline_generator.py
+++ b/api/core/app/apps/pipeline/pipeline_generator.py
@ -130,7 +130,7 @@ class PipelineGenerator(BaseAppGenerator):
            pipeline=pipeline, workflow=workflow, start_node_id=start_node_id
        )
        documents: list[Document] = []
-        if invoke_from == InvokeFrom.PUBLISHED and not is_retry and not args.get("original_document_id"):
+        if invoke_from == InvokeFrom.PUBLISHED_PIPELINE and not is_retry and not args.get("original_document_id"):
            from services.dataset_service import DocumentService

            for datasource_info in datasource_info_list:
@ -156,7 +156,7 @@ class PipelineGenerator(BaseAppGenerator):
        for i, datasource_info in enumerate(datasource_info_list):
            workflow_run_id = str(uuid.uuid4())
            document_id = args.get("original_document_id") or None
-            if invoke_from == InvokeFrom.PUBLISHED and not is_retry:
+            if invoke_from == InvokeFrom.PUBLISHED_PIPELINE and not is_retry:
                document_id = document_id or documents[i].id
                document_pipeline_execution_log = DocumentPipelineExecutionLog(
                    document_id=document_id,
--- a/api/core/app/apps/workflow/generate_task_pipeline.py
+++ b/api/core/app/apps/workflow/generate_task_pipeline.py
@ -13,6 +13,7 @@ from core.app.apps.common.workflow_response_converter import WorkflowResponseCon
 from core.app.entities.app_invoke_entities import InvokeFrom, WorkflowAppGenerateEntity
 from core.app.entities.queue_entities import (
    AppQueueEvent,
+    ChunkType,
    MessageQueueMessage,
    QueueAgentLogEvent,
    QueueErrorEvent,
@ -483,11 +484,33 @@ class WorkflowAppGenerateTaskPipeline(GraphRuntimeStateSupport):
        if delta_text is None:
            return

+        tool_call = event.tool_call
+        tool_result = event.tool_result
+        tool_payload = tool_call or tool_result
+        tool_call_id = tool_payload.id if tool_payload and tool_payload.id else None
+        tool_name = tool_payload.name if tool_payload and tool_payload.name else None
+        tool_arguments = tool_call.arguments if tool_call else None
+        tool_elapsed_time = tool_result.elapsed_time if tool_result else None
+        tool_files = tool_result.files if tool_result else []
+        tool_icon = tool_payload.icon if tool_payload else None
+        tool_icon_dark = tool_payload.icon_dark if tool_payload else None
+
        # only publish tts message at text chunk streaming
        if tts_publisher and queue_message:
            tts_publisher.publish(queue_message)

-        yield self._text_chunk_to_stream_response(delta_text, from_variable_selector=event.from_variable_selector)
+        yield self._text_chunk_to_stream_response(
+            text=delta_text,
+            from_variable_selector=event.from_variable_selector,
+            chunk_type=event.chunk_type,
+            tool_call_id=tool_call_id,
+            tool_name=tool_name,
+            tool_arguments=tool_arguments,
+            tool_files=tool_files,
+            tool_elapsed_time=tool_elapsed_time,
+            tool_icon=tool_icon,
+            tool_icon_dark=tool_icon_dark,
+        )

    def _handle_agent_log_event(self, event: QueueAgentLogEvent, **kwargs) -> Generator[StreamResponse, None, None]:
        """Handle agent log events."""
@ -650,16 +673,61 @@ class WorkflowAppGenerateTaskPipeline(GraphRuntimeStateSupport):
        session.add(workflow_app_log)

    def _text_chunk_to_stream_response(
-        self, text: str, from_variable_selector: list[str] | None = None
+        self,
+        text: str,
+        from_variable_selector: list[str] | None = None,
+        chunk_type: ChunkType | None = None,
+        tool_call_id: str | None = None,
+        tool_name: str | None = None,
+        tool_arguments: str | None = None,
+        tool_files: list[str] | None = None,
+        tool_error: str | None = None,
+        tool_elapsed_time: float | None = None,
+        tool_icon: str | dict | None = None,
+        tool_icon_dark: str | dict | None = None,
    ) -> TextChunkStreamResponse:
        """
        Handle completed event.
        :param text: text
        :return:
        """
+        from core.app.entities.task_entities import ChunkType as ResponseChunkType
+
+        response_chunk_type = ResponseChunkType(chunk_type.value) if chunk_type else ResponseChunkType.TEXT
+
+        data = TextChunkStreamResponse.Data(
+            text=text,
+            from_variable_selector=from_variable_selector,
+            chunk_type=response_chunk_type,
+        )
+
+        if response_chunk_type == ResponseChunkType.TOOL_CALL:
+            data = data.model_copy(
+                update={
+                    "tool_call_id": tool_call_id,
+                    "tool_name": tool_name,
+                    "tool_arguments": tool_arguments,
+                    "tool_icon": tool_icon,
+                    "tool_icon_dark": tool_icon_dark,
+                }
+            )
+        elif response_chunk_type == ResponseChunkType.TOOL_RESULT:
+            data = data.model_copy(
+                update={
+                    "tool_call_id": tool_call_id,
+                    "tool_name": tool_name,
+                    "tool_arguments": tool_arguments,
+                    "tool_files": tool_files,
+                    "tool_error": tool_error,
+                    "tool_elapsed_time": tool_elapsed_time,
+                    "tool_icon": tool_icon,
+                    "tool_icon_dark": tool_icon_dark,
+                }
+            )
+
        response = TextChunkStreamResponse(
            task_id=self._application_generate_entity.task_id,
-            data=TextChunkStreamResponse.Data(text=text, from_variable_selector=from_variable_selector),
+            data=data,
        )

        return response
--- a/api/core/app/apps/workflow_app_runner.py
+++ b/api/core/app/apps/workflow_app_runner.py
@ -455,12 +455,20 @@ class WorkflowBasedAppRunner:
                )
            )
        elif isinstance(event, NodeRunStreamChunkEvent):
+            from core.app.entities.queue_entities import ChunkType as QueueChunkType
+
+            if event.is_final and not event.chunk:
+                return
+
            self._publish_event(
                QueueTextChunkEvent(
                    text=event.chunk,
                    from_variable_selector=list(event.selector),
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
+                    chunk_type=QueueChunkType(event.chunk_type.value),
+                    tool_call=event.tool_call,
+                    tool_result=event.tool_result,
                )
            )
        elif isinstance(event, NodeRunRetrieverResourceEvent):
--- a/api/core/app/entities/app_invoke_entities.py
+++ b/api/core/app/entities/app_invoke_entities.py
@ -42,7 +42,8 @@ class InvokeFrom(StrEnum):
    # DEBUGGER indicates that this invocation is from
    # the workflow (or chatflow) edit page.
    DEBUGGER = "debugger"
-    PUBLISHED = "published"
+    # PUBLISHED_PIPELINE indicates that this invocation runs a published RAG pipeline workflow.
+    PUBLISHED_PIPELINE = "published"

    # VALIDATION indicates that this invocation is from validation.
    VALIDATION = "validation"
--- a/api/core/app/entities/llm_generation_entities.py
+++ b/api/core/app/entities/llm_generation_entities.py
@ -0,0 +1,70 @@
+"""
+LLM Generation Detail entities.
+
+Defines the structure for storing and transmitting LLM generation details
+including reasoning content, tool calls, and their sequence.
+"""
+
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+
+class ContentSegment(BaseModel):
+    """Represents a content segment in the generation sequence."""
+
+    type: Literal["content"] = "content"
+    start: int = Field(..., description="Start position in the text")
+    end: int = Field(..., description="End position in the text")
+
+
+class ReasoningSegment(BaseModel):
+    """Represents a reasoning segment in the generation sequence."""
+
+    type: Literal["reasoning"] = "reasoning"
+    index: int = Field(..., description="Index into reasoning_content array")
+
+
+class ToolCallSegment(BaseModel):
+    """Represents a tool call segment in the generation sequence."""
+
+    type: Literal["tool_call"] = "tool_call"
+    index: int = Field(..., description="Index into tool_calls array")
+
+
+SequenceSegment = ContentSegment | ReasoningSegment | ToolCallSegment
+
+
+class ToolCallDetail(BaseModel):
+    """Represents a tool call with its arguments and result."""
+
+    id: str = Field(default="", description="Unique identifier for the tool call")
+    name: str = Field(..., description="Name of the tool")
+    arguments: str = Field(default="", description="JSON string of tool arguments")
+    result: str = Field(default="", description="Result from the tool execution")
+    elapsed_time: float | None = Field(default=None, description="Elapsed time in seconds")
+
+
+class LLMGenerationDetailData(BaseModel):
+    """
+    Domain model for LLM generation detail.
+
+    Contains the structured data for reasoning content, tool calls,
+    and their display sequence.
+    """
+
+    reasoning_content: list[str] = Field(default_factory=list, description="List of reasoning segments")
+    tool_calls: list[ToolCallDetail] = Field(default_factory=list, description="List of tool call details")
+    sequence: list[SequenceSegment] = Field(default_factory=list, description="Display order of segments")
+
+    def is_empty(self) -> bool:
+        """Check if there's any meaningful generation detail."""
+        return not self.reasoning_content and not self.tool_calls
+
+    def to_response_dict(self) -> dict:
+        """Convert to dictionary for API response."""
+        return {
+            "reasoning_content": self.reasoning_content,
+            "tool_calls": [tc.model_dump() for tc in self.tool_calls],
+            "sequence": [seg.model_dump() for seg in self.sequence],
+        }
--- a/api/core/app/entities/queue_entities.py
+++ b/api/core/app/entities/queue_entities.py
@ -7,7 +7,7 @@ from pydantic import BaseModel, ConfigDict, Field

 from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
 from core.rag.entities.citation_metadata import RetrievalSourceMetadata
-from core.workflow.entities import AgentNodeStrategyInit
+from core.workflow.entities import AgentNodeStrategyInit, ToolCall, ToolResult
 from core.workflow.enums import WorkflowNodeExecutionMetadataKey
 from core.workflow.nodes import NodeType

@ -177,6 +177,17 @@ class QueueLoopCompletedEvent(AppQueueEvent):
    error: str | None = None


+class ChunkType(StrEnum):
+    """Stream chunk type for LLM-related events."""
+
+    TEXT = "text"  # Normal text streaming
+    TOOL_CALL = "tool_call"  # Tool call arguments streaming
+    TOOL_RESULT = "tool_result"  # Tool execution result
+    THOUGHT = "thought"  # Agent thinking process (ReAct)
+    THOUGHT_START = "thought_start"  # Agent thought start
+    THOUGHT_END = "thought_end"  # Agent thought end
+
+
 class QueueTextChunkEvent(AppQueueEvent):
    """
    QueueTextChunkEvent entity
@ -191,6 +202,16 @@ class QueueTextChunkEvent(AppQueueEvent):
    in_loop_id: str | None = None
    """loop id if node is in loop"""

+    # Extended fields for Agent/Tool streaming
+    chunk_type: ChunkType = ChunkType.TEXT
+    """type of the chunk"""
+
+    # Tool streaming payloads
+    tool_call: ToolCall | None = None
+    """structured tool call info"""
+    tool_result: ToolResult | None = None
+    """structured tool result info"""
+

 class QueueAgentMessageEvent(AppQueueEvent):
    """
--- a/api/core/app/entities/task_entities.py
+++ b/api/core/app/entities/task_entities.py
@ -113,6 +113,38 @@ class MessageStreamResponse(StreamResponse):
    answer: str
    from_variable_selector: list[str] | None = None

+    # Extended fields for Agent/Tool streaming (imported at runtime to avoid circular import)
+    chunk_type: str | None = None
+    """type of the chunk: text, tool_call, tool_result, thought"""
+
+    # Tool call fields (when chunk_type == "tool_call")
+    tool_call_id: str | None = None
+    """unique identifier for this tool call"""
+    tool_name: str | None = None
+    """name of the tool being called"""
+    tool_arguments: str | None = None
+    """accumulated tool arguments JSON"""
+
+    # Tool result fields (when chunk_type == "tool_result")
+    tool_files: list[str] | None = None
+    """file IDs produced by tool"""
+    tool_error: str | None = None
+    """error message if tool failed"""
+    tool_elapsed_time: float | None = None
+    """elapsed time spent executing the tool"""
+    tool_icon: str | dict | None = None
+    """icon of the tool"""
+    tool_icon_dark: str | dict | None = None
+    """dark theme icon of the tool"""
+
+    def model_dump(self, *args, **kwargs) -> dict[str, object]:
+        kwargs.setdefault("exclude_none", True)
+        return super().model_dump(*args, **kwargs)
+
+    def model_dump_json(self, *args, **kwargs) -> str:
+        kwargs.setdefault("exclude_none", True)
+        return super().model_dump_json(*args, **kwargs)
+

 class MessageAudioStreamResponse(StreamResponse):
    """
@ -582,6 +614,17 @@ class LoopNodeCompletedStreamResponse(StreamResponse):
    data: Data


+class ChunkType(StrEnum):
+    """Stream chunk type for LLM-related events."""
+
+    TEXT = "text"  # Normal text streaming
+    TOOL_CALL = "tool_call"  # Tool call arguments streaming
+    TOOL_RESULT = "tool_result"  # Tool execution result
+    THOUGHT = "thought"  # Agent thinking process (ReAct)
+    THOUGHT_START = "thought_start"  # Agent thought start
+    THOUGHT_END = "thought_end"  # Agent thought end
+
+
 class TextChunkStreamResponse(StreamResponse):
    """
    TextChunkStreamResponse entity
@ -595,6 +638,36 @@ class TextChunkStreamResponse(StreamResponse):
        text: str
        from_variable_selector: list[str] | None = None

+        # Extended fields for Agent/Tool streaming
+        chunk_type: ChunkType = ChunkType.TEXT
+        """type of the chunk"""
+
+        # Tool call fields (when chunk_type == TOOL_CALL)
+        tool_call_id: str | None = None
+        """unique identifier for this tool call"""
+        tool_name: str | None = None
+        """name of the tool being called"""
+        tool_arguments: str | None = None
+        """accumulated tool arguments JSON"""
+
+        # Tool result fields (when chunk_type == TOOL_RESULT)
+        tool_files: list[str] | None = None
+        """file IDs produced by tool"""
+        tool_error: str | None = None
+        """error message if tool failed"""
+
+        # Tool elapsed time fields (when chunk_type == TOOL_RESULT)
+        tool_elapsed_time: float | None = None
+        """elapsed time spent executing the tool"""
+
+        def model_dump(self, *args, **kwargs) -> dict[str, object]:
+            kwargs.setdefault("exclude_none", True)
+            return super().model_dump(*args, **kwargs)
+
+        def model_dump_json(self, *args, **kwargs) -> str:
+            kwargs.setdefault("exclude_none", True)
+            return super().model_dump_json(*args, **kwargs)
+
    event: StreamEvent = StreamEvent.TEXT_CHUNK
    data: Data

@ -743,7 +816,7 @@ class AgentLogStreamResponse(StreamResponse):
        """

        node_execution_id: str
-        id: str
+        message_id: str
        label: str
        parent_id: str | None = None
        error: str | None = None
--- a/api/core/app/layers/conversation_variable_persist_layer.py
+++ b/api/core/app/layers/conversation_variable_persist_layer.py
@ -0,0 +1,60 @@
+import logging
+
+from core.variables import Variable
+from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID
+from core.workflow.conversation_variable_updater import ConversationVariableUpdater
+from core.workflow.enums import NodeType
+from core.workflow.graph_engine.layers.base import GraphEngineLayer
+from core.workflow.graph_events import GraphEngineEvent, NodeRunSucceededEvent
+from core.workflow.nodes.variable_assigner.common import helpers as common_helpers
+
+logger = logging.getLogger(__name__)
+
+
+class ConversationVariablePersistenceLayer(GraphEngineLayer):
+    def __init__(self, conversation_variable_updater: ConversationVariableUpdater) -> None:
+        super().__init__()
+        self._conversation_variable_updater = conversation_variable_updater
+
+    def on_graph_start(self) -> None:
+        pass
+
+    def on_event(self, event: GraphEngineEvent) -> None:
+        if not isinstance(event, NodeRunSucceededEvent):
+            return
+        if event.node_type != NodeType.VARIABLE_ASSIGNER:
+            return
+        if self.graph_runtime_state is None:
+            return
+
+        updated_variables = common_helpers.get_updated_variables(event.node_run_result.process_data) or []
+        if not updated_variables:
+            return
+
+        conversation_id = self.graph_runtime_state.system_variable.conversation_id
+        if conversation_id is None:
+            return
+
+        updated_any = False
+        for item in updated_variables:
+            selector = item.selector
+            if len(selector) < 2:
+                logger.warning("Conversation variable selector invalid. selector=%s", selector)
+                continue
+            if selector[0] != CONVERSATION_VARIABLE_NODE_ID:
+                continue
+            variable = self.graph_runtime_state.variable_pool.get(selector)
+            if not isinstance(variable, Variable):
+                logger.warning(
+                    "Conversation variable not found in variable pool. selector=%s",
+                    selector,
+                )
+                continue
+            self._conversation_variable_updater.update(conversation_id=conversation_id, variable=variable)
+            updated_any = True
+
+        if updated_any:
+            self._conversation_variable_updater.flush()
+
+    def on_graph_end(self, error: Exception | None) -> None:
+        pass
--- a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
+++ b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
@ -1,4 +1,5 @@
 import logging
+import re
 import time
 from collections.abc import Generator
 from threading import Thread
@ -58,7 +59,7 @@ from core.prompt.utils.prompt_template_parser import PromptTemplateParser
 from events.message_event import message_was_created
 from extensions.ext_database import db
 from libs.datetime_utils import naive_utc_now
-from models.model import AppMode, Conversation, Message, MessageAgentThought
+from models.model import AppMode, Conversation, LLMGenerationDetail, Message, MessageAgentThought

 logger = logging.getLogger(__name__)

@ -68,6 +69,8 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
    EasyUIBasedGenerateTaskPipeline is a class that generate stream output and state management for Application.
    """

+    _THINK_PATTERN = re.compile(r"<think[^>]*>(.*?)</think>", re.IGNORECASE | re.DOTALL)
+
    _task_state: EasyUITaskState
    _application_generate_entity: Union[ChatAppGenerateEntity, CompletionAppGenerateEntity, AgentChatAppGenerateEntity]

@ -409,11 +412,136 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
                )
            )

+        # Save LLM generation detail if there's reasoning_content
+        self._save_generation_detail(session=session, message=message, llm_result=llm_result)
+
        message_was_created.send(
            message,
            application_generate_entity=self._application_generate_entity,
        )

+    def _save_generation_detail(self, *, session: Session, message: Message, llm_result: LLMResult) -> None:
+        """
+        Save LLM generation detail for Completion/Chat/Agent-Chat applications.
+        For Agent-Chat, also merges MessageAgentThought records.
+        """
+        import json
+
+        reasoning_list: list[str] = []
+        tool_calls_list: list[dict] = []
+        sequence: list[dict] = []
+        answer = message.answer or ""
+
+        # Check if this is Agent-Chat mode by looking for agent thoughts
+        agent_thoughts = (
+            session.query(MessageAgentThought)
+            .filter_by(message_id=message.id)
+            .order_by(MessageAgentThought.position.asc())
+            .all()
+        )
+
+        if agent_thoughts:
+            # Agent-Chat mode: merge MessageAgentThought records
+            content_pos = 0
+            cleaned_answer_parts: list[str] = []
+            for thought in agent_thoughts:
+                # Add thought/reasoning
+                if thought.thought:
+                    reasoning_text = thought.thought
+                    if "<think" in reasoning_text.lower():
+                        clean_text, extracted_reasoning = self._split_reasoning_from_answer(reasoning_text)
+                        if extracted_reasoning:
+                            reasoning_text = extracted_reasoning
+                            thought.thought = clean_text or extracted_reasoning
+                    reasoning_list.append(reasoning_text)
+                    sequence.append({"type": "reasoning", "index": len(reasoning_list) - 1})
+
+                # Add tool calls
+                if thought.tool:
+                    tool_calls_list.append(
+                        {
+                            "name": thought.tool,
+                            "arguments": thought.tool_input or "",
+                            "result": thought.observation or "",
+                        }
+                    )
+                    sequence.append({"type": "tool_call", "index": len(tool_calls_list) - 1})
+
+                # Add answer content if present
+                if thought.answer:
+                    content_text = thought.answer
+                    if "<think" in content_text.lower():
+                        clean_answer, extracted_reasoning = self._split_reasoning_from_answer(content_text)
+                        if extracted_reasoning:
+                            reasoning_list.append(extracted_reasoning)
+                            sequence.append({"type": "reasoning", "index": len(reasoning_list) - 1})
+                        content_text = clean_answer
+                        thought.answer = clean_answer or content_text
+
+                    if content_text:
+                        start = content_pos
+                        end = content_pos + len(content_text)
+                        sequence.append({"type": "content", "start": start, "end": end})
+                        content_pos = end
+                        cleaned_answer_parts.append(content_text)
+
+            if cleaned_answer_parts:
+                merged_answer = "".join(cleaned_answer_parts)
+                message.answer = merged_answer
+                llm_result.message.content = merged_answer
+        else:
+            # Completion/Chat mode: use reasoning_content from llm_result
+            reasoning_content = llm_result.reasoning_content
+            if not reasoning_content and answer:
+                # Extract reasoning from <think> blocks and clean the final answer
+                clean_answer, reasoning_content = self._split_reasoning_from_answer(answer)
+                if reasoning_content:
+                    answer = clean_answer
+                    llm_result.message.content = clean_answer
+                    llm_result.reasoning_content = reasoning_content
+                    message.answer = clean_answer
+            if reasoning_content:
+                reasoning_list = [reasoning_content]
+                # Content comes first, then reasoning
+                if answer:
+                    sequence.append({"type": "content", "start": 0, "end": len(answer)})
+                sequence.append({"type": "reasoning", "index": 0})
+
+        # Only save if there's meaningful generation detail
+        if not reasoning_list and not tool_calls_list:
+            return
+
+        # Check if generation detail already exists
+        existing = session.query(LLMGenerationDetail).filter_by(message_id=message.id).first()
+
+        if existing:
+            existing.reasoning_content = json.dumps(reasoning_list) if reasoning_list else None
+            existing.tool_calls = json.dumps(tool_calls_list) if tool_calls_list else None
+            existing.sequence = json.dumps(sequence) if sequence else None
+        else:
+            generation_detail = LLMGenerationDetail(
+                tenant_id=self._application_generate_entity.app_config.tenant_id,
+                app_id=self._application_generate_entity.app_config.app_id,
+                message_id=message.id,
+                reasoning_content=json.dumps(reasoning_list) if reasoning_list else None,
+                tool_calls=json.dumps(tool_calls_list) if tool_calls_list else None,
+                sequence=json.dumps(sequence) if sequence else None,
+            )
+            session.add(generation_detail)
+
+    @classmethod
+    def _split_reasoning_from_answer(cls, text: str) -> tuple[str, str]:
+        """
+        Extract reasoning segments from <think> blocks and return (clean_text, reasoning).
+        """
+        matches = cls._THINK_PATTERN.findall(text)
+        reasoning_content = "\n".join(match.strip() for match in matches) if matches else ""
+
+        clean_text = cls._THINK_PATTERN.sub("", text)
+        clean_text = re.sub(r"\n\s*\n", "\n\n", clean_text).strip()
+
+        return clean_text, reasoning_content or ""
+
    def _handle_stop(self, event: QueueStopEvent):
        """
        Handle stop.
--- a/api/core/app/task_pipeline/message_cycle_manager.py
+++ b/api/core/app/task_pipeline/message_cycle_manager.py
@ -232,15 +232,31 @@ class MessageCycleManager:
        answer: str,
        message_id: str,
        from_variable_selector: list[str] | None = None,
+        chunk_type: str | None = None,
+        tool_call_id: str | None = None,
+        tool_name: str | None = None,
+        tool_arguments: str | None = None,
+        tool_files: list[str] | None = None,
+        tool_error: str | None = None,
+        tool_elapsed_time: float | None = None,
+        tool_icon: str | dict | None = None,
+        tool_icon_dark: str | dict | None = None,
        event_type: StreamEvent | None = None,
    ) -> MessageStreamResponse:
        """
        Message to stream response.
        :param answer: answer
        :param message_id: message id
+        :param from_variable_selector: from variable selector
+        :param chunk_type: type of the chunk (text, function_call, tool_result, thought)
+        :param tool_call_id: unique identifier for this tool call
+        :param tool_name: name of the tool being called
+        :param tool_arguments: accumulated tool arguments JSON
+        :param tool_files: file IDs produced by tool
+        :param tool_error: error message if tool failed
        :return:
        """
-        return MessageStreamResponse(
+        response = MessageStreamResponse(
            task_id=self._application_generate_entity.task_id,
            id=message_id,
            answer=answer,
@ -248,6 +264,35 @@ class MessageCycleManager:
            event=event_type or StreamEvent.MESSAGE,
        )

+        if chunk_type:
+            response = response.model_copy(update={"chunk_type": chunk_type})
+
+        if chunk_type == "tool_call":
+            response = response.model_copy(
+                update={
+                    "tool_call_id": tool_call_id,
+                    "tool_name": tool_name,
+                    "tool_arguments": tool_arguments,
+                    "tool_icon": tool_icon,
+                    "tool_icon_dark": tool_icon_dark,
+                }
+            )
+        elif chunk_type == "tool_result":
+            response = response.model_copy(
+                update={
+                    "tool_call_id": tool_call_id,
+                    "tool_name": tool_name,
+                    "tool_arguments": tool_arguments,
+                    "tool_files": tool_files,
+                    "tool_error": tool_error,
+                    "tool_elapsed_time": tool_elapsed_time,
+                    "tool_icon": tool_icon,
+                    "tool_icon_dark": tool_icon_dark,
+                }
+            )
+
+        return response
+
    def message_replace_to_stream_response(self, answer: str, reason: str = "") -> MessageReplaceStreamResponse:
        """
        Message replace to stream response.