feat: generation stream output.

2026-04-29 06:58:05 +08:00 · 2025-12-09 16:22:17 +08:00
parent 2b23c43434
commit 2d2ce5df85
14 changed files with 160 additions and 104 deletions
--- a/api/core/workflow/graph_engine/response_coordinator/coordinator.py
+++ b/api/core/workflow/graph_engine/response_coordinator/coordinator.py
@ -441,7 +441,6 @@ class ResponseStreamCoordinator:
                                    tool_arguments=event.tool_arguments,
                                    tool_files=event.tool_files,
                                    tool_error=event.tool_error,
-                                    round_index=event.round_index,
                                )
                                events.append(updated_event)
                            else:
--- a/api/core/workflow/graph_events/node.py
+++ b/api/core/workflow/graph_events/node.py
@ -51,9 +51,6 @@ class NodeRunStreamChunkEvent(GraphNodeEventBase):
    tool_files: list[str] = Field(default_factory=list, description="file IDs produced by tool")
    tool_error: str | None = Field(default=None, description="error message if tool failed")

-    # Thought fields (when chunk_type == THOUGHT)
-    round_index: int | None = Field(default=None, description="current iteration round")
-

 class NodeRunRetrieverResourceEvent(GraphNodeEventBase):
    retriever_resources: Sequence[RetrievalSourceMetadata] = Field(..., description="retriever resources")
--- a/api/core/workflow/node_events/node.py
+++ b/api/core/workflow/node_events/node.py
@ -74,7 +74,6 @@ class ThoughtChunkEvent(StreamChunkEvent):
    """Agent thought streaming event - Agent thinking process (ReAct)."""

    chunk_type: ChunkType = Field(default=ChunkType.THOUGHT, frozen=True)
-    round_index: int = Field(default=1, description="current iteration round")


 class StreamCompletedEvent(NodeEventBase):
--- a/api/core/workflow/nodes/base/node.py
+++ b/api/core/workflow/nodes/base/node.py
@ -598,7 +598,6 @@ class Node(Generic[NodeDataT]):
            chunk=event.chunk,
            is_final=event.is_final,
            chunk_type=ChunkType.THOUGHT,
-            round_index=event.round_index,
        )

    @_dispatch.register
--- a/api/core/workflow/nodes/llm/node.py
+++ b/api/core/workflow/nodes/llm/node.py
@ -277,7 +277,7 @@ class LLMNode(Node[LLMNodeData]):
            structured_output: LLMStructuredOutput | None = None

            for event in generator:
-                if isinstance(event, StreamChunkEvent):
+                if isinstance(event, (StreamChunkEvent, ThoughtChunkEvent)):
                    yield event
                elif isinstance(event, ModelInvokeCompletedEvent):
                    # Raw text
@ -340,6 +340,16 @@ class LLMNode(Node[LLMNodeData]):
                chunk="",
                is_final=True,
            )
+            yield StreamChunkEvent(
+                selector=[self._node_id, "generation", "content"],
+                chunk="",
+                is_final=True,
+            )
+            yield ThoughtChunkEvent(
+                selector=[self._node_id, "generation", "thought"],
+                chunk="",
+                is_final=True,
+            )

            yield StreamCompletedEvent(
                node_run_result=NodeRunResult(
@ -470,6 +480,8 @@ class LLMNode(Node[LLMNodeData]):
        usage = LLMUsage.empty_usage()
        finish_reason = None
        full_text_buffer = io.StringIO()
+        think_parser = llm_utils.ThinkTagStreamParser()
+        reasoning_chunks: list[str] = []

        # Initialize streaming metrics tracking
        start_time = request_start_time if request_start_time is not None else time.perf_counter()
@ -498,12 +510,32 @@ class LLMNode(Node[LLMNodeData]):
                            has_content = True

                        full_text_buffer.write(text_part)
+                        # Text output: always forward raw chunk (keep <think> tags intact)
                        yield StreamChunkEvent(
                            selector=[node_id, "text"],
                            chunk=text_part,
                            is_final=False,
                        )

+                        # Generation output: split out thoughts, forward only non-thought content chunks
+                        for kind, segment in think_parser.process(text_part):
+                            if not segment:
+                                continue
+
+                            if kind == "thought":
+                                reasoning_chunks.append(segment)
+                                yield ThoughtChunkEvent(
+                                    selector=[node_id, "generation", "thought"],
+                                    chunk=segment,
+                                    is_final=False,
+                                )
+                            else:
+                                yield StreamChunkEvent(
+                                    selector=[node_id, "generation", "content"],
+                                    chunk=segment,
+                                    is_final=False,
+                                )
+
                    # Update the whole metadata
                    if not model and result.model:
                        model = result.model
@ -518,16 +550,35 @@ class LLMNode(Node[LLMNodeData]):
        except OutputParserError as e:
            raise LLMNodeError(f"Failed to parse structured output: {e}")

+        for kind, segment in think_parser.flush():
+            if not segment:
+                continue
+            if kind == "thought":
+                reasoning_chunks.append(segment)
+                yield ThoughtChunkEvent(
+                    selector=[node_id, "generation", "thought"],
+                    chunk=segment,
+                    is_final=False,
+                )
+            else:
+                yield StreamChunkEvent(
+                    selector=[node_id, "generation", "content"],
+                    chunk=segment,
+                    is_final=False,
+                )
+
        # Extract reasoning content from <think> tags in the main text
        full_text = full_text_buffer.getvalue()

        if reasoning_format == "tagged":
            # Keep <think> tags in text for backward compatibility
            clean_text = full_text
-            reasoning_content = ""
+            reasoning_content = "".join(reasoning_chunks)
        else:
            # Extract clean text and reasoning from <think> tags
            clean_text, reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)
+            if reasoning_chunks and not reasoning_content:
+                reasoning_content = "".join(reasoning_chunks)

        # Calculate streaming metrics
        end_time = time.perf_counter()
@ -1398,8 +1449,6 @@ class LLMNode(Node[LLMNodeData]):
        finish_reason = None
        agent_result: AgentResult | None = None

-        # Track current round for ThoughtChunkEvent
-        current_round = 1
        think_parser = llm_utils.ThinkTagStreamParser()
        reasoning_chunks: list[str] = []

@ -1431,12 +1480,6 @@ class LLMNode(Node[LLMNodeData]):
                    else:
                        agent_logs.append(agent_log_event)

-                    # Extract round number from ROUND log type
-                    if output.log_type == AgentLog.LogType.ROUND:
-                        round_index = output.data.get("round_index")
-                        if isinstance(round_index, int):
-                            current_round = round_index
-
                    # Emit tool call events when tool call starts
                    if output.log_type == AgentLog.LogType.TOOL_CALL and output.status == AgentLog.LogStatus.START:
                        tool_name = output.data.get("tool_name", "")
@ -1450,26 +1493,34 @@ class LLMNode(Node[LLMNodeData]):
                            tool_call_id=tool_call_id,
                            tool_name=tool_name,
                            tool_arguments=tool_arguments,
-                            is_final=True,
+                            is_final=False,
                        )

-                    # Emit tool result events when tool call completes
-                    if output.log_type == AgentLog.LogType.TOOL_CALL and output.status == AgentLog.LogStatus.SUCCESS:
+                    # Emit tool result events when tool call completes (both success and error)
+                    if output.log_type == AgentLog.LogType.TOOL_CALL and output.status != AgentLog.LogStatus.START:
                        tool_name = output.data.get("tool_name", "")
                        tool_output = output.data.get("output", "")
                        tool_call_id = output.data.get("tool_call_id", "")
                        tool_files = []
                        tool_error = None

-                        # Extract file IDs if present
+                        # Extract file IDs if present (only for success case)
                        files_data = output.data.get("files")
                        if files_data and isinstance(files_data, list):
                            tool_files = files_data

-                        # Check for error in meta
-                        meta = output.data.get("meta")
-                        if meta and isinstance(meta, dict) and meta.get("error"):
-                            tool_error = meta.get("error")
+                        # Check for error from multiple sources
+                        if output.status == AgentLog.LogStatus.ERROR:
+                            # Priority: output.error > data.error > meta.error
+                            tool_error = output.error or output.data.get("error")
+                            meta = output.data.get("meta")
+                            if not tool_error and meta and isinstance(meta, dict):
+                                tool_error = meta.get("error")
+                        else:
+                            # For success case, check meta for potential errors
+                            meta = output.data.get("meta")
+                            if meta and isinstance(meta, dict) and meta.get("error"):
+                                tool_error = meta.get("error")

                        yield ToolResultChunkEvent(
                            selector=[self._node_id, "generation", "tool_results"],
@ -1478,7 +1529,7 @@ class LLMNode(Node[LLMNodeData]):
                            tool_name=tool_name,
                            tool_files=tool_files,
                            tool_error=tool_error,
-                            is_final=True,
+                            is_final=False,
                        )

                elif isinstance(output, LLMResultChunk):
@ -1502,7 +1553,6 @@ class LLMNode(Node[LLMNodeData]):
                                yield ThoughtChunkEvent(
                                    selector=[self._node_id, "generation", "thought"],
                                    chunk=segment,
-                                    round_index=current_round,
                                    is_final=False,
                                )
                            else:
@ -1548,7 +1598,6 @@ class LLMNode(Node[LLMNodeData]):
                yield ThoughtChunkEvent(
                    selector=[self._node_id, "generation", "thought"],
                    chunk=segment,
-                    round_index=current_round,
                    is_final=False,
                )
            else:
@ -1580,7 +1629,27 @@ class LLMNode(Node[LLMNodeData]):
        yield ThoughtChunkEvent(
            selector=[self._node_id, "generation", "thought"],
            chunk="",
-            round_index=current_round,
+            is_final=True,
+        )
+
+        # Close tool_calls stream (already sent via ToolCallChunkEvent)
+        yield ToolCallChunkEvent(
+            selector=[self._node_id, "generation", "tool_calls"],
+            chunk="",
+            tool_call_id="",
+            tool_name="",
+            tool_arguments="",
+            is_final=True,
+        )
+
+        # Close tool_results stream (already sent via ToolResultChunkEvent)
+        yield ToolResultChunkEvent(
+            selector=[self._node_id, "generation", "tool_results"],
+            chunk="",
+            tool_call_id="",
+            tool_name="",
+            tool_files=[],
+            tool_error=None,
            is_final=True,
        )