feat: generation stream output.

This commit is contained in:
Novice
2025-12-09 16:22:17 +08:00
parent 2b23c43434
commit 2d2ce5df85
14 changed files with 160 additions and 104 deletions

View File

@ -441,7 +441,6 @@ class ResponseStreamCoordinator:
tool_arguments=event.tool_arguments,
tool_files=event.tool_files,
tool_error=event.tool_error,
round_index=event.round_index,
)
events.append(updated_event)
else:

View File

@ -51,9 +51,6 @@ class NodeRunStreamChunkEvent(GraphNodeEventBase):
tool_files: list[str] = Field(default_factory=list, description="file IDs produced by tool")
tool_error: str | None = Field(default=None, description="error message if tool failed")
# Thought fields (when chunk_type == THOUGHT)
round_index: int | None = Field(default=None, description="current iteration round")
class NodeRunRetrieverResourceEvent(GraphNodeEventBase):
retriever_resources: Sequence[RetrievalSourceMetadata] = Field(..., description="retriever resources")

View File

@ -74,7 +74,6 @@ class ThoughtChunkEvent(StreamChunkEvent):
"""Agent thought streaming event - Agent thinking process (ReAct)."""
chunk_type: ChunkType = Field(default=ChunkType.THOUGHT, frozen=True)
round_index: int = Field(default=1, description="current iteration round")
class StreamCompletedEvent(NodeEventBase):

View File

@ -598,7 +598,6 @@ class Node(Generic[NodeDataT]):
chunk=event.chunk,
is_final=event.is_final,
chunk_type=ChunkType.THOUGHT,
round_index=event.round_index,
)
@_dispatch.register

View File

@ -277,7 +277,7 @@ class LLMNode(Node[LLMNodeData]):
structured_output: LLMStructuredOutput | None = None
for event in generator:
if isinstance(event, StreamChunkEvent):
if isinstance(event, (StreamChunkEvent, ThoughtChunkEvent)):
yield event
elif isinstance(event, ModelInvokeCompletedEvent):
# Raw text
@ -340,6 +340,16 @@ class LLMNode(Node[LLMNodeData]):
chunk="",
is_final=True,
)
yield StreamChunkEvent(
selector=[self._node_id, "generation", "content"],
chunk="",
is_final=True,
)
yield ThoughtChunkEvent(
selector=[self._node_id, "generation", "thought"],
chunk="",
is_final=True,
)
yield StreamCompletedEvent(
node_run_result=NodeRunResult(
@ -470,6 +480,8 @@ class LLMNode(Node[LLMNodeData]):
usage = LLMUsage.empty_usage()
finish_reason = None
full_text_buffer = io.StringIO()
think_parser = llm_utils.ThinkTagStreamParser()
reasoning_chunks: list[str] = []
# Initialize streaming metrics tracking
start_time = request_start_time if request_start_time is not None else time.perf_counter()
@ -498,12 +510,32 @@ class LLMNode(Node[LLMNodeData]):
has_content = True
full_text_buffer.write(text_part)
# Text output: always forward raw chunk (keep <think> tags intact)
yield StreamChunkEvent(
selector=[node_id, "text"],
chunk=text_part,
is_final=False,
)
# Generation output: split out thoughts, forward only non-thought content chunks
for kind, segment in think_parser.process(text_part):
if not segment:
continue
if kind == "thought":
reasoning_chunks.append(segment)
yield ThoughtChunkEvent(
selector=[node_id, "generation", "thought"],
chunk=segment,
is_final=False,
)
else:
yield StreamChunkEvent(
selector=[node_id, "generation", "content"],
chunk=segment,
is_final=False,
)
# Update the whole metadata
if not model and result.model:
model = result.model
@ -518,16 +550,35 @@ class LLMNode(Node[LLMNodeData]):
except OutputParserError as e:
raise LLMNodeError(f"Failed to parse structured output: {e}")
for kind, segment in think_parser.flush():
if not segment:
continue
if kind == "thought":
reasoning_chunks.append(segment)
yield ThoughtChunkEvent(
selector=[node_id, "generation", "thought"],
chunk=segment,
is_final=False,
)
else:
yield StreamChunkEvent(
selector=[node_id, "generation", "content"],
chunk=segment,
is_final=False,
)
# Extract reasoning content from <think> tags in the main text
full_text = full_text_buffer.getvalue()
if reasoning_format == "tagged":
# Keep <think> tags in text for backward compatibility
clean_text = full_text
reasoning_content = ""
reasoning_content = "".join(reasoning_chunks)
else:
# Extract clean text and reasoning from <think> tags
clean_text, reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)
if reasoning_chunks and not reasoning_content:
reasoning_content = "".join(reasoning_chunks)
# Calculate streaming metrics
end_time = time.perf_counter()
@ -1398,8 +1449,6 @@ class LLMNode(Node[LLMNodeData]):
finish_reason = None
agent_result: AgentResult | None = None
# Track current round for ThoughtChunkEvent
current_round = 1
think_parser = llm_utils.ThinkTagStreamParser()
reasoning_chunks: list[str] = []
@ -1431,12 +1480,6 @@ class LLMNode(Node[LLMNodeData]):
else:
agent_logs.append(agent_log_event)
# Extract round number from ROUND log type
if output.log_type == AgentLog.LogType.ROUND:
round_index = output.data.get("round_index")
if isinstance(round_index, int):
current_round = round_index
# Emit tool call events when tool call starts
if output.log_type == AgentLog.LogType.TOOL_CALL and output.status == AgentLog.LogStatus.START:
tool_name = output.data.get("tool_name", "")
@ -1450,26 +1493,34 @@ class LLMNode(Node[LLMNodeData]):
tool_call_id=tool_call_id,
tool_name=tool_name,
tool_arguments=tool_arguments,
is_final=True,
is_final=False,
)
# Emit tool result events when tool call completes
if output.log_type == AgentLog.LogType.TOOL_CALL and output.status == AgentLog.LogStatus.SUCCESS:
# Emit tool result events when tool call completes (both success and error)
if output.log_type == AgentLog.LogType.TOOL_CALL and output.status != AgentLog.LogStatus.START:
tool_name = output.data.get("tool_name", "")
tool_output = output.data.get("output", "")
tool_call_id = output.data.get("tool_call_id", "")
tool_files = []
tool_error = None
# Extract file IDs if present
# Extract file IDs if present (only for success case)
files_data = output.data.get("files")
if files_data and isinstance(files_data, list):
tool_files = files_data
# Check for error in meta
meta = output.data.get("meta")
if meta and isinstance(meta, dict) and meta.get("error"):
tool_error = meta.get("error")
# Check for error from multiple sources
if output.status == AgentLog.LogStatus.ERROR:
# Priority: output.error > data.error > meta.error
tool_error = output.error or output.data.get("error")
meta = output.data.get("meta")
if not tool_error and meta and isinstance(meta, dict):
tool_error = meta.get("error")
else:
# For success case, check meta for potential errors
meta = output.data.get("meta")
if meta and isinstance(meta, dict) and meta.get("error"):
tool_error = meta.get("error")
yield ToolResultChunkEvent(
selector=[self._node_id, "generation", "tool_results"],
@ -1478,7 +1529,7 @@ class LLMNode(Node[LLMNodeData]):
tool_name=tool_name,
tool_files=tool_files,
tool_error=tool_error,
is_final=True,
is_final=False,
)
elif isinstance(output, LLMResultChunk):
@ -1502,7 +1553,6 @@ class LLMNode(Node[LLMNodeData]):
yield ThoughtChunkEvent(
selector=[self._node_id, "generation", "thought"],
chunk=segment,
round_index=current_round,
is_final=False,
)
else:
@ -1548,7 +1598,6 @@ class LLMNode(Node[LLMNodeData]):
yield ThoughtChunkEvent(
selector=[self._node_id, "generation", "thought"],
chunk=segment,
round_index=current_round,
is_final=False,
)
else:
@ -1580,7 +1629,27 @@ class LLMNode(Node[LLMNodeData]):
yield ThoughtChunkEvent(
selector=[self._node_id, "generation", "thought"],
chunk="",
round_index=current_round,
is_final=True,
)
# Close tool_calls stream (already sent via ToolCallChunkEvent)
yield ToolCallChunkEvent(
selector=[self._node_id, "generation", "tool_calls"],
chunk="",
tool_call_id="",
tool_name="",
tool_arguments="",
is_final=True,
)
# Close tool_results stream (already sent via ToolResultChunkEvent)
yield ToolResultChunkEvent(
selector=[self._node_id, "generation", "tool_results"],
chunk="",
tool_call_id="",
tool_name="",
tool_files=[],
tool_error=None,
is_final=True,
)