Merge remote-tracking branch 'origin/main' into feat/trigger

2026-05-04 09:28:04 +08:00 · 2025-10-30 12:14:47 +08:00
parent ca9d92b1e5 fd7c4e8a6d
commit be9eeff6c2
37 changed files with 851 additions and 149 deletions
--- a/api/core/workflow/nodes/http_request/node.py
+++ b/api/core/workflow/nodes/http_request/node.py
@ -104,7 +104,7 @@ class HttpRequestNode(Node):
                    status=WorkflowNodeExecutionStatus.FAILED,
                    outputs={
                        "status_code": response.status_code,
-                        "body": response.text if not files else "",
+                        "body": response.text if not files.value else "",
                        "headers": response.headers,
                        "files": files,
                    },
--- a/api/core/workflow/nodes/llm/node.py
+++ b/api/core/workflow/nodes/llm/node.py
@ -3,6 +3,7 @@ import io
 import json
 import logging
 import re
+import time
 from collections.abc import Generator, Mapping, Sequence
 from typing import TYPE_CHECKING, Any, Literal

@ -384,6 +385,8 @@ class LLMNode(Node):
            output_schema = LLMNode.fetch_structured_output_schema(
                structured_output=structured_output or {},
            )
+            request_start_time = time.perf_counter()
+
            invoke_result = invoke_llm_with_structured_output(
                provider=model_instance.provider,
                model_schema=model_schema,
@ -396,6 +399,8 @@ class LLMNode(Node):
                user=user_id,
            )
        else:
+            request_start_time = time.perf_counter()
+
            invoke_result = model_instance.invoke_llm(
                prompt_messages=list(prompt_messages),
                model_parameters=node_data_model.completion_params,
@ -411,6 +416,7 @@ class LLMNode(Node):
            node_id=node_id,
            node_type=node_type,
            reasoning_format=reasoning_format,
+            request_start_time=request_start_time,
        )

    @staticmethod
@ -422,14 +428,20 @@ class LLMNode(Node):
        node_id: str,
        node_type: NodeType,
        reasoning_format: Literal["separated", "tagged"] = "tagged",
+        request_start_time: float | None = None,
    ) -> Generator[NodeEventBase | LLMStructuredOutput, None, None]:
        # For blocking mode
        if isinstance(invoke_result, LLMResult):
+            duration = None
+            if request_start_time is not None:
+                duration = time.perf_counter() - request_start_time
+                invoke_result.usage.latency = round(duration, 3)
            event = LLMNode.handle_blocking_result(
                invoke_result=invoke_result,
                saver=file_saver,
                file_outputs=file_outputs,
                reasoning_format=reasoning_format,
+                request_latency=duration,
            )
            yield event
            return
@ -441,6 +453,12 @@ class LLMNode(Node):
        usage = LLMUsage.empty_usage()
        finish_reason = None
        full_text_buffer = io.StringIO()
+
+        # Initialize streaming metrics tracking
+        start_time = request_start_time if request_start_time is not None else time.perf_counter()
+        first_token_time = None
+        has_content = False
+
        collected_structured_output = None  # Collect structured_output from streaming chunks
        # Consume the invoke result and handle generator exception
        try:
@ -457,6 +475,11 @@ class LLMNode(Node):
                        file_saver=file_saver,
                        file_outputs=file_outputs,
                    ):
+                        # Detect first token for TTFT calculation
+                        if text_part and not has_content:
+                            first_token_time = time.perf_counter()
+                            has_content = True
+
                        full_text_buffer.write(text_part)
                        yield StreamChunkEvent(
                            selector=[node_id, "text"],
@ -489,6 +512,16 @@ class LLMNode(Node):
            # Extract clean text and reasoning from <think> tags
            clean_text, reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)

+        # Calculate streaming metrics
+        end_time = time.perf_counter()
+        total_duration = end_time - start_time
+        usage.latency = round(total_duration, 3)
+        if has_content and first_token_time:
+            gen_ai_server_time_to_first_token = first_token_time - start_time
+            llm_streaming_time_to_generate = end_time - first_token_time
+            usage.time_to_first_token = round(gen_ai_server_time_to_first_token, 3)
+            usage.time_to_generate = round(llm_streaming_time_to_generate, 3)
+
        yield ModelInvokeCompletedEvent(
            # Use clean_text for separated mode, full_text for tagged mode
            text=clean_text if reasoning_format == "separated" else full_text,
@ -1068,6 +1101,7 @@ class LLMNode(Node):
        saver: LLMFileSaver,
        file_outputs: list["File"],
        reasoning_format: Literal["separated", "tagged"] = "tagged",
+        request_latency: float | None = None,
    ) -> ModelInvokeCompletedEvent:
        buffer = io.StringIO()
        for text_part in LLMNode._save_multimodal_output_and_convert_result_to_markdown(
@ -1088,7 +1122,7 @@ class LLMNode(Node):
            # Extract clean text and reasoning from <think> tags
            clean_text, reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format)

-        return ModelInvokeCompletedEvent(
+        event = ModelInvokeCompletedEvent(
            # Use clean_text for separated mode, full_text for tagged mode
            text=clean_text if reasoning_format == "separated" else full_text,
            usage=invoke_result.usage,
@ -1098,6 +1132,9 @@ class LLMNode(Node):
            # Pass structured output if enabled
            structured_output=getattr(invoke_result, "structured_output", None),
        )
+        if request_latency is not None:
+            event.usage.latency = round(request_latency, 3)
+        return event

    @staticmethod
    def save_multimodal_image_output(