feat: add llm first token timeout config

2026-05-05 09:58:04 +08:00 · 2026-01-15 17:56:21 +08:00
parent bdd8d5b470
commit 5db06175de
10 changed files with 617 additions and 52 deletions
--- a/api/core/workflow/utils/generator_timeout.py
+++ b/api/core/workflow/utils/generator_timeout.py
@ -0,0 +1,54 @@
+"""
+Generator timeout utilities for workflow nodes.
+
+Provides timeout wrappers for streaming generators, primarily used for
+LLM response streaming where we need to enforce time-to-first-token limits.
+"""
+
+import time
+from collections.abc import Generator
+from typing import TypeVar
+
+T = TypeVar("T")
+
+
+class FirstTokenTimeoutError(Exception):
+    """Raised when a generator fails to yield its first item within the configured timeout."""
+
+    def __init__(self, timeout_ms: int):
+        self.timeout_ms = timeout_ms
+        super().__init__(f"Generator timed out after {timeout_ms}ms without yielding first item")
+
+
+def with_first_token_timeout(
+    generator: Generator[T, None, None],
+    timeout_seconds: float,
+) -> Generator[T, None, None]:
+    """
+    Wrap a generator with first token timeout monitoring.
+
+    Only monitors the time until the FIRST item is yielded.
+    Once the first item arrives, timeout monitoring stops and
+    subsequent items are yielded without timeout checks.
+
+    Args:
+        generator: The source generator to wrap
+        timeout_seconds: Maximum time to wait for first item (in seconds)
+
+    Yields:
+        Items from the source generator
+
+    Raises:
+        FirstTokenTimeoutError: If first item doesn't arrive within timeout
+    """
+    start_time = time.monotonic()
+    first_token_received = False
+
+    for item in generator:
+        if not first_token_received:
+            current_time = time.monotonic()
+            if current_time - start_time > timeout_seconds:
+                raise FirstTokenTimeoutError(int(timeout_seconds * 1000))
+            first_token_received = True
+
+        yield item