Merge branch 'main' into feat/agent-node-v2

2026-04-29 06:58:05 +08:00 · 2026-01-07 17:34:23 +08:00
parent 1584a78fc9 187bfafe8b
commit eec57e84e4
802 changed files with 41190 additions and 6172 deletions
--- a/api/core/workflow/graph_engine/command_channels/redis_channel.py
+++ b/api/core/workflow/graph_engine/command_channels/redis_channel.py
@ -9,7 +9,7 @@ Each instance uses a unique key for its command queue.
 import json
 from typing import TYPE_CHECKING, Any, final

-from ..entities.commands import AbortCommand, CommandType, GraphEngineCommand, PauseCommand
+from ..entities.commands import AbortCommand, CommandType, GraphEngineCommand, PauseCommand, UpdateVariablesCommand

 if TYPE_CHECKING:
    from extensions.ext_redis import RedisClientWrapper
@ -113,6 +113,8 @@ class RedisChannel:
                return AbortCommand.model_validate(data)
            if command_type == CommandType.PAUSE:
                return PauseCommand.model_validate(data)
+            if command_type == CommandType.UPDATE_VARIABLES:
+                return UpdateVariablesCommand.model_validate(data)

            # For other command types, use base class
            return GraphEngineCommand.model_validate(data)
--- a/api/core/workflow/graph_engine/command_processing/init.py
+++ b/api/core/workflow/graph_engine/command_processing/init.py
@ -5,11 +5,12 @@ This package handles external commands sent to the engine
 during execution.
 """

-from .command_handlers import AbortCommandHandler, PauseCommandHandler
+from .command_handlers import AbortCommandHandler, PauseCommandHandler, UpdateVariablesCommandHandler
 from .command_processor import CommandProcessor

 __all__ = [
    "AbortCommandHandler",
    "CommandProcessor",
    "PauseCommandHandler",
+    "UpdateVariablesCommandHandler",
 ]
--- a/api/core/workflow/graph_engine/command_processing/command_handlers.py
+++ b/api/core/workflow/graph_engine/command_processing/command_handlers.py
@ -4,9 +4,10 @@ from typing import final
 from typing_extensions import override

 from core.workflow.entities.pause_reason import SchedulingPause
+from core.workflow.runtime import VariablePool

 from ..domain.graph_execution import GraphExecution
-from ..entities.commands import AbortCommand, GraphEngineCommand, PauseCommand
+from ..entities.commands import AbortCommand, GraphEngineCommand, PauseCommand, UpdateVariablesCommand
 from .command_processor import CommandHandler

 logger = logging.getLogger(__name__)
@ -31,3 +32,25 @@ class PauseCommandHandler(CommandHandler):
        reason = command.reason
        pause_reason = SchedulingPause(message=reason)
        execution.pause(pause_reason)
+
+
+@final
+class UpdateVariablesCommandHandler(CommandHandler):
+    def __init__(self, variable_pool: VariablePool) -> None:
+        self._variable_pool = variable_pool
+
+    @override
+    def handle(self, command: GraphEngineCommand, execution: GraphExecution) -> None:
+        assert isinstance(command, UpdateVariablesCommand)
+        for update in command.updates:
+            try:
+                variable = update.value
+                self._variable_pool.add(variable.selector, variable)
+                logger.debug("Updated variable %s for workflow %s", variable.selector, execution.workflow_id)
+            except ValueError as exc:
+                logger.warning(
+                    "Skipping invalid variable selector %s for workflow %s: %s",
+                    getattr(update.value, "selector", None),
+                    execution.workflow_id,
+                    exc,
+                )
--- a/api/core/workflow/graph_engine/entities/commands.py
+++ b/api/core/workflow/graph_engine/entities/commands.py
@ -5,17 +5,21 @@ This module defines command types that can be sent to a running GraphEngine
 instance to control its execution flow.
 """

-from enum import StrEnum
+from collections.abc import Sequence
+from enum import StrEnum, auto
 from typing import Any

 from pydantic import BaseModel, Field

+from core.variables.variables import VariableUnion
+

 class CommandType(StrEnum):
    """Types of commands that can be sent to GraphEngine."""

-    ABORT = "abort"
-    PAUSE = "pause"
+    ABORT = auto()
+    PAUSE = auto()
+    UPDATE_VARIABLES = auto()


 class GraphEngineCommand(BaseModel):
@ -37,3 +41,16 @@ class PauseCommand(GraphEngineCommand):

    command_type: CommandType = Field(default=CommandType.PAUSE, description="Type of command")
    reason: str = Field(default="unknown reason", description="reason for pause")
+
+
+class VariableUpdate(BaseModel):
+    """Represents a single variable update instruction."""
+
+    value: VariableUnion = Field(description="New variable value")
+
+
+class UpdateVariablesCommand(GraphEngineCommand):
+    """Command to update a group of variables in the variable pool."""
+
+    command_type: CommandType = Field(default=CommandType.UPDATE_VARIABLES, description="Type of command")
+    updates: Sequence[VariableUpdate] = Field(default_factory=list, description="Variable updates")
--- a/api/core/workflow/graph_engine/graph_engine.py
+++ b/api/core/workflow/graph_engine/graph_engine.py
@ -5,9 +5,12 @@ This engine uses a modular architecture with separated packages following
 Domain-Driven Design principles for improved maintainability and testability.
 """

+from __future__ import annotations
+
 import contextvars
 import logging
 import queue
+import threading
 from collections.abc import Generator
 from typing import TYPE_CHECKING, cast, final

@ -30,8 +33,13 @@ from core.workflow.runtime import GraphRuntimeState, ReadOnlyGraphRuntimeStateWr
 if TYPE_CHECKING:  # pragma: no cover - used only for static analysis
    from core.workflow.runtime.graph_runtime_state import GraphProtocol

-from .command_processing import AbortCommandHandler, CommandProcessor, PauseCommandHandler
-from .entities.commands import AbortCommand, PauseCommand
+from .command_processing import (
+    AbortCommandHandler,
+    CommandProcessor,
+    PauseCommandHandler,
+    UpdateVariablesCommandHandler,
+)
+from .entities.commands import AbortCommand, PauseCommand, UpdateVariablesCommand
 from .error_handler import ErrorHandler
 from .event_management import EventHandler, EventManager
 from .graph_state_manager import GraphStateManager
@ -70,10 +78,13 @@ class GraphEngine:
        scale_down_idle_time: float | None = None,
    ) -> None:
        """Initialize the graph engine with all subsystems and dependencies."""
+        # stop event
+        self._stop_event = threading.Event()

        # Bind runtime state to current workflow context
        self._graph = graph
        self._graph_runtime_state = graph_runtime_state
+        self._graph_runtime_state.stop_event = self._stop_event
        self._graph_runtime_state.configure(graph=cast("GraphProtocol", graph))
        self._command_channel = command_channel

@ -140,6 +151,9 @@ class GraphEngine:
        pause_handler = PauseCommandHandler()
        self._command_processor.register_handler(PauseCommand, pause_handler)

+        update_variables_handler = UpdateVariablesCommandHandler(self._graph_runtime_state.variable_pool)
+        self._command_processor.register_handler(UpdateVariablesCommand, update_variables_handler)
+
        # === Extensibility ===
        # Layers allow plugins to extend engine functionality
        self._layers: list[GraphEngineLayer] = []
@ -169,6 +183,7 @@ class GraphEngine:
            max_workers=self._max_workers,
            scale_up_threshold=self._scale_up_threshold,
            scale_down_idle_time=self._scale_down_idle_time,
+            stop_event=self._stop_event,
        )

        # === Orchestration ===
@ -199,6 +214,7 @@ class GraphEngine:
            event_handler=self._event_handler_registry,
            execution_coordinator=self._execution_coordinator,
            event_emitter=self._event_manager,
+            stop_event=self._stop_event,
        )

        # === Validation ===
@ -212,9 +228,16 @@ class GraphEngine:
            if id(node.graph_runtime_state) != expected_state_id:
                raise ValueError(f"GraphRuntimeState consistency violation: Node '{node.id}' has a different instance")

-    def layer(self, layer: GraphEngineLayer) -> "GraphEngine":
+    def _bind_layer_context(
+        self,
+        layer: GraphEngineLayer,
+    ) -> None:
+        layer.initialize(ReadOnlyGraphRuntimeStateWrapper(self._graph_runtime_state), self._command_channel)
+
+    def layer(self, layer: GraphEngineLayer) -> GraphEngine:
        """Add a layer for extending functionality."""
        self._layers.append(layer)
+        self._bind_layer_context(layer)
        return self

    def run(self) -> Generator[GraphEngineEvent, None, None]:
@ -301,14 +324,7 @@ class GraphEngine:
    def _initialize_layers(self) -> None:
        """Initialize layers with context."""
        self._event_manager.set_layers(self._layers)
-        # Create a read-only wrapper for the runtime state
-        read_only_state = ReadOnlyGraphRuntimeStateWrapper(self._graph_runtime_state)
        for layer in self._layers:
-            try:
-                layer.initialize(read_only_state, self._command_channel)
-            except Exception as e:
-                logger.warning("Failed to initialize layer %s: %s", layer.__class__.__name__, e)
-
            try:
                layer.on_graph_start()
            except Exception as e:
@ -316,6 +332,7 @@ class GraphEngine:

    def _start_execution(self, *, resume: bool = False) -> None:
        """Start execution subsystems."""
+        self._stop_event.clear()
        paused_nodes: list[str] = []
        if resume:
            paused_nodes = self._graph_runtime_state.consume_paused_nodes()
@ -343,13 +360,12 @@ class GraphEngine:

    def _stop_execution(self) -> None:
        """Stop execution subsystems."""
+        self._stop_event.set()
        self._dispatcher.stop()
        self._worker_pool.stop()
        # Don't mark complete here as the dispatcher already does it

        # Notify layers
-        logger = logging.getLogger(__name__)
-
        for layer in self._layers:
            try:
                layer.on_graph_end(self._graph_execution.error)
--- a/api/core/workflow/graph_engine/graph_traversal/skip_propagator.py
+++ b/api/core/workflow/graph_engine/graph_traversal/skip_propagator.py
@ -60,6 +60,7 @@ class SkipPropagator:
        if edge_states["has_taken"]:
            # Enqueue node
            self._state_manager.enqueue_node(downstream_node_id)
+            self._state_manager.start_execution(downstream_node_id)
            return

        # All edges are skipped, propagate skip to this node
--- a/api/core/workflow/graph_engine/layers/README.md
+++ b/api/core/workflow/graph_engine/layers/README.md
@ -8,7 +8,7 @@ Pluggable middleware for engine extensions.

 Abstract base class for layers.

- `initialize()` - Receive runtime context
+- `initialize()` - Receive runtime context (runtime state is bound here and always available to hooks)
 - `on_graph_start()` - Execution start hook
 - `on_event()` - Process all events
 - `on_graph_end()` - Execution end hook
@ -34,6 +34,9 @@ engine.layer(debug_layer)
 engine.run()
 ```

+`engine.layer()` binds the read-only runtime state before execution, so
+`graph_runtime_state` is always available inside layer hooks.
+
 ## Custom Layers

 ```python
--- a/api/core/workflow/graph_engine/layers/base.py
+++ b/api/core/workflow/graph_engine/layers/base.py
@ -13,6 +13,14 @@ from core.workflow.nodes.base.node import Node
 from core.workflow.runtime import ReadOnlyGraphRuntimeState


+class GraphEngineLayerNotInitializedError(Exception):
+    """Raised when a layer's runtime state is accessed before initialization."""
+
+    def __init__(self, layer_name: str | None = None) -> None:
+        name = layer_name or "GraphEngineLayer"
+        super().__init__(f"{name} runtime state is not initialized. Bind the layer to a GraphEngine before access.")
+
+
 class GraphEngineLayer(ABC):
    """
    Abstract base class for GraphEngine layers.
@ -28,22 +36,27 @@ class GraphEngineLayer(ABC):

    def __init__(self) -> None:
        """Initialize the layer. Subclasses can override with custom parameters."""
-        self.graph_runtime_state: ReadOnlyGraphRuntimeState | None = None
+        self._graph_runtime_state: ReadOnlyGraphRuntimeState | None = None
        self.command_channel: CommandChannel | None = None

+    @property
+    def graph_runtime_state(self) -> ReadOnlyGraphRuntimeState:
+        if self._graph_runtime_state is None:
+            raise GraphEngineLayerNotInitializedError(type(self).__name__)
+        return self._graph_runtime_state
+
    def initialize(self, graph_runtime_state: ReadOnlyGraphRuntimeState, command_channel: CommandChannel) -> None:
        """
        Initialize the layer with engine dependencies.

-        Called by GraphEngine before execution starts to inject the read-only runtime state
-        and command channel. This allows layers to observe engine context and send
-        commands, but prevents direct state modification.
-
+        Called by GraphEngine to inject the read-only runtime state and command channel.
+        This is invoked when the layer is registered with a `GraphEngine` instance.
+        Implementations should be idempotent.
        Args:
            graph_runtime_state: Read-only view of the runtime state
            command_channel: Channel for sending commands to the engine
        """
-        self.graph_runtime_state = graph_runtime_state
+        self._graph_runtime_state = graph_runtime_state
        self.command_channel = command_channel

    @abstractmethod
--- a/api/core/workflow/graph_engine/layers/debug_logging.py
+++ b/api/core/workflow/graph_engine/layers/debug_logging.py
@ -109,10 +109,8 @@ class DebugLoggingLayer(GraphEngineLayer):
        self.logger.info("=" * 80)
        self.logger.info("🚀 GRAPH EXECUTION STARTED")
        self.logger.info("=" * 80)
-
-        if self.graph_runtime_state:
-            # Log initial state
-            self.logger.info("Initial State:")
+        # Log initial state
+        self.logger.info("Initial State:")

    @override
    def on_event(self, event: GraphEngineEvent) -> None:
@ -243,8 +241,7 @@ class DebugLoggingLayer(GraphEngineLayer):
        self.logger.info("  Node retries: %s", self.retry_count)

        # Log final state if available
-        if self.graph_runtime_state and self.include_outputs:
-            if self.graph_runtime_state.outputs:
-                self.logger.info("Final outputs: %s", self._format_dict(self.graph_runtime_state.outputs))
+        if self.include_outputs and self.graph_runtime_state.outputs:
+            self.logger.info("Final outputs: %s", self._format_dict(self.graph_runtime_state.outputs))

        self.logger.info("=" * 80)
--- a/api/core/workflow/graph_engine/layers/persistence.py
+++ b/api/core/workflow/graph_engine/layers/persistence.py
@ -337,8 +337,6 @@ class WorkflowPersistenceLayer(GraphEngineLayer):
        if update_finished:
            execution.finished_at = naive_utc_now()
        runtime_state = self.graph_runtime_state
-        if runtime_state is None:
-            return
        execution.total_tokens = runtime_state.total_tokens
        execution.total_steps = runtime_state.node_run_steps
        execution.outputs = execution.outputs or runtime_state.outputs
@ -404,6 +402,4 @@ class WorkflowPersistenceLayer(GraphEngineLayer):

    def _system_variables(self) -> Mapping[str, Any]:
        runtime_state = self.graph_runtime_state
-        if runtime_state is None:
-            return {}
        return runtime_state.variable_pool.get_by_prefix(SYSTEM_VARIABLE_NODE_ID)
--- a/api/core/workflow/graph_engine/manager.py
+++ b/api/core/workflow/graph_engine/manager.py
@ -3,14 +3,20 @@ GraphEngine Manager for sending control commands via Redis channel.

 This module provides a simplified interface for controlling workflow executions
 using the new Redis command channel, without requiring user permission checks.
-Supports stop, pause, and resume operations.
 """

 import logging
+from collections.abc import Sequence
 from typing import final

 from core.workflow.graph_engine.command_channels.redis_channel import RedisChannel
-from core.workflow.graph_engine.entities.commands import AbortCommand, GraphEngineCommand, PauseCommand
+from core.workflow.graph_engine.entities.commands import (
+    AbortCommand,
+    GraphEngineCommand,
+    PauseCommand,
+    UpdateVariablesCommand,
+    VariableUpdate,
+)
 from extensions.ext_redis import redis_client

 logger = logging.getLogger(__name__)
@ -23,7 +29,6 @@ class GraphEngineManager:

    This class provides a simple interface for controlling workflow executions
    by sending commands through Redis channels, without user validation.
-    Supports stop and pause operations.
    """

    @staticmethod
@ -45,6 +50,16 @@ class GraphEngineManager:
        pause_command = PauseCommand(reason=reason or "User requested pause")
        GraphEngineManager._send_command(task_id, pause_command)

+    @staticmethod
+    def send_update_variables_command(task_id: str, updates: Sequence[VariableUpdate]) -> None:
+        """Send a command to update variables in a running workflow."""
+
+        if not updates:
+            return
+
+        update_command = UpdateVariablesCommand(updates=updates)
+        GraphEngineManager._send_command(task_id, update_command)
+
    @staticmethod
    def _send_command(task_id: str, command: GraphEngineCommand) -> None:
        """Send a command to the workflow-specific Redis channel."""
--- a/api/core/workflow/graph_engine/orchestration/dispatcher.py
+++ b/api/core/workflow/graph_engine/orchestration/dispatcher.py
@ -44,6 +44,7 @@ class Dispatcher:
        event_queue: queue.Queue[GraphNodeEventBase],
        event_handler: "EventHandler",
        execution_coordinator: ExecutionCoordinator,
+        stop_event: threading.Event,
        event_emitter: EventManager | None = None,
    ) -> None:
        """
@ -61,7 +62,7 @@ class Dispatcher:
        self._event_emitter = event_emitter

        self._thread: threading.Thread | None = None
-        self._stop_event = threading.Event()
+        self._stop_event = stop_event
        self._start_time: float | None = None

    def start(self) -> None:
@ -69,16 +70,14 @@ class Dispatcher:
        if self._thread and self._thread.is_alive():
            return

-        self._stop_event.clear()
        self._start_time = time.time()
        self._thread = threading.Thread(target=self._dispatcher_loop, name="GraphDispatcher", daemon=True)
        self._thread.start()

    def stop(self) -> None:
        """Stop the dispatcher thread."""
-        self._stop_event.set()
        if self._thread and self._thread.is_alive():
-            self._thread.join(timeout=10.0)
+            self._thread.join(timeout=2.0)

    def _dispatcher_loop(self) -> None:
        """Main dispatcher loop."""
--- a/api/core/workflow/graph_engine/ready_queue/factory.py
+++ b/api/core/workflow/graph_engine/ready_queue/factory.py
@ -2,6 +2,8 @@
 Factory for creating ReadyQueue instances from serialized state.
 """

+from __future__ import annotations
+
 from typing import TYPE_CHECKING

 from .in_memory import InMemoryReadyQueue
@ -11,7 +13,7 @@ if TYPE_CHECKING:
    from .protocol import ReadyQueue


-def create_ready_queue_from_state(state: ReadyQueueState) -> "ReadyQueue":
+def create_ready_queue_from_state(state: ReadyQueueState) -> ReadyQueue:
    """
    Create a ReadyQueue instance from a serialized state.

--- a/api/core/workflow/graph_engine/response_coordinator/session.py
+++ b/api/core/workflow/graph_engine/response_coordinator/session.py
@ -5,6 +5,8 @@ This module contains the private ResponseSession class used internally
 by ResponseStreamCoordinator to manage streaming sessions.
 """

+from __future__ import annotations
+
 from dataclasses import dataclass

 from core.workflow.nodes.answer.answer_node import AnswerNode
@ -27,7 +29,7 @@ class ResponseSession:
    index: int = 0  # Current position in the template segments

    @classmethod
-    def from_node(cls, node: Node) -> "ResponseSession":
+    def from_node(cls, node: Node) -> ResponseSession:
        """
        Create a ResponseSession from an AnswerNode or EndNode.

--- a/api/core/workflow/graph_engine/worker.py
+++ b/api/core/workflow/graph_engine/worker.py
@ -42,6 +42,7 @@ class Worker(threading.Thread):
        event_queue: queue.Queue[GraphNodeEventBase],
        graph: Graph,
        layers: Sequence[GraphEngineLayer],
+        stop_event: threading.Event,
        worker_id: int = 0,
        flask_app: Flask | None = None,
        context_vars: contextvars.Context | None = None,
@ -65,13 +66,16 @@ class Worker(threading.Thread):
        self._worker_id = worker_id
        self._flask_app = flask_app
        self._context_vars = context_vars
-        self._stop_event = threading.Event()
        self._last_task_time = time.time()
+        self._stop_event = stop_event
        self._layers = layers if layers is not None else []

    def stop(self) -> None:
-        """Signal the worker to stop processing."""
-        self._stop_event.set()
+        """Worker is controlled via shared stop_event from GraphEngine.
+
+        This method is a no-op retained for backward compatibility.
+        """
+        pass

    @property
    def is_idle(self) -> bool:
--- a/api/core/workflow/graph_engine/worker_management/worker_pool.py
+++ b/api/core/workflow/graph_engine/worker_management/worker_pool.py
@ -41,6 +41,7 @@ class WorkerPool:
        event_queue: queue.Queue[GraphNodeEventBase],
        graph: Graph,
        layers: list[GraphEngineLayer],
+        stop_event: threading.Event,
        flask_app: "Flask | None" = None,
        context_vars: "Context | None" = None,
        min_workers: int | None = None,
@ -81,6 +82,7 @@ class WorkerPool:
        self._worker_counter = 0
        self._lock = threading.RLock()
        self._running = False
+        self._stop_event = stop_event

        # No longer tracking worker states with callbacks to avoid lock contention

@ -135,7 +137,7 @@ class WorkerPool:
            # Wait for workers to finish
            for worker in self._workers:
                if worker.is_alive():
-                    worker.join(timeout=10.0)
+                    worker.join(timeout=2.0)

            self._workers.clear()

@ -152,6 +154,7 @@ class WorkerPool:
            worker_id=worker_id,
            flask_app=self._flask_app,
            context_vars=self._context_vars,
+            stop_event=self._stop_event,
        )

        worker.start()