refactor: move workflow package to dify_graph (#32844)

2026-05-24 02:47:53 +08:00 · 2026-03-02 18:42:30 +08:00
parent 9c33923985
commit c917838f9c
613 changed files with 2008 additions and 2012 deletions
--- a/api/dify_graph/graph_engine/error_handler.py
+++ b/api/dify_graph/graph_engine/error_handler.py
@ -0,0 +1,211 @@
+"""
+Main error handler that coordinates error strategies.
+"""
+
+import logging
+import time
+from typing import TYPE_CHECKING, final
+
+from dify_graph.enums import (
+    ErrorStrategy as ErrorStrategyEnum,
+)
+from dify_graph.enums import (
+    WorkflowNodeExecutionMetadataKey,
+    WorkflowNodeExecutionStatus,
+)
+from dify_graph.graph import Graph
+from dify_graph.graph_events import (
+    GraphNodeEventBase,
+    NodeRunExceptionEvent,
+    NodeRunFailedEvent,
+    NodeRunRetryEvent,
+)
+from dify_graph.node_events import NodeRunResult
+
+if TYPE_CHECKING:
+    from .domain import GraphExecution
+
+logger = logging.getLogger(__name__)
+
+
+@final
+class ErrorHandler:
+    """
+    Coordinates error handling strategies for node failures.
+
+    This acts as a facade for the various error strategies,
+    selecting and applying the appropriate strategy based on
+    node configuration.
+    """
+
+    def __init__(self, graph: Graph, graph_execution: "GraphExecution") -> None:
+        """
+        Initialize the error handler.
+
+        Args:
+            graph: The workflow graph
+            graph_execution: The graph execution state
+        """
+        self._graph = graph
+        self._graph_execution = graph_execution
+
+    def handle_node_failure(self, event: NodeRunFailedEvent) -> GraphNodeEventBase | None:
+        """
+        Handle a node failure event.
+
+        Selects and applies the appropriate error strategy based on
+        the node's configuration.
+
+        Args:
+            event: The node failure event
+
+        Returns:
+            Optional new event to process, or None to abort
+        """
+        node = self._graph.nodes[event.node_id]
+        # Get retry count from NodeExecution
+        node_execution = self._graph_execution.get_or_create_node_execution(event.node_id)
+        retry_count = node_execution.retry_count
+
+        # First check if retry is configured and not exhausted
+        if node.retry and retry_count < node.retry_config.max_retries:
+            result = self._handle_retry(event, retry_count)
+            if result:
+                # Retry count will be incremented when NodeRunRetryEvent is handled
+                return result
+
+        # Apply configured error strategy
+        strategy = node.error_strategy
+
+        match strategy:
+            case None:
+                return self._handle_abort(event)
+            case ErrorStrategyEnum.FAIL_BRANCH:
+                return self._handle_fail_branch(event)
+            case ErrorStrategyEnum.DEFAULT_VALUE:
+                return self._handle_default_value(event)
+
+    def _handle_abort(self, event: NodeRunFailedEvent):
+        """
+        Handle error by aborting execution.
+
+        This is the default strategy when no other strategy is specified.
+        It stops the entire graph execution when a node fails.
+
+        Args:
+            event: The failure event
+
+        Returns:
+            None - signals abortion
+        """
+        logger.error("Node %s failed with ABORT strategy: %s", event.node_id, event.error)
+        # Return None to signal that execution should stop
+
+    def _handle_retry(self, event: NodeRunFailedEvent, retry_count: int):
+        """
+        Handle error by retrying the node.
+
+        This strategy re-attempts node execution up to a configured
+        maximum number of retries with configurable intervals.
+
+        Args:
+            event: The failure event
+            retry_count: Current retry attempt count
+
+        Returns:
+            NodeRunRetryEvent if retry should occur, None otherwise
+        """
+        node = self._graph.nodes[event.node_id]
+
+        # Check if we've exceeded max retries
+        if not node.retry or retry_count >= node.retry_config.max_retries:
+            return None
+
+        # Wait for retry interval
+        time.sleep(node.retry_config.retry_interval_seconds)
+
+        # Create retry event
+        return NodeRunRetryEvent(
+            id=event.id,
+            node_title=node.title,
+            node_id=event.node_id,
+            node_type=event.node_type,
+            node_run_result=event.node_run_result,
+            start_at=event.start_at,
+            error=event.error,
+            retry_index=retry_count + 1,
+        )
+
+    def _handle_fail_branch(self, event: NodeRunFailedEvent):
+        """
+        Handle error by taking the fail branch.
+
+        This strategy converts failures to exceptions and routes execution
+        through a designated fail-branch edge.
+
+        Args:
+            event: The failure event
+
+        Returns:
+            NodeRunExceptionEvent to continue via fail branch
+        """
+        outputs = {
+            "error_message": event.node_run_result.error,
+            "error_type": event.node_run_result.error_type,
+        }
+
+        return NodeRunExceptionEvent(
+            id=event.id,
+            node_id=event.node_id,
+            node_type=event.node_type,
+            start_at=event.start_at,
+            node_run_result=NodeRunResult(
+                status=WorkflowNodeExecutionStatus.EXCEPTION,
+                inputs=event.node_run_result.inputs,
+                process_data=event.node_run_result.process_data,
+                outputs=outputs,
+                edge_source_handle="fail-branch",
+                metadata={
+                    WorkflowNodeExecutionMetadataKey.ERROR_STRATEGY: ErrorStrategyEnum.FAIL_BRANCH,
+                },
+            ),
+            error=event.error,
+        )
+
+    def _handle_default_value(self, event: NodeRunFailedEvent):
+        """
+        Handle error by using default values.
+
+        This strategy allows nodes to fail gracefully by providing
+        predefined default output values.
+
+        Args:
+            event: The failure event
+
+        Returns:
+            NodeRunExceptionEvent with default values
+        """
+        node = self._graph.nodes[event.node_id]
+
+        outputs = {
+            **node.default_value_dict,
+            "error_message": event.node_run_result.error,
+            "error_type": event.node_run_result.error_type,
+        }
+
+        return NodeRunExceptionEvent(
+            id=event.id,
+            node_id=event.node_id,
+            node_type=event.node_type,
+            start_at=event.start_at,
+            node_run_result=NodeRunResult(
+                status=WorkflowNodeExecutionStatus.EXCEPTION,
+                inputs=event.node_run_result.inputs,
+                process_data=event.node_run_result.process_data,
+                outputs=outputs,
+                metadata={
+                    WorkflowNodeExecutionMetadataKey.ERROR_STRATEGY: ErrorStrategyEnum.DEFAULT_VALUE,
+                },
+            ),
+            error=event.error,
+        )