refactor: move workflow package to dify_graph (#32844)

2026-05-20 00:37:15 +08:00 · 2026-03-02 18:42:30 +08:00
parent 9c33923985
commit c917838f9c
613 changed files with 2008 additions and 2012 deletions
--- a/api/dify_graph/nodes/code/code_node.py
+++ b/api/dify_graph/nodes/code/code_node.py
@ -0,0 +1,495 @@
+from collections.abc import Mapping, Sequence
+from decimal import Decimal
+from textwrap import dedent
+from typing import TYPE_CHECKING, Any, Protocol, cast
+
+from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus
+from dify_graph.node_events import NodeRunResult
+from dify_graph.nodes.base.node import Node
+from dify_graph.nodes.code.entities import CodeLanguage, CodeNodeData
+from dify_graph.nodes.code.limits import CodeNodeLimits
+from dify_graph.variables.segments import ArrayFileSegment
+from dify_graph.variables.types import SegmentType
+
+from .exc import (
+    CodeNodeError,
+    DepthLimitError,
+    OutputValidationError,
+)
+
+if TYPE_CHECKING:
+    from dify_graph.entities import GraphInitParams
+    from dify_graph.runtime import GraphRuntimeState
+
+
+class WorkflowCodeExecutor(Protocol):
+    def execute(
+        self,
+        *,
+        language: CodeLanguage,
+        code: str,
+        inputs: Mapping[str, Any],
+    ) -> Mapping[str, Any]: ...
+
+    def is_execution_error(self, error: Exception) -> bool: ...
+
+
+def _build_default_config(*, language: CodeLanguage, code: str) -> Mapping[str, object]:
+    return {
+        "type": "code",
+        "config": {
+            "variables": [
+                {"variable": "arg1", "value_selector": []},
+                {"variable": "arg2", "value_selector": []},
+            ],
+            "code_language": language,
+            "code": code,
+            "outputs": {"result": {"type": "string", "children": None}},
+        },
+    }
+
+
+_DEFAULT_CODE_BY_LANGUAGE: Mapping[CodeLanguage, str] = {
+    CodeLanguage.PYTHON3: dedent(
+        """
+        def main(arg1: str, arg2: str):
+            return {
+                "result": arg1 + arg2,
+            }
+        """
+    ),
+    CodeLanguage.JAVASCRIPT: dedent(
+        """
+        function main({arg1, arg2}) {
+            return {
+                result: arg1 + arg2
+            }
+        }
+        """
+    ),
+}
+
+
+class CodeNode(Node[CodeNodeData]):
+    node_type = NodeType.CODE
+    _limits: CodeNodeLimits
+
+    def __init__(
+        self,
+        id: str,
+        config: Mapping[str, Any],
+        graph_init_params: "GraphInitParams",
+        graph_runtime_state: "GraphRuntimeState",
+        *,
+        code_executor: WorkflowCodeExecutor,
+        code_limits: CodeNodeLimits,
+    ) -> None:
+        super().__init__(
+            id=id,
+            config=config,
+            graph_init_params=graph_init_params,
+            graph_runtime_state=graph_runtime_state,
+        )
+        self._code_executor: WorkflowCodeExecutor = code_executor
+        self._limits = code_limits
+
+    @classmethod
+    def get_default_config(cls, filters: Mapping[str, object] | None = None) -> Mapping[str, object]:
+        """
+        Get default config of node.
+        :param filters: filter by node config parameters.
+        :return:
+        """
+        code_language = CodeLanguage.PYTHON3
+        if filters:
+            code_language = cast(CodeLanguage, filters.get("code_language", CodeLanguage.PYTHON3))
+
+        default_code = _DEFAULT_CODE_BY_LANGUAGE.get(code_language)
+        if default_code is None:
+            raise CodeNodeError(f"Unsupported code language: {code_language}")
+        return _build_default_config(language=code_language, code=default_code)
+
+    @classmethod
+    def version(cls) -> str:
+        return "1"
+
+    def _run(self) -> NodeRunResult:
+        # Get code language
+        code_language = self.node_data.code_language
+        code = self.node_data.code
+
+        # Get variables
+        variables = {}
+        for variable_selector in self.node_data.variables:
+            variable_name = variable_selector.variable
+            variable = self.graph_runtime_state.variable_pool.get(variable_selector.value_selector)
+            if isinstance(variable, ArrayFileSegment):
+                variables[variable_name] = [v.to_dict() for v in variable.value] if variable.value else None
+            else:
+                variables[variable_name] = variable.to_object() if variable else None
+        # Run code
+        try:
+            result = self._code_executor.execute(
+                language=code_language,
+                code=code,
+                inputs=variables,
+            )
+
+            # Transform result
+            result = self._transform_result(result=result, output_schema=self.node_data.outputs)
+        except CodeNodeError as e:
+            return NodeRunResult(
+                status=WorkflowNodeExecutionStatus.FAILED, inputs=variables, error=str(e), error_type=type(e).__name__
+            )
+        except Exception as e:
+            if not self._code_executor.is_execution_error(e):
+                raise
+            return NodeRunResult(
+                status=WorkflowNodeExecutionStatus.FAILED, inputs=variables, error=str(e), error_type=type(e).__name__
+            )
+
+        return NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED, inputs=variables, outputs=result)
+
+    def _check_string(self, value: str | None, variable: str) -> str | None:
+        """
+        Check string
+        :param value: value
+        :param variable: variable
+        :return:
+        """
+        if value is None:
+            return None
+
+        if len(value) > self._limits.max_string_length:
+            raise OutputValidationError(
+                f"The length of output variable `{variable}` must be"
+                f" less than {self._limits.max_string_length} characters"
+            )
+
+        return value.replace("\x00", "")
+
+    def _check_boolean(self, value: bool | None, variable: str) -> bool | None:
+        if value is None:
+            return None
+
+        return value
+
+    def _check_number(self, value: int | float | None, variable: str) -> int | float | None:
+        """
+        Check number
+        :param value: value
+        :param variable: variable
+        :return:
+        """
+        if value is None:
+            return None
+
+        if value > self._limits.max_number or value < self._limits.min_number:
+            raise OutputValidationError(
+                f"Output variable `{variable}` is out of range,"
+                f" it must be between {self._limits.min_number} and {self._limits.max_number}."
+            )
+
+        if isinstance(value, float):
+            decimal_value = Decimal(str(value)).normalize()
+            precision = -decimal_value.as_tuple().exponent if decimal_value.as_tuple().exponent < 0 else 0  # type: ignore[operator]
+            # raise error if precision is too high
+            if precision > self._limits.max_precision:
+                raise OutputValidationError(
+                    f"Output variable `{variable}` has too high precision,"
+                    f" it must be less than {self._limits.max_precision} digits."
+                )
+
+        return value
+
+    def _transform_result(
+        self,
+        result: Mapping[str, Any],
+        output_schema: dict[str, CodeNodeData.Output] | None,
+        prefix: str = "",
+        depth: int = 1,
+    ):
+        # TODO(QuantumGhost): Replace native Python lists with `Array*Segment` classes.
+        # Note that `_transform_result` may produce lists containing `None` values,
+        # which don't conform to the type requirements of `Array*Segment` classes.
+        if depth > self._limits.max_depth:
+            raise DepthLimitError(f"Depth limit {self._limits.max_depth} reached, object too deep.")
+
+        transformed_result: dict[str, Any] = {}
+        if output_schema is None:
+            # validate output thought instance type
+            for output_name, output_value in result.items():
+                if isinstance(output_value, dict):
+                    self._transform_result(
+                        result=output_value,
+                        output_schema=None,
+                        prefix=f"{prefix}.{output_name}" if prefix else output_name,
+                        depth=depth + 1,
+                    )
+                elif isinstance(output_value, bool):
+                    self._check_boolean(output_value, variable=f"{prefix}.{output_name}" if prefix else output_name)
+                elif isinstance(output_value, int | float):
+                    self._check_number(
+                        value=output_value, variable=f"{prefix}.{output_name}" if prefix else output_name
+                    )
+                elif isinstance(output_value, str):
+                    self._check_string(
+                        value=output_value, variable=f"{prefix}.{output_name}" if prefix else output_name
+                    )
+                elif isinstance(output_value, list):
+                    first_element = output_value[0] if len(output_value) > 0 else None
+                    if first_element is not None:
+                        if isinstance(first_element, int | float) and all(
+                            value is None or isinstance(value, int | float) for value in output_value
+                        ):
+                            for i, value in enumerate(output_value):
+                                self._check_number(
+                                    value=value,
+                                    variable=f"{prefix}.{output_name}[{i}]" if prefix else f"{output_name}[{i}]",
+                                )
+                        elif isinstance(first_element, str) and all(
+                            value is None or isinstance(value, str) for value in output_value
+                        ):
+                            for i, value in enumerate(output_value):
+                                self._check_string(
+                                    value=value,
+                                    variable=f"{prefix}.{output_name}[{i}]" if prefix else f"{output_name}[{i}]",
+                                )
+                        elif (
+                            isinstance(first_element, dict)
+                            and all(value is None or isinstance(value, dict) for value in output_value)
+                            or isinstance(first_element, list)
+                            and all(value is None or isinstance(value, list) for value in output_value)
+                        ):
+                            for i, value in enumerate(output_value):
+                                if value is not None:
+                                    self._transform_result(
+                                        result=value,
+                                        output_schema=None,
+                                        prefix=f"{prefix}.{output_name}[{i}]" if prefix else f"{output_name}[{i}]",
+                                        depth=depth + 1,
+                                    )
+                        else:
+                            raise OutputValidationError(
+                                f"Output {prefix}.{output_name} is not a valid array."
+                                f" make sure all elements are of the same type."
+                            )
+                elif output_value is None:
+                    pass
+                else:
+                    raise OutputValidationError(f"Output {prefix}.{output_name} is not a valid type.")
+
+            return result
+
+        parameters_validated = {}
+        for output_name, output_config in output_schema.items():
+            dot = "." if prefix else ""
+            if output_name not in result:
+                raise OutputValidationError(f"Output {prefix}{dot}{output_name} is missing.")
+
+            if output_config.type == SegmentType.OBJECT:
+                # check if output is object
+                if not isinstance(result.get(output_name), dict):
+                    if result[output_name] is None:
+                        transformed_result[output_name] = None
+                    else:
+                        raise OutputValidationError(
+                            f"Output {prefix}{dot}{output_name} is not an object,"
+                            f" got {type(result.get(output_name))} instead."
+                        )
+                else:
+                    transformed_result[output_name] = self._transform_result(
+                        result=result[output_name],
+                        output_schema=output_config.children,
+                        prefix=f"{prefix}.{output_name}",
+                        depth=depth + 1,
+                    )
+            elif output_config.type == SegmentType.NUMBER:
+                # check if number available
+                value = result.get(output_name)
+                if value is not None and not isinstance(value, (int, float)):
+                    raise OutputValidationError(
+                        f"Output {prefix}{dot}{output_name} is not a number,"
+                        f" got {type(result.get(output_name))} instead."
+                    )
+                checked = self._check_number(value=value, variable=f"{prefix}{dot}{output_name}")
+                # If the output is a boolean and the output schema specifies a NUMBER type,
+                # convert the boolean value to an integer.
+                #
+                # This ensures compatibility with existing workflows that may use
+                # `True` and `False` as values for NUMBER type outputs.
+                transformed_result[output_name] = self._convert_boolean_to_int(checked)
+
+            elif output_config.type == SegmentType.STRING:
+                # check if string available
+                value = result.get(output_name)
+                if value is not None and not isinstance(value, str):
+                    raise OutputValidationError(
+                        f"Output {prefix}{dot}{output_name} must be a string, got {type(value).__name__} instead"
+                    )
+                transformed_result[output_name] = self._check_string(
+                    value=value,
+                    variable=f"{prefix}{dot}{output_name}",
+                )
+            elif output_config.type == SegmentType.BOOLEAN:
+                transformed_result[output_name] = self._check_boolean(
+                    value=result[output_name],
+                    variable=f"{prefix}{dot}{output_name}",
+                )
+            elif output_config.type == SegmentType.ARRAY_NUMBER:
+                # check if array of number available
+                value = result[output_name]
+                if not isinstance(value, list):
+                    if value is None:
+                        transformed_result[output_name] = None
+                    else:
+                        raise OutputValidationError(
+                            f"Output {prefix}{dot}{output_name} is not an array, got {type(value)} instead."
+                        )
+                else:
+                    if len(value) > self._limits.max_number_array_length:
+                        raise OutputValidationError(
+                            f"The length of output variable `{prefix}{dot}{output_name}` must be"
+                            f" less than {self._limits.max_number_array_length} elements."
+                        )
+
+                    for i, inner_value in enumerate(value):
+                        if not isinstance(inner_value, (int, float)):
+                            raise OutputValidationError(
+                                f"The element at index {i} of output variable `{prefix}{dot}{output_name}` must be"
+                                f" a number."
+                            )
+                        _ = self._check_number(value=inner_value, variable=f"{prefix}{dot}{output_name}[{i}]")
+                    transformed_result[output_name] = [
+                        # If the element is a boolean and the output schema specifies a `array[number]` type,
+                        # convert the boolean value to an integer.
+                        #
+                        # This ensures compatibility with existing workflows that may use
+                        # `True` and `False` as values for NUMBER type outputs.
+                        self._convert_boolean_to_int(v)
+                        for v in value
+                    ]
+            elif output_config.type == SegmentType.ARRAY_STRING:
+                # check if array of string available
+                if not isinstance(result[output_name], list):
+                    if result[output_name] is None:
+                        transformed_result[output_name] = None
+                    else:
+                        raise OutputValidationError(
+                            f"Output {prefix}{dot}{output_name} is not an array,"
+                            f" got {type(result.get(output_name))} instead."
+                        )
+                else:
+                    if len(result[output_name]) > self._limits.max_string_array_length:
+                        raise OutputValidationError(
+                            f"The length of output variable `{prefix}{dot}{output_name}` must be"
+                            f" less than {self._limits.max_string_array_length} elements."
+                        )
+
+                    transformed_result[output_name] = [
+                        self._check_string(value=value, variable=f"{prefix}{dot}{output_name}[{i}]")
+                        for i, value in enumerate(result[output_name])
+                    ]
+            elif output_config.type == SegmentType.ARRAY_OBJECT:
+                # check if array of object available
+                if not isinstance(result[output_name], list):
+                    if result[output_name] is None:
+                        transformed_result[output_name] = None
+                    else:
+                        raise OutputValidationError(
+                            f"Output {prefix}{dot}{output_name} is not an array,"
+                            f" got {type(result.get(output_name))} instead."
+                        )
+                else:
+                    if len(result[output_name]) > self._limits.max_object_array_length:
+                        raise OutputValidationError(
+                            f"The length of output variable `{prefix}{dot}{output_name}` must be"
+                            f" less than {self._limits.max_object_array_length} elements."
+                        )
+
+                    for i, value in enumerate(result[output_name]):
+                        if not isinstance(value, dict):
+                            if value is None:
+                                pass
+                            else:
+                                raise OutputValidationError(
+                                    f"Output {prefix}{dot}{output_name}[{i}] is not an object,"
+                                    f" got {type(value)} instead at index {i}."
+                                )
+
+                    transformed_result[output_name] = [
+                        None
+                        if value is None
+                        else self._transform_result(
+                            result=value,
+                            output_schema=output_config.children,
+                            prefix=f"{prefix}{dot}{output_name}[{i}]",
+                            depth=depth + 1,
+                        )
+                        for i, value in enumerate(result[output_name])
+                    ]
+            elif output_config.type == SegmentType.ARRAY_BOOLEAN:
+                # check if array of object available
+                value = result[output_name]
+                if not isinstance(value, list):
+                    if value is None:
+                        transformed_result[output_name] = None
+                    else:
+                        raise OutputValidationError(
+                            f"Output {prefix}{dot}{output_name} is not an array,"
+                            f" got {type(result.get(output_name))} instead."
+                        )
+                else:
+                    for i, inner_value in enumerate(value):
+                        if inner_value is not None and not isinstance(inner_value, bool):
+                            raise OutputValidationError(
+                                f"Output {prefix}{dot}{output_name}[{i}] is not a boolean,"
+                                f" got {type(inner_value)} instead."
+                            )
+                        _ = self._check_boolean(value=inner_value, variable=f"{prefix}{dot}{output_name}[{i}]")
+                    transformed_result[output_name] = value
+
+            else:
+                raise OutputValidationError(f"Output type {output_config.type} is not supported.")
+
+            parameters_validated[output_name] = True
+
+        # check if all output parameters are validated
+        if len(parameters_validated) != len(result):
+            raise CodeNodeError("Not all output parameters are validated.")
+
+        return transformed_result
+
+    @classmethod
+    def _extract_variable_selector_to_variable_mapping(
+        cls,
+        *,
+        graph_config: Mapping[str, Any],
+        node_id: str,
+        node_data: Mapping[str, Any],
+    ) -> Mapping[str, Sequence[str]]:
+        _ = graph_config  # Explicitly mark as unused
+        # Create typed NodeData from dict
+        typed_node_data = CodeNodeData.model_validate(node_data)
+
+        return {
+            node_id + "." + variable_selector.variable: variable_selector.value_selector
+            for variable_selector in typed_node_data.variables
+        }
+
+    @property
+    def retry(self) -> bool:
+        return self.node_data.retry_config.retry_enabled
+
+    @staticmethod
+    def _convert_boolean_to_int(value: bool | int | float | None) -> int | float | None:
+        """This function convert boolean to integers when the output schema specifies a NUMBER type.
+
+        This ensures compatibility with existing workflows that may use
+        `True` and `False` as values for NUMBER type outputs.
+        """
+        if value is None:
+            return None
+        if isinstance(value, bool):
+            return int(value)
+        return value