dify/api/dify_graph/nodes/code/code_node.py

from collections.abc import Mapping, Sequence
from decimal import Decimal
from textwrap import dedent
from typing import TYPE_CHECKING, Any, Protocol, cast

from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus
from dify_graph.node_events import NodeRunResult
from dify_graph.nodes.base.node import Node
from dify_graph.nodes.code.entities import CodeLanguage, CodeNodeData
from dify_graph.nodes.code.limits import CodeNodeLimits
from dify_graph.variables.segments import ArrayFileSegment
from dify_graph.variables.types import SegmentType

from .exc import (
    CodeNodeError,
    DepthLimitError,
    OutputValidationError,
)

if TYPE_CHECKING:
    from dify_graph.entities import GraphInitParams
    from dify_graph.runtime import GraphRuntimeState


class WorkflowCodeExecutor(Protocol):
    def execute(
        self,
        *,
        language: CodeLanguage,
        code: str,
        inputs: Mapping[str, Any],
    ) -> Mapping[str, Any]: ...

    def is_execution_error(self, error: Exception) -> bool: ...


def _build_default_config(*, language: CodeLanguage, code: str) -> Mapping[str, object]:
    return {
        "type": "code",
        "config": {
            "variables": [
                {"variable": "arg1", "value_selector": []},
                {"variable": "arg2", "value_selector": []},
            ],
            "code_language": language,
            "code": code,
            "outputs": {"result": {"type": "string", "children": None}},
        },
    }


_DEFAULT_CODE_BY_LANGUAGE: Mapping[CodeLanguage, str] = {
    CodeLanguage.PYTHON3: dedent(
        """
        def main(arg1: str, arg2: str):
            return {
                "result": arg1 + arg2,
            }
        """
    ),
    CodeLanguage.JAVASCRIPT: dedent(
        """
        function main({arg1, arg2}) {
            return {
                result: arg1 + arg2
            }
        }
        """
    ),
}


class CodeNode(Node[CodeNodeData]):
    node_type = NodeType.CODE
    _limits: CodeNodeLimits

    def __init__(
        self,
        id: str,
        config: Mapping[str, Any],
        graph_init_params: "GraphInitParams",
        graph_runtime_state: "GraphRuntimeState",
        *,
        code_executor: WorkflowCodeExecutor,
        code_limits: CodeNodeLimits,
    ) -> None:
        super().__init__(
            id=id,
            config=config,
            graph_init_params=graph_init_params,
            graph_runtime_state=graph_runtime_state,
        )
        self._code_executor: WorkflowCodeExecutor = code_executor
        self._limits = code_limits

    @classmethod
    def get_default_config(cls, filters: Mapping[str, object] | None = None) -> Mapping[str, object]:
        """
        Get default config of node.
        :param filters: filter by node config parameters.
        :return:
        """
        code_language = CodeLanguage.PYTHON3
        if filters:
            code_language = cast(CodeLanguage, filters.get("code_language", CodeLanguage.PYTHON3))

        default_code = _DEFAULT_CODE_BY_LANGUAGE.get(code_language)
        if default_code is None:
            raise CodeNodeError(f"Unsupported code language: {code_language}")
        return _build_default_config(language=code_language, code=default_code)

    @classmethod
    def version(cls) -> str:
        return "1"

    def _run(self) -> NodeRunResult:
        # Get code language
        code_language = self.node_data.code_language
        code = self.node_data.code

        # Get variables
        variables = {}
        for variable_selector in self.node_data.variables:
            variable_name = variable_selector.variable
            variable = self.graph_runtime_state.variable_pool.get(variable_selector.value_selector)
            if isinstance(variable, ArrayFileSegment):
                variables[variable_name] = [v.to_dict() for v in variable.value] if variable.value else None
            else:
                variables[variable_name] = variable.to_object() if variable else None
        # Run code
        try:
            result = self._code_executor.execute(
                language=code_language,
                code=code,
                inputs=variables,
            )

            # Transform result
            result = self._transform_result(result=result, output_schema=self.node_data.outputs)
        except CodeNodeError as e:
            return NodeRunResult(
                status=WorkflowNodeExecutionStatus.FAILED, inputs=variables, error=str(e), error_type=type(e).__name__
            )
        except Exception as e:
            if not self._code_executor.is_execution_error(e):
                raise
            return NodeRunResult(
                status=WorkflowNodeExecutionStatus.FAILED, inputs=variables, error=str(e), error_type=type(e).__name__
            )

        return NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED, inputs=variables, outputs=result)

    def _check_string(self, value: str | None, variable: str) -> str | None:
        """
        Check string
        :param value: value
        :param variable: variable
        :return:
        """
        if value is None:
            return None

        if len(value) > self._limits.max_string_length:
            raise OutputValidationError(
                f"The length of output variable `{variable}` must be"
                f" less than {self._limits.max_string_length} characters"
            )

        return value.replace("\x00", "")

    def _check_boolean(self, value: bool | None, variable: str) -> bool | None:
        if value is None:
            return None

        return value

    def _check_number(self, value: int | float | None, variable: str) -> int | float | None:
        """
        Check number
        :param value: value
        :param variable: variable
        :return:
        """
        if value is None:
            return None

        if value > self._limits.max_number or value < self._limits.min_number:
            raise OutputValidationError(
                f"Output variable `{variable}` is out of range,"
                f" it must be between {self._limits.min_number} and {self._limits.max_number}."
            )

        if isinstance(value, float):
            decimal_value = Decimal(str(value)).normalize()
            precision = -decimal_value.as_tuple().exponent if decimal_value.as_tuple().exponent < 0 else 0  # type: ignore[operator]
            # raise error if precision is too high
            if precision > self._limits.max_precision:
                raise OutputValidationError(
                    f"Output variable `{variable}` has too high precision,"
                    f" it must be less than {self._limits.max_precision} digits."
                )

        return value

    def _transform_result(
        self,
        result: Mapping[str, Any],
        output_schema: dict[str, CodeNodeData.Output] | None,
        prefix: str = "",
        depth: int = 1,
    ):
        # TODO(QuantumGhost): Replace native Python lists with `Array*Segment` classes.
        # Note that `_transform_result` may produce lists containing `None` values,
        # which don't conform to the type requirements of `Array*Segment` classes.
        if depth > self._limits.max_depth:
            raise DepthLimitError(f"Depth limit {self._limits.max_depth} reached, object too deep.")

        transformed_result: dict[str, Any] = {}
        if output_schema is None:
            # validate output thought instance type
            for output_name, output_value in result.items():
                if isinstance(output_value, dict):
                    self._transform_result(
                        result=output_value,
                        output_schema=None,
                        prefix=f"{prefix}.{output_name}" if prefix else output_name,
                        depth=depth + 1,
                    )
                elif isinstance(output_value, bool):
                    self._check_boolean(output_value, variable=f"{prefix}.{output_name}" if prefix else output_name)
                elif isinstance(output_value, int | float):
                    self._check_number(
                        value=output_value, variable=f"{prefix}.{output_name}" if prefix else output_name
                    )
                elif isinstance(output_value, str):
                    self._check_string(
                        value=output_value, variable=f"{prefix}.{output_name}" if prefix else output_name
                    )
                elif isinstance(output_value, list):
                    first_element = output_value[0] if len(output_value) > 0 else None
                    if first_element is not None:
                        if isinstance(first_element, int | float) and all(
                            value is None or isinstance(value, int | float) for value in output_value
                        ):
                            for i, value in enumerate(output_value):
                                self._check_number(
                                    value=value,
                                    variable=f"{prefix}.{output_name}[{i}]" if prefix else f"{output_name}[{i}]",
                                )
                        elif isinstance(first_element, str) and all(
                            value is None or isinstance(value, str) for value in output_value
                        ):
                            for i, value in enumerate(output_value):
                                self._check_string(
                                    value=value,
                                    variable=f"{prefix}.{output_name}[{i}]" if prefix else f"{output_name}[{i}]",
                                )
                        elif (
                            isinstance(first_element, dict)
                            and all(value is None or isinstance(value, dict) for value in output_value)
                            or isinstance(first_element, list)
                            and all(value is None or isinstance(value, list) for value in output_value)
                        ):
                            for i, value in enumerate(output_value):
                                if value is not None:
                                    self._transform_result(
                                        result=value,
                                        output_schema=None,
                                        prefix=f"{prefix}.{output_name}[{i}]" if prefix else f"{output_name}[{i}]",
                                        depth=depth + 1,
                                    )
                        else:
                            raise OutputValidationError(
                                f"Output {prefix}.{output_name} is not a valid array."
                                f" make sure all elements are of the same type."
                            )
                elif output_value is None:
                    pass
                else:
                    raise OutputValidationError(f"Output {prefix}.{output_name} is not a valid type.")

            return result

        parameters_validated = {}
        for output_name, output_config in output_schema.items():
            dot = "." if prefix else ""
            if output_name not in result:
                raise OutputValidationError(f"Output {prefix}{dot}{output_name} is missing.")

            if output_config.type == SegmentType.OBJECT:
                # check if output is object
                if not isinstance(result.get(output_name), dict):
                    if result[output_name] is None:
                        transformed_result[output_name] = None
                    else:
                        raise OutputValidationError(
                            f"Output {prefix}{dot}{output_name} is not an object,"
                            f" got {type(result.get(output_name))} instead."
                        )
                else:
                    transformed_result[output_name] = self._transform_result(
                        result=result[output_name],
                        output_schema=output_config.children,
                        prefix=f"{prefix}.{output_name}",
                        depth=depth + 1,
                    )
            elif output_config.type == SegmentType.NUMBER:
                # check if number available
                value = result.get(output_name)
                if value is not None and not isinstance(value, (int, float)):
                    raise OutputValidationError(
                        f"Output {prefix}{dot}{output_name} is not a number,"
                        f" got {type(result.get(output_name))} instead."
                    )
                checked = self._check_number(value=value, variable=f"{prefix}{dot}{output_name}")
                # If the output is a boolean and the output schema specifies a NUMBER type,
                # convert the boolean value to an integer.
                #
                # This ensures compatibility with existing workflows that may use
                # `True` and `False` as values for NUMBER type outputs.
                transformed_result[output_name] = self._convert_boolean_to_int(checked)

            elif output_config.type == SegmentType.STRING:
                # check if string available
                value = result.get(output_name)
                if value is not None and not isinstance(value, str):
                    raise OutputValidationError(
                        f"Output {prefix}{dot}{output_name} must be a string, got {type(value).__name__} instead"
                    )
                transformed_result[output_name] = self._check_string(
                    value=value,
                    variable=f"{prefix}{dot}{output_name}",
                )
            elif output_config.type == SegmentType.BOOLEAN:
                transformed_result[output_name] = self._check_boolean(
                    value=result[output_name],
                    variable=f"{prefix}{dot}{output_name}",
                )
            elif output_config.type == SegmentType.ARRAY_NUMBER:
                # check if array of number available
                value = result[output_name]
                if not isinstance(value, list):
                    if value is None:
                        transformed_result[output_name] = None
                    else:
                        raise OutputValidationError(
                            f"Output {prefix}{dot}{output_name} is not an array, got {type(value)} instead."
                        )
                else:
                    if len(value) > self._limits.max_number_array_length:
                        raise OutputValidationError(
                            f"The length of output variable `{prefix}{dot}{output_name}` must be"
                            f" less than {self._limits.max_number_array_length} elements."
                        )

                    for i, inner_value in enumerate(value):
                        if not isinstance(inner_value, (int, float)):
                            raise OutputValidationError(
                                f"The element at index {i} of output variable `{prefix}{dot}{output_name}` must be"
                                f" a number."
                            )
                        _ = self._check_number(value=inner_value, variable=f"{prefix}{dot}{output_name}[{i}]")
                    transformed_result[output_name] = [
                        # If the element is a boolean and the output schema specifies a `array[number]` type,
                        # convert the boolean value to an integer.
                        #
                        # This ensures compatibility with existing workflows that may use
                        # `True` and `False` as values for NUMBER type outputs.
                        self._convert_boolean_to_int(v)
                        for v in value
                    ]
            elif output_config.type == SegmentType.ARRAY_STRING:
                # check if array of string available
                if not isinstance(result[output_name], list):
                    if result[output_name] is None:
                        transformed_result[output_name] = None
                    else:
                        raise OutputValidationError(
                            f"Output {prefix}{dot}{output_name} is not an array,"
                            f" got {type(result.get(output_name))} instead."
                        )
                else:
                    if len(result[output_name]) > self._limits.max_string_array_length:
                        raise OutputValidationError(
                            f"The length of output variable `{prefix}{dot}{output_name}` must be"
                            f" less than {self._limits.max_string_array_length} elements."
                        )

                    transformed_result[output_name] = [
                        self._check_string(value=value, variable=f"{prefix}{dot}{output_name}[{i}]")
                        for i, value in enumerate(result[output_name])
                    ]
            elif output_config.type == SegmentType.ARRAY_OBJECT:
                # check if array of object available
                if not isinstance(result[output_name], list):
                    if result[output_name] is None:
                        transformed_result[output_name] = None
                    else:
                        raise OutputValidationError(
                            f"Output {prefix}{dot}{output_name} is not an array,"
                            f" got {type(result.get(output_name))} instead."
                        )
                else:
                    if len(result[output_name]) > self._limits.max_object_array_length:
                        raise OutputValidationError(
                            f"The length of output variable `{prefix}{dot}{output_name}` must be"
                            f" less than {self._limits.max_object_array_length} elements."
                        )

                    for i, value in enumerate(result[output_name]):
                        if not isinstance(value, dict):
                            if value is None:
                                pass
                            else:
                                raise OutputValidationError(
                                    f"Output {prefix}{dot}{output_name}[{i}] is not an object,"
                                    f" got {type(value)} instead at index {i}."
                                )

                    transformed_result[output_name] = [
                        None
                        if value is None
                        else self._transform_result(
                            result=value,
                            output_schema=output_config.children,
                            prefix=f"{prefix}{dot}{output_name}[{i}]",
                            depth=depth + 1,
                        )
                        for i, value in enumerate(result[output_name])
                    ]
            elif output_config.type == SegmentType.ARRAY_BOOLEAN:
                # check if array of object available
                value = result[output_name]
                if not isinstance(value, list):
                    if value is None:
                        transformed_result[output_name] = None
                    else:
                        raise OutputValidationError(
                            f"Output {prefix}{dot}{output_name} is not an array,"
                            f" got {type(result.get(output_name))} instead."
                        )
                else:
                    for i, inner_value in enumerate(value):
                        if inner_value is not None and not isinstance(inner_value, bool):
                            raise OutputValidationError(
                                f"Output {prefix}{dot}{output_name}[{i}] is not a boolean,"
                                f" got {type(inner_value)} instead."
                            )
                        _ = self._check_boolean(value=inner_value, variable=f"{prefix}{dot}{output_name}[{i}]")
                    transformed_result[output_name] = value

            else:
                raise OutputValidationError(f"Output type {output_config.type} is not supported.")

            parameters_validated[output_name] = True

        # check if all output parameters are validated
        if len(parameters_validated) != len(result):
            raise CodeNodeError("Not all output parameters are validated.")

        return transformed_result

    @classmethod
    def _extract_variable_selector_to_variable_mapping(
        cls,
        *,
        graph_config: Mapping[str, Any],
        node_id: str,
        node_data: Mapping[str, Any],
    ) -> Mapping[str, Sequence[str]]:
        _ = graph_config  # Explicitly mark as unused
        # Create typed NodeData from dict
        typed_node_data = CodeNodeData.model_validate(node_data)

        return {
            node_id + "." + variable_selector.variable: variable_selector.value_selector
            for variable_selector in typed_node_data.variables
        }

    @property
    def retry(self) -> bool:
        return self.node_data.retry_config.retry_enabled

    @staticmethod
    def _convert_boolean_to_int(value: bool | int | float | None) -> int | float | None:
        """This function convert boolean to integers when the output schema specifies a NUMBER type.

        This ensures compatibility with existing workflows that may use
        `True` and `False` as values for NUMBER type outputs.
        """
        if value is None:
            return None
        if isinstance(value, bool):
            return int(value)
        return value