feat(api): Implement truncation for WorkflowNodeExecution

2026-04-26 13:45:57 +08:00 · 2025-08-29 14:49:09 +08:00
parent 2fd337e610
commit 6b9d2e98b9
10 changed files with 366 additions and 46 deletions
--- a/api/core/workflow/entities/workflow_node_execution.py
+++ b/api/core/workflow/entities/workflow_node_execution.py
@ -11,7 +11,7 @@ from datetime import datetime
 from enum import StrEnum
 from typing import Any, Optional

-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, PrivateAttr

 from core.workflow.nodes.enums import NodeType

@ -90,6 +90,7 @@ class WorkflowNodeExecution(BaseModel):
    title: str  # Display title of the node

    # Execution data
+    # The `inputs` and `outputs` fields hold the full content
    inputs: Optional[Mapping[str, Any]] = None  # Input variables used by this node
    process_data: Optional[Mapping[str, Any]] = None  # Intermediate processing data
    outputs: Optional[Mapping[str, Any]] = None  # Output variables produced by this node
@ -106,6 +107,58 @@ class WorkflowNodeExecution(BaseModel):
    created_at: datetime  # When execution started
    finished_at: Optional[datetime] = None  # When execution completed

+    _truncated_inputs: Mapping[str, Any] | None = PrivateAttr(None)
+    _truncated_outputs: Mapping[str, Any] | None = PrivateAttr(None)
+    _truncated_process_data: Mapping[str, Any] | None = PrivateAttr(None)
+
+    def get_truncated_inputs(self) -> Mapping[str, Any] | None:
+        return self._truncated_inputs
+
+    def get_truncated_outputs(self) -> Mapping[str, Any] | None:
+        return self._truncated_outputs
+
+    def get_truncated_process_data(self) -> Mapping[str, Any] | None:
+        return self._truncated_process_data
+
+    def set_truncated_inputs(self, truncated_inputs: Mapping[str, Any] | None):
+        self._truncated_inputs = truncated_inputs
+
+    def set_truncated_outputs(self, truncated_outputs: Mapping[str, Any] | None):
+        self._truncated_outputs = truncated_outputs
+
+    def set_truncated_process_data(self, truncated_process_data: Mapping[str, Any] | None):
+        self._truncated_process_data = truncated_process_data
+
+    def get_response_inputs(self) -> Mapping[str, Any] | None:
+        inputs = self.get_truncated_inputs()
+        if inputs:
+            return inputs
+        return self.inputs
+
+    @property
+    def inputs_truncated(self):
+        return self._truncated_inputs is not None
+
+    @property
+    def outputs_truncated(self):
+        return self._truncated_outputs is not None
+
+    @property
+    def process_data_truncated(self):
+        return self._truncated_process_data is not None
+
+    def get_response_outputs(self) -> Mapping[str, Any] | None:
+        outputs = self.get_truncated_outputs()
+        if outputs is not None:
+            return outputs
+        return self.outputs
+
+    def get_response_process_data(self) -> Mapping[str, Any] | None:
+        process_data = self.get_truncated_process_data()
+        if process_data is not None:
+            return process_data
+        return self.process_data
+
    def update_from_mapping(
        self,
        inputs: Optional[Mapping[str, Any]] = None,
--- a/api/core/workflow/repositories/workflow_node_execution_repository.py
+++ b/api/core/workflow/repositories/workflow_node_execution_repository.py
@ -30,6 +30,12 @@ class WorkflowNodeExecutionRepository(Protocol):
        """
        Save or update a NodeExecution instance.

+        This method saves all data on the `WorkflowNodeExecution` object, except for `inputs`, `process_data`,
+        and `outputs`. Its primary purpose is to persist the status and various metadata, such as execution time
+        and execution-related details.
+
+        It's main purpose is to save the status and various metadata (execution time, execution metadata etc.)
+
        This method handles both creating new records and updating existing ones.
        The implementation should determine whether to create or update based on
        the execution's ID or other identifying fields.
@ -39,6 +45,14 @@ class WorkflowNodeExecutionRepository(Protocol):
        """
        ...

+    def save_execution_data(self, execution: WorkflowNodeExecution):
+        """Save or update the inputs, process_data, or outputs associated with a specific
+        node_execution record.
+
+        If any of the inputs, process_data, or outputs are None, those fields will not be updated.
+        """
+        ...
+
    def get_by_workflow_run(
        self,
        workflow_run_id: str,
--- a/api/core/workflow/workflow_cycle_manager.py
+++ b/api/core/workflow/workflow_cycle_manager.py
@ -188,6 +188,7 @@ class WorkflowCycleManager:
        )

        self._workflow_node_execution_repository.save(domain_execution)
+        self._workflow_node_execution_repository.save_execution_data(domain_execution)
        return domain_execution

    def handle_workflow_node_execution_failed(
@ -220,6 +221,7 @@ class WorkflowCycleManager:
        )

        self._workflow_node_execution_repository.save(domain_execution)
+        self._workflow_node_execution_repository.save_execution_data(domain_execution)
        return domain_execution

    def handle_workflow_node_execution_retried(
@ -242,7 +244,9 @@ class WorkflowCycleManager:

        domain_execution.update_from_mapping(inputs=inputs, outputs=outputs, metadata=metadata)

-        return self._save_and_cache_node_execution(domain_execution)
+        execution = self._save_and_cache_node_execution(domain_execution)
+        self._workflow_node_execution_repository.save_execution_data(execution)
+        return execution

    def _get_workflow_execution_or_raise_error(self, id: str, /) -> WorkflowExecution:
        # Check cache first
@ -275,7 +279,10 @@ class WorkflowCycleManager:
        return execution

    def _save_and_cache_node_execution(self, execution: WorkflowNodeExecution) -> WorkflowNodeExecution:
-        """Save node execution to repository and cache it if it has an ID."""
+        """Save node execution to repository and cache it if it has an ID.
+
+        This does not persist the `inputs` / `process_data` / `outputs` fields of the execution model.
+        """
        self._workflow_node_execution_repository.save(execution)
        if execution.node_execution_id:
            self._node_execution_cache[execution.node_execution_id] = execution
--- a/api/core/workflow/workflow_type_encoder.py
+++ b/api/core/workflow/workflow_type_encoder.py
@ -1,6 +1,6 @@
 from collections.abc import Mapping
 from decimal import Decimal
-from typing import Any
+from typing import Any, overload

 from pydantic import BaseModel

@ -9,6 +9,11 @@ from core.variables import Segment


 class WorkflowRuntimeTypeConverter:
+    @overload
+    def to_json_encodable(self, value: Mapping[str, Any]) -> Mapping[str, Any]: ...
+    @overload
+    def to_json_encodable(self, value: None) -> None: ...
+
    def to_json_encodable(self, value: Mapping[str, Any] | None) -> Mapping[str, Any] | None:
        result = self._to_json_encodable_recursive(value)
        return result if isinstance(result, Mapping) or result is None else dict(result)