Merge remote-tracking branch 'origin/main' into feat/trigger

This commit is contained in:
yessenia
2025-09-25 17:14:24 +08:00
3013 changed files with 148826 additions and 44294 deletions

View File

@ -2,26 +2,28 @@ import json
import logging
from collections.abc import Generator, Mapping, Sequence
from datetime import datetime
from enum import Enum, StrEnum
from typing import TYPE_CHECKING, Any, Optional, Union
from enum import StrEnum
from typing import TYPE_CHECKING, Any, Optional, Union, cast
from uuid import uuid4
import sqlalchemy as sa
from sqlalchemy import DateTime, exists, orm, select
from sqlalchemy import DateTime, Select, exists, orm, select
from core.file.constants import maybe_file_object
from core.file.models import File
from core.variables import utils as variable_utils
from core.variables.variables import FloatVariable, IntegerVariable, StringVariable
from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID
from core.workflow.nodes.enums import NodeType
from core.workflow.enums import NodeType
from extensions.ext_storage import Storage
from factories.variable_factory import TypeMismatchError, build_segment_with_type
from libs.datetime_utils import naive_utc_now
from libs.uuid_utils import uuidv7
from ._workflow_exc import NodeNotFoundError, WorkflowDataError
if TYPE_CHECKING:
from models.model import AppMode
from models.model import AppMode, UploadFile
from sqlalchemy import Index, PrimaryKeyConstraint, String, UniqueConstraint, func
from sqlalchemy.orm import Mapped, declared_attr, mapped_column
@ -35,19 +37,20 @@ from libs import helper
from .account import Account
from .base import Base
from .engine import db
from .enums import CreatorUserRole, DraftVariableType
from .enums import CreatorUserRole, DraftVariableType, ExecutionOffLoadType
from .types import EnumText, StringUUID
logger = logging.getLogger(__name__)
class WorkflowType(Enum):
class WorkflowType(StrEnum):
"""
Workflow Type Enum
"""
WORKFLOW = "workflow"
CHAT = "chat"
RAG_PIPELINE = "rag-pipeline"
@classmethod
def value_of(cls, value: str) -> "WorkflowType":
@ -130,7 +133,7 @@ class Workflow(Base):
_features: Mapped[str] = mapped_column("features", sa.TEXT)
created_by: Mapped[str] = mapped_column(StringUUID, nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp())
updated_by: Mapped[Optional[str]] = mapped_column(StringUUID)
updated_by: Mapped[str | None] = mapped_column(StringUUID)
updated_at: Mapped[datetime] = mapped_column(
DateTime,
nullable=False,
@ -143,6 +146,9 @@ class Workflow(Base):
_conversation_variables: Mapped[str] = mapped_column(
"conversation_variables", sa.Text, nullable=False, server_default="{}"
)
_rag_pipeline_variables: Mapped[str] = mapped_column(
"rag_pipeline_variables", db.Text, nullable=False, server_default="{}"
)
VERSION_DRAFT = "draft"
@ -159,6 +165,7 @@ class Workflow(Base):
created_by: str,
environment_variables: Sequence[Variable],
conversation_variables: Sequence[Variable],
rag_pipeline_variables: list[dict],
marked_name: str = "",
marked_comment: str = "",
) -> "Workflow":
@ -173,6 +180,7 @@ class Workflow(Base):
workflow.created_by = created_by
workflow.environment_variables = environment_variables or []
workflow.conversation_variables = conversation_variables or []
workflow.rag_pipeline_variables = rag_pipeline_variables or []
workflow.marked_name = marked_name
workflow.marked_comment = marked_comment
workflow.created_at = naive_utc_now()
@ -224,7 +232,7 @@ class Workflow(Base):
raise WorkflowDataError("nodes not found in workflow graph")
try:
node_config = next(filter(lambda node: node["id"] == node_id, nodes))
node_config: dict[str, Any] = next(filter(lambda node: node["id"] == node_id, nodes))
except StopIteration:
raise NodeNotFoundError(node_id)
assert isinstance(node_config, dict)
@ -282,7 +290,7 @@ class Workflow(Base):
return self._features
@features.setter
def features(self, value: str) -> None:
def features(self, value: str):
self._features = value
@property
@ -337,7 +345,7 @@ class Workflow(Base):
else:
yield from ((node["id"], node["data"]) for node in graph_dict["nodes"])
def user_input_form(self, to_old_structure: bool = False) -> list:
def user_input_form(self, to_old_structure: bool = False) -> list[Any]:
# get start node from graph
if not self.graph:
return []
@ -354,7 +362,7 @@ class Workflow(Base):
variables: list[Any] = start_node.get("data", {}).get("variables", [])
if to_old_structure:
old_structure_variables = []
old_structure_variables: list[dict[str, Any]] = []
for variable in variables:
old_structure_variables.append({variable["type"]: variable})
@ -362,6 +370,12 @@ class Workflow(Base):
return variables
def rag_pipeline_user_input_form(self) -> list:
# get user_input_form from start node
variables: list[Any] = self.rag_pipeline_variables
return variables
@property
def unique_hash(self) -> str:
"""
@ -394,9 +408,7 @@ class Workflow(Base):
@property
def environment_variables(self) -> Sequence[StringVariable | IntegerVariable | FloatVariable | SecretVariable]:
# TODO: find some way to init `self._environment_variables` when instance created.
if self._environment_variables is None:
self._environment_variables = "{}"
# _environment_variables is guaranteed to be non-None due to server_default="{}"
# Use workflow.tenant_id to avoid relying on request user in background threads
tenant_id = self.tenant_id
@ -404,23 +416,24 @@ class Workflow(Base):
if not tenant_id:
return []
environment_variables_dict: dict[str, Any] = json.loads(self._environment_variables)
environment_variables_dict: dict[str, Any] = json.loads(self._environment_variables or "{}")
results = [
variable_factory.build_environment_variable_from_mapping(v) for v in environment_variables_dict.values()
]
# decrypt secret variables value
def decrypt_func(var):
def decrypt_func(var: Variable) -> StringVariable | IntegerVariable | FloatVariable | SecretVariable:
if isinstance(var, SecretVariable):
return var.model_copy(update={"value": encrypter.decrypt_token(tenant_id=tenant_id, token=var.value)})
elif isinstance(var, (StringVariable, IntegerVariable, FloatVariable)):
return var
else:
raise AssertionError("this statement should be unreachable.")
# Other variable types are not supported for environment variables
raise AssertionError(f"Unexpected variable type for environment variable: {type(var)}")
decrypted_results: list[SecretVariable | StringVariable | IntegerVariable | FloatVariable] = list(
map(decrypt_func, results)
)
decrypted_results: list[SecretVariable | StringVariable | IntegerVariable | FloatVariable] = [
decrypt_func(var) for var in results
]
return decrypted_results
@environment_variables.setter
@ -448,7 +461,7 @@ class Workflow(Base):
value[i] = origin_variables_dictionary[variable.id].model_copy(update={"name": variable.name})
# encrypt secret variables value
def encrypt_func(var):
def encrypt_func(var: Variable) -> Variable:
if isinstance(var, SecretVariable):
return var.model_copy(update={"value": encrypter.encrypt_token(tenant_id=tenant_id, token=var.value)})
else:
@ -473,26 +486,42 @@ class Workflow(Base):
"features": self.features_dict,
"environment_variables": [var.model_dump(mode="json") for var in environment_variables],
"conversation_variables": [var.model_dump(mode="json") for var in self.conversation_variables],
"rag_pipeline_variables": self.rag_pipeline_variables,
}
return result
@property
def conversation_variables(self) -> Sequence[Variable]:
# TODO: find some way to init `self._conversation_variables` when instance created.
if self._conversation_variables is None:
self._conversation_variables = "{}"
# _conversation_variables is guaranteed to be non-None due to server_default="{}"
variables_dict: dict[str, Any] = json.loads(self._conversation_variables)
results = [variable_factory.build_conversation_variable_from_mapping(v) for v in variables_dict.values()]
return results
@conversation_variables.setter
def conversation_variables(self, value: Sequence[Variable]) -> None:
def conversation_variables(self, value: Sequence[Variable]):
self._conversation_variables = json.dumps(
{var.name: var.model_dump() for var in value},
ensure_ascii=False,
)
@property
def rag_pipeline_variables(self) -> list[dict]:
# TODO: find some way to init `self._conversation_variables` when instance created.
if self._rag_pipeline_variables is None:
self._rag_pipeline_variables = "{}"
variables_dict: dict[str, Any] = json.loads(self._rag_pipeline_variables)
results = list(variables_dict.values())
return results
@rag_pipeline_variables.setter
def rag_pipeline_variables(self, values: list[dict]) -> None:
self._rag_pipeline_variables = json.dumps(
{item["variable"]: item for item in values},
ensure_ascii=False,
)
@staticmethod
def version_from_datetime(d: datetime) -> str:
return str(d)
@ -550,18 +579,18 @@ class WorkflowRun(Base):
type: Mapped[str] = mapped_column(String(255))
triggered_from: Mapped[str] = mapped_column(String(255))
version: Mapped[str] = mapped_column(String(255))
graph: Mapped[Optional[str]] = mapped_column(sa.Text)
inputs: Mapped[Optional[str]] = mapped_column(sa.Text)
graph: Mapped[str | None] = mapped_column(sa.Text)
inputs: Mapped[str | None] = mapped_column(sa.Text)
status: Mapped[str] = mapped_column(String(255)) # running, succeeded, failed, stopped, partial-succeeded
outputs: Mapped[Optional[str]] = mapped_column(sa.Text, default="{}")
error: Mapped[Optional[str]] = mapped_column(sa.Text)
outputs: Mapped[str | None] = mapped_column(sa.Text, default="{}")
error: Mapped[str | None] = mapped_column(sa.Text)
elapsed_time: Mapped[float] = mapped_column(sa.Float, nullable=False, server_default=sa.text("0"))
total_tokens: Mapped[int] = mapped_column(sa.BigInteger, server_default=sa.text("0"))
total_steps: Mapped[int] = mapped_column(sa.Integer, server_default=sa.text("0"), nullable=True)
created_by_role: Mapped[str] = mapped_column(String(255)) # account, end_user
created_by: Mapped[str] = mapped_column(StringUUID, nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp())
finished_at: Mapped[Optional[datetime]] = mapped_column(DateTime)
finished_at: Mapped[datetime | None] = mapped_column(DateTime)
exceptions_count: Mapped[int] = mapped_column(sa.Integer, server_default=sa.text("0"), nullable=True)
@property
@ -625,7 +654,7 @@ class WorkflowRun(Base):
}
@classmethod
def from_dict(cls, data: dict) -> "WorkflowRun":
def from_dict(cls, data: dict[str, Any]) -> "WorkflowRun":
return cls(
id=data.get("id"),
tenant_id=data.get("tenant_id"),
@ -657,9 +686,10 @@ class WorkflowNodeExecutionTriggeredFrom(StrEnum):
SINGLE_STEP = "single-step"
WORKFLOW_RUN = "workflow-run"
RAG_PIPELINE_RUN = "rag-pipeline-run"
class WorkflowNodeExecutionModel(Base):
class WorkflowNodeExecutionModel(Base): # This model is expected to have `offload_data` preloaded in most cases.
"""
Workflow Node Execution
@ -710,7 +740,8 @@ class WorkflowNodeExecutionModel(Base):
__tablename__ = "workflow_node_executions"
@declared_attr
def __table_args__(cls): # noqa
@classmethod
def __table_args__(cls) -> Any:
return (
PrimaryKeyConstraint("id", name="workflow_node_execution_pkey"),
Index(
@ -747,7 +778,7 @@ class WorkflowNodeExecutionModel(Base):
# MyPy may flag the following line because it doesn't recognize that
# the `declared_attr` decorator passes the receiving class as the first
# argument to this method, allowing us to reference class attributes.
cls.created_at.desc(), # type: ignore
cls.created_at.desc(),
),
)
@ -756,24 +787,50 @@ class WorkflowNodeExecutionModel(Base):
app_id: Mapped[str] = mapped_column(StringUUID)
workflow_id: Mapped[str] = mapped_column(StringUUID)
triggered_from: Mapped[str] = mapped_column(String(255))
workflow_run_id: Mapped[Optional[str]] = mapped_column(StringUUID)
workflow_run_id: Mapped[str | None] = mapped_column(StringUUID)
index: Mapped[int] = mapped_column(sa.Integer)
predecessor_node_id: Mapped[Optional[str]] = mapped_column(String(255))
node_execution_id: Mapped[Optional[str]] = mapped_column(String(255))
predecessor_node_id: Mapped[str | None] = mapped_column(String(255))
node_execution_id: Mapped[str | None] = mapped_column(String(255))
node_id: Mapped[str] = mapped_column(String(255))
node_type: Mapped[str] = mapped_column(String(255))
title: Mapped[str] = mapped_column(String(255))
inputs: Mapped[Optional[str]] = mapped_column(sa.Text)
process_data: Mapped[Optional[str]] = mapped_column(sa.Text)
outputs: Mapped[Optional[str]] = mapped_column(sa.Text)
inputs: Mapped[str | None] = mapped_column(sa.Text)
process_data: Mapped[str | None] = mapped_column(sa.Text)
outputs: Mapped[str | None] = mapped_column(sa.Text)
status: Mapped[str] = mapped_column(String(255))
error: Mapped[Optional[str]] = mapped_column(sa.Text)
error: Mapped[str | None] = mapped_column(sa.Text)
elapsed_time: Mapped[float] = mapped_column(sa.Float, server_default=sa.text("0"))
execution_metadata: Mapped[Optional[str]] = mapped_column(sa.Text)
execution_metadata: Mapped[str | None] = mapped_column(sa.Text)
created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.current_timestamp())
created_by_role: Mapped[str] = mapped_column(String(255))
created_by: Mapped[str] = mapped_column(StringUUID)
finished_at: Mapped[Optional[datetime]] = mapped_column(DateTime)
finished_at: Mapped[datetime | None] = mapped_column(DateTime)
offload_data: Mapped[list["WorkflowNodeExecutionOffload"]] = orm.relationship(
"WorkflowNodeExecutionOffload",
primaryjoin="WorkflowNodeExecutionModel.id == foreign(WorkflowNodeExecutionOffload.node_execution_id)",
uselist=True,
lazy="raise",
back_populates="execution",
)
@staticmethod
def preload_offload_data(
query: Select[tuple["WorkflowNodeExecutionModel"]] | orm.Query["WorkflowNodeExecutionModel"],
):
return query.options(orm.selectinload(WorkflowNodeExecutionModel.offload_data))
@staticmethod
def preload_offload_data_and_files(
query: Select[tuple["WorkflowNodeExecutionModel"]] | orm.Query["WorkflowNodeExecutionModel"],
):
return query.options(
orm.selectinload(WorkflowNodeExecutionModel.offload_data).options(
# Using `joinedload` instead of `selectinload` to minimize database roundtrips,
# as `selectinload` would require separate queries for `inputs_file` and `outputs_file`.
orm.selectinload(WorkflowNodeExecutionOffload.file),
)
)
@property
def created_by_account(self):
@ -809,25 +866,148 @@ class WorkflowNodeExecutionModel(Base):
return json.loads(self.execution_metadata) if self.execution_metadata else {}
@property
def extras(self):
def extras(self) -> dict[str, Any]:
from core.tools.tool_manager import ToolManager
extras = {}
extras: dict[str, Any] = {}
if self.execution_metadata_dict:
from core.workflow.nodes import NodeType
if self.node_type == NodeType.TOOL.value and "tool_info" in self.execution_metadata_dict:
tool_info = self.execution_metadata_dict["tool_info"]
tool_info: dict[str, Any] = self.execution_metadata_dict["tool_info"]
extras["icon"] = ToolManager.get_tool_icon(
tenant_id=self.tenant_id,
provider_type=tool_info["provider_type"],
provider_id=tool_info["provider_id"],
)
elif self.node_type == NodeType.DATASOURCE.value and "datasource_info" in self.execution_metadata_dict:
datasource_info = self.execution_metadata_dict["datasource_info"]
extras["icon"] = datasource_info.get("icon")
return extras
def _get_offload_by_type(self, type_: ExecutionOffLoadType) -> Optional["WorkflowNodeExecutionOffload"]:
return next(iter([i for i in self.offload_data if i.type_ == type_]), None)
class WorkflowAppLogCreatedFrom(Enum):
@property
def inputs_truncated(self) -> bool:
"""Check if inputs were truncated (offloaded to external storage)."""
return self._get_offload_by_type(ExecutionOffLoadType.INPUTS) is not None
@property
def outputs_truncated(self) -> bool:
"""Check if outputs were truncated (offloaded to external storage)."""
return self._get_offload_by_type(ExecutionOffLoadType.OUTPUTS) is not None
@property
def process_data_truncated(self) -> bool:
"""Check if process_data were truncated (offloaded to external storage)."""
return self._get_offload_by_type(ExecutionOffLoadType.PROCESS_DATA) is not None
@staticmethod
def _load_full_content(session: orm.Session, file_id: str, storage: Storage):
from .model import UploadFile
stmt = sa.select(UploadFile).where(UploadFile.id == file_id)
file = session.scalars(stmt).first()
assert file is not None, f"UploadFile with id {file_id} should exist but not"
content = storage.load(file.key)
return json.loads(content)
def load_full_inputs(self, session: orm.Session, storage: Storage) -> Mapping[str, Any] | None:
offload = self._get_offload_by_type(ExecutionOffLoadType.INPUTS)
if offload is None:
return self.inputs_dict
return self._load_full_content(session, offload.file_id, storage)
def load_full_outputs(self, session: orm.Session, storage: Storage) -> Mapping[str, Any] | None:
offload: WorkflowNodeExecutionOffload | None = self._get_offload_by_type(ExecutionOffLoadType.OUTPUTS)
if offload is None:
return self.outputs_dict
return self._load_full_content(session, offload.file_id, storage)
def load_full_process_data(self, session: orm.Session, storage: Storage) -> Mapping[str, Any] | None:
offload: WorkflowNodeExecutionOffload | None = self._get_offload_by_type(ExecutionOffLoadType.PROCESS_DATA)
if offload is None:
return self.process_data_dict
return self._load_full_content(session, offload.file_id, storage)
class WorkflowNodeExecutionOffload(Base):
__tablename__ = "workflow_node_execution_offload"
__table_args__ = (
# PostgreSQL 14 treats NULL values as distinct in unique constraints by default,
# allowing multiple records with NULL values for the same column combination.
#
# This behavior allows us to have multiple records with NULL node_execution_id,
# simplifying garbage collection process.
UniqueConstraint(
"node_execution_id",
"type",
# Note: PostgreSQL 15+ supports explicit `nulls distinct` behavior through
# `postgresql_nulls_not_distinct=False`, which would make our intention clearer.
# We rely on PostgreSQL's default behavior of treating NULLs as distinct values.
# postgresql_nulls_not_distinct=False,
),
)
_HASH_COL_SIZE = 64
id: Mapped[str] = mapped_column(
StringUUID,
primary_key=True,
server_default=sa.text("uuidv7()"),
)
created_at: Mapped[datetime] = mapped_column(
DateTime, default=naive_utc_now, server_default=func.current_timestamp()
)
tenant_id: Mapped[str] = mapped_column(StringUUID)
app_id: Mapped[str] = mapped_column(StringUUID)
# `node_execution_id` indicates the `WorkflowNodeExecutionModel` associated with this offload record.
# A value of `None` signifies that this offload record is not linked to any execution record
# and should be considered for garbage collection.
node_execution_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
type_: Mapped[ExecutionOffLoadType] = mapped_column(EnumText(ExecutionOffLoadType), name="type", nullable=False)
# Design Decision: Combining inputs and outputs into a single object was considered to reduce I/O
# operations. However, due to the current design of `WorkflowNodeExecutionRepository`,
# the `save` method is called at two distinct times:
#
# - When the node starts execution: the `inputs` field exists, but the `outputs` field is absent
# - When the node completes execution (either succeeded or failed): the `outputs` field becomes available
#
# It's difficult to correlate these two successive calls to `save` for combined storage.
# Converting the `WorkflowNodeExecutionRepository` to buffer the first `save` call and flush
# when execution completes was also considered, but this would make the execution state unobservable
# until completion, significantly damaging the observability of workflow execution.
#
# Given these constraints, `inputs` and `outputs` are stored separately to maintain real-time
# observability and system reliability.
# `file_id` references to the offloaded storage object containing the data.
file_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
execution: Mapped[WorkflowNodeExecutionModel] = orm.relationship(
foreign_keys=[node_execution_id],
lazy="raise",
uselist=False,
primaryjoin="WorkflowNodeExecutionOffload.node_execution_id == WorkflowNodeExecutionModel.id",
back_populates="offload_data",
)
file: Mapped[Optional["UploadFile"]] = orm.relationship(
foreign_keys=[file_id],
lazy="raise",
uselist=False,
primaryjoin="WorkflowNodeExecutionOffload.file_id == UploadFile.id",
)
class WorkflowAppLogCreatedFrom(StrEnum):
"""
Workflow App Log Created From Enum
"""
@ -883,6 +1063,7 @@ class WorkflowAppLog(Base):
__table_args__ = (
sa.PrimaryKeyConstraint("id", name="workflow_app_log_pkey"),
sa.Index("workflow_app_log_app_idx", "tenant_id", "app_id"),
sa.Index("workflow_app_log_workflow_run_id_idx", "workflow_run_id"),
)
id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"))
@ -939,7 +1120,7 @@ class ConversationVariable(Base):
DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp()
)
def __init__(self, *, id: str, app_id: str, conversation_id: str, data: str) -> None:
def __init__(self, *, id: str, app_id: str, conversation_id: str, data: str):
self.id = id
self.app_id = app_id
self.conversation_id = conversation_id
@ -988,7 +1169,10 @@ class WorkflowDraftVariable(Base):
]
__tablename__ = "workflow_draft_variables"
__table_args__ = (UniqueConstraint(*unique_app_id_node_id_name()),)
__table_args__ = (
UniqueConstraint(*unique_app_id_node_id_name()),
Index("workflow_draft_variable_file_id_idx", "file_id"),
)
# Required for instance variable annotation.
__allow_unmapped__ = True
@ -1049,9 +1233,16 @@ class WorkflowDraftVariable(Base):
selector: Mapped[str] = mapped_column(sa.String(255), nullable=False, name="selector")
# The data type of this variable's value
#
# If the variable is offloaded, `value_type` represents the type of the truncated value,
# which may differ from the original value's type. Typically, they are the same,
# but in cases where the structurally truncated value still exceeds the size limit,
# text slicing is applied, and the `value_type` is converted to `STRING`.
value_type: Mapped[SegmentType] = mapped_column(EnumText(SegmentType, length=20))
# The variable's value serialized as a JSON string
#
# If the variable is offloaded, `value` contains a truncated version, not the full original value.
value: Mapped[str] = mapped_column(sa.Text, nullable=False, name="value")
# Controls whether the variable should be displayed in the variable inspection panel
@ -1071,6 +1262,35 @@ class WorkflowDraftVariable(Base):
default=None,
)
# Reference to WorkflowDraftVariableFile for offloaded large variables
#
# Indicates whether the current draft variable is offloaded.
# If not offloaded, this field will be None.
file_id: Mapped[str | None] = mapped_column(
StringUUID,
nullable=True,
default=None,
comment="Reference to WorkflowDraftVariableFile if variable is offloaded to external storage",
)
is_default_value: Mapped[bool] = mapped_column(
sa.Boolean,
nullable=False,
default=False,
comment=(
"Indicates whether the current value is the default for a conversation variable. "
"Always `FALSE` for other types of variables."
),
)
# Relationship to WorkflowDraftVariableFile
variable_file: Mapped[Optional["WorkflowDraftVariableFile"]] = orm.relationship(
foreign_keys=[file_id],
lazy="raise",
uselist=False,
primaryjoin="WorkflowDraftVariableFile.id == WorkflowDraftVariable.file_id",
)
# Cache for deserialized value
#
# NOTE(QuantumGhost): This field serves two purposes:
@ -1084,7 +1304,7 @@ class WorkflowDraftVariable(Base):
# making this attribute harder to access from outside the class.
__value: Segment | None
def __init__(self, *args, **kwargs):
def __init__(self, *args: Any, **kwargs: Any) -> None:
"""
The constructor of `WorkflowDraftVariable` is not intended for
direct use outside this file. Its solo purpose is setup private state
@ -1102,15 +1322,15 @@ class WorkflowDraftVariable(Base):
self.__value = None
def get_selector(self) -> list[str]:
selector = json.loads(self.selector)
selector: Any = json.loads(self.selector)
if not isinstance(selector, list):
logger.error(
"invalid selector loaded from database, type=%s, value=%s",
type(selector),
type(selector).__name__,
self.selector,
)
raise ValueError("invalid selector.")
return selector
return cast(list[str], selector)
def _set_selector(self, value: list[str]):
self.selector = json.dumps(value)
@ -1120,7 +1340,7 @@ class WorkflowDraftVariable(Base):
return self.build_segment_with_type(self.value_type, value)
@staticmethod
def rebuild_file_types(value: Any) -> Any:
def rebuild_file_types(value: Any):
# NOTE(QuantumGhost): Temporary workaround for structured data handling.
# By this point, `output` has been converted to dict by
# `WorkflowEntry.handle_special_values`, so we need to
@ -1133,15 +1353,17 @@ class WorkflowDraftVariable(Base):
# `WorkflowEntry.handle_special_values`, making a comprehensive migration challenging.
if isinstance(value, dict):
if not maybe_file_object(value):
return value
return cast(Any, value)
return File.model_validate(value)
elif isinstance(value, list) and value:
first = value[0]
value_list = cast(list[Any], value)
first: Any = value_list[0]
if not maybe_file_object(first):
return value
return [File.model_validate(i) for i in value]
return cast(Any, value)
file_list: list[File] = [File.model_validate(cast(dict[str, Any], i)) for i in value_list]
return cast(Any, file_list)
else:
return value
return cast(Any, value)
@classmethod
def build_segment_with_type(cls, segment_type: SegmentType, value: Any) -> Segment:
@ -1218,6 +1440,9 @@ class WorkflowDraftVariable(Base):
case _:
return DraftVariableType.NODE
def is_truncated(self) -> bool:
return self.file_id is not None
@classmethod
def _new(
cls,
@ -1228,6 +1453,7 @@ class WorkflowDraftVariable(Base):
value: Segment,
node_execution_id: str | None,
description: str = "",
file_id: str | None = None,
) -> "WorkflowDraftVariable":
variable = WorkflowDraftVariable()
variable.created_at = _naive_utc_datetime()
@ -1237,6 +1463,7 @@ class WorkflowDraftVariable(Base):
variable.node_id = node_id
variable.name = name
variable.set_value(value)
variable.file_id = file_id
variable._set_selector(list(variable_utils.to_selector(node_id, name)))
variable.node_execution_id = node_execution_id
return variable
@ -1292,6 +1519,7 @@ class WorkflowDraftVariable(Base):
node_execution_id: str,
visible: bool = True,
editable: bool = True,
file_id: str | None = None,
) -> "WorkflowDraftVariable":
variable = cls._new(
app_id=app_id,
@ -1299,6 +1527,7 @@ class WorkflowDraftVariable(Base):
name=name,
node_execution_id=node_execution_id,
value=value,
file_id=file_id,
)
variable.visible = visible
variable.editable = editable
@ -1309,6 +1538,93 @@ class WorkflowDraftVariable(Base):
return self.last_edited_at is not None
class WorkflowDraftVariableFile(Base):
"""Stores metadata about files associated with large workflow draft variables.
This model acts as an intermediary between WorkflowDraftVariable and UploadFile,
allowing for proper cleanup of orphaned files when variables are updated or deleted.
The MIME type of the stored content is recorded in `UploadFile.mime_type`.
Possible values are 'application/json' for JSON types other than plain text,
and 'text/plain' for JSON strings.
"""
__tablename__ = "workflow_draft_variable_files"
# Primary key
id: Mapped[str] = mapped_column(
StringUUID,
primary_key=True,
default=uuidv7,
server_default=sa.text("uuidv7()"),
)
created_at: Mapped[datetime] = mapped_column(
DateTime,
nullable=False,
default=_naive_utc_datetime,
server_default=func.current_timestamp(),
)
tenant_id: Mapped[str] = mapped_column(
StringUUID,
nullable=False,
comment="The tenant to which the WorkflowDraftVariableFile belongs, referencing Tenant.id",
)
app_id: Mapped[str] = mapped_column(
StringUUID,
nullable=False,
comment="The application to which the WorkflowDraftVariableFile belongs, referencing App.id",
)
user_id: Mapped[str] = mapped_column(
StringUUID,
nullable=False,
comment="The owner to of the WorkflowDraftVariableFile, referencing Account.id",
)
# Reference to the `UploadFile.id` field
upload_file_id: Mapped[str] = mapped_column(
StringUUID,
nullable=False,
comment="Reference to UploadFile containing the large variable data",
)
# -------------- metadata about the variable content --------------
# The `size` is already recorded in UploadFiles. It is duplicated here to avoid an additional database lookup.
size: Mapped[int | None] = mapped_column(
sa.BigInteger,
nullable=False,
comment="Size of the original variable content in bytes",
)
length: Mapped[int | None] = mapped_column(
sa.Integer,
nullable=True,
comment=(
"Length of the original variable content. For array and array-like types, "
"this represents the number of elements. For object types, it indicates the number of keys. "
"For other types, the value is NULL."
),
)
# The `value_type` field records the type of the original value.
value_type: Mapped[SegmentType] = mapped_column(
EnumText(SegmentType, length=20),
nullable=False,
)
# Relationship to UploadFile
upload_file: Mapped["UploadFile"] = orm.relationship(
foreign_keys=[upload_file_id],
lazy="raise",
uselist=False,
primaryjoin="WorkflowDraftVariableFile.upload_file_id == UploadFile.id",
)
def is_system_variable_editable(name: str) -> bool:
return name in _EDITABLE_SYSTEM_VARIABLE