refactor(api): rename dify_graph to graphon (#34095)

This commit is contained in:
99
2026-03-25 21:58:56 +08:00
committed by GitHub
parent 7e9d00a5a6
commit 52e7492cbc
898 changed files with 2687 additions and 2687 deletions

View File

@ -0,0 +1,10 @@
from .entities import BaseIterationNodeData, BaseIterationState, BaseLoopNodeData, BaseLoopState
from .usage_tracking_mixin import LLMUsageTrackingMixin
__all__ = [
"BaseIterationNodeData",
"BaseIterationState",
"BaseLoopNodeData",
"BaseLoopState",
"LLMUsageTrackingMixin",
]

View File

@ -0,0 +1,87 @@
from __future__ import annotations
from collections.abc import Sequence
from enum import StrEnum
from typing import Any
from pydantic import BaseModel, field_validator
from graphon.entities.base_node_data import BaseNodeData
class VariableSelector(BaseModel):
"""
Variable Selector.
"""
variable: str
value_selector: Sequence[str]
class OutputVariableType(StrEnum):
STRING = "string"
NUMBER = "number"
INTEGER = "integer"
SECRET = "secret"
BOOLEAN = "boolean"
OBJECT = "object"
FILE = "file"
ARRAY = "array"
ARRAY_STRING = "array[string]"
ARRAY_NUMBER = "array[number]"
ARRAY_OBJECT = "array[object]"
ARRAY_BOOLEAN = "array[boolean]"
ARRAY_FILE = "array[file]"
ANY = "any"
ARRAY_ANY = "array[any]"
class OutputVariableEntity(BaseModel):
"""
Output Variable Entity.
"""
variable: str
value_type: OutputVariableType = OutputVariableType.ANY
value_selector: Sequence[str]
@field_validator("value_type", mode="before")
@classmethod
def normalize_value_type(cls, v: Any) -> Any:
"""
Normalize value_type to handle case-insensitive array types.
Converts 'Array[...]' to 'array[...]' for backward compatibility.
"""
if isinstance(v, str) and v.startswith("Array["):
return v.lower()
return v
class BaseIterationNodeData(BaseNodeData):
start_node_id: str | None = None
class BaseIterationState(BaseModel):
iteration_node_id: str
index: int
inputs: dict
class MetaData(BaseModel):
pass
metadata: MetaData
class BaseLoopNodeData(BaseNodeData):
start_node_id: str | None = None
class BaseLoopState(BaseModel):
loop_node_id: str
index: int
inputs: dict
class MetaData(BaseModel):
pass
metadata: MetaData

View File

@ -0,0 +1,787 @@
from __future__ import annotations
import logging
import operator
from abc import abstractmethod
from collections.abc import Generator, Mapping, Sequence
from datetime import UTC, datetime
from functools import singledispatchmethod
from types import MappingProxyType
from typing import Any, ClassVar, Generic, TypeVar, cast, get_args, get_origin
from uuid import uuid4
from graphon.entities import GraphInitParams
from graphon.entities.base_node_data import BaseNodeData, RetryConfig
from graphon.entities.graph_config import NodeConfigDict
from graphon.enums import (
ErrorStrategy,
NodeExecutionType,
NodeState,
NodeType,
WorkflowNodeExecutionStatus,
)
from graphon.graph_events import (
GraphNodeEventBase,
NodeRunAgentLogEvent,
NodeRunFailedEvent,
NodeRunHumanInputFormFilledEvent,
NodeRunHumanInputFormTimeoutEvent,
NodeRunIterationFailedEvent,
NodeRunIterationNextEvent,
NodeRunIterationStartedEvent,
NodeRunIterationSucceededEvent,
NodeRunLoopFailedEvent,
NodeRunLoopNextEvent,
NodeRunLoopStartedEvent,
NodeRunLoopSucceededEvent,
NodeRunPauseRequestedEvent,
NodeRunRetrieverResourceEvent,
NodeRunStartedEvent,
NodeRunStreamChunkEvent,
NodeRunSucceededEvent,
NodeRunVariableUpdatedEvent,
)
from graphon.node_events import (
AgentLogEvent,
HumanInputFormFilledEvent,
HumanInputFormTimeoutEvent,
IterationFailedEvent,
IterationNextEvent,
IterationStartedEvent,
IterationSucceededEvent,
LoopFailedEvent,
LoopNextEvent,
LoopStartedEvent,
LoopSucceededEvent,
NodeEventBase,
NodeRunResult,
PauseRequestedEvent,
RunRetrieverResourceEvent,
StreamChunkEvent,
StreamCompletedEvent,
VariableUpdatedEvent,
)
from graphon.runtime import GraphRuntimeState
NodeDataT = TypeVar("NodeDataT", bound=BaseNodeData)
_MISSING_RUN_CONTEXT_VALUE = object()
logger = logging.getLogger(__name__)
class Node(Generic[NodeDataT]):
"""BaseNode serves as the foundational class for all node implementations.
Nodes are allowed to maintain transient states (e.g., `LLMNode` uses the `_file_output`
attribute to track files generated by the LLM). However, these states are not persisted
when the workflow is suspended or resumed. If a node needs its state to be preserved
across workflow suspension and resumption, it should include the relevant state data
in its output.
"""
node_type: ClassVar[NodeType]
execution_type: NodeExecutionType = NodeExecutionType.EXECUTABLE
_node_data_type: ClassVar[type[BaseNodeData]] = BaseNodeData
def __init_subclass__(cls, **kwargs: Any) -> None:
"""
Automatically extract and validate the node data type from the generic parameter.
When a subclass is defined as `class MyNode(Node[MyNodeData])`, this method:
1. Inspects `__orig_bases__` to find the `Node[T]` parameterization
2. Extracts `T` (e.g., `MyNodeData`) from the generic argument
3. Validates that `T` is a proper `BaseNodeData` subclass
4. Stores it in `_node_data_type` for automatic hydration in `__init__`
This eliminates the need for subclasses to manually implement boilerplate
accessor methods like `_get_title()`, `_get_error_strategy()`, etc.
How it works:
::
class CodeNode(Node[CodeNodeData]):
│ │
│ └─────────────────────────────────┐
│ │
▼ ▼
┌─────────────────────────────┐ ┌─────────────────────────────────┐
│ __orig_bases__ = ( │ │ CodeNodeData(BaseNodeData) │
│ Node[CodeNodeData], │ │ title: str │
│ ) │ │ desc: str | None │
└──────────────┬──────────────┘ │ ... │
│ └─────────────────────────────────┘
▼ ▲
┌─────────────────────────────┐ │
│ get_origin(base) -> Node │ │
│ get_args(base) -> ( │ │
│ CodeNodeData, │ ──────────────────────┘
│ ) │
└──────────────┬──────────────┘
┌─────────────────────────────┐
│ Validate: │
│ - Is it a type? │
│ - Is it a BaseNodeData │
│ subclass? │
└──────────────┬──────────────┘
┌─────────────────────────────┐
│ cls._node_data_type = │
│ CodeNodeData │
└─────────────────────────────┘
Later, in __init__:
::
config["data"] ──► _node_data_type.model_validate(..., from_attributes=True)
CodeNodeData instance
(stored in self._node_data)
Example:
class CodeNode(Node[CodeNodeData]): # CodeNodeData is auto-extracted
node_type = BuiltinNodeTypes.CODE
# No need to implement _get_title, _get_error_strategy, etc.
"""
super().__init_subclass__(**kwargs)
if cls is Node:
return
node_data_type = cls._extract_node_data_type_from_generic()
if node_data_type is None:
raise TypeError(f"{cls.__name__} must inherit from Node[T] with a BaseNodeData subtype")
cls._node_data_type = node_data_type
# Skip base class itself
if cls is Node:
return
# Only treat nodes from the base graphon package as production
# registrations. Higher-layer packages may still register subclasses,
# but graphon itself should not know their module identities.
# This prevents test helper subclasses from polluting the global registry and
# accidentally overriding real node types (e.g., a test Answer node).
module_name = getattr(cls, "__module__", "")
# Only register concrete subclasses that define node_type and version()
node_type = cls.node_type
version = cls.version()
bucket = Node._registry.setdefault(node_type, {})
if module_name.startswith("graphon.nodes."):
# Production node definitions take precedence and may override
bucket[version] = cls # type: ignore[index]
else:
# External/test subclasses may register but must not override production
bucket.setdefault(version, cls) # type: ignore[index]
# Maintain a "latest" pointer preferring numeric versions; fallback to lexicographic
version_keys = [v for v in bucket if v != "latest"]
numeric_pairs: list[tuple[str, int]] = []
for v in version_keys:
numeric_pairs.append((v, int(v)))
if numeric_pairs:
latest_key = max(numeric_pairs, key=operator.itemgetter(1))[0]
else:
latest_key = max(version_keys) if version_keys else version
bucket["latest"] = bucket[latest_key]
Node._registry_version += 1
@classmethod
def _extract_node_data_type_from_generic(cls) -> type[BaseNodeData] | None:
"""
Extract the node data type from the generic parameter `Node[T]`.
Inspects `__orig_bases__` to find the `Node[T]` parameterization and extracts `T`.
Returns:
The extracted BaseNodeData subtype, or None if not found.
Raises:
TypeError: If the generic argument is invalid (not exactly one argument,
or not a BaseNodeData subtype).
"""
# __orig_bases__ contains the original generic bases before type erasure.
# For `class CodeNode(Node[CodeNodeData])`, this would be `(Node[CodeNodeData],)`.
for base in getattr(cls, "__orig_bases__", ()): # type: ignore[attr-defined]
origin = get_origin(base) # Returns `Node` for `Node[CodeNodeData]`
if origin is Node:
args = get_args(base) # Returns `(CodeNodeData,)` for `Node[CodeNodeData]`
if len(args) != 1:
raise TypeError(f"{cls.__name__} must specify exactly one node data generic argument")
candidate = args[0]
if not isinstance(candidate, type) or not issubclass(candidate, BaseNodeData):
raise TypeError(f"{cls.__name__} must parameterize Node with a BaseNodeData subtype")
return candidate
return None
# Global registry populated via __init_subclass__
_registry: ClassVar[dict[NodeType, dict[str, type[Node]]]] = {}
_registry_version: ClassVar[int] = 0
@classmethod
def get_registry_version(cls) -> int:
return cls._registry_version
def __init__(
self,
id: str,
config: NodeConfigDict,
graph_init_params: GraphInitParams,
graph_runtime_state: GraphRuntimeState,
) -> None:
self._graph_init_params = graph_init_params
self._run_context = MappingProxyType(dict(graph_init_params.run_context))
self.id = id
self.workflow_id = graph_init_params.workflow_id
self.graph_config = graph_init_params.graph_config
self.workflow_call_depth = graph_init_params.call_depth
self.graph_runtime_state = graph_runtime_state
self.state: NodeState = NodeState.UNKNOWN # node execution state
node_id = config["id"]
self._node_id = node_id
self._node_execution_id: str = ""
self._start_at = datetime.now(UTC).replace(tzinfo=None)
self._node_data = self.validate_node_data(config["data"])
self.post_init()
@classmethod
def validate_node_data(cls, node_data: BaseNodeData | Mapping[str, Any]) -> NodeDataT:
"""Validate shared graph node payloads against the subclass-declared NodeData model.
Re-validate from a dumped payload instead of `from_attributes=True` so compatibility
extras stored on `BaseNodeData` survive the handoff to the concrete node data model.
Human Input delivery methods are one such extra field until graphon owns that schema.
"""
if isinstance(node_data, BaseNodeData):
payload = node_data.model_dump(mode="python")
else:
payload = dict(node_data)
return cast(NodeDataT, cls._node_data_type.model_validate(payload))
def init_node_data(self, data: BaseNodeData | Mapping[str, Any]) -> None:
"""Hydrate `_node_data` for legacy callers that bypass `__init__`."""
self._node_data = self.validate_node_data(cast(BaseNodeData, data))
def post_init(self) -> None:
"""Optional hook for subclasses requiring extra initialization."""
return
@property
def graph_init_params(self) -> GraphInitParams:
return self._graph_init_params
@property
def run_context(self) -> Mapping[str, Any]:
return self._run_context
def get_run_context_value(self, key: str, default: Any = None) -> Any:
return self._run_context.get(key, default)
def require_run_context_value(self, key: str) -> Any:
value = self.get_run_context_value(key, _MISSING_RUN_CONTEXT_VALUE)
if value is _MISSING_RUN_CONTEXT_VALUE:
raise ValueError(f"run_context missing required key: {key}")
return value
@property
def execution_id(self) -> str:
return self._node_execution_id
def ensure_execution_id(self) -> str:
if self._node_execution_id:
return self._node_execution_id
resumed_execution_id = self._restore_execution_id_from_runtime_state()
if resumed_execution_id:
self._node_execution_id = resumed_execution_id
return self._node_execution_id
self._node_execution_id = str(uuid4())
return self._node_execution_id
def _restore_execution_id_from_runtime_state(self) -> str | None:
graph_execution = self.graph_runtime_state.graph_execution
try:
node_executions = graph_execution.node_executions
except AttributeError:
return None
if not isinstance(node_executions, dict):
return None
node_execution = node_executions.get(self._node_id)
if node_execution is None:
return None
execution_id = node_execution.execution_id
if not execution_id:
return None
return str(execution_id)
@abstractmethod
def _run(self) -> NodeRunResult | Generator[NodeEventBase, None, None]:
"""
Run node
:return:
"""
raise NotImplementedError
def populate_start_event(self, event: NodeRunStartedEvent) -> None:
"""Allow subclasses to enrich the started event without cross-node imports in the base class."""
_ = event
def run(self) -> Generator[GraphNodeEventBase, None, None]:
execution_id = self.ensure_execution_id()
self._start_at = datetime.now(UTC).replace(tzinfo=None)
# Create and push start event with required fields
start_event = NodeRunStartedEvent(
id=execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.title,
in_iteration_id=None,
start_at=self._start_at,
)
try:
self.populate_start_event(start_event)
except Exception:
logger.warning("Failed to populate start event for node %s", self._node_id, exc_info=True)
yield start_event
try:
result = self._run()
# Handle NodeRunResult
if isinstance(result, NodeRunResult):
yield self._convert_node_run_result_to_graph_node_event(result)
return
# Handle event stream
for event in result:
# NOTE: this is necessary because iteration and loop nodes yield GraphNodeEventBase
if isinstance(event, NodeEventBase): # pyright: ignore[reportUnnecessaryIsInstance]
yield self._dispatch(event)
elif isinstance(event, GraphNodeEventBase) and not event.in_iteration_id and not event.in_loop_id: # pyright: ignore[reportUnnecessaryIsInstance]
event.id = self.execution_id
yield event
else:
yield event
except Exception as e:
logger.exception("Node %s failed to run", self._node_id)
result = NodeRunResult(
status=WorkflowNodeExecutionStatus.FAILED,
error=str(e),
error_type="WorkflowNodeError",
)
finished_at = datetime.now(UTC).replace(tzinfo=None)
yield NodeRunFailedEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
start_at=self._start_at,
finished_at=finished_at,
node_run_result=result,
error=str(e),
)
@classmethod
def extract_variable_selector_to_variable_mapping(
cls,
*,
graph_config: Mapping[str, Any],
config: NodeConfigDict,
) -> Mapping[str, Sequence[str]]:
"""Extracts references variable selectors from node configuration.
The `config` parameter represents the configuration for a specific node type and corresponds
to the `data` field in the node definition object.
The returned mapping has the following structure:
{'1747829548239.#1747829667553.result#': ['1747829667553', 'result']}
For loop and iteration nodes, the mapping may look like this:
{
"1748332301644.input_selector": ["1748332363630", "result"],
"1748332325079.1748332325079.#sys.workflow_id#": ["sys", "workflow_id"],
}
where `1748332301644` is the ID of the loop / iteration node,
and `1748332325079` is the ID of the node inside the loop or iteration node.
Here, the key consists of two parts: the current node ID (provided as the `node_id`
parameter to `_extract_variable_selector_to_variable_mapping`) and the variable selector,
enclosed in `#` symbols. These two parts are separated by a dot (`.`).
The value is a list of string representing the variable selector, where the first element is the node ID
of the referenced variable, and the second element is the variable name within that node.
The meaning of the above response is:
The node with ID `1747829548239` references the variable `result` from the node with
ID `1747829667553`. For example, if `1747829548239` is a LLM node, its prompt may contain a
reference to the `result` output variable of node `1747829667553`.
:param graph_config: graph config
:param config: node config
:return:
"""
node_id = config["id"]
node_data = cls.validate_node_data(config["data"])
data = cls._extract_variable_selector_to_variable_mapping(
graph_config=graph_config,
node_id=node_id,
node_data=node_data,
)
return data
@classmethod
def _extract_variable_selector_to_variable_mapping(
cls,
*,
graph_config: Mapping[str, Any],
node_id: str,
node_data: NodeDataT,
) -> Mapping[str, Sequence[str]]:
return {}
def blocks_variable_output(self, variable_selectors: set[tuple[str, ...]]) -> bool:
"""
Check if this node blocks the output of specific variables.
This method is used to determine if a node must complete execution before
the specified variables can be used in streaming output.
:param variable_selectors: Set of variable selectors, each as a tuple (e.g., ('conversation', 'str'))
:return: True if this node blocks output of any of the specified variables, False otherwise
"""
return False
@classmethod
def get_default_config(cls, filters: Mapping[str, object] | None = None) -> Mapping[str, object]:
return {}
@classmethod
@abstractmethod
def version(cls) -> str:
"""`node_version` returns the version of current node type."""
# NOTE(QuantumGhost): Node versions must remain unique per `NodeType` so
# registry lookups can resolve numeric versions and `latest`.
raise NotImplementedError("subclasses of BaseNode must implement `version` method.")
@classmethod
def get_node_type_classes_mapping(cls) -> Mapping[NodeType, Mapping[str, type[Node]]]:
"""Return a read-only view of the currently registered node classes.
This accessor intentionally performs no imports. The embedding layer that
owns bootstrap (for example `core.workflow.node_factory`) must import any
extension node packages before calling it so their subclasses register via
`__init_subclass__`.
"""
return {node_type: MappingProxyType(version_map) for node_type, version_map in cls._registry.items()}
@property
def retry(self) -> bool:
return False
def _get_error_strategy(self) -> ErrorStrategy | None:
"""Get the error strategy for this node."""
return self._node_data.error_strategy
def _get_retry_config(self) -> RetryConfig:
"""Get the retry configuration for this node."""
return self._node_data.retry_config
def _get_title(self) -> str:
"""Get the node title."""
return self._node_data.title
def _get_description(self) -> str | None:
"""Get the node description."""
return self._node_data.desc
def _get_default_value_dict(self) -> dict[str, Any]:
"""Get the default values dictionary for this node."""
return self._node_data.default_value_dict
# Public interface properties that delegate to abstract methods
@property
def error_strategy(self) -> ErrorStrategy | None:
"""Get the error strategy for this node."""
return self._get_error_strategy()
@property
def retry_config(self) -> RetryConfig:
"""Get the retry configuration for this node."""
return self._get_retry_config()
@property
def title(self) -> str:
"""Get the node title."""
return self._get_title()
@property
def description(self) -> str | None:
"""Get the node description."""
return self._get_description()
@property
def default_value_dict(self) -> dict[str, Any]:
"""Get the default values dictionary for this node."""
return self._get_default_value_dict()
@property
def node_data(self) -> NodeDataT:
"""Typed access to this node's configuration data."""
return self._node_data
def _convert_node_run_result_to_graph_node_event(self, result: NodeRunResult) -> GraphNodeEventBase:
finished_at = datetime.now(UTC).replace(tzinfo=None)
match result.status:
case WorkflowNodeExecutionStatus.FAILED:
return NodeRunFailedEvent(
id=self.execution_id,
node_id=self.id,
node_type=self.node_type,
start_at=self._start_at,
finished_at=finished_at,
node_run_result=result,
error=result.error,
)
case WorkflowNodeExecutionStatus.SUCCEEDED:
return NodeRunSucceededEvent(
id=self.execution_id,
node_id=self.id,
node_type=self.node_type,
start_at=self._start_at,
finished_at=finished_at,
node_run_result=result,
)
case _:
raise Exception(f"result status {result.status} not supported")
@singledispatchmethod
def _dispatch(self, event: NodeEventBase) -> GraphNodeEventBase:
raise NotImplementedError(f"Node {self._node_id} does not support event type {type(event)}")
@_dispatch.register
def _(self, event: StreamChunkEvent) -> NodeRunStreamChunkEvent:
return NodeRunStreamChunkEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
selector=event.selector,
chunk=event.chunk,
is_final=event.is_final,
)
@_dispatch.register
def _(self, event: StreamCompletedEvent) -> NodeRunSucceededEvent | NodeRunFailedEvent:
finished_at = datetime.now(UTC).replace(tzinfo=None)
match event.node_run_result.status:
case WorkflowNodeExecutionStatus.SUCCEEDED:
return NodeRunSucceededEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
start_at=self._start_at,
finished_at=finished_at,
node_run_result=event.node_run_result,
)
case WorkflowNodeExecutionStatus.FAILED:
return NodeRunFailedEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
start_at=self._start_at,
finished_at=finished_at,
node_run_result=event.node_run_result,
error=event.node_run_result.error,
)
case _:
raise NotImplementedError(
f"Node {self._node_id} does not support status {event.node_run_result.status}"
)
@_dispatch.register
def _(self, event: VariableUpdatedEvent) -> NodeRunVariableUpdatedEvent:
return NodeRunVariableUpdatedEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
variable=event.variable,
)
@_dispatch.register
def _(self, event: PauseRequestedEvent) -> NodeRunPauseRequestedEvent:
return NodeRunPauseRequestedEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_run_result=NodeRunResult(status=WorkflowNodeExecutionStatus.PAUSED),
reason=event.reason,
)
@_dispatch.register
def _(self, event: AgentLogEvent) -> NodeRunAgentLogEvent:
return NodeRunAgentLogEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
message_id=event.message_id,
label=event.label,
node_execution_id=event.node_execution_id,
parent_id=event.parent_id,
error=event.error,
status=event.status,
data=event.data,
metadata=event.metadata,
)
@_dispatch.register
def _(self, event: HumanInputFormFilledEvent):
return NodeRunHumanInputFormFilledEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=event.node_title,
rendered_content=event.rendered_content,
action_id=event.action_id,
action_text=event.action_text,
)
@_dispatch.register
def _(self, event: HumanInputFormTimeoutEvent):
return NodeRunHumanInputFormTimeoutEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=event.node_title,
expiration_time=event.expiration_time,
)
@_dispatch.register
def _(self, event: LoopStartedEvent) -> NodeRunLoopStartedEvent:
return NodeRunLoopStartedEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.node_data.title,
start_at=event.start_at,
inputs=event.inputs,
metadata=event.metadata,
predecessor_node_id=event.predecessor_node_id,
)
@_dispatch.register
def _(self, event: LoopNextEvent) -> NodeRunLoopNextEvent:
return NodeRunLoopNextEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.node_data.title,
index=event.index,
pre_loop_output=event.pre_loop_output,
)
@_dispatch.register
def _(self, event: LoopSucceededEvent) -> NodeRunLoopSucceededEvent:
return NodeRunLoopSucceededEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.node_data.title,
start_at=event.start_at,
inputs=event.inputs,
outputs=event.outputs,
metadata=event.metadata,
steps=event.steps,
)
@_dispatch.register
def _(self, event: LoopFailedEvent) -> NodeRunLoopFailedEvent:
return NodeRunLoopFailedEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.node_data.title,
start_at=event.start_at,
inputs=event.inputs,
outputs=event.outputs,
metadata=event.metadata,
steps=event.steps,
error=event.error,
)
@_dispatch.register
def _(self, event: IterationStartedEvent) -> NodeRunIterationStartedEvent:
return NodeRunIterationStartedEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.node_data.title,
start_at=event.start_at,
inputs=event.inputs,
metadata=event.metadata,
predecessor_node_id=event.predecessor_node_id,
)
@_dispatch.register
def _(self, event: IterationNextEvent) -> NodeRunIterationNextEvent:
return NodeRunIterationNextEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.node_data.title,
index=event.index,
pre_iteration_output=event.pre_iteration_output,
)
@_dispatch.register
def _(self, event: IterationSucceededEvent) -> NodeRunIterationSucceededEvent:
return NodeRunIterationSucceededEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.node_data.title,
start_at=event.start_at,
inputs=event.inputs,
outputs=event.outputs,
metadata=event.metadata,
steps=event.steps,
)
@_dispatch.register
def _(self, event: IterationFailedEvent) -> NodeRunIterationFailedEvent:
return NodeRunIterationFailedEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.node_data.title,
start_at=event.start_at,
inputs=event.inputs,
outputs=event.outputs,
metadata=event.metadata,
steps=event.steps,
error=event.error,
)
@_dispatch.register
def _(self, event: RunRetrieverResourceEvent) -> NodeRunRetrieverResourceEvent:
return NodeRunRetrieverResourceEvent(
id=self.execution_id,
node_id=self._node_id,
node_type=self.node_type,
retriever_resources=event.retriever_resources,
context=event.context,
node_version=self.version(),
)

View File

@ -0,0 +1,150 @@
"""Template structures for Response nodes (Answer and End).
This module provides a unified template structure for both Answer and End nodes,
similar to SegmentGroup but focused on template representation without values.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from collections.abc import Sequence
from dataclasses import dataclass
from typing import Any, Union
from graphon.nodes.base.variable_template_parser import VariableTemplateParser
@dataclass(frozen=True)
class TemplateSegment(ABC):
"""Base class for template segments."""
@abstractmethod
def __str__(self) -> str:
"""String representation of the segment."""
pass
@dataclass(frozen=True)
class TextSegment(TemplateSegment):
"""A text segment in a template."""
text: str
def __str__(self) -> str:
return self.text
@dataclass(frozen=True)
class VariableSegment(TemplateSegment):
"""A variable reference segment in a template."""
selector: Sequence[str]
variable_name: str | None = None # Optional variable name for End nodes
def __str__(self) -> str:
return "{{#" + ".".join(self.selector) + "#}}"
# Type alias for segments
TemplateSegmentUnion = Union[TextSegment, VariableSegment]
@dataclass(frozen=True)
class Template:
"""Unified template structure for Response nodes.
Similar to SegmentGroup, but represents the template structure
without variable values - only marking variable selectors.
"""
segments: list[TemplateSegmentUnion]
@classmethod
def from_answer_template(cls, template_str: str) -> Template:
"""Create a Template from an Answer node template string.
Example:
"Hello, {{#node1.name#}}" -> [TextSegment("Hello, "), VariableSegment(["node1", "name"])]
Args:
template_str: The answer template string
Returns:
Template instance
"""
parser = VariableTemplateParser(template_str)
segments: list[TemplateSegmentUnion] = []
# Extract variable selectors to find all variables
variable_selectors = parser.extract_variable_selectors()
var_map = {var.variable: var.value_selector for var in variable_selectors}
# Parse template to get ordered segments
# We need to split the template by variable placeholders while preserving order
import re
# Create a regex pattern that matches variable placeholders
pattern = r"\{\{(#[a-zA-Z0-9_]{1,50}(?:\.[a-zA-Z_][a-zA-Z0-9_]{0,29}){1,10}#)\}\}"
# Split template while keeping the delimiters (variable placeholders)
parts = re.split(pattern, template_str)
for i, part in enumerate(parts):
if not part:
continue
# Check if this part is a variable reference (odd indices after split)
if i % 2 == 1: # Odd indices are variable keys
# Remove the # symbols from the variable key
var_key = part
if var_key in var_map:
segments.append(VariableSegment(selector=list(var_map[var_key])))
else:
# This shouldn't happen with valid templates
segments.append(TextSegment(text="{{" + part + "}}"))
else:
# Even indices are text segments
segments.append(TextSegment(text=part))
return cls(segments=segments)
@classmethod
def from_end_outputs(cls, outputs_config: list[dict[str, Any]]) -> Template:
"""Create a Template from an End node outputs configuration.
End nodes are treated as templates of concatenated variables with newlines.
Example:
[{"variable": "text", "value_selector": ["node1", "text"]},
{"variable": "result", "value_selector": ["node2", "result"]}]
->
[VariableSegment(["node1", "text"]),
TextSegment("\n"),
VariableSegment(["node2", "result"])]
Args:
outputs_config: List of output configurations with variable and value_selector
Returns:
Template instance
"""
segments: list[TemplateSegmentUnion] = []
for i, output in enumerate(outputs_config):
if i > 0:
# Add newline separator between variables
segments.append(TextSegment(text="\n"))
value_selector = output.get("value_selector", [])
variable_name = output.get("variable", "")
if value_selector:
segments.append(VariableSegment(selector=list(value_selector), variable_name=variable_name))
if len(segments) > 0 and isinstance(segments[-1], TextSegment):
segments = segments[:-1]
return cls(segments=segments)
def __str__(self) -> str:
"""String representation of the template."""
return "".join(str(segment) for segment in self.segments)

View File

@ -0,0 +1,28 @@
from graphon.model_runtime.entities.llm_entities import LLMUsage
from graphon.runtime import GraphRuntimeState
class LLMUsageTrackingMixin:
"""Provides shared helpers for merging and recording LLM usage within workflow nodes."""
graph_runtime_state: GraphRuntimeState
@staticmethod
def _merge_usage(current: LLMUsage, new_usage: LLMUsage | None) -> LLMUsage:
"""Return a combined usage snapshot, preserving zero-value inputs."""
if new_usage is None or new_usage.total_tokens <= 0:
return current
if current.total_tokens == 0:
return new_usage
return current.plus(new_usage)
def _accumulate_usage(self, usage: LLMUsage) -> None:
"""Push usage into the graph runtime accumulator for downstream reporting."""
if usage.total_tokens <= 0:
return
current_usage = self.graph_runtime_state.llm_usage
if current_usage.total_tokens == 0:
self.graph_runtime_state.llm_usage = usage.model_copy()
else:
self.graph_runtime_state.llm_usage = current_usage.plus(usage)

View File

@ -0,0 +1,130 @@
import re
from collections.abc import Mapping, Sequence
from typing import Any
from .entities import VariableSelector
REGEX = re.compile(r"\{\{(#[a-zA-Z0-9_]{1,50}(\.[a-zA-Z_][a-zA-Z0-9_]{0,29}){1,10}#)\}\}")
SELECTOR_PATTERN = re.compile(r"\{\{(#[a-zA-Z0-9_]{1,50}(?:\.[a-zA-Z_][a-zA-Z0-9_]{0,29}){1,10}#)\}\}")
def extract_selectors_from_template(template: str, /) -> Sequence[VariableSelector]:
parts = SELECTOR_PATTERN.split(template)
selectors = []
for part in filter(lambda x: x, parts):
if "." in part and part[0] == "#" and part[-1] == "#":
selectors.append(VariableSelector(variable=f"{part}", value_selector=part[1:-1].split(".")))
return selectors
class VariableTemplateParser:
"""
!NOTE: Consider to use the new `segments` module instead of this class.
A class for parsing and manipulating template variables in a string.
Rules:
1. Template variables must be enclosed in `{{}}`.
2. The template variable Key can only be: #node_id.var1.var2#.
3. The template variable Key cannot contain new lines or spaces, and must comply with rule 2.
Example usage:
template = "Hello, {{#node_id.query.name#}}! Your age is {{#node_id.query.age#}}."
parser = VariableTemplateParser(template)
# Extract template variable keys
variable_keys = parser.extract()
print(variable_keys)
# Output: ['#node_id.query.name#', '#node_id.query.age#']
# Extract variable selectors
variable_selectors = parser.extract_variable_selectors()
print(variable_selectors)
# Output: [VariableSelector(variable='#node_id.query.name#', value_selector=['node_id', 'query', 'name']),
# VariableSelector(variable='#node_id.query.age#', value_selector=['node_id', 'query', 'age'])]
# Format the template string
inputs = {'#node_id.query.name#': 'John', '#node_id.query.age#': 25}}
formatted_string = parser.format(inputs)
print(formatted_string)
# Output: "Hello, John! Your age is 25."
"""
def __init__(self, template: str):
self.template = template
self.variable_keys = self.extract()
def extract(self):
"""
Extracts all the template variable keys from the template string.
Returns:
A list of template variable keys.
"""
# Regular expression to match the template rules
matches = re.findall(REGEX, self.template)
first_group_matches = [match[0] for match in matches]
return list(set(first_group_matches))
def extract_variable_selectors(self) -> list[VariableSelector]:
"""
Extracts the variable selectors from the template variable keys.
Returns:
A list of VariableSelector objects representing the variable selectors.
"""
variable_selectors = []
for variable_key in self.variable_keys:
remove_hash = variable_key.replace("#", "")
split_result = remove_hash.split(".")
if len(split_result) < 2:
continue
variable_selectors.append(VariableSelector(variable=variable_key, value_selector=split_result))
return variable_selectors
def format(self, inputs: Mapping[str, Any]) -> str:
"""
Formats the template string by replacing the template variables with their corresponding values.
Args:
inputs: A dictionary containing the values for the template variables.
Returns:
The formatted string with template variables replaced by their values.
"""
def replacer(match):
key = match.group(1)
value = inputs.get(key, match.group(0)) # return original matched string if key not found
if value is None:
value = ""
# convert the value to string
if isinstance(value, list | dict | bool | int | float):
value = str(value)
# remove template variables if required
return VariableTemplateParser.remove_template_variables(value)
prompt = re.sub(REGEX, replacer, self.template)
return re.sub(r"<\|.*?\|>", "", prompt)
@classmethod
def remove_template_variables(cls, text: str):
"""
Removes the template variables from the given text.
Args:
text: The text from which to remove the template variables.
Returns:
The text with template variables removed.
"""
return re.sub(REGEX, r"{\1}", text)