feat: introduce trigger functionality (#27644)

Signed-off-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: Stream <Stream_2@qq.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zhsama <torvalds@linux.do>
Co-authored-by: Harry <xh001x@hotmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: yessenia <yessenia.contact@gmail.com>
Co-authored-by: hjlarry <hjlarry@163.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: WTW0313 <twwu@dify.ai>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Yeuoly
2025-11-12 17:59:37 +08:00
committed by GitHub
parent ca7794305b
commit b76e17b25d
785 changed files with 41186 additions and 3725 deletions

View File

@ -0,0 +1,165 @@
"""
Pydantic models for async workflow trigger system.
"""
from collections.abc import Mapping, Sequence
from enum import StrEnum
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from models.enums import AppTriggerType, WorkflowRunTriggeredFrom
class AsyncTriggerStatus(StrEnum):
"""Async trigger execution status"""
COMPLETED = "completed"
FAILED = "failed"
TIMEOUT = "timeout"
class TriggerMetadata(BaseModel):
"""Trigger metadata"""
type: AppTriggerType = Field(default=AppTriggerType.UNKNOWN)
class TriggerData(BaseModel):
"""Base trigger data model for async workflow execution"""
app_id: str
tenant_id: str
workflow_id: str | None = None
root_node_id: str
inputs: Mapping[str, Any]
files: Sequence[Mapping[str, Any]] = Field(default_factory=list)
trigger_type: AppTriggerType
trigger_from: WorkflowRunTriggeredFrom
trigger_metadata: TriggerMetadata | None = None
model_config = ConfigDict(use_enum_values=True)
class WebhookTriggerData(TriggerData):
"""Webhook-specific trigger data"""
trigger_type: AppTriggerType = AppTriggerType.TRIGGER_WEBHOOK
trigger_from: WorkflowRunTriggeredFrom = WorkflowRunTriggeredFrom.WEBHOOK
class ScheduleTriggerData(TriggerData):
"""Schedule-specific trigger data"""
trigger_type: AppTriggerType = AppTriggerType.TRIGGER_SCHEDULE
trigger_from: WorkflowRunTriggeredFrom = WorkflowRunTriggeredFrom.SCHEDULE
class PluginTriggerMetadata(TriggerMetadata):
"""Plugin trigger metadata"""
type: AppTriggerType = AppTriggerType.TRIGGER_PLUGIN
endpoint_id: str
plugin_unique_identifier: str
provider_id: str
event_name: str
icon_filename: str
icon_dark_filename: str
class PluginTriggerData(TriggerData):
"""Plugin webhook trigger data"""
trigger_type: AppTriggerType = AppTriggerType.TRIGGER_PLUGIN
trigger_from: WorkflowRunTriggeredFrom = WorkflowRunTriggeredFrom.PLUGIN
plugin_id: str
endpoint_id: str
class PluginTriggerDispatchData(BaseModel):
"""Plugin trigger dispatch data for Celery tasks"""
user_id: str
tenant_id: str
endpoint_id: str
provider_id: str
subscription_id: str
timestamp: int
events: list[str]
request_id: str
class WorkflowTaskData(BaseModel):
"""Lightweight data structure for Celery workflow tasks"""
workflow_trigger_log_id: str # Primary tracking ID - all other data can be fetched from DB
model_config = ConfigDict(arbitrary_types_allowed=True)
class AsyncTriggerExecutionResult(BaseModel):
"""Result from async trigger-based workflow execution"""
execution_id: str
status: AsyncTriggerStatus
result: Mapping[str, Any] | None = None
error: str | None = None
elapsed_time: float | None = None
total_tokens: int | None = None
model_config = ConfigDict(use_enum_values=True)
class AsyncTriggerResponse(BaseModel):
"""Response from triggering an async workflow"""
workflow_trigger_log_id: str
task_id: str
status: str
queue: str
model_config = ConfigDict(use_enum_values=True)
class TriggerLogResponse(BaseModel):
"""Response model for trigger log data"""
id: str
tenant_id: str
app_id: str
workflow_id: str
trigger_type: WorkflowRunTriggeredFrom
status: str
queue_name: str
retry_count: int
celery_task_id: str | None = None
workflow_run_id: str | None = None
error: str | None = None
outputs: str | None = None
elapsed_time: float | None = None
total_tokens: int | None = None
created_at: str | None = None
triggered_at: str | None = None
finished_at: str | None = None
model_config = ConfigDict(use_enum_values=True)
class WorkflowScheduleCFSPlanEntity(BaseModel):
"""
CFS plan entity.
Ensure each workflow run inside Dify is associated with a CFS(Completely Fair Scheduler) plan.
"""
class Strategy(StrEnum):
"""
CFS plan strategy.
"""
TimeSlice = "time-slice" # time-slice based plan
Nop = "nop" # no plan, just run the workflow
schedule_strategy: Strategy
granularity: int = Field(default=-1) # -1 means infinite

View File

@ -0,0 +1,151 @@
"""
Queue dispatcher system for async workflow execution.
Implements an ABC-based pattern for handling different subscription tiers
with appropriate queue routing and rate limiting.
"""
from abc import ABC, abstractmethod
from enum import StrEnum
from configs import dify_config
from extensions.ext_redis import redis_client
from services.billing_service import BillingService
from services.workflow.rate_limiter import TenantDailyRateLimiter
class QueuePriority(StrEnum):
"""Queue priorities for different subscription tiers"""
PROFESSIONAL = "workflow_professional" # Highest priority
TEAM = "workflow_team"
SANDBOX = "workflow_sandbox" # Free tier
class BaseQueueDispatcher(ABC):
"""Abstract base class for queue dispatchers"""
def __init__(self):
self.rate_limiter = TenantDailyRateLimiter(redis_client)
@abstractmethod
def get_queue_name(self) -> str:
"""Get the queue name for this dispatcher"""
pass
@abstractmethod
def get_daily_limit(self) -> int:
"""Get daily execution limit"""
pass
@abstractmethod
def get_priority(self) -> int:
"""Get task priority level"""
pass
def check_daily_quota(self, tenant_id: str) -> bool:
"""
Check if tenant has remaining daily quota
Args:
tenant_id: The tenant identifier
Returns:
True if quota available, False otherwise
"""
# Check without consuming
remaining = self.rate_limiter.get_remaining_quota(tenant_id=tenant_id, max_daily_limit=self.get_daily_limit())
return remaining > 0
def consume_quota(self, tenant_id: str) -> bool:
"""
Consume one execution from daily quota
Args:
tenant_id: The tenant identifier
Returns:
True if quota consumed successfully, False if limit reached
"""
return self.rate_limiter.check_and_consume(tenant_id=tenant_id, max_daily_limit=self.get_daily_limit())
class ProfessionalQueueDispatcher(BaseQueueDispatcher):
"""Dispatcher for professional tier"""
def get_queue_name(self) -> str:
return QueuePriority.PROFESSIONAL
def get_daily_limit(self) -> int:
return int(1e9)
def get_priority(self) -> int:
return 100
class TeamQueueDispatcher(BaseQueueDispatcher):
"""Dispatcher for team tier"""
def get_queue_name(self) -> str:
return QueuePriority.TEAM
def get_daily_limit(self) -> int:
return int(1e9)
def get_priority(self) -> int:
return 50
class SandboxQueueDispatcher(BaseQueueDispatcher):
"""Dispatcher for free/sandbox tier"""
def get_queue_name(self) -> str:
return QueuePriority.SANDBOX
def get_daily_limit(self) -> int:
return dify_config.APP_DAILY_RATE_LIMIT
def get_priority(self) -> int:
return 10
class QueueDispatcherManager:
"""Factory for creating appropriate dispatcher based on tenant subscription"""
# Mapping of billing plans to dispatchers
PLAN_DISPATCHER_MAP = {
"professional": ProfessionalQueueDispatcher,
"team": TeamQueueDispatcher,
"sandbox": SandboxQueueDispatcher,
# Add new tiers here as they're created
# For any unknown plan, default to sandbox
}
@classmethod
def get_dispatcher(cls, tenant_id: str) -> BaseQueueDispatcher:
"""
Get dispatcher based on tenant's subscription plan
Args:
tenant_id: The tenant identifier
Returns:
Appropriate queue dispatcher instance
"""
if dify_config.BILLING_ENABLED:
try:
billing_info = BillingService.get_info(tenant_id)
plan = billing_info.get("subscription", {}).get("plan", "sandbox")
except Exception:
# If billing service fails, default to sandbox
plan = "sandbox"
else:
# If billing is disabled, use team tier as default
plan = "team"
dispatcher_class = cls.PLAN_DISPATCHER_MAP.get(
plan,
SandboxQueueDispatcher, # Default to sandbox for unknown plans
)
return dispatcher_class() # type: ignore

View File

@ -0,0 +1,183 @@
"""
Day-based rate limiter for workflow executions.
Implements UTC-based daily quotas that reset at midnight UTC for consistent rate limiting.
"""
from datetime import UTC, datetime, time, timedelta
from typing import Union
import pytz
from redis import Redis
from sqlalchemy import select
from extensions.ext_database import db
from extensions.ext_redis import RedisClientWrapper
from models.account import Account, TenantAccountJoin, TenantAccountRole
class TenantDailyRateLimiter:
"""
Day-based rate limiter that resets at midnight UTC
This class provides Redis-based rate limiting with the following features:
- Daily quotas that reset at midnight UTC for consistency
- Atomic check-and-consume operations
- Automatic cleanup of stale counters
- Timezone-aware error messages for better UX
"""
def __init__(self, redis_client: Union[Redis, RedisClientWrapper]):
self.redis = redis_client
def get_tenant_owner_timezone(self, tenant_id: str) -> str:
"""
Get timezone of tenant owner
Args:
tenant_id: The tenant identifier
Returns:
Timezone string (e.g., 'America/New_York', 'UTC')
"""
# Query to get tenant owner's timezone using scalar and select
owner = db.session.scalar(
select(Account)
.join(TenantAccountJoin, TenantAccountJoin.account_id == Account.id)
.where(TenantAccountJoin.tenant_id == tenant_id, TenantAccountJoin.role == TenantAccountRole.OWNER)
)
if not owner:
return "UTC"
return owner.timezone or "UTC"
def _get_day_key(self, tenant_id: str) -> str:
"""
Get Redis key for current UTC day
Args:
tenant_id: The tenant identifier
Returns:
Redis key for the current UTC day
"""
utc_now = datetime.now(UTC)
date_str = utc_now.strftime("%Y-%m-%d")
return f"workflow:daily_limit:{tenant_id}:{date_str}"
def _get_ttl_seconds(self) -> int:
"""
Calculate seconds until UTC midnight
Returns:
Number of seconds until UTC midnight
"""
utc_now = datetime.now(UTC)
# Get next midnight in UTC
next_midnight = datetime.combine(utc_now.date() + timedelta(days=1), time.min)
next_midnight = next_midnight.replace(tzinfo=UTC)
return int((next_midnight - utc_now).total_seconds())
def check_and_consume(self, tenant_id: str, max_daily_limit: int) -> bool:
"""
Check if quota available and consume one execution
Args:
tenant_id: The tenant identifier
max_daily_limit: Maximum daily limit
Returns:
True if quota consumed successfully, False if limit reached
"""
key = self._get_day_key(tenant_id)
ttl = self._get_ttl_seconds()
# Check current usage
current = self.redis.get(key)
if current is None:
# First execution of the day - set to 1
self.redis.setex(key, ttl, 1)
return True
current_count = int(current)
if current_count < max_daily_limit:
# Within limit, increment
new_count = self.redis.incr(key)
# Update TTL
self.redis.expire(key, ttl)
# Double-check in case of race condition
if new_count <= max_daily_limit:
return True
else:
# Race condition occurred, decrement back
self.redis.decr(key)
return False
else:
# Limit exceeded
return False
def get_remaining_quota(self, tenant_id: str, max_daily_limit: int) -> int:
"""
Get remaining quota for the day
Args:
tenant_id: The tenant identifier
max_daily_limit: Maximum daily limit
Returns:
Number of remaining executions for the day
"""
key = self._get_day_key(tenant_id)
used = int(self.redis.get(key) or 0)
return max(0, max_daily_limit - used)
def get_current_usage(self, tenant_id: str) -> int:
"""
Get current usage for the day
Args:
tenant_id: The tenant identifier
Returns:
Number of executions used today
"""
key = self._get_day_key(tenant_id)
return int(self.redis.get(key) or 0)
def reset_quota(self, tenant_id: str) -> bool:
"""
Reset quota for testing purposes
Args:
tenant_id: The tenant identifier
Returns:
True if key was deleted, False if key didn't exist
"""
key = self._get_day_key(tenant_id)
return bool(self.redis.delete(key))
def get_quota_reset_time(self, tenant_id: str, timezone_str: str) -> datetime:
"""
Get the time when quota will reset (next UTC midnight in tenant's timezone)
Args:
tenant_id: The tenant identifier
timezone_str: Tenant's timezone for display purposes
Returns:
Datetime when quota resets (next UTC midnight in tenant's timezone)
"""
tz = pytz.timezone(timezone_str)
utc_now = datetime.now(UTC)
# Get next midnight in UTC, then convert to tenant's timezone
next_utc_midnight = datetime.combine(utc_now.date() + timedelta(days=1), time.min)
next_utc_midnight = pytz.UTC.localize(next_utc_midnight)
return next_utc_midnight.astimezone(tz)

View File

@ -0,0 +1,34 @@
from abc import ABC, abstractmethod
from enum import StrEnum
from services.workflow.entities import WorkflowScheduleCFSPlanEntity
class SchedulerCommand(StrEnum):
"""
Scheduler command.
"""
RESOURCE_LIMIT_REACHED = "resource_limit_reached"
NONE = "none"
class CFSPlanScheduler(ABC):
"""
CFS plan scheduler.
"""
def __init__(self, plan: WorkflowScheduleCFSPlanEntity):
"""
Initialize the CFS plan scheduler.
Args:
plan: The CFS plan.
"""
self.plan = plan
@abstractmethod
def can_schedule(self) -> SchedulerCommand:
"""
Whether a workflow run can be scheduled.
"""