Merge remote-tracking branch 'origin/main' into feat/trigger

This commit is contained in:
yessenia
2025-09-25 17:14:24 +08:00
3013 changed files with 148826 additions and 44294 deletions

View File

@ -1,16 +1,40 @@
from .clean_when_dataset_deleted import handle
from .clean_when_document_deleted import handle
from .create_document_index import handle
from .create_installed_app_when_app_created import handle
from .create_site_record_when_app_created import handle
from .delete_tool_parameters_cache_when_sync_draft_workflow import handle
from .sync_plugin_trigger_when_app_created import handle
from .sync_webhook_when_app_created import handle
from .sync_workflow_schedule_when_app_published import handle
from .update_app_dataset_join_when_app_model_config_updated import handle
from .update_app_dataset_join_when_app_published_workflow_updated import handle
from .update_app_triggers_when_app_published_workflow_updated import handle
from .clean_when_dataset_deleted import handle as handle_clean_when_dataset_deleted
from .clean_when_document_deleted import handle as handle_clean_when_document_deleted
from .create_document_index import handle as handle_create_document_index
from .create_installed_app_when_app_created import handle as handle_create_installed_app_when_app_created
from .create_site_record_when_app_created import handle as handle_create_site_record_when_app_created
from .delete_tool_parameters_cache_when_sync_draft_workflow import (
handle as handle_delete_tool_parameters_cache_when_sync_draft_workflow,
)
from .sync_plugin_trigger_when_app_created import handle as handle_sync_plugin_trigger_when_app_created
from .sync_webhook_when_app_created import handle as handle_sync_webhook_when_app_created
from .sync_workflow_schedule_when_app_published import handle as handle_sync_workflow_schedule_when_app_published
from .update_app_dataset_join_when_app_model_config_updated import (
handle as handle_update_app_dataset_join_when_app_model_config_updated,
)
from .update_app_dataset_join_when_app_published_workflow_updated import (
handle as handle_update_app_dataset_join_when_app_published_workflow_updated,
)
from .update_app_triggers_when_app_published_workflow_updated import (
handle as handle_update_app_triggers_when_app_published_workflow_updated,
)
# Consolidated handler replaces both deduct_quota_when_message_created and
# update_provider_last_used_at_when_message_created
from .update_provider_when_message_created import handle
from .update_provider_when_message_created import handle as handle_update_provider_when_message_created
__all__ = [
"handle_clean_when_dataset_deleted",
"handle_clean_when_document_deleted",
"handle_create_document_index",
"handle_create_installed_app_when_app_created",
"handle_create_site_record_when_app_created",
"handle_delete_tool_parameters_cache_when_sync_draft_workflow",
"handle_sync_plugin_trigger_when_app_created",
"handle_sync_webhook_when_app_created",
"handle_sync_workflow_schedule_when_app_published",
"handle_update_app_dataset_join_when_app_model_config_updated",
"handle_update_app_dataset_join_when_app_published_workflow_updated",
"handle_update_app_triggers_when_app_published_workflow_updated",
"handle_update_provider_when_message_created",
]

View File

@ -1,3 +1,5 @@
from sqlalchemy import select
from events.app_event import app_model_config_was_updated
from extensions.ext_database import db
from models.dataset import AppDatasetJoin
@ -13,7 +15,7 @@ def handle(sender, **kwargs):
dataset_ids = get_dataset_ids_from_model_config(app_model_config)
app_dataset_joins = db.session.query(AppDatasetJoin).where(AppDatasetJoin.app_id == app.id).all()
app_dataset_joins = db.session.scalars(select(AppDatasetJoin).where(AppDatasetJoin.app_id == app.id)).all()
removed_dataset_ids: set[str] = set()
if not app_dataset_joins:

View File

@ -1,5 +1,7 @@
from typing import cast
from sqlalchemy import select
from core.workflow.nodes import NodeType
from core.workflow.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData
from events.app_event import app_published_workflow_was_updated
@ -15,7 +17,7 @@ def handle(sender, **kwargs):
published_workflow = cast(Workflow, published_workflow)
dataset_ids = get_dataset_ids_from_workflow(published_workflow)
app_dataset_joins = db.session.query(AppDatasetJoin).where(AppDatasetJoin.app_id == app.id).all()
app_dataset_joins = db.session.scalars(select(AppDatasetJoin).where(AppDatasetJoin.app_id == app.id)).all()
removed_dataset_ids: set[str] = set()
if not app_dataset_joins:
@ -61,7 +63,7 @@ def get_dataset_ids_from_workflow(published_workflow: Workflow) -> set[str]:
try:
node_data = KnowledgeRetrievalNodeData(**node.get("data", {}))
dataset_ids.update(dataset_id for dataset_id in node_data.dataset_ids)
except Exception as e:
except Exception:
continue
return dataset_ids

View File

@ -1,7 +1,7 @@
import logging
import time as time_module
from datetime import datetime
from typing import Any, Optional
from typing import Any
from pydantic import BaseModel
from sqlalchemy import update
@ -10,23 +10,50 @@ from sqlalchemy.orm import Session
from configs import dify_config
from core.app.entities.app_invoke_entities import AgentChatAppGenerateEntity, ChatAppGenerateEntity
from core.entities.provider_entities import QuotaUnit, SystemConfiguration
from core.plugin.entities.plugin import ModelProviderID
from events.message_event import message_was_created
from extensions.ext_database import db
from extensions.ext_redis import redis_client, redis_fallback
from libs import datetime_utils
from models.model import Message
from models.provider import Provider, ProviderType
from models.provider_ids import ModelProviderID
logger = logging.getLogger(__name__)
# Redis cache key prefix for provider last used timestamps
_PROVIDER_LAST_USED_CACHE_PREFIX = "provider:last_used"
# Default TTL for cache entries (10 minutes)
_CACHE_TTL_SECONDS = 600
LAST_USED_UPDATE_WINDOW_SECONDS = 60 * 5
def _get_provider_cache_key(tenant_id: str, provider_name: str) -> str:
"""Generate Redis cache key for provider last used timestamp."""
return f"{_PROVIDER_LAST_USED_CACHE_PREFIX}:{tenant_id}:{provider_name}"
@redis_fallback(default_return=None)
def _get_last_update_timestamp(cache_key: str) -> datetime | None:
"""Get last update timestamp from Redis cache."""
timestamp_str = redis_client.get(cache_key)
if timestamp_str:
return datetime.fromtimestamp(float(timestamp_str.decode("utf-8")))
return None
@redis_fallback()
def _set_last_update_timestamp(cache_key: str, timestamp: datetime):
"""Set last update timestamp in Redis cache with TTL."""
redis_client.setex(cache_key, _CACHE_TTL_SECONDS, str(timestamp.timestamp()))
class _ProviderUpdateFilters(BaseModel):
"""Filters for identifying Provider records to update."""
tenant_id: str
provider_name: str
provider_type: Optional[str] = None
quota_type: Optional[str] = None
provider_type: str | None = None
quota_type: str | None = None
class _ProviderUpdateAdditionalFilters(BaseModel):
@ -38,8 +65,8 @@ class _ProviderUpdateAdditionalFilters(BaseModel):
class _ProviderUpdateValues(BaseModel):
"""Values to update in Provider records."""
last_used: Optional[datetime] = None
quota_used: Optional[Any] = None # Can be Provider.quota_used + int expression
last_used: datetime | None = None
quota_used: Any | None = None # Can be Provider.quota_used + int expression
class _ProviderUpdateOperation(BaseModel):
@ -139,7 +166,7 @@ def handle(sender: Message, **kwargs):
provider_name,
)
except Exception as e:
except Exception:
# Log failure with timing and context
duration = time_module.perf_counter() - start_time
@ -155,7 +182,7 @@ def handle(sender: Message, **kwargs):
def _calculate_quota_usage(
*, message: Message, system_configuration: SystemConfiguration, model_name: str
) -> Optional[int]:
) -> int | None:
"""Calculate quota usage based on message tokens and quota type."""
quota_unit = None
for quota_configuration in system_configuration.quota_configurations:
@ -177,7 +204,7 @@ def _calculate_quota_usage(
elif quota_unit == QuotaUnit.TIMES:
return 1
return None
except Exception as e:
except Exception:
logger.exception("Failed to calculate quota usage")
return None
@ -215,8 +242,23 @@ def _execute_provider_updates(updates_to_perform: list[_ProviderUpdateOperation]
# Prepare values dict for SQLAlchemy update
update_values = {}
# updateing to `last_used` is removed due to performance reason.
# ref: https://github.com/langgenius/dify/issues/24526
# NOTE: For frequently used providers under high load, this implementation may experience
# race conditions or update contention despite the time-window optimization:
# 1. Multiple concurrent requests might check the same cache key simultaneously
# 2. Redis cache operations are not atomic with database updates
# 3. Heavy providers could still face database lock contention during peak usage
# The current implementation is acceptable for most scenarios, but future optimization
# considerations could include: batched updates, or async processing.
if values.last_used is not None:
cache_key = _get_provider_cache_key(filters.tenant_id, filters.provider_name)
now = datetime_utils.naive_utc_now()
last_update = _get_last_update_timestamp(cache_key)
if last_update is None or (now - last_update).total_seconds() > LAST_USED_UPDATE_WINDOW_SECONDS:
update_values["last_used"] = values.last_used
_set_last_update_timestamp(cache_key, now)
if values.quota_used is not None:
update_values["quota_used"] = values.quota_used
# Skip the current update operation if no updates are required.