diff --git a/.github/workflows/api-tests.yml b/.github/workflows/api-tests.yml index deba7d6b30..12d7ff33c7 100644 --- a/.github/workflows/api-tests.yml +++ b/.github/workflows/api-tests.yml @@ -27,7 +27,7 @@ jobs: persist-credentials: false - name: Setup UV and Python - uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0 + uses: astral-sh/setup-uv@e06108dd0aef18192324c70427afc47652e63a82 # v7.5.0 with: enable-cache: true python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/autofix.yml b/.github/workflows/autofix.yml index 80f892589d..73ca94f98f 100644 --- a/.github/workflows/autofix.yml +++ b/.github/workflows/autofix.yml @@ -39,7 +39,7 @@ jobs: with: python-version: "3.11" - - uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0 + - uses: astral-sh/setup-uv@e06108dd0aef18192324c70427afc47652e63a82 # v7.5.0 - name: Generate Docker Compose if: steps.docker-compose-changes.outputs.any_changed == 'true' diff --git a/.github/workflows/build-push.yml b/.github/workflows/build-push.yml index 94466d151c..1ae8d44482 100644 --- a/.github/workflows/build-push.yml +++ b/.github/workflows/build-push.yml @@ -113,7 +113,7 @@ jobs: context: "web" steps: - name: Download digests - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: path: /tmp/digests pattern: digests-${{ matrix.context }}-* diff --git a/.github/workflows/db-migration-test.yml b/.github/workflows/db-migration-test.yml index 570dd3fd8c..c567a4bfe0 100644 --- a/.github/workflows/db-migration-test.yml +++ b/.github/workflows/db-migration-test.yml @@ -19,7 +19,7 @@ jobs: persist-credentials: false - name: Setup UV and Python - uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0 + uses: astral-sh/setup-uv@e06108dd0aef18192324c70427afc47652e63a82 # v7.5.0 with: enable-cache: true python-version: "3.12" @@ -69,7 +69,7 @@ jobs: persist-credentials: false - name: Setup UV and Python - uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0 + uses: astral-sh/setup-uv@e06108dd0aef18192324c70427afc47652e63a82 # v7.5.0 with: enable-cache: true python-version: "3.12" diff --git a/.github/workflows/main-ci.yml b/.github/workflows/main-ci.yml index fd104e9496..4f00a9101c 100644 --- a/.github/workflows/main-ci.yml +++ b/.github/workflows/main-ci.yml @@ -28,7 +28,7 @@ jobs: migration-changed: ${{ steps.changes.outputs.migration }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2 + - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1 id: changes with: filters: | diff --git a/.github/workflows/pyrefly-diff.yml b/.github/workflows/pyrefly-diff.yml index ea152dec97..f50df229d5 100644 --- a/.github/workflows/pyrefly-diff.yml +++ b/.github/workflows/pyrefly-diff.yml @@ -22,7 +22,7 @@ jobs: fetch-depth: 0 - name: Setup Python & UV - uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0 + uses: astral-sh/setup-uv@e06108dd0aef18192324c70427afc47652e63a82 # v7.5.0 with: enable-cache: true diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 868bacc6e5..5e037d2541 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -33,7 +33,7 @@ jobs: - name: Setup UV and Python if: steps.changed-files.outputs.any_changed == 'true' - uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0 + uses: astral-sh/setup-uv@e06108dd0aef18192324c70427afc47652e63a82 # v7.5.0 with: enable-cache: false python-version: "3.12" diff --git a/.github/workflows/translate-i18n-claude.yml b/.github/workflows/translate-i18n-claude.yml index 62724c84e5..9af6649328 100644 --- a/.github/workflows/translate-i18n-claude.yml +++ b/.github/workflows/translate-i18n-claude.yml @@ -120,7 +120,7 @@ jobs: - name: Run Claude Code for Translation Sync if: steps.detect_changes.outputs.CHANGED_FILES != '' - uses: anthropics/claude-code-action@26ec041249acb0a944c0a47b6c0c13f05dbc5b44 # v1.0.70 + uses: anthropics/claude-code-action@cd77b50d2b0808657f8e6774085c8bf54484351c # v1.0.72 with: anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/vdb-tests.yml b/.github/workflows/vdb-tests.yml index 84a1182f94..0b771c1af7 100644 --- a/.github/workflows/vdb-tests.yml +++ b/.github/workflows/vdb-tests.yml @@ -31,7 +31,7 @@ jobs: remove_tool_cache: true - name: Setup UV and Python - uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0 + uses: astral-sh/setup-uv@e06108dd0aef18192324c70427afc47652e63a82 # v7.5.0 with: enable-cache: true python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/web-tests.yml b/.github/workflows/web-tests.yml index d3cc633e56..8999b8d544 100644 --- a/.github/workflows/web-tests.yml +++ b/.github/workflows/web-tests.yml @@ -77,7 +77,7 @@ jobs: uses: ./.github/actions/setup-web - name: Download blob reports - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: path: web/.vitest-reports pattern: blob-report-* diff --git a/.gitignore b/.gitignore index 8200d70afe..aaca9f2b0a 100644 --- a/.gitignore +++ b/.gitignore @@ -237,3 +237,6 @@ scripts/stress-test/reports/ # settings *.local.json *.local.md + +# Code Agent Folder +.qoder/* \ No newline at end of file diff --git a/api/.env.example b/api/.env.example index 8fbe2e4643..8195a3c074 100644 --- a/api/.env.example +++ b/api/.env.example @@ -22,10 +22,10 @@ APP_WEB_URL=http://localhost:3000 # Files URL FILES_URL=http://localhost:5001 -# INTERNAL_FILES_URL is used for plugin daemon communication within Docker network. -# Set this to the internal Docker service URL for proper plugin file access. -# Example: INTERNAL_FILES_URL=http://api:5001 -INTERNAL_FILES_URL=http://127.0.0.1:5001 +# INTERNAL_FILES_URL is used by services running in Docker to reach the API file endpoints. +# For Docker Desktop (Mac/Windows), use http://host.docker.internal:5001 when the API runs on the host. +# For Docker Compose on Linux, use http://api:5001 when the API runs inside the Docker network. +INTERNAL_FILES_URL=http://host.docker.internal:5001 # TRIGGER URL TRIGGER_URL=http://localhost:5001 @@ -180,7 +180,7 @@ CONSOLE_CORS_ALLOW_ORIGINS=http://localhost:3000,* COOKIE_DOMAIN= # Vector database configuration -# Supported values are `weaviate`, `oceanbase`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`. +# Supported values are `weaviate`, `oceanbase`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `hologres`. VECTOR_STORE=weaviate # Prefix used to create collection name in vector database VECTOR_INDEX_NAME_PREFIX=Vector_index @@ -217,6 +217,20 @@ COUCHBASE_PASSWORD=password COUCHBASE_BUCKET_NAME=Embeddings COUCHBASE_SCOPE_NAME=_default +# Hologres configuration +# access_key_id is used as the PG username, access_key_secret is used as the PG password +HOLOGRES_HOST= +HOLOGRES_PORT=80 +HOLOGRES_DATABASE= +HOLOGRES_ACCESS_KEY_ID= +HOLOGRES_ACCESS_KEY_SECRET= +HOLOGRES_SCHEMA=public +HOLOGRES_TOKENIZER=jieba +HOLOGRES_DISTANCE_METHOD=Cosine +HOLOGRES_BASE_QUANTIZATION_TYPE=rabitq +HOLOGRES_MAX_DEGREE=64 +HOLOGRES_EF_CONSTRUCTION=400 + # Milvus configuration MILVUS_URI=http://127.0.0.1:19530 MILVUS_TOKEN= diff --git a/api/.importlinter b/api/.importlinter index 5c0a6e1288..a836d09088 100644 --- a/api/.importlinter +++ b/api/.importlinter @@ -43,7 +43,6 @@ forbidden_modules = extensions.ext_redis allow_indirect_imports = True ignore_imports = - dify_graph.nodes.agent.agent_node -> extensions.ext_database dify_graph.nodes.llm.node -> extensions.ext_database dify_graph.model_runtime.model_providers.__base.ai_model -> extensions.ext_redis dify_graph.model_runtime.model_providers.model_provider_factory -> extensions.ext_redis @@ -90,9 +89,6 @@ forbidden_modules = core.trigger core.variables ignore_imports = - dify_graph.nodes.agent.agent_node -> core.model_manager - dify_graph.nodes.agent.agent_node -> core.provider_manager - dify_graph.nodes.agent.agent_node -> core.tools.tool_manager dify_graph.nodes.llm.llm_utils -> core.model_manager dify_graph.nodes.llm.protocols -> core.model_manager dify_graph.nodes.llm.llm_utils -> dify_graph.model_runtime.model_providers.__base.large_language_model @@ -100,9 +96,6 @@ ignore_imports = dify_graph.nodes.tool.tool_node -> core.callback_handler.workflow_tool_callback_handler dify_graph.nodes.tool.tool_node -> core.tools.tool_engine dify_graph.nodes.tool.tool_node -> core.tools.tool_manager - dify_graph.nodes.agent.agent_node -> core.agent.entities - dify_graph.nodes.agent.agent_node -> core.agent.plugin_entities - dify_graph.nodes.knowledge_retrieval.knowledge_retrieval_node -> core.app.app_config.entities dify_graph.nodes.parameter_extractor.parameter_extractor_node -> core.prompt.advanced_prompt_transform dify_graph.nodes.parameter_extractor.parameter_extractor_node -> core.prompt.simple_prompt_transform dify_graph.nodes.parameter_extractor.parameter_extractor_node -> dify_graph.model_runtime.model_providers.__base.large_language_model @@ -110,12 +103,9 @@ ignore_imports = dify_graph.nodes.parameter_extractor.parameter_extractor_node -> core.model_manager dify_graph.nodes.question_classifier.question_classifier_node -> core.model_manager dify_graph.nodes.tool.tool_node -> core.tools.utils.message_transformer - dify_graph.nodes.agent.agent_node -> models.model - dify_graph.nodes.llm.node -> core.helper.code_executor dify_graph.nodes.llm.node -> core.llm_generator.output_parser.errors dify_graph.nodes.llm.node -> core.llm_generator.output_parser.structured_output dify_graph.nodes.llm.node -> core.model_manager - dify_graph.nodes.agent.entities -> core.prompt.entities.advanced_prompt_entities dify_graph.nodes.llm.entities -> core.prompt.entities.advanced_prompt_entities dify_graph.nodes.llm.node -> core.prompt.entities.advanced_prompt_entities dify_graph.nodes.llm.node -> core.prompt.utils.prompt_message_util @@ -124,17 +114,12 @@ ignore_imports = dify_graph.nodes.parameter_extractor.parameter_extractor_node -> core.prompt.utils.prompt_message_util dify_graph.nodes.question_classifier.entities -> core.prompt.entities.advanced_prompt_entities dify_graph.nodes.question_classifier.question_classifier_node -> core.prompt.utils.prompt_message_util - dify_graph.nodes.knowledge_index.entities -> core.rag.retrieval.retrieval_methods dify_graph.nodes.llm.node -> models.dataset - dify_graph.nodes.agent.agent_node -> core.tools.utils.message_transformer dify_graph.nodes.llm.file_saver -> core.tools.signature dify_graph.nodes.llm.file_saver -> core.tools.tool_file_manager dify_graph.nodes.tool.tool_node -> core.tools.errors - dify_graph.nodes.agent.agent_node -> extensions.ext_database dify_graph.nodes.llm.node -> extensions.ext_database - dify_graph.nodes.agent.agent_node -> models dify_graph.nodes.llm.node -> models.model - dify_graph.nodes.agent.agent_node -> services dify_graph.nodes.tool.tool_node -> services dify_graph.model_runtime.model_providers.__base.ai_model -> configs dify_graph.model_runtime.model_providers.__base.ai_model -> extensions.ext_redis diff --git a/api/README.md b/api/README.md index b647367046..9f247da8f0 100644 --- a/api/README.md +++ b/api/README.md @@ -42,7 +42,7 @@ The scripts resolve paths relative to their location, so you can run them from a 1. Set up your application by visiting `http://localhost:3000`. -1. Start the worker service (async and scheduler tasks, runs from `api`). +1. Optional: start the worker service (async tasks, runs from `api`). ```bash ./dev/start-worker @@ -54,6 +54,87 @@ The scripts resolve paths relative to their location, so you can run them from a ./dev/start-beat ``` +### Manual commands + +
+Show manual setup and run steps + +These commands assume you start from the repository root. + +1. Start the docker-compose stack. + + The backend requires middleware, including PostgreSQL, Redis, and Weaviate, which can be started together using `docker-compose`. + + ```bash + cp docker/middleware.env.example docker/middleware.env + # Use mysql or another vector database profile if you are not using postgres/weaviate. + docker compose -f docker/docker-compose.middleware.yaml --profile postgresql --profile weaviate -p dify up -d + ``` + +1. Copy env files. + + ```bash + cp api/.env.example api/.env + cp web/.env.example web/.env.local + ``` + +1. Install UV if needed. + + ```bash + pip install uv + # Or on macOS + brew install uv + ``` + +1. Install API dependencies. + + ```bash + cd api + uv sync --group dev + ``` + +1. Install web dependencies. + + ```bash + cd web + pnpm install + cd .. + ``` + +1. Start backend (runs migrations first, in a new terminal). + + ```bash + cd api + uv run flask db upgrade + uv run flask run --host 0.0.0.0 --port=5001 --debug + ``` + +1. Start Dify [web](../web) service (in a new terminal). + + ```bash + cd web + pnpm dev:inspect + ``` + +1. Set up your application by visiting `http://localhost:3000`. + +1. Optional: start the worker service (async tasks, in a new terminal). + + ```bash + cd api + # Note: enterprise_telemetry queue is only used in Enterprise Edition + uv run celery -A app.celery worker -P threads -c 2 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,enterprise_telemetry + ``` + +1. Optional: start Celery Beat (scheduled tasks, in a new terminal). + + ```bash + cd api + uv run celery -A app.celery beat + ``` + +
+ ### Environment notes > [!IMPORTANT] diff --git a/api/app_factory.py b/api/app_factory.py index dcbc821687..11568f139f 100644 --- a/api/app_factory.py +++ b/api/app_factory.py @@ -81,6 +81,7 @@ def initialize_extensions(app: DifyApp): ext_commands, ext_compress, ext_database, + ext_enterprise_telemetry, ext_fastopenapi, ext_forward_refs, ext_hosting_provider, @@ -131,6 +132,7 @@ def initialize_extensions(app: DifyApp): ext_commands, ext_fastopenapi, ext_otel, + ext_enterprise_telemetry, ext_request_logging, ext_session_factory, ] diff --git a/api/commands/vector.py b/api/commands/vector.py index 4df194026b..5f41d469c8 100644 --- a/api/commands/vector.py +++ b/api/commands/vector.py @@ -160,6 +160,7 @@ def migrate_knowledge_vector_database(): } lower_collection_vector_types = { VectorType.ANALYTICDB, + VectorType.HOLOGRES, VectorType.CHROMA, VectorType.MYSCALE, VectorType.PGVECTO_RS, diff --git a/api/configs/app_config.py b/api/configs/app_config.py index d3b1cf9d5b..831f0a49e0 100644 --- a/api/configs/app_config.py +++ b/api/configs/app_config.py @@ -8,7 +8,7 @@ from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, Settings from libs.file_utils import search_file_upwards from .deploy import DeploymentConfig -from .enterprise import EnterpriseFeatureConfig +from .enterprise import EnterpriseFeatureConfig, EnterpriseTelemetryConfig from .extra import ExtraServiceConfig from .feature import FeatureConfig from .middleware import MiddlewareConfig @@ -73,6 +73,8 @@ class DifyConfig( # Enterprise feature configs # **Before using, please contact business@dify.ai by email to inquire about licensing matters.** EnterpriseFeatureConfig, + # Enterprise telemetry configs + EnterpriseTelemetryConfig, ): model_config = SettingsConfigDict( # read from dotenv format config file diff --git a/api/configs/enterprise/__init__.py b/api/configs/enterprise/__init__.py index f8447c6979..c7289de393 100644 --- a/api/configs/enterprise/__init__.py +++ b/api/configs/enterprise/__init__.py @@ -22,3 +22,49 @@ class EnterpriseFeatureConfig(BaseSettings): ENTERPRISE_REQUEST_TIMEOUT: int = Field( ge=1, description="Maximum timeout in seconds for enterprise requests", default=5 ) + + +class EnterpriseTelemetryConfig(BaseSettings): + """ + Configuration for enterprise telemetry. + """ + + ENTERPRISE_TELEMETRY_ENABLED: bool = Field( + description="Enable enterprise telemetry collection (also requires ENTERPRISE_ENABLED=true).", + default=False, + ) + + ENTERPRISE_OTLP_ENDPOINT: str = Field( + description="Enterprise OTEL collector endpoint.", + default="", + ) + + ENTERPRISE_OTLP_HEADERS: str = Field( + description="Auth headers for OTLP export (key=value,key2=value2).", + default="", + ) + + ENTERPRISE_OTLP_PROTOCOL: str = Field( + description="OTLP protocol: 'http' or 'grpc' (default: http).", + default="http", + ) + + ENTERPRISE_OTLP_API_KEY: str = Field( + description="Bearer token for enterprise OTLP export authentication.", + default="", + ) + + ENTERPRISE_INCLUDE_CONTENT: bool = Field( + description="Include input/output content in traces (privacy toggle).", + default=True, + ) + + ENTERPRISE_SERVICE_NAME: str = Field( + description="Service name for OTEL resource.", + default="dify", + ) + + ENTERPRISE_OTEL_SAMPLING_RATE: float = Field( + description="Sampling rate for enterprise traces (0.0 to 1.0, default 1.0 = 100%).", + default=1.0, + ) diff --git a/api/configs/middleware/__init__.py b/api/configs/middleware/__init__.py index 0532a42371..15ac8bf0bf 100644 --- a/api/configs/middleware/__init__.py +++ b/api/configs/middleware/__init__.py @@ -26,6 +26,7 @@ from .vdb.chroma_config import ChromaConfig from .vdb.clickzetta_config import ClickzettaConfig from .vdb.couchbase_config import CouchbaseConfig from .vdb.elasticsearch_config import ElasticsearchConfig +from .vdb.hologres_config import HologresConfig from .vdb.huawei_cloud_config import HuaweiCloudConfig from .vdb.iris_config import IrisVectorConfig from .vdb.lindorm_config import LindormConfig @@ -347,6 +348,7 @@ class MiddlewareConfig( AnalyticdbConfig, ChromaConfig, ClickzettaConfig, + HologresConfig, HuaweiCloudConfig, IrisVectorConfig, MilvusConfig, diff --git a/api/configs/middleware/vdb/hologres_config.py b/api/configs/middleware/vdb/hologres_config.py new file mode 100644 index 0000000000..9812cce268 --- /dev/null +++ b/api/configs/middleware/vdb/hologres_config.py @@ -0,0 +1,68 @@ +from holo_search_sdk.types import BaseQuantizationType, DistanceType, TokenizerType +from pydantic import Field +from pydantic_settings import BaseSettings + + +class HologresConfig(BaseSettings): + """ + Configuration settings for Hologres vector database. + + Hologres is compatible with PostgreSQL protocol. + access_key_id is used as the PostgreSQL username, + and access_key_secret is used as the PostgreSQL password. + """ + + HOLOGRES_HOST: str | None = Field( + description="Hostname or IP address of the Hologres instance.", + default=None, + ) + + HOLOGRES_PORT: int = Field( + description="Port number for connecting to the Hologres instance.", + default=80, + ) + + HOLOGRES_DATABASE: str | None = Field( + description="Name of the Hologres database to connect to.", + default=None, + ) + + HOLOGRES_ACCESS_KEY_ID: str | None = Field( + description="Alibaba Cloud AccessKey ID, also used as the PostgreSQL username.", + default=None, + ) + + HOLOGRES_ACCESS_KEY_SECRET: str | None = Field( + description="Alibaba Cloud AccessKey Secret, also used as the PostgreSQL password.", + default=None, + ) + + HOLOGRES_SCHEMA: str = Field( + description="Schema name in the Hologres database.", + default="public", + ) + + HOLOGRES_TOKENIZER: TokenizerType = Field( + description="Tokenizer for full-text search index (e.g., 'jieba', 'ik', 'standard', 'simple').", + default="jieba", + ) + + HOLOGRES_DISTANCE_METHOD: DistanceType = Field( + description="Distance method for vector index (e.g., 'Cosine', 'Euclidean', 'InnerProduct').", + default="Cosine", + ) + + HOLOGRES_BASE_QUANTIZATION_TYPE: BaseQuantizationType = Field( + description="Base quantization type for vector index (e.g., 'rabitq', 'sq8', 'fp16', 'fp32').", + default="rabitq", + ) + + HOLOGRES_MAX_DEGREE: int = Field( + description="Max degree (M) parameter for HNSW vector index.", + default=64, + ) + + HOLOGRES_EF_CONSTRUCTION: int = Field( + description="ef_construction parameter for HNSW vector index.", + default=400, + ) diff --git a/api/controllers/console/app/app.py b/api/controllers/console/app/app.py index 33b3c9ec36..5ac0e342e6 100644 --- a/api/controllers/console/app/app.py +++ b/api/controllers/console/app/app.py @@ -25,7 +25,8 @@ from controllers.console.wraps import ( ) from core.ops.ops_trace_manager import OpsTraceManager from core.rag.retrieval.retrieval_methods import RetrievalMethod -from dify_graph.enums import NodeType, WorkflowExecutionStatus +from core.trigger.constants import TRIGGER_NODE_TYPES +from dify_graph.enums import WorkflowExecutionStatus from dify_graph.file import helpers as file_helpers from extensions.ext_database import db from libs.login import current_account_with_tenant, login_required @@ -508,11 +509,7 @@ class AppListApi(Resource): .scalars() .all() ) - trigger_node_types = { - NodeType.TRIGGER_WEBHOOK, - NodeType.TRIGGER_SCHEDULE, - NodeType.TRIGGER_PLUGIN, - } + trigger_node_types = TRIGGER_NODE_TYPES for workflow in draft_workflows: node_id = None try: diff --git a/api/controllers/console/app/mcp_server.py b/api/controllers/console/app/mcp_server.py index dd982b6d7b..2025048e09 100644 --- a/api/controllers/console/app/mcp_server.py +++ b/api/controllers/console/app/mcp_server.py @@ -1,5 +1,4 @@ import json -from enum import StrEnum from flask_restx import Resource, marshal_with from pydantic import BaseModel, Field @@ -11,6 +10,7 @@ from controllers.console.wraps import account_initialization_required, edit_perm from extensions.ext_database import db from fields.app_fields import app_server_fields from libs.login import current_account_with_tenant, login_required +from models.enums import AppMCPServerStatus from models.model import AppMCPServer DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}" @@ -19,11 +19,6 @@ DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}" app_server_model = console_ns.model("AppServer", app_server_fields) -class AppMCPServerStatus(StrEnum): - ACTIVE = "active" - INACTIVE = "inactive" - - class MCPServerCreatePayload(BaseModel): description: str | None = Field(default=None, description="Server description") parameters: dict = Field(..., description="Server parameters configuration") @@ -117,9 +112,10 @@ class AppMCPServerController(Resource): server.parameters = json.dumps(payload.parameters, ensure_ascii=False) if payload.status: - if payload.status not in [status.value for status in AppMCPServerStatus]: + try: + server.status = AppMCPServerStatus(payload.status) + except ValueError: raise ValueError("Invalid status") - server.status = payload.status db.session.commit() return server diff --git a/api/controllers/console/app/workflow.py b/api/controllers/console/app/workflow.py index 9759e0815a..837245ecb1 100644 --- a/api/controllers/console/app/workflow.py +++ b/api/controllers/console/app/workflow.py @@ -22,6 +22,7 @@ from core.app.apps.workflow.app_generator import SKIP_PREPARE_USER_INPUTS_KEY from core.app.entities.app_invoke_entities import InvokeFrom from core.helper.trace_id_helper import get_external_trace_id from core.plugin.impl.exc import PluginInvokeError +from core.trigger.constants import TRIGGER_SCHEDULE_NODE_TYPE from core.trigger.debug.event_selectors import ( TriggerDebugEvent, TriggerDebugEventPoller, @@ -1209,7 +1210,7 @@ class DraftWorkflowTriggerNodeApi(Resource): node_type: NodeType = draft_workflow.get_node_type_from_node_config(node_config) event: TriggerDebugEvent | None = None # for schedule trigger, when run single node, just execute directly - if node_type == NodeType.TRIGGER_SCHEDULE: + if node_type == TRIGGER_SCHEDULE_NODE_TYPE: event = TriggerDebugEvent( workflow_args={}, node_id=node_id, diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index ddad7f40ca..eebba57fa3 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -263,6 +263,7 @@ def _get_retrieval_methods_by_vector_type(vector_type: str | None, is_mock: bool VectorType.BAIDU, VectorType.ALIBABACLOUD_MYSQL, VectorType.IRIS, + VectorType.HOLOGRES, } semantic_methods = {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]} diff --git a/api/controllers/console/workspace/account.py b/api/controllers/console/workspace/account.py index 708df62642..0d8960c9bd 100644 --- a/api/controllers/console/workspace/account.py +++ b/api/controllers/console/workspace/account.py @@ -43,6 +43,7 @@ from libs.datetime_utils import naive_utc_now from libs.helper import EmailStr, TimestampField, extract_remote_ip, timezone from libs.login import current_account_with_tenant, login_required from models import AccountIntegrate, InvitationCode +from models.account import AccountStatus, InvitationCodeStatus from services.account_service import AccountService from services.billing_service import BillingService from services.errors.account import CurrentPasswordIncorrectError as ServiceCurrentPasswordIncorrectError @@ -215,7 +216,7 @@ class AccountInitApi(Resource): db.session.query(InvitationCode) .where( InvitationCode.code == args.invitation_code, - InvitationCode.status == "unused", + InvitationCode.status == InvitationCodeStatus.UNUSED, ) .first() ) @@ -223,7 +224,7 @@ class AccountInitApi(Resource): if not invitation_code: raise InvalidInvitationCodeError() - invitation_code.status = "used" + invitation_code.status = InvitationCodeStatus.USED invitation_code.used_at = naive_utc_now() invitation_code.used_by_tenant_id = account.current_tenant_id invitation_code.used_by_account_id = account.id @@ -231,7 +232,7 @@ class AccountInitApi(Resource): account.interface_language = args.interface_language account.timezone = args.timezone account.interface_theme = "light" - account.status = "active" + account.status = AccountStatus.ACTIVE account.initialized_at = naive_utc_now() db.session.commit() diff --git a/api/controllers/console/workspace/plugin.py b/api/controllers/console/workspace/plugin.py index 2f06f72f29..ee537367c7 100644 --- a/api/controllers/console/workspace/plugin.py +++ b/api/controllers/console/workspace/plugin.py @@ -5,6 +5,7 @@ from typing import Any, Literal from flask import request, send_file from flask_restx import Resource from pydantic import BaseModel, Field +from werkzeug.datastructures import FileStorage from werkzeug.exceptions import Forbidden from configs import dify_config @@ -169,6 +170,20 @@ register_enum_models( ) +def _read_upload_content(file: FileStorage, max_size: int) -> bytes: + """ + Read the uploaded file and validate its actual size before delegating to the plugin service. + + FileStorage.content_length is not reliable for multipart test uploads and may be zero even when + content exists, so the controllers validate against the loaded bytes instead. + """ + content = file.read() + if len(content) > max_size: + raise ValueError("File size exceeds the maximum allowed size") + + return content + + @console_ns.route("/workspaces/current/plugin/debugging-key") class PluginDebuggingKeyApi(Resource): @setup_required @@ -284,12 +299,7 @@ class PluginUploadFromPkgApi(Resource): _, tenant_id = current_account_with_tenant() file = request.files["pkg"] - - # check file size - if file.content_length > dify_config.PLUGIN_MAX_PACKAGE_SIZE: - raise ValueError("File size exceeds the maximum allowed size") - - content = file.read() + content = _read_upload_content(file, dify_config.PLUGIN_MAX_PACKAGE_SIZE) try: response = PluginService.upload_pkg(tenant_id, content) except PluginDaemonClientSideError as e: @@ -328,12 +338,7 @@ class PluginUploadFromBundleApi(Resource): _, tenant_id = current_account_with_tenant() file = request.files["bundle"] - - # check file size - if file.content_length > dify_config.PLUGIN_MAX_BUNDLE_SIZE: - raise ValueError("File size exceeds the maximum allowed size") - - content = file.read() + content = _read_upload_content(file, dify_config.PLUGIN_MAX_BUNDLE_SIZE) try: response = PluginService.upload_bundle(tenant_id, content) except PluginDaemonClientSideError as e: diff --git a/api/controllers/mcp/mcp.py b/api/controllers/mcp/mcp.py index 2bc6640807..9ddaaa315b 100644 --- a/api/controllers/mcp/mcp.py +++ b/api/controllers/mcp/mcp.py @@ -6,13 +6,13 @@ from pydantic import BaseModel, Field, ValidationError from sqlalchemy.orm import Session from controllers.common.schema import register_schema_model -from controllers.console.app.mcp_server import AppMCPServerStatus from controllers.mcp import mcp_ns from core.mcp import types as mcp_types from core.mcp.server.streamable_http import handle_mcp_request from dify_graph.variables.input_entities import VariableEntity from extensions.ext_database import db from libs import helper +from models.enums import AppMCPServerStatus from models.model import App, AppMCPServer, AppMode, EndUser diff --git a/api/controllers/service_api/wraps.py b/api/controllers/service_api/wraps.py index cc55c69c48..7aa5b2f092 100644 --- a/api/controllers/service_api/wraps.py +++ b/api/controllers/service_api/wraps.py @@ -3,7 +3,7 @@ import time from collections.abc import Callable from enum import StrEnum, auto from functools import wraps -from typing import Concatenate, ParamSpec, TypeVar, cast +from typing import Concatenate, ParamSpec, TypeVar, cast, overload from flask import current_app, request from flask_login import user_logged_in @@ -44,10 +44,22 @@ class FetchUserArg(BaseModel): required: bool = False -def validate_app_token(view: Callable[P, R] | None = None, *, fetch_user_arg: FetchUserArg | None = None): - def decorator(view_func: Callable[P, R]): +@overload +def validate_app_token(view: Callable[P, R]) -> Callable[P, R]: ... + + +@overload +def validate_app_token( + view: None = None, *, fetch_user_arg: FetchUserArg | None = None +) -> Callable[[Callable[P, R]], Callable[P, R]]: ... + + +def validate_app_token( + view: Callable[P, R] | None = None, *, fetch_user_arg: FetchUserArg | None = None +) -> Callable[P, R] | Callable[[Callable[P, R]], Callable[P, R]]: + def decorator(view_func: Callable[P, R]) -> Callable[P, R]: @wraps(view_func) - def decorated_view(*args: P.args, **kwargs: P.kwargs): + def decorated_view(*args: P.args, **kwargs: P.kwargs) -> R: api_token = validate_and_get_api_token("app") app_model = db.session.query(App).where(App.id == api_token.app_id).first() @@ -213,10 +225,20 @@ def cloud_edition_billing_rate_limit_check(resource: str, api_token_type: str): return interceptor -def validate_dataset_token(view: Callable[Concatenate[T, P], R] | None = None): - def decorator(view: Callable[Concatenate[T, P], R]): - @wraps(view) - def decorated(*args: P.args, **kwargs: P.kwargs): +@overload +def validate_dataset_token(view: Callable[Concatenate[T, P], R]) -> Callable[P, R]: ... + + +@overload +def validate_dataset_token(view: None = None) -> Callable[[Callable[Concatenate[T, P], R]], Callable[P, R]]: ... + + +def validate_dataset_token( + view: Callable[Concatenate[T, P], R] | None = None, +) -> Callable[P, R] | Callable[[Callable[Concatenate[T, P], R]], Callable[P, R]]: + def decorator(view_func: Callable[Concatenate[T, P], R]) -> Callable[P, R]: + @wraps(view_func) + def decorated(*args: P.args, **kwargs: P.kwargs) -> R: api_token = validate_and_get_api_token("dataset") # get url path dataset_id from positional args or kwargs @@ -287,7 +309,7 @@ def validate_dataset_token(view: Callable[Concatenate[T, P], R] | None = None): raise Unauthorized("Tenant owner account does not exist.") else: raise Unauthorized("Tenant does not exist.") - return view(api_token.tenant_id, *args, **kwargs) + return view_func(api_token.tenant_id, *args, **kwargs) # type: ignore[arg-type] return decorated diff --git a/api/core/agent/cot_agent_runner.py b/api/core/agent/cot_agent_runner.py index c6ecd5509b..9271ed10bd 100644 --- a/api/core/agent/cot_agent_runner.py +++ b/api/core/agent/cot_agent_runner.py @@ -6,6 +6,7 @@ from typing import Any from core.agent.base_agent_runner import BaseAgentRunner from core.agent.entities import AgentScratchpadUnit +from core.agent.errors import AgentMaxIterationError from core.agent.output_parser.cot_output_parser import CotAgentOutputParser from core.app.apps.base_app_queue_manager import PublishFrom from core.app.entities.queue_entities import QueueAgentThoughtEvent, QueueMessageEndEvent, QueueMessageFileEvent @@ -22,7 +23,6 @@ from dify_graph.model_runtime.entities.message_entities import ( ToolPromptMessage, UserPromptMessage, ) -from dify_graph.nodes.agent.exc import AgentMaxIterationError from models.model import Message logger = logging.getLogger(__name__) diff --git a/api/core/agent/errors.py b/api/core/agent/errors.py new file mode 100644 index 0000000000..ed504d500a --- /dev/null +++ b/api/core/agent/errors.py @@ -0,0 +1,9 @@ +class AgentMaxIterationError(Exception): + """Raised when an agent runner exceeds the configured max iteration count.""" + + def __init__(self, max_iteration: int): + self.max_iteration = max_iteration + super().__init__( + f"Agent exceeded the maximum iteration limit of {max_iteration}. " + f"The agent was unable to complete the task within the allowed number of iterations." + ) diff --git a/api/core/agent/fc_agent_runner.py b/api/core/agent/fc_agent_runner.py index 3271fe319b..5e13a13b21 100644 --- a/api/core/agent/fc_agent_runner.py +++ b/api/core/agent/fc_agent_runner.py @@ -5,6 +5,7 @@ from copy import deepcopy from typing import Any, Union from core.agent.base_agent_runner import BaseAgentRunner +from core.agent.errors import AgentMaxIterationError from core.app.apps.base_app_queue_manager import PublishFrom from core.app.entities.queue_entities import QueueAgentThoughtEvent, QueueMessageEndEvent, QueueMessageFileEvent from core.prompt.agent_history_prompt_transform import AgentHistoryPromptTransform @@ -25,7 +26,6 @@ from dify_graph.model_runtime.entities import ( UserPromptMessage, ) from dify_graph.model_runtime.entities.message_entities import ImagePromptMessageContent, PromptMessageContentUnionTypes -from dify_graph.nodes.agent.exc import AgentMaxIterationError from models.model import Message logger = logging.getLogger(__name__) diff --git a/api/core/app/apps/advanced_chat/generate_task_pipeline.py b/api/core/app/apps/advanced_chat/generate_task_pipeline.py index a1cb375e24..6583ba51e9 100644 --- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py +++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py @@ -69,7 +69,7 @@ from dify_graph.entities.pause_reason import HumanInputRequired from dify_graph.enums import WorkflowExecutionStatus from dify_graph.model_runtime.entities.llm_entities import LLMUsage from dify_graph.model_runtime.utils.encoders import jsonable_encoder -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.repositories.draft_variable_repository import DraftVariableSaverFactory from dify_graph.runtime import GraphRuntimeState from dify_graph.system_variable import SystemVariable @@ -357,7 +357,7 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport): ) -> Generator[StreamResponse, None, None]: """Handle node succeeded events.""" # Record files if it's an answer node or end node - if event.node_type in [NodeType.ANSWER, NodeType.END, NodeType.LLM]: + if event.node_type in [BuiltinNodeTypes.ANSWER, BuiltinNodeTypes.END, BuiltinNodeTypes.LLM]: self._recorded_files.extend( self._workflow_response_converter.fetch_files_from_node_outputs(event.outputs or {}) ) diff --git a/api/core/app/apps/common/workflow_response_converter.py b/api/core/app/apps/common/workflow_response_converter.py index 67dc9909a1..5665a2b76c 100644 --- a/api/core/app/apps/common/workflow_response_converter.py +++ b/api/core/app/apps/common/workflow_response_converter.py @@ -48,12 +48,13 @@ from core.app.entities.task_entities import ( from core.plugin.impl.datasource import PluginDatasourceManager from core.tools.entities.tool_entities import ToolProviderType from core.tools.tool_manager import ToolManager +from core.trigger.constants import TRIGGER_PLUGIN_NODE_TYPE from core.trigger.trigger_manager import TriggerManager from core.workflow.workflow_entry import WorkflowEntry from dify_graph.entities.pause_reason import HumanInputRequired from dify_graph.entities.workflow_start_reason import WorkflowStartReason from dify_graph.enums import ( - NodeType, + BuiltinNodeTypes, SystemVariableKey, WorkflowExecutionStatus, WorkflowNodeExecutionMetadataKey, @@ -442,7 +443,7 @@ class WorkflowResponseConverter: event: QueueNodeStartedEvent, task_id: str, ) -> NodeStartStreamResponse | None: - if event.node_type in {NodeType.ITERATION, NodeType.LOOP}: + if event.node_type in {BuiltinNodeTypes.ITERATION, BuiltinNodeTypes.LOOP}: return None run_id = self._ensure_workflow_run_id() snapshot = self._store_snapshot(event) @@ -464,13 +465,13 @@ class WorkflowResponseConverter: ) try: - if event.node_type == NodeType.TOOL: + if event.node_type == BuiltinNodeTypes.TOOL: response.data.extras["icon"] = ToolManager.get_tool_icon( tenant_id=self._application_generate_entity.app_config.tenant_id, provider_type=ToolProviderType(event.provider_type), provider_id=event.provider_id, ) - elif event.node_type == NodeType.DATASOURCE: + elif event.node_type == BuiltinNodeTypes.DATASOURCE: manager = PluginDatasourceManager() provider_entity = manager.fetch_datasource_provider( self._application_generate_entity.app_config.tenant_id, @@ -479,7 +480,7 @@ class WorkflowResponseConverter: response.data.extras["icon"] = provider_entity.declaration.identity.generate_datasource_icon_url( self._application_generate_entity.app_config.tenant_id ) - elif event.node_type == NodeType.TRIGGER_PLUGIN: + elif event.node_type == TRIGGER_PLUGIN_NODE_TYPE: response.data.extras["icon"] = TriggerManager.get_trigger_plugin_icon( self._application_generate_entity.app_config.tenant_id, event.provider_id, @@ -496,7 +497,7 @@ class WorkflowResponseConverter: event: QueueNodeSucceededEvent | QueueNodeFailedEvent | QueueNodeExceptionEvent, task_id: str, ) -> NodeFinishStreamResponse | None: - if event.node_type in {NodeType.ITERATION, NodeType.LOOP}: + if event.node_type in {BuiltinNodeTypes.ITERATION, BuiltinNodeTypes.LOOP}: return None run_id = self._ensure_workflow_run_id() snapshot = self._pop_snapshot(event.node_execution_id) @@ -554,7 +555,7 @@ class WorkflowResponseConverter: event: QueueNodeRetryEvent, task_id: str, ) -> NodeRetryStreamResponse | None: - if event.node_type in {NodeType.ITERATION, NodeType.LOOP}: + if event.node_type in {BuiltinNodeTypes.ITERATION, BuiltinNodeTypes.LOOP}: return None run_id = self._ensure_workflow_run_id() @@ -612,7 +613,7 @@ class WorkflowResponseConverter: data=IterationNodeStartStreamResponse.Data( id=event.node_id, node_id=event.node_id, - node_type=event.node_type.value, + node_type=event.node_type, title=event.node_title, created_at=int(time.time()), extras={}, @@ -635,7 +636,7 @@ class WorkflowResponseConverter: data=IterationNodeNextStreamResponse.Data( id=event.node_id, node_id=event.node_id, - node_type=event.node_type.value, + node_type=event.node_type, title=event.node_title, index=event.index, created_at=int(time.time()), @@ -662,7 +663,7 @@ class WorkflowResponseConverter: data=IterationNodeCompletedStreamResponse.Data( id=event.node_id, node_id=event.node_id, - node_type=event.node_type.value, + node_type=event.node_type, title=event.node_title, outputs=new_outputs, outputs_truncated=outputs_truncated, @@ -692,7 +693,7 @@ class WorkflowResponseConverter: data=LoopNodeStartStreamResponse.Data( id=event.node_id, node_id=event.node_id, - node_type=event.node_type.value, + node_type=event.node_type, title=event.node_title, created_at=int(time.time()), extras={}, @@ -715,7 +716,7 @@ class WorkflowResponseConverter: data=LoopNodeNextStreamResponse.Data( id=event.node_id, node_id=event.node_id, - node_type=event.node_type.value, + node_type=event.node_type, title=event.node_title, index=event.index, # The `pre_loop_output` field is not utilized by the frontend. @@ -744,7 +745,7 @@ class WorkflowResponseConverter: data=LoopNodeCompletedStreamResponse.Data( id=event.node_id, node_id=event.node_id, - node_type=event.node_type.value, + node_type=event.node_type, title=event.node_title, outputs=new_outputs, outputs_truncated=outputs_truncated, diff --git a/api/core/app/apps/pipeline/pipeline_runner.py b/api/core/app/apps/pipeline/pipeline_runner.py index 4222aae809..e767766bdb 100644 --- a/api/core/app/apps/pipeline/pipeline_runner.py +++ b/api/core/app/apps/pipeline/pipeline_runner.py @@ -12,7 +12,7 @@ from core.app.entities.app_invoke_entities import ( build_dify_run_context, ) from core.app.workflow.layers.persistence import PersistenceWorkflowInfo, WorkflowPersistenceLayer -from core.workflow.node_factory import DifyNodeFactory +from core.workflow.node_factory import DifyNodeFactory, get_default_root_node_id from core.workflow.workflow_entry import WorkflowEntry from dify_graph.entities.graph_init_params import GraphInitParams from dify_graph.enums import WorkflowType @@ -274,6 +274,8 @@ class PipelineRunner(WorkflowBasedAppRunner): graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state, ) + if start_node_id is None: + start_node_id = get_default_root_node_id(graph_config) graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id=start_node_id) if not graph: diff --git a/api/core/app/apps/workflow/app_generator.py b/api/core/app/apps/workflow/app_generator.py index 32a7a3ccec..84bebb8bab 100644 --- a/api/core/app/apps/workflow/app_generator.py +++ b/api/core/app/apps/workflow/app_generator.py @@ -159,9 +159,12 @@ class WorkflowAppGenerator(BaseAppGenerator): inputs: Mapping[str, Any] = args["inputs"] - extras = { + extras: dict[str, Any] = { **extract_external_trace_id_from_args(args), } + parent_trace_context = args.get("_parent_trace_context") + if parent_trace_context: + extras["parent_trace_context"] = parent_trace_context workflow_run_id = str(workflow_run_id or uuid.uuid4()) # FIXME (Yeuoly): we need to remove the SKIP_PREPARE_USER_INPUTS_KEY from the args # trigger shouldn't prepare user inputs diff --git a/api/core/app/apps/workflow_app_runner.py b/api/core/app/apps/workflow_app_runner.py index 3812fac28a..25d3c8bd2a 100644 --- a/api/core/app/apps/workflow_app_runner.py +++ b/api/core/app/apps/workflow_app_runner.py @@ -3,7 +3,10 @@ import time from collections.abc import Mapping, Sequence from typing import Any, cast +from pydantic import ValidationError + from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom +from core.app.entities.agent_strategy import AgentStrategyInfo from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom, build_dify_run_context from core.app.entities.queue_entities import ( AppQueueEvent, @@ -29,7 +32,8 @@ from core.app.entities.queue_entities import ( QueueWorkflowStartedEvent, QueueWorkflowSucceededEvent, ) -from core.workflow.node_factory import DifyNodeFactory +from core.rag.entities.citation_metadata import RetrievalSourceMetadata +from core.workflow.node_factory import DifyNodeFactory, get_default_root_node_id, resolve_workflow_node_class from core.workflow.workflow_entry import WorkflowEntry from dify_graph.entities import GraphInitParams from dify_graph.entities.graph_config import NodeConfigDictAdapter @@ -63,7 +67,6 @@ from dify_graph.graph_events import ( NodeRunSucceededEvent, ) from dify_graph.graph_events.graph import GraphRunAbortedEvent -from dify_graph.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from dify_graph.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader, load_into_variable_pool @@ -137,6 +140,9 @@ class WorkflowBasedAppRunner: graph_runtime_state=graph_runtime_state, ) + if root_node_id is None: + root_node_id = get_default_root_node_id(graph_config) + # init graph graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id=root_node_id) @@ -308,7 +314,7 @@ class WorkflowBasedAppRunner: # Get node class node_type = target_node_config["data"].type node_version = str(target_node_config["data"].version) - node_cls = NODE_TYPE_CLASSES_MAPPING[node_type][node_version] + node_cls = resolve_workflow_node_class(node_type=node_type, node_version=node_version) # Use the variable pool from graph_runtime_state instead of creating a new one variable_pool = graph_runtime_state.variable_pool @@ -336,6 +342,18 @@ class WorkflowBasedAppRunner: return graph, variable_pool + @staticmethod + def _build_agent_strategy_info(event: NodeRunStartedEvent) -> AgentStrategyInfo | None: + raw_agent_strategy = event.extras.get("agent_strategy") + if raw_agent_strategy is None: + return None + + try: + return AgentStrategyInfo.model_validate(raw_agent_strategy) + except ValidationError: + logger.warning("Invalid agent strategy payload for node %s", event.node_id, exc_info=True) + return None + def _handle_event(self, workflow_entry: WorkflowEntry, event: GraphEngineEvent): """ Handle event @@ -421,7 +439,7 @@ class WorkflowBasedAppRunner: start_at=event.start_at, in_iteration_id=event.in_iteration_id, in_loop_id=event.in_loop_id, - agent_strategy=event.agent_strategy, + agent_strategy=self._build_agent_strategy_info(event), provider_type=event.provider_type, provider_id=event.provider_id, ) @@ -490,7 +508,9 @@ class WorkflowBasedAppRunner: elif isinstance(event, NodeRunRetrieverResourceEvent): self._publish_event( QueueRetrieverResourcesEvent( - retriever_resources=event.retriever_resources, + retriever_resources=[ + RetrievalSourceMetadata.model_validate(resource) for resource in event.retriever_resources + ], in_iteration_id=event.in_iteration_id, in_loop_id=event.in_loop_id, ) diff --git a/api/core/app/entities/__init__.py b/api/core/app/entities/__init__.py index e69de29bb2..8e41acee32 100644 --- a/api/core/app/entities/__init__.py +++ b/api/core/app/entities/__init__.py @@ -0,0 +1,3 @@ +from .agent_strategy import AgentStrategyInfo + +__all__ = ["AgentStrategyInfo"] diff --git a/api/core/app/entities/agent_strategy.py b/api/core/app/entities/agent_strategy.py new file mode 100644 index 0000000000..b063a12f4f --- /dev/null +++ b/api/core/app/entities/agent_strategy.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel, ConfigDict + + +class AgentStrategyInfo(BaseModel): + name: str + icon: str | None = None + + model_config = ConfigDict(extra="forbid") diff --git a/api/core/app/entities/queue_entities.py b/api/core/app/entities/queue_entities.py index d42df0d1bf..8899d80db8 100644 --- a/api/core/app/entities/queue_entities.py +++ b/api/core/app/entities/queue_entities.py @@ -5,13 +5,12 @@ from typing import Any from pydantic import BaseModel, ConfigDict, Field +from core.app.entities.agent_strategy import AgentStrategyInfo from core.rag.entities.citation_metadata import RetrievalSourceMetadata -from dify_graph.entities import AgentNodeStrategyInit from dify_graph.entities.pause_reason import PauseReason from dify_graph.entities.workflow_start_reason import WorkflowStartReason -from dify_graph.enums import WorkflowNodeExecutionMetadataKey +from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey from dify_graph.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk -from dify_graph.nodes import NodeType class QueueEvent(StrEnum): @@ -314,7 +313,7 @@ class QueueNodeStartedEvent(AppQueueEvent): in_iteration_id: str | None = None in_loop_id: str | None = None start_at: datetime - agent_strategy: AgentNodeStrategyInit | None = None + agent_strategy: AgentStrategyInfo | None = None # FIXME(-LAN-): only for ToolNode, need to refactor provider_type: str # should be a core.tools.entities.tool_entities.ToolProviderType diff --git a/api/core/app/entities/task_entities.py b/api/core/app/entities/task_entities.py index b58dae0ff2..46a8ab52f2 100644 --- a/api/core/app/entities/task_entities.py +++ b/api/core/app/entities/task_entities.py @@ -4,8 +4,8 @@ from typing import Any from pydantic import BaseModel, ConfigDict, Field +from core.app.entities.agent_strategy import AgentStrategyInfo from core.rag.entities.citation_metadata import RetrievalSourceMetadata -from dify_graph.entities import AgentNodeStrategyInit from dify_graph.entities.workflow_start_reason import WorkflowStartReason from dify_graph.enums import WorkflowExecutionStatus, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus from dify_graph.model_runtime.entities.llm_entities import LLMResult, LLMUsage @@ -349,7 +349,7 @@ class NodeStartStreamResponse(StreamResponse): extras: dict[str, object] = Field(default_factory=dict) iteration_id: str | None = None loop_id: str | None = None - agent_strategy: AgentNodeStrategyInit | None = None + agent_strategy: AgentStrategyInfo | None = None event: StreamEvent = StreamEvent.NODE_STARTED workflow_run_id: str diff --git a/api/core/app/layers/conversation_variable_persist_layer.py b/api/core/app/layers/conversation_variable_persist_layer.py index e495abf855..d227e4e904 100644 --- a/api/core/app/layers/conversation_variable_persist_layer.py +++ b/api/core/app/layers/conversation_variable_persist_layer.py @@ -2,7 +2,7 @@ import logging from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID from dify_graph.conversation_variable_updater import ConversationVariableUpdater -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.graph_engine.layers.base import GraphEngineLayer from dify_graph.graph_events import GraphEngineEvent, NodeRunSucceededEvent from dify_graph.nodes.variable_assigner.common import helpers as common_helpers @@ -22,7 +22,7 @@ class ConversationVariablePersistenceLayer(GraphEngineLayer): def on_event(self, event: GraphEngineEvent) -> None: if not isinstance(event, NodeRunSucceededEvent): return - if event.node_type != NodeType.VARIABLE_ASSIGNER: + if event.node_type != BuiltinNodeTypes.VARIABLE_ASSIGNER: return if self.graph_runtime_state is None: return diff --git a/api/core/app/workflow/layers/llm_quota.py b/api/core/app/workflow/layers/llm_quota.py index 2e930a1f58..faf1516c40 100644 --- a/api/core/app/workflow/layers/llm_quota.py +++ b/api/core/app/workflow/layers/llm_quota.py @@ -12,7 +12,7 @@ from typing_extensions import override from core.app.llm import deduct_llm_quota, ensure_llm_quota_available from core.errors.error import QuotaExceededError from core.model_manager import ModelInstance -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.graph_engine.entities.commands import AbortCommand, CommandType from dify_graph.graph_engine.layers.base import GraphEngineLayer from dify_graph.graph_events import GraphEngineEvent, GraphNodeEventBase @@ -113,11 +113,11 @@ class LLMQuotaLayer(GraphEngineLayer): def _extract_model_instance(node: Node) -> ModelInstance | None: try: match node.node_type: - case NodeType.LLM: + case BuiltinNodeTypes.LLM: return cast("LLMNode", node).model_instance - case NodeType.PARAMETER_EXTRACTOR: + case BuiltinNodeTypes.PARAMETER_EXTRACTOR: return cast("ParameterExtractorNode", node).model_instance - case NodeType.QUESTION_CLASSIFIER: + case BuiltinNodeTypes.QUESTION_CLASSIFIER: return cast("QuestionClassifierNode", node).model_instance case _: return None diff --git a/api/core/app/workflow/layers/observability.py b/api/core/app/workflow/layers/observability.py index ab73db59f1..4b20477a7f 100644 --- a/api/core/app/workflow/layers/observability.py +++ b/api/core/app/workflow/layers/observability.py @@ -16,7 +16,7 @@ from opentelemetry.trace import Span, SpanKind, Tracer, get_tracer, set_span_in_ from typing_extensions import override from configs import dify_config -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.graph_engine.layers.base import GraphEngineLayer from dify_graph.graph_events import GraphNodeEventBase from dify_graph.nodes.base.node import Node @@ -74,16 +74,13 @@ class ObservabilityLayer(GraphEngineLayer): def _build_parser_registry(self) -> None: """Initialize parser registry for node types.""" self._parsers = { - NodeType.TOOL: ToolNodeOTelParser(), - NodeType.LLM: LLMNodeOTelParser(), - NodeType.KNOWLEDGE_RETRIEVAL: RetrievalNodeOTelParser(), + BuiltinNodeTypes.TOOL: ToolNodeOTelParser(), + BuiltinNodeTypes.LLM: LLMNodeOTelParser(), + BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: RetrievalNodeOTelParser(), } def _get_parser(self, node: Node) -> NodeOTelParser: - node_type = getattr(node, "node_type", None) - if isinstance(node_type, NodeType): - return self._parsers.get(node_type, self._default_parser) - return self._default_parser + return self._parsers.get(node.node_type, self._default_parser) @override def on_graph_start(self) -> None: diff --git a/api/core/callback_handler/index_tool_callback_handler.py b/api/core/callback_handler/index_tool_callback_handler.py index d0279349ca..b054409681 100644 --- a/api/core/callback_handler/index_tool_callback_handler.py +++ b/api/core/callback_handler/index_tool_callback_handler.py @@ -12,6 +12,7 @@ from core.rag.models.document import Document from extensions.ext_database import db from models.dataset import ChildChunk, DatasetQuery, DocumentSegment from models.dataset import Document as DatasetDocument +from models.enums import CreatorUserRole _logger = logging.getLogger(__name__) @@ -38,7 +39,9 @@ class DatasetIndexToolCallbackHandler: source="app", source_app_id=self._app_id, created_by_role=( - "account" if self._invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER} else "end_user" + CreatorUserRole.ACCOUNT + if self._invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER} + else CreatorUserRole.END_USER ), created_by=self._user_id, ) diff --git a/api/core/datasource/datasource_manager.py b/api/core/datasource/datasource_manager.py index 15cd319750..4fa941ae16 100644 --- a/api/core/datasource/datasource_manager.py +++ b/api/core/datasource/datasource_manager.py @@ -24,12 +24,12 @@ from core.datasource.utils.message_transformer import DatasourceFileMessageTrans from core.datasource.website_crawl.website_crawl_provider import WebsiteCrawlDatasourcePluginProviderController from core.db.session_factory import session_factory from core.plugin.impl.datasource import PluginDatasourceManager +from core.workflow.nodes.datasource.entities import DatasourceParameter, OnlineDriveDownloadFileParam from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus from dify_graph.enums import WorkflowNodeExecutionMetadataKey from dify_graph.file import File from dify_graph.file.enums import FileTransferMethod, FileType from dify_graph.node_events import NodeRunResult, StreamChunkEvent, StreamCompletedEvent -from dify_graph.repositories.datasource_manager_protocol import DatasourceParameter, OnlineDriveDownloadFileParam from factories import file_factory from models.model import UploadFile from models.tools import ToolFile diff --git a/api/core/llm_generator/entities.py b/api/core/llm_generator/entities.py index 3bb8d2c899..6573bcbe95 100644 --- a/api/core/llm_generator/entities.py +++ b/api/core/llm_generator/entities.py @@ -9,6 +9,7 @@ class RuleGeneratePayload(BaseModel): instruction: str = Field(..., description="Rule generation instruction") model_config_data: ModelConfig = Field(..., alias="model_config", description="Model configuration") no_variable: bool = Field(default=False, description="Whether to exclude variables") + app_id: str | None = Field(default=None, description="App ID for prompt generation tracing") class RuleCodeGeneratePayload(RuleGeneratePayload): @@ -18,3 +19,4 @@ class RuleCodeGeneratePayload(RuleGeneratePayload): class RuleStructuredOutputPayload(BaseModel): instruction: str = Field(..., description="Structured output generation instruction") model_config_data: ModelConfig = Field(..., alias="model_config", description="Model configuration") + app_id: str | None = Field(default=None, description="App ID for prompt generation tracing") diff --git a/api/core/ops/aliyun_trace/aliyun_trace.py b/api/core/ops/aliyun_trace/aliyun_trace.py index 19111cc917..18f35b5b9c 100644 --- a/api/core/ops/aliyun_trace/aliyun_trace.py +++ b/api/core/ops/aliyun_trace/aliyun_trace.py @@ -58,7 +58,7 @@ from core.ops.entities.trace_entity import ( ) from core.repositories import DifyCoreRepositoryFactory from dify_graph.entities import WorkflowNodeExecution -from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey from extensions.ext_database import db from models import WorkflowNodeExecutionTriggeredFrom @@ -302,11 +302,11 @@ class AliyunDataTrace(BaseTraceInstance): self, node_execution: WorkflowNodeExecution, trace_info: WorkflowTraceInfo, trace_metadata: TraceMetadata ): try: - if node_execution.node_type == NodeType.LLM: + if node_execution.node_type == BuiltinNodeTypes.LLM: node_span = self.build_workflow_llm_span(trace_info, node_execution, trace_metadata) - elif node_execution.node_type == NodeType.KNOWLEDGE_RETRIEVAL: + elif node_execution.node_type == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: node_span = self.build_workflow_retrieval_span(trace_info, node_execution, trace_metadata) - elif node_execution.node_type == NodeType.TOOL: + elif node_execution.node_type == BuiltinNodeTypes.TOOL: node_span = self.build_workflow_tool_span(trace_info, node_execution, trace_metadata) else: node_span = self.build_workflow_task_span(trace_info, node_execution, trace_metadata) diff --git a/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py b/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py index 452255f69e..7cb54b2c88 100644 --- a/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py +++ b/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py @@ -155,8 +155,8 @@ def wrap_span_metadata(metadata, **kwargs): return metadata -# Mapping from NodeType string values to OpenInference span kinds. -# NodeType values not listed here default to CHAIN. +# Mapping from built-in node type strings to OpenInference span kinds. +# Node types not listed here default to CHAIN. _NODE_TYPE_TO_SPAN_KIND: dict[str, OpenInferenceSpanKindValues] = { "llm": OpenInferenceSpanKindValues.LLM, "knowledge-retrieval": OpenInferenceSpanKindValues.RETRIEVER, @@ -168,7 +168,7 @@ _NODE_TYPE_TO_SPAN_KIND: dict[str, OpenInferenceSpanKindValues] = { def _get_node_span_kind(node_type: str) -> OpenInferenceSpanKindValues: """Return the OpenInference span kind for a given workflow node type. - Covers every ``NodeType`` enum value. Nodes that do not have a + Covers every built-in node type string. Nodes that do not have a specialised span kind (e.g. ``start``, ``end``, ``if-else``, ``code``, ``loop``, ``iteration``, etc.) are mapped to ``CHAIN``. """ diff --git a/api/core/ops/entities/trace_entity.py b/api/core/ops/entities/trace_entity.py index 50a2cdea63..45b2f635ba 100644 --- a/api/core/ops/entities/trace_entity.py +++ b/api/core/ops/entities/trace_entity.py @@ -9,8 +9,8 @@ from pydantic import BaseModel, ConfigDict, field_serializer, field_validator class BaseTraceInfo(BaseModel): message_id: str | None = None message_data: Any | None = None - inputs: Union[str, dict[str, Any], list] | None = None - outputs: Union[str, dict[str, Any], list] | None = None + inputs: Union[str, dict[str, Any], list[Any]] | None = None + outputs: Union[str, dict[str, Any], list[Any]] | None = None start_time: datetime | None = None end_time: datetime | None = None metadata: dict[str, Any] @@ -18,7 +18,7 @@ class BaseTraceInfo(BaseModel): @field_validator("inputs", "outputs") @classmethod - def ensure_type(cls, v): + def ensure_type(cls, v: str | dict[str, Any] | list[Any] | None) -> str | dict[str, Any] | list[Any] | None: if v is None: return None if isinstance(v, str | dict | list): @@ -27,6 +27,48 @@ class BaseTraceInfo(BaseModel): model_config = ConfigDict(protected_namespaces=()) + @property + def resolved_trace_id(self) -> str | None: + """Get trace_id with intelligent fallback. + + Priority: + 1. External trace_id (from X-Trace-Id header) + 2. workflow_run_id (if this trace type has it) + 3. message_id (as final fallback) + """ + if self.trace_id: + return self.trace_id + + # Try workflow_run_id (only exists on workflow-related traces) + workflow_run_id = getattr(self, "workflow_run_id", None) + if workflow_run_id: + return workflow_run_id + + # Final fallback to message_id + return str(self.message_id) if self.message_id else None + + @property + def resolved_parent_context(self) -> tuple[str | None, str | None]: + """Resolve cross-workflow parent linking from metadata. + + Extracts typed parent IDs from the untyped ``parent_trace_context`` + metadata dict (set by tool_node when invoking nested workflows). + + Returns: + (trace_correlation_override, parent_span_id_source) where + trace_correlation_override is the outer workflow_run_id and + parent_span_id_source is the outer node_execution_id. + """ + parent_ctx = self.metadata.get("parent_trace_context") + if not isinstance(parent_ctx, dict): + return None, None + trace_override = parent_ctx.get("parent_workflow_run_id") + parent_span = parent_ctx.get("parent_node_execution_id") + return ( + trace_override if isinstance(trace_override, str) else None, + parent_span if isinstance(parent_span, str) else None, + ) + @field_serializer("start_time", "end_time") def serialize_datetime(self, dt: datetime | None) -> str | None: if dt is None: @@ -48,7 +90,10 @@ class WorkflowTraceInfo(BaseTraceInfo): workflow_run_version: str error: str | None = None total_tokens: int + prompt_tokens: int | None = None + completion_tokens: int | None = None file_list: list[str] + invoked_by: str | None = None query: str metadata: dict[str, Any] @@ -59,7 +104,7 @@ class MessageTraceInfo(BaseTraceInfo): answer_tokens: int total_tokens: int error: str | None = None - file_list: Union[str, dict[str, Any], list] | None = None + file_list: Union[str, dict[str, Any], list[Any]] | None = None message_file_data: Any | None = None conversation_mode: str gen_ai_server_time_to_first_token: float | None = None @@ -106,7 +151,7 @@ class ToolTraceInfo(BaseTraceInfo): tool_config: dict[str, Any] time_cost: Union[int, float] tool_parameters: dict[str, Any] - file_url: Union[str, None, list] = None + file_url: Union[str, None, list[str]] = None class GenerateNameTraceInfo(BaseTraceInfo): @@ -114,6 +159,79 @@ class GenerateNameTraceInfo(BaseTraceInfo): tenant_id: str +class PromptGenerationTraceInfo(BaseTraceInfo): + """Trace information for prompt generation operations (rule-generate, code-generate, etc.).""" + + tenant_id: str + user_id: str + app_id: str | None = None + + operation_type: str + instruction: str + + prompt_tokens: int + completion_tokens: int + total_tokens: int + + model_provider: str + model_name: str + + latency: float + + total_price: float | None = None + currency: str | None = None + + error: str | None = None + + model_config = ConfigDict(protected_namespaces=()) + + +class WorkflowNodeTraceInfo(BaseTraceInfo): + workflow_id: str + workflow_run_id: str + tenant_id: str + node_execution_id: str + node_id: str + node_type: str + title: str + + status: str + error: str | None = None + elapsed_time: float + + index: int + predecessor_node_id: str | None = None + + total_tokens: int = 0 + total_price: float = 0.0 + currency: str | None = None + + model_provider: str | None = None + model_name: str | None = None + prompt_tokens: int | None = None + completion_tokens: int | None = None + + tool_name: str | None = None + + iteration_id: str | None = None + iteration_index: int | None = None + loop_id: str | None = None + loop_index: int | None = None + parallel_id: str | None = None + + node_inputs: Mapping[str, Any] | None = None + node_outputs: Mapping[str, Any] | None = None + process_data: Mapping[str, Any] | None = None + + invoked_by: str | None = None + + model_config = ConfigDict(protected_namespaces=()) + + +class DraftNodeExecutionTrace(WorkflowNodeTraceInfo): + pass + + class TaskData(BaseModel): app_id: str trace_info_type: str @@ -128,11 +246,31 @@ trace_info_info_map = { "DatasetRetrievalTraceInfo": DatasetRetrievalTraceInfo, "ToolTraceInfo": ToolTraceInfo, "GenerateNameTraceInfo": GenerateNameTraceInfo, + "PromptGenerationTraceInfo": PromptGenerationTraceInfo, + "WorkflowNodeTraceInfo": WorkflowNodeTraceInfo, + "DraftNodeExecutionTrace": DraftNodeExecutionTrace, } +class OperationType(StrEnum): + """Operation type for token metric labels. + + Used as a metric attribute on ``dify.tokens.input`` / ``dify.tokens.output`` + counters so consumers can break down token usage by operation. + """ + + WORKFLOW = "workflow" + NODE_EXECUTION = "node_execution" + MESSAGE = "message" + RULE_GENERATE = "rule_generate" + CODE_GENERATE = "code_generate" + STRUCTURED_OUTPUT = "structured_output" + INSTRUCTION_MODIFY = "instruction_modify" + + class TraceTaskName(StrEnum): CONVERSATION_TRACE = "conversation" + DRAFT_NODE_EXECUTION_TRACE = "draft_node_execution" WORKFLOW_TRACE = "workflow" MESSAGE_TRACE = "message" MODERATION_TRACE = "moderation" @@ -140,4 +278,6 @@ class TraceTaskName(StrEnum): DATASET_RETRIEVAL_TRACE = "dataset_retrieval" TOOL_TRACE = "tool" GENERATE_NAME_TRACE = "generate_conversation_name" + PROMPT_GENERATION_TRACE = "prompt_generation" + NODE_EXECUTION_TRACE = "node_execution" DATASOURCE_TRACE = "datasource" diff --git a/api/core/ops/langfuse_trace/langfuse_trace.py b/api/core/ops/langfuse_trace/langfuse_trace.py index 28e800e6c7..6e62387a1f 100644 --- a/api/core/ops/langfuse_trace/langfuse_trace.py +++ b/api/core/ops/langfuse_trace/langfuse_trace.py @@ -28,7 +28,7 @@ from core.ops.langfuse_trace.entities.langfuse_trace_entity import ( ) from core.ops.utils import filter_none_values from core.repositories import DifyCoreRepositoryFactory -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from extensions.ext_database import db from models import EndUser, WorkflowNodeExecutionTriggeredFrom from models.enums import MessageStatus @@ -141,7 +141,7 @@ class LangFuseDataTrace(BaseTraceInstance): node_name = node_execution.title node_type = node_execution.node_type status = node_execution.status - if node_type == NodeType.LLM: + if node_type == BuiltinNodeTypes.LLM: inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {} else: inputs = node_execution.inputs or {} diff --git a/api/core/ops/langsmith_trace/langsmith_trace.py b/api/core/ops/langsmith_trace/langsmith_trace.py index b40bc89b71..32a0c77fe2 100644 --- a/api/core/ops/langsmith_trace/langsmith_trace.py +++ b/api/core/ops/langsmith_trace/langsmith_trace.py @@ -28,7 +28,7 @@ from core.ops.langsmith_trace.entities.langsmith_trace_entity import ( ) from core.ops.utils import filter_none_values, generate_dotted_order from core.repositories import DifyCoreRepositoryFactory -from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey from extensions.ext_database import db from models import EndUser, MessageFile, WorkflowNodeExecutionTriggeredFrom @@ -163,7 +163,7 @@ class LangSmithDataTrace(BaseTraceInstance): node_name = node_execution.title node_type = node_execution.node_type status = node_execution.status - if node_type == NodeType.LLM: + if node_type == BuiltinNodeTypes.LLM: inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {} else: inputs = node_execution.inputs or {} @@ -197,7 +197,7 @@ class LangSmithDataTrace(BaseTraceInstance): "ls_model_name": process_data.get("model_name", ""), } ) - elif node_type == NodeType.KNOWLEDGE_RETRIEVAL: + elif node_type == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: run_type = LangSmithRunType.retriever else: run_type = LangSmithRunType.tool diff --git a/api/core/ops/mlflow_trace/mlflow_trace.py b/api/core/ops/mlflow_trace/mlflow_trace.py index ba2cb9e0c3..ab4a7650ec 100644 --- a/api/core/ops/mlflow_trace/mlflow_trace.py +++ b/api/core/ops/mlflow_trace/mlflow_trace.py @@ -23,7 +23,7 @@ from core.ops.entities.trace_entity import ( TraceTaskName, WorkflowTraceInfo, ) -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from extensions.ext_database import db from models import EndUser from models.workflow import WorkflowNodeExecutionModel @@ -145,10 +145,10 @@ class MLflowDataTrace(BaseTraceInstance): "app_name": node.title, } - if node.node_type in (NodeType.LLM, NodeType.QUESTION_CLASSIFIER): + if node.node_type in (BuiltinNodeTypes.LLM, BuiltinNodeTypes.QUESTION_CLASSIFIER): inputs, llm_attributes = self._parse_llm_inputs_and_attributes(node) attributes.update(llm_attributes) - elif node.node_type == NodeType.HTTP_REQUEST: + elif node.node_type == BuiltinNodeTypes.HTTP_REQUEST: inputs = node.process_data # contains request URL if not inputs: @@ -180,9 +180,9 @@ class MLflowDataTrace(BaseTraceInstance): # End node span finished_at = node.created_at + timedelta(seconds=node.elapsed_time) outputs = json.loads(node.outputs) if node.outputs else {} - if node.node_type == NodeType.KNOWLEDGE_RETRIEVAL: + if node.node_type == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: outputs = self._parse_knowledge_retrieval_outputs(outputs) - elif node.node_type == NodeType.LLM: + elif node.node_type == BuiltinNodeTypes.LLM: outputs = outputs.get("text", outputs) node_span.end( outputs=outputs, @@ -471,13 +471,13 @@ class MLflowDataTrace(BaseTraceInstance): def _get_node_span_type(self, node_type: str) -> str: """Map Dify node types to MLflow span types""" node_type_mapping = { - NodeType.LLM: SpanType.LLM, - NodeType.QUESTION_CLASSIFIER: SpanType.LLM, - NodeType.KNOWLEDGE_RETRIEVAL: SpanType.RETRIEVER, - NodeType.TOOL: SpanType.TOOL, - NodeType.CODE: SpanType.TOOL, - NodeType.HTTP_REQUEST: SpanType.TOOL, - NodeType.AGENT: SpanType.AGENT, + BuiltinNodeTypes.LLM: SpanType.LLM, + BuiltinNodeTypes.QUESTION_CLASSIFIER: SpanType.LLM, + BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: SpanType.RETRIEVER, + BuiltinNodeTypes.TOOL: SpanType.TOOL, + BuiltinNodeTypes.CODE: SpanType.TOOL, + BuiltinNodeTypes.HTTP_REQUEST: SpanType.TOOL, + BuiltinNodeTypes.AGENT: SpanType.AGENT, } return node_type_mapping.get(node_type, "CHAIN") # type: ignore[arg-type,call-overload] diff --git a/api/core/ops/opik_trace/opik_trace.py b/api/core/ops/opik_trace/opik_trace.py index eab51fd9f8..fb72bc2381 100644 --- a/api/core/ops/opik_trace/opik_trace.py +++ b/api/core/ops/opik_trace/opik_trace.py @@ -23,7 +23,7 @@ from core.ops.entities.trace_entity import ( WorkflowTraceInfo, ) from core.repositories import DifyCoreRepositoryFactory -from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey from extensions.ext_database import db from models import EndUser, MessageFile, WorkflowNodeExecutionTriggeredFrom @@ -187,7 +187,7 @@ class OpikDataTrace(BaseTraceInstance): node_name = node_execution.title node_type = node_execution.node_type status = node_execution.status - if node_type == NodeType.LLM: + if node_type == BuiltinNodeTypes.LLM: inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {} else: inputs = node_execution.inputs or {} diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index 33782e7949..d4b8917440 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -15,22 +15,32 @@ from sqlalchemy import select from sqlalchemy.orm import Session, sessionmaker from core.helper.encrypter import batch_decrypt_token, encrypt_token, obfuscated_token -from core.ops.entities.config_entity import OPS_FILE_PATH, TracingProviderEnum +from core.ops.entities.config_entity import ( + OPS_FILE_PATH, + TracingProviderEnum, +) from core.ops.entities.trace_entity import ( DatasetRetrievalTraceInfo, + DraftNodeExecutionTrace, GenerateNameTraceInfo, MessageTraceInfo, ModerationTraceInfo, + PromptGenerationTraceInfo, SuggestedQuestionTraceInfo, TaskData, ToolTraceInfo, TraceTaskName, + WorkflowNodeTraceInfo, WorkflowTraceInfo, ) from core.ops.utils import get_message_data +from extensions.ext_database import db from extensions.ext_storage import storage -from models.engine import db +from models.account import Tenant +from models.dataset import Dataset from models.model import App, AppModelConfig, Conversation, Message, MessageFile, TraceAppConfig +from models.provider import Provider, ProviderCredential, ProviderModel, ProviderModelCredential, ProviderType +from models.tools import ApiToolProvider, BuiltinToolProvider, MCPToolProvider, WorkflowToolProvider from models.workflow import WorkflowAppLog from tasks.ops_trace_task import process_trace_tasks @@ -40,9 +50,142 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +def _lookup_app_and_workspace_names(app_id: str | None, tenant_id: str | None) -> tuple[str, str]: + """Return (app_name, workspace_name) for the given IDs. Falls back to empty strings.""" + app_name = "" + workspace_name = "" + if not app_id and not tenant_id: + return app_name, workspace_name + with Session(db.engine) as session: + if app_id: + name = session.scalar(select(App.name).where(App.id == app_id)) + if name: + app_name = name + if tenant_id: + name = session.scalar(select(Tenant.name).where(Tenant.id == tenant_id)) + if name: + workspace_name = name + return app_name, workspace_name + + +_PROVIDER_TYPE_TO_MODEL: dict[str, type] = { + "builtin": BuiltinToolProvider, + "plugin": BuiltinToolProvider, + "api": ApiToolProvider, + "workflow": WorkflowToolProvider, + "mcp": MCPToolProvider, +} + + +def _lookup_credential_name(credential_id: str | None, provider_type: str | None) -> str: + if not credential_id: + return "" + model_cls = _PROVIDER_TYPE_TO_MODEL.get(provider_type or "") + if not model_cls: + return "" + with Session(db.engine) as session: + name = session.scalar(select(model_cls.name).where(model_cls.id == credential_id)) # type: ignore[attr-defined] + return str(name) if name else "" + + +def _lookup_llm_credential_info( + tenant_id: str | None, provider: str | None, model: str | None, model_type: str | None = "llm" +) -> tuple[str | None, str]: + """ + Lookup LLM credential ID and name for the given provider and model. + Returns (credential_id, credential_name). + + Handles async timing issues gracefully - if credential is deleted between lookups, + returns the ID but empty name rather than failing. + """ + if not tenant_id or not provider: + return None, "" + + try: + with Session(db.engine) as session: + # Try to find provider-level or model-level configuration + provider_record = session.scalar( + select(Provider).where( + Provider.tenant_id == tenant_id, + Provider.provider_name == provider, + Provider.provider_type == ProviderType.CUSTOM, + ) + ) + + if not provider_record: + return None, "" + + # Check if there's a model-specific config + credential_id = None + credential_name = "" + is_model_level = False + + if model: + # Try model-level first + model_record = session.scalar( + select(ProviderModel).where( + ProviderModel.tenant_id == tenant_id, + ProviderModel.provider_name == provider, + ProviderModel.model_name == model, + ProviderModel.model_type == model_type, + ) + ) + + if model_record and model_record.credential_id: + credential_id = model_record.credential_id + is_model_level = True + + if not credential_id and provider_record.credential_id: + # Fall back to provider-level credential + credential_id = provider_record.credential_id + is_model_level = False + + # Lookup credential_name if we have credential_id + if credential_id: + try: + if is_model_level: + # Query ProviderModelCredential + cred_name = session.scalar( + select(ProviderModelCredential.credential_name).where( + ProviderModelCredential.id == credential_id + ) + ) + else: + # Query ProviderCredential + cred_name = session.scalar( + select(ProviderCredential.credential_name).where(ProviderCredential.id == credential_id) + ) + + if cred_name: + credential_name = str(cred_name) + except Exception as e: + # Credential might have been deleted between lookups (async timing) + # Return ID but empty name rather than failing + logger.warning( + "Failed to lookup credential name for credential_id=%s (provider=%s, model=%s): %s", + credential_id, + provider, + model, + str(e), + ) + + return credential_id, credential_name + except Exception as e: + # Database query failed or other unexpected error + # Return empty rather than propagating error to telemetry emission + logger.warning( + "Failed to lookup LLM credential info for tenant_id=%s, provider=%s, model=%s: %s", + tenant_id, + provider, + model, + str(e), + ) + return None, "" + + class OpsTraceProviderConfigMap(collections.UserDict[str, dict[str, Any]]): - def __getitem__(self, key: str) -> dict[str, Any]: - match key: + def __getitem__(self, provider: str) -> dict[str, Any]: + match provider: case TracingProviderEnum.LANGFUSE: from core.ops.entities.config_entity import LangfuseConfig from core.ops.langfuse_trace.langfuse_trace import LangFuseDataTrace @@ -149,7 +292,7 @@ class OpsTraceProviderConfigMap(collections.UserDict[str, dict[str, Any]]): } case _: - raise KeyError(f"Unsupported tracing provider: {key}") + raise KeyError(f"Unsupported tracing provider: {provider}") provider_config_map = OpsTraceProviderConfigMap() @@ -314,6 +457,10 @@ class OpsTraceManager: if app_id is None: return None + # Handle storage_id format (tenant-{uuid}) - not a real app_id + if isinstance(app_id, str) and app_id.startswith("tenant-"): + return None + app: App | None = db.session.query(App).where(App.id == app_id).first() if app is None: @@ -466,8 +613,6 @@ class TraceTask: @classmethod def _get_workflow_run_repo(cls): - from repositories.factory import DifyAPIRepositoryFactory - if cls._workflow_run_repo is None: with cls._repo_lock: if cls._workflow_run_repo is None: @@ -478,6 +623,56 @@ class TraceTask: cls._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker) return cls._workflow_run_repo + @classmethod + def _get_user_id_from_metadata(cls, metadata: dict[str, Any]) -> str: + """Extract user ID from metadata, prioritizing end_user over account. + + Returns the actual user ID (end_user or account) who invoked the workflow, + regardless of invoke_from context. + """ + # Priority 1: End user (external users via API/WebApp) + if user_id := metadata.get("from_end_user_id"): + return f"end_user:{user_id}" + + # Priority 2: Account user (internal users via console/debugger) + if user_id := metadata.get("from_account_id"): + return f"account:{user_id}" + + # Priority 3: User (internal users via console/debugger) + if user_id := metadata.get("user_id"): + return f"user:{user_id}" + + return "anonymous" + + @classmethod + def _calculate_workflow_token_split(cls, workflow_run_id: str, tenant_id: str) -> tuple[int, int]: + from dify_graph.enums import WorkflowNodeExecutionMetadataKey + from models.workflow import WorkflowNodeExecutionModel + + with Session(db.engine) as session: + node_executions = session.scalars( + select(WorkflowNodeExecutionModel).where( + WorkflowNodeExecutionModel.tenant_id == tenant_id, + WorkflowNodeExecutionModel.workflow_run_id == workflow_run_id, + ) + ).all() + + total_prompt = 0 + total_completion = 0 + + for node_exec in node_executions: + metadata = node_exec.execution_metadata_dict + + prompt = metadata.get(WorkflowNodeExecutionMetadataKey.PROMPT_TOKENS) + if prompt is not None: + total_prompt += prompt + + completion = metadata.get(WorkflowNodeExecutionMetadataKey.COMPLETION_TOKENS) + if completion is not None: + total_completion += completion + + return (total_prompt, total_completion) + def __init__( self, trace_type: Any, @@ -498,6 +693,8 @@ class TraceTask: self.app_id = None self.trace_id = None self.kwargs = kwargs + if user_id is not None and "user_id" not in self.kwargs: + self.kwargs["user_id"] = user_id external_trace_id = kwargs.get("external_trace_id") if external_trace_id: self.trace_id = external_trace_id @@ -511,7 +708,7 @@ class TraceTask: TraceTaskName.WORKFLOW_TRACE: lambda: self.workflow_trace( workflow_run_id=self.workflow_run_id, conversation_id=self.conversation_id, user_id=self.user_id ), - TraceTaskName.MESSAGE_TRACE: lambda: self.message_trace(message_id=self.message_id), + TraceTaskName.MESSAGE_TRACE: lambda: self.message_trace(message_id=self.message_id, **self.kwargs), TraceTaskName.MODERATION_TRACE: lambda: self.moderation_trace( message_id=self.message_id, timer=self.timer, **self.kwargs ), @@ -527,6 +724,9 @@ class TraceTask: TraceTaskName.GENERATE_NAME_TRACE: lambda: self.generate_name_trace( conversation_id=self.conversation_id, timer=self.timer, **self.kwargs ), + TraceTaskName.PROMPT_GENERATION_TRACE: lambda: self.prompt_generation_trace(**self.kwargs), + TraceTaskName.NODE_EXECUTION_TRACE: lambda: self.node_execution_trace(**self.kwargs), + TraceTaskName.DRAFT_NODE_EXECUTION_TRACE: lambda: self.draft_node_execution_trace(**self.kwargs), } return preprocess_map.get(self.trace_type, lambda: None)() @@ -562,6 +762,10 @@ class TraceTask: total_tokens = workflow_run.total_tokens + prompt_tokens, completion_tokens = self._calculate_workflow_token_split( + workflow_run_id=workflow_run_id, tenant_id=tenant_id + ) + file_list = workflow_run_inputs.get("sys.file") or [] query = workflow_run_inputs.get("query") or workflow_run_inputs.get("sys.query") or "" @@ -582,7 +786,14 @@ class TraceTask: ) message_id = session.scalar(message_data_stmt) - metadata = { + from core.telemetry.gateway import is_enterprise_telemetry_enabled + + if is_enterprise_telemetry_enabled(): + app_name, workspace_name = _lookup_app_and_workspace_names(workflow_run.app_id, tenant_id) + else: + app_name, workspace_name = "", "" + + metadata: dict[str, Any] = { "workflow_id": workflow_id, "conversation_id": conversation_id, "workflow_run_id": workflow_run_id, @@ -595,8 +806,14 @@ class TraceTask: "triggered_from": workflow_run.triggered_from, "user_id": user_id, "app_id": workflow_run.app_id, + "app_name": app_name, + "workspace_name": workspace_name, } + parent_trace_context = self.kwargs.get("parent_trace_context") + if parent_trace_context: + metadata["parent_trace_context"] = parent_trace_context + workflow_trace_info = WorkflowTraceInfo( trace_id=self.trace_id, workflow_data=workflow_run.to_dict(), @@ -611,6 +828,8 @@ class TraceTask: workflow_run_version=workflow_run_version, error=error, total_tokens=total_tokens, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, file_list=file_list, query=query, metadata=metadata, @@ -618,20 +837,21 @@ class TraceTask: message_id=message_id, start_time=workflow_run.created_at, end_time=workflow_run.finished_at, + invoked_by=self._get_user_id_from_metadata(metadata), ) return workflow_trace_info - def message_trace(self, message_id: str | None): + def message_trace(self, message_id: str | None, **kwargs): if not message_id: return {} message_data = get_message_data(message_id) if not message_data: return {} conversation_mode_stmt = select(Conversation.mode).where(Conversation.id == message_data.conversation_id) - conversation_mode = db.session.scalars(conversation_mode_stmt).all() - if not conversation_mode or len(conversation_mode) == 0: + conversation_modes = db.session.scalars(conversation_mode_stmt).all() + if not conversation_modes or len(conversation_modes) == 0: return {} - conversation_mode = conversation_mode[0] + conversation_mode = conversation_modes[0] created_at = message_data.created_at inputs = message_data.message @@ -644,6 +864,19 @@ class TraceTask: streaming_metrics = self._extract_streaming_metrics(message_data) + tenant_id = "" + with Session(db.engine) as session: + tid = session.scalar(select(App.tenant_id).where(App.id == message_data.app_id)) + if tid: + tenant_id = str(tid) + + from core.telemetry.gateway import is_enterprise_telemetry_enabled + + if is_enterprise_telemetry_enabled(): + app_name, workspace_name = _lookup_app_and_workspace_names(message_data.app_id, tenant_id) + else: + app_name, workspace_name = "", "" + metadata = { "conversation_id": message_data.conversation_id, "ls_provider": message_data.model_provider, @@ -655,7 +888,14 @@ class TraceTask: "workflow_run_id": message_data.workflow_run_id, "from_source": message_data.from_source, "message_id": message_id, + "tenant_id": tenant_id, + "app_id": message_data.app_id, + "user_id": message_data.from_end_user_id or message_data.from_account_id, + "app_name": app_name, + "workspace_name": workspace_name, } + if node_execution_id := kwargs.get("node_execution_id"): + metadata["node_execution_id"] = node_execution_id message_tokens = message_data.message_tokens @@ -672,7 +912,9 @@ class TraceTask: outputs=message_data.answer, file_list=file_list, start_time=created_at, - end_time=created_at + timedelta(seconds=message_data.provider_response_latency), + end_time=message_data.updated_at + if message_data.updated_at and message_data.updated_at > created_at + else created_at + timedelta(seconds=message_data.provider_response_latency), metadata=metadata, message_file_data=message_file_data, conversation_mode=conversation_mode, @@ -697,6 +939,8 @@ class TraceTask: "preset_response": moderation_result.preset_response, "query": moderation_result.query, } + if node_execution_id := kwargs.get("node_execution_id"): + metadata["node_execution_id"] = node_execution_id # get workflow_app_log_id workflow_app_log_id = None @@ -738,6 +982,8 @@ class TraceTask: "workflow_run_id": message_data.workflow_run_id, "from_source": message_data.from_source, } + if node_execution_id := kwargs.get("node_execution_id"): + metadata["node_execution_id"] = node_execution_id # get workflow_app_log_id workflow_app_log_id = None @@ -777,6 +1023,52 @@ class TraceTask: if not message_data: return {} + tenant_id = "" + with Session(db.engine) as session: + tid = session.scalar(select(App.tenant_id).where(App.id == message_data.app_id)) + if tid: + tenant_id = str(tid) + + from core.telemetry.gateway import is_enterprise_telemetry_enabled + + if is_enterprise_telemetry_enabled(): + app_name, workspace_name = _lookup_app_and_workspace_names(message_data.app_id, tenant_id) + else: + app_name, workspace_name = "", "" + + doc_list = [doc.model_dump() for doc in documents] if documents else [] + dataset_ids: set[str] = set() + for doc in doc_list: + doc_meta = doc.get("metadata") or {} + did = doc_meta.get("dataset_id") + if did: + dataset_ids.add(did) + + embedding_models: dict[str, dict[str, str]] = {} + if dataset_ids: + with Session(db.engine) as session: + rows = session.execute( + select(Dataset.id, Dataset.embedding_model, Dataset.embedding_model_provider).where( + Dataset.id.in_(list(dataset_ids)) + ) + ).all() + for row in rows: + embedding_models[str(row[0])] = { + "embedding_model": row[1] or "", + "embedding_model_provider": row[2] or "", + } + + # Extract rerank model info from retrieval_model kwargs + rerank_model_provider = "" + rerank_model_name = "" + if "retrieval_model" in kwargs: + retrieval_model = kwargs["retrieval_model"] + if isinstance(retrieval_model, dict): + reranking_model = retrieval_model.get("reranking_model") + if isinstance(reranking_model, dict): + rerank_model_provider = reranking_model.get("reranking_provider_name", "") + rerank_model_name = reranking_model.get("reranking_model_name", "") + metadata = { "message_id": message_id, "ls_provider": message_data.model_provider, @@ -787,13 +1079,23 @@ class TraceTask: "agent_based": message_data.agent_based, "workflow_run_id": message_data.workflow_run_id, "from_source": message_data.from_source, + "tenant_id": tenant_id, + "app_id": message_data.app_id, + "user_id": message_data.from_end_user_id or message_data.from_account_id, + "app_name": app_name, + "workspace_name": workspace_name, + "embedding_models": embedding_models, + "rerank_model_provider": rerank_model_provider, + "rerank_model_name": rerank_model_name, } + if node_execution_id := kwargs.get("node_execution_id"): + metadata["node_execution_id"] = node_execution_id dataset_retrieval_trace_info = DatasetRetrievalTraceInfo( trace_id=self.trace_id, message_id=message_id, inputs=message_data.query or message_data.inputs, - documents=[doc.model_dump() for doc in documents] if documents else [], + documents=doc_list, start_time=timer.get("start"), end_time=timer.get("end"), metadata=metadata, @@ -836,6 +1138,10 @@ class TraceTask: "error": error, "tool_parameters": tool_parameters, } + if message_data.workflow_run_id: + metadata["workflow_run_id"] = message_data.workflow_run_id + if node_execution_id := kwargs.get("node_execution_id"): + metadata["node_execution_id"] = node_execution_id file_url = "" message_file_data = db.session.query(MessageFile).filter_by(message_id=message_id).first() @@ -890,6 +1196,8 @@ class TraceTask: "conversation_id": conversation_id, "tenant_id": tenant_id, } + if node_execution_id := kwargs.get("node_execution_id"): + metadata["node_execution_id"] = node_execution_id generate_name_trace_info = GenerateNameTraceInfo( trace_id=self.trace_id, @@ -904,6 +1212,182 @@ class TraceTask: return generate_name_trace_info + def prompt_generation_trace(self, **kwargs) -> PromptGenerationTraceInfo | dict: + tenant_id = kwargs.get("tenant_id", "") + user_id = kwargs.get("user_id", "") + app_id = kwargs.get("app_id") + operation_type = kwargs.get("operation_type", "") + instruction = kwargs.get("instruction", "") + generated_output = kwargs.get("generated_output", "") + + prompt_tokens = kwargs.get("prompt_tokens", 0) + completion_tokens = kwargs.get("completion_tokens", 0) + total_tokens = kwargs.get("total_tokens", 0) + + model_provider = kwargs.get("model_provider", "") + model_name = kwargs.get("model_name", "") + + latency = kwargs.get("latency", 0.0) + + timer = kwargs.get("timer") + start_time = timer.get("start") if timer else None + end_time = timer.get("end") if timer else None + + total_price = kwargs.get("total_price") + currency = kwargs.get("currency") + + error = kwargs.get("error") + + app_name = None + workspace_name = None + if app_id: + app_name, workspace_name = _lookup_app_and_workspace_names(app_id, tenant_id) + + metadata = { + "tenant_id": tenant_id, + "user_id": user_id, + "app_id": app_id or "", + "app_name": app_name, + "workspace_name": workspace_name, + "operation_type": operation_type, + "model_provider": model_provider, + "model_name": model_name, + } + if node_execution_id := kwargs.get("node_execution_id"): + metadata["node_execution_id"] = node_execution_id + + return PromptGenerationTraceInfo( + trace_id=self.trace_id, + inputs=instruction, + outputs=generated_output, + start_time=start_time, + end_time=end_time, + metadata=metadata, + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + operation_type=operation_type, + instruction=instruction, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + model_provider=model_provider, + model_name=model_name, + latency=latency, + total_price=total_price, + currency=currency, + error=error, + ) + + def node_execution_trace(self, **kwargs) -> WorkflowNodeTraceInfo | dict: + node_data: dict = kwargs.get("node_execution_data", {}) + if not node_data: + return {} + + from core.telemetry.gateway import is_enterprise_telemetry_enabled + + if is_enterprise_telemetry_enabled(): + app_name, workspace_name = _lookup_app_and_workspace_names( + node_data.get("app_id"), node_data.get("tenant_id") + ) + else: + app_name, workspace_name = "", "" + + # Try tool credential lookup first + credential_id = node_data.get("credential_id") + if is_enterprise_telemetry_enabled(): + credential_name = _lookup_credential_name(credential_id, node_data.get("credential_provider_type")) + # If no credential_id found (e.g., LLM nodes), try LLM credential lookup + if not credential_id: + llm_cred_id, llm_cred_name = _lookup_llm_credential_info( + tenant_id=node_data.get("tenant_id"), + provider=node_data.get("model_provider"), + model=node_data.get("model_name"), + model_type="llm", + ) + if llm_cred_id: + credential_id = llm_cred_id + credential_name = llm_cred_name + else: + credential_name = "" + metadata: dict[str, Any] = { + "tenant_id": node_data.get("tenant_id"), + "app_id": node_data.get("app_id"), + "app_name": app_name, + "workspace_name": workspace_name, + "user_id": node_data.get("user_id"), + "invoke_from": node_data.get("invoke_from"), + "credential_id": credential_id, + "credential_name": credential_name, + "dataset_ids": node_data.get("dataset_ids"), + "dataset_names": node_data.get("dataset_names"), + "plugin_name": node_data.get("plugin_name"), + } + + parent_trace_context = node_data.get("parent_trace_context") + if parent_trace_context: + metadata["parent_trace_context"] = parent_trace_context + + message_id: str | None = None + conversation_id = node_data.get("conversation_id") + workflow_execution_id = node_data.get("workflow_execution_id") + if conversation_id and workflow_execution_id and not parent_trace_context: + with Session(db.engine) as session: + msg_id = session.scalar( + select(Message.id).where( + Message.conversation_id == conversation_id, + Message.workflow_run_id == workflow_execution_id, + ) + ) + if msg_id: + message_id = str(msg_id) + metadata["message_id"] = message_id + if conversation_id: + metadata["conversation_id"] = conversation_id + + return WorkflowNodeTraceInfo( + trace_id=self.trace_id, + message_id=message_id, + start_time=node_data.get("created_at"), + end_time=node_data.get("finished_at"), + metadata=metadata, + workflow_id=node_data.get("workflow_id", ""), + workflow_run_id=node_data.get("workflow_execution_id", ""), + tenant_id=node_data.get("tenant_id", ""), + node_execution_id=node_data.get("node_execution_id", ""), + node_id=node_data.get("node_id", ""), + node_type=node_data.get("node_type", ""), + title=node_data.get("title", ""), + status=node_data.get("status", ""), + error=node_data.get("error"), + elapsed_time=node_data.get("elapsed_time", 0.0), + index=node_data.get("index", 0), + predecessor_node_id=node_data.get("predecessor_node_id"), + total_tokens=node_data.get("total_tokens", 0), + total_price=node_data.get("total_price", 0.0), + currency=node_data.get("currency"), + model_provider=node_data.get("model_provider"), + model_name=node_data.get("model_name"), + prompt_tokens=node_data.get("prompt_tokens"), + completion_tokens=node_data.get("completion_tokens"), + tool_name=node_data.get("tool_name"), + iteration_id=node_data.get("iteration_id"), + iteration_index=node_data.get("iteration_index"), + loop_id=node_data.get("loop_id"), + loop_index=node_data.get("loop_index"), + parallel_id=node_data.get("parallel_id"), + node_inputs=node_data.get("node_inputs"), + node_outputs=node_data.get("node_outputs"), + process_data=node_data.get("process_data"), + invoked_by=self._get_user_id_from_metadata(metadata), + ) + + def draft_node_execution_trace(self, **kwargs) -> DraftNodeExecutionTrace | dict: + node_trace = self.node_execution_trace(**kwargs) + if not isinstance(node_trace, WorkflowNodeTraceInfo): + return node_trace + return DraftNodeExecutionTrace(**node_trace.model_dump()) + def _extract_streaming_metrics(self, message_data) -> dict: if not message_data.message_metadata: return {} @@ -937,13 +1421,17 @@ class TraceQueueManager: self.user_id = user_id self.trace_instance = OpsTraceManager.get_ops_trace_instance(app_id) self.flask_app = current_app._get_current_object() # type: ignore + + from core.telemetry.gateway import is_enterprise_telemetry_enabled + + self._enterprise_telemetry_enabled = is_enterprise_telemetry_enabled() if trace_manager_timer is None: self.start_timer() def add_trace_task(self, trace_task: TraceTask): global trace_manager_timer, trace_manager_queue try: - if self.trace_instance: + if self._enterprise_telemetry_enabled or self.trace_instance: trace_task.app_id = self.app_id trace_manager_queue.put(trace_task) except Exception: @@ -979,20 +1467,27 @@ class TraceQueueManager: def send_to_celery(self, tasks: list[TraceTask]): with self.flask_app.app_context(): for task in tasks: - if task.app_id is None: - continue + storage_id = task.app_id + if storage_id is None: + tenant_id = task.kwargs.get("tenant_id") + if tenant_id: + storage_id = f"tenant-{tenant_id}" + else: + logger.warning("Skipping trace without app_id or tenant_id, trace_type: %s", task.trace_type) + continue + file_id = uuid4().hex trace_info = task.execute() task_data = TaskData( - app_id=task.app_id, + app_id=storage_id, trace_info_type=type(trace_info).__name__, trace_info=trace_info.model_dump() if trace_info else None, ) - file_path = f"{OPS_FILE_PATH}{task.app_id}/{file_id}.json" + file_path = f"{OPS_FILE_PATH}{storage_id}/{file_id}.json" storage.save(file_path, task_data.model_dump_json().encode("utf-8")) file_info = { "file_id": file_id, - "app_id": task.app_id, + "app_id": storage_id, } process_trace_tasks.delay(file_info) # type: ignore diff --git a/api/core/ops/tencent_trace/tencent_trace.py b/api/core/ops/tencent_trace/tencent_trace.py index cbff1c9e1c..7e56b1effa 100644 --- a/api/core/ops/tencent_trace/tencent_trace.py +++ b/api/core/ops/tencent_trace/tencent_trace.py @@ -27,7 +27,7 @@ from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository from dify_graph.entities.workflow_node_execution import ( WorkflowNodeExecution, ) -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from extensions.ext_database import db from models import Account, App, TenantAccountJoin, WorkflowNodeExecutionTriggeredFrom @@ -179,7 +179,7 @@ class TencentDataTrace(BaseTraceInstance): if node_span: self.trace_client.add_span(node_span) - if node_execution.node_type == NodeType.LLM: + if node_execution.node_type == BuiltinNodeTypes.LLM: self._record_llm_metrics(node_execution) except Exception: logger.exception("[Tencent APM] Failed to process node execution: %s", node_execution.id) @@ -192,15 +192,15 @@ class TencentDataTrace(BaseTraceInstance): ) -> SpanData | None: """Build span for different node types""" try: - if node_execution.node_type == NodeType.LLM: + if node_execution.node_type == BuiltinNodeTypes.LLM: return TencentSpanBuilder.build_workflow_llm_span( trace_id, workflow_span_id, trace_info, node_execution ) - elif node_execution.node_type == NodeType.KNOWLEDGE_RETRIEVAL: + elif node_execution.node_type == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: return TencentSpanBuilder.build_workflow_retrieval_span( trace_id, workflow_span_id, trace_info, node_execution ) - elif node_execution.node_type == NodeType.TOOL: + elif node_execution.node_type == BuiltinNodeTypes.TOOL: return TencentSpanBuilder.build_workflow_tool_span( trace_id, workflow_span_id, trace_info, node_execution ) diff --git a/api/core/ops/weave_trace/weave_trace.py b/api/core/ops/weave_trace/weave_trace.py index 7b62207366..2a657b672c 100644 --- a/api/core/ops/weave_trace/weave_trace.py +++ b/api/core/ops/weave_trace/weave_trace.py @@ -31,7 +31,7 @@ from core.ops.entities.trace_entity import ( ) from core.ops.weave_trace.entities.weave_trace_entity import WeaveTraceModel from core.repositories import DifyCoreRepositoryFactory -from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey from extensions.ext_database import db from models import EndUser, MessageFile, WorkflowNodeExecutionTriggeredFrom @@ -175,7 +175,7 @@ class WeaveDataTrace(BaseTraceInstance): node_name = node_execution.title node_type = node_execution.node_type status = node_execution.status - if node_type == NodeType.LLM: + if node_type == BuiltinNodeTypes.LLM: inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {} else: inputs = node_execution.inputs or {} diff --git a/api/core/plugin/backwards_invocation/node.py b/api/core/plugin/backwards_invocation/node.py index 33c45c0007..d6aef93fc4 100644 --- a/api/core/plugin/backwards_invocation/node.py +++ b/api/core/plugin/backwards_invocation/node.py @@ -1,5 +1,5 @@ from core.plugin.backwards_invocation.base import BaseBackwardsInvocation -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.parameter_extractor.entities import ( ModelConfig as ParameterExtractorModelConfig, ) @@ -52,7 +52,7 @@ class PluginNodeBackwardsInvocation(BaseBackwardsInvocation): instruction=instruction, # instruct with variables are not supported ) node_data_dict = node_data.model_dump() - node_data_dict["type"] = NodeType.PARAMETER_EXTRACTOR + node_data_dict["type"] = BuiltinNodeTypes.PARAMETER_EXTRACTOR execution = workflow_service.run_free_workflow_node( node_data_dict, tenant_id=tenant_id, diff --git a/api/core/provider_manager.py b/api/core/provider_manager.py index f82c3a846b..c538a557fb 100644 --- a/api/core/provider_manager.py +++ b/api/core/provider_manager.py @@ -305,9 +305,7 @@ class ProviderManager: available_models = provider_configurations.get_models(model_type=model_type, only_active=True) if available_models: - available_model = next( - (model for model in available_models if model.model == "gpt-4"), available_models[0] - ) + available_model = available_models[0] default_model = TenantDefaultModel( tenant_id=tenant_id, @@ -627,7 +625,7 @@ class ProviderManager: tenant_id=tenant_id, # TODO: Use provider name with prefix after the data migration. provider_name=ModelProviderID(provider_name).provider_name, - provider_type=ProviderType.SYSTEM.value, + provider_type=ProviderType.SYSTEM, quota_type=quota.quota_type, quota_limit=0, # type: ignore quota_used=0, diff --git a/api/core/rag/datasource/vdb/hologres/__init__.py b/api/core/rag/datasource/vdb/hologres/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/rag/datasource/vdb/hologres/hologres_vector.py b/api/core/rag/datasource/vdb/hologres/hologres_vector.py new file mode 100644 index 0000000000..36b259e494 --- /dev/null +++ b/api/core/rag/datasource/vdb/hologres/hologres_vector.py @@ -0,0 +1,361 @@ +import json +import logging +import time +from typing import Any + +import holo_search_sdk as holo # type: ignore +from holo_search_sdk.types import BaseQuantizationType, DistanceType, TokenizerType +from psycopg import sql as psql +from pydantic import BaseModel, model_validator + +from configs import dify_config +from core.rag.datasource.vdb.vector_base import BaseVector +from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory +from core.rag.datasource.vdb.vector_type import VectorType +from core.rag.embedding.embedding_base import Embeddings +from core.rag.models.document import Document +from extensions.ext_redis import redis_client +from models.dataset import Dataset + +logger = logging.getLogger(__name__) + + +class HologresVectorConfig(BaseModel): + """ + Configuration for Hologres vector database connection. + + In Hologres, access_key_id is used as the PostgreSQL username, + and access_key_secret is used as the PostgreSQL password. + """ + + host: str + port: int = 80 + database: str + access_key_id: str + access_key_secret: str + schema_name: str = "public" + tokenizer: TokenizerType = "jieba" + distance_method: DistanceType = "Cosine" + base_quantization_type: BaseQuantizationType = "rabitq" + max_degree: int = 64 + ef_construction: int = 400 + + @model_validator(mode="before") + @classmethod + def validate_config(cls, values: dict): + if not values.get("host"): + raise ValueError("config HOLOGRES_HOST is required") + if not values.get("database"): + raise ValueError("config HOLOGRES_DATABASE is required") + if not values.get("access_key_id"): + raise ValueError("config HOLOGRES_ACCESS_KEY_ID is required") + if not values.get("access_key_secret"): + raise ValueError("config HOLOGRES_ACCESS_KEY_SECRET is required") + return values + + +class HologresVector(BaseVector): + """ + Hologres vector storage implementation using holo-search-sdk. + + Supports semantic search (vector), full-text search, and hybrid search. + """ + + def __init__(self, collection_name: str, config: HologresVectorConfig): + super().__init__(collection_name) + self._config = config + self._client = self._init_client(config) + self.table_name = f"embedding_{collection_name}".lower() + + def _init_client(self, config: HologresVectorConfig): + """Initialize and return a holo-search-sdk client.""" + client = holo.connect( + host=config.host, + port=config.port, + database=config.database, + access_key_id=config.access_key_id, + access_key_secret=config.access_key_secret, + schema=config.schema_name, + ) + client.connect() + return client + + def get_type(self) -> str: + return VectorType.HOLOGRES + + def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs): + """Create collection table with vector and full-text indexes, then add texts.""" + dimension = len(embeddings[0]) + self._create_collection(dimension) + self.add_texts(texts, embeddings) + + def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs): + """Add texts with embeddings to the collection using batch upsert.""" + if not documents: + return [] + + pks: list[str] = [] + batch_size = 100 + for i in range(0, len(documents), batch_size): + batch_docs = documents[i : i + batch_size] + batch_embeddings = embeddings[i : i + batch_size] + + values = [] + column_names = ["id", "text", "meta", "embedding"] + + for j, doc in enumerate(batch_docs): + doc_id = doc.metadata.get("doc_id", "") if doc.metadata else "" + pks.append(doc_id) + values.append( + [ + doc_id, + doc.page_content, + json.dumps(doc.metadata or {}), + batch_embeddings[j], + ] + ) + + table = self._client.open_table(self.table_name) + table.upsert_multi( + index_column="id", + values=values, + column_names=column_names, + update=True, + update_columns=["text", "meta", "embedding"], + ) + + return pks + + def text_exists(self, id: str) -> bool: + """Check if a text with the given doc_id exists in the collection.""" + if not self._client.check_table_exist(self.table_name): + return False + + result = self._client.execute( + psql.SQL("SELECT 1 FROM {} WHERE id = {} LIMIT 1").format( + psql.Identifier(self.table_name), psql.Literal(id) + ), + fetch_result=True, + ) + return bool(result) + + def get_ids_by_metadata_field(self, key: str, value: str) -> list[str] | None: + """Get document IDs by metadata field key and value.""" + result = self._client.execute( + psql.SQL("SELECT id FROM {} WHERE meta->>{} = {}").format( + psql.Identifier(self.table_name), psql.Literal(key), psql.Literal(value) + ), + fetch_result=True, + ) + if result: + return [row[0] for row in result] + return None + + def delete_by_ids(self, ids: list[str]): + """Delete documents by their doc_id list.""" + if not ids: + return + if not self._client.check_table_exist(self.table_name): + return + + self._client.execute( + psql.SQL("DELETE FROM {} WHERE id IN ({})").format( + psql.Identifier(self.table_name), + psql.SQL(", ").join(psql.Literal(id) for id in ids), + ) + ) + + def delete_by_metadata_field(self, key: str, value: str): + """Delete documents by metadata field key and value.""" + if not self._client.check_table_exist(self.table_name): + return + + self._client.execute( + psql.SQL("DELETE FROM {} WHERE meta->>{} = {}").format( + psql.Identifier(self.table_name), psql.Literal(key), psql.Literal(value) + ) + ) + + def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: + """Search for documents by vector similarity.""" + if not self._client.check_table_exist(self.table_name): + return [] + + top_k = kwargs.get("top_k", 4) + score_threshold = float(kwargs.get("score_threshold") or 0.0) + + table = self._client.open_table(self.table_name) + query = ( + table.search_vector( + vector=query_vector, + column="embedding", + distance_method=self._config.distance_method, + output_name="distance", + ) + .select(["id", "text", "meta"]) + .limit(top_k) + ) + + # Apply document_ids_filter if provided + document_ids_filter = kwargs.get("document_ids_filter") + if document_ids_filter: + filter_sql = psql.SQL("meta->>'document_id' IN ({})").format( + psql.SQL(", ").join(psql.Literal(id) for id in document_ids_filter) + ) + query = query.where(filter_sql) + + results = query.fetchall() + return self._process_vector_results(results, score_threshold) + + def _process_vector_results(self, results: list, score_threshold: float) -> list[Document]: + """Process vector search results into Document objects.""" + docs = [] + for row in results: + # row format: (distance, id, text, meta) + # distance is first because search_vector() adds the computed column before selected columns + distance = row[0] + text = row[2] + meta = row[3] + + if isinstance(meta, str): + meta = json.loads(meta) + + # Convert distance to similarity score (consistent with pgvector) + score = 1 - distance + meta["score"] = score + + if score >= score_threshold: + docs.append(Document(page_content=text, metadata=meta)) + + return docs + + def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: + """Search for documents by full-text search.""" + if not self._client.check_table_exist(self.table_name): + return [] + + top_k = kwargs.get("top_k", 4) + + table = self._client.open_table(self.table_name) + search_query = table.search_text( + column="text", + expression=query, + return_score=True, + return_score_name="score", + return_all_columns=True, + ).limit(top_k) + + # Apply document_ids_filter if provided + document_ids_filter = kwargs.get("document_ids_filter") + if document_ids_filter: + filter_sql = psql.SQL("meta->>'document_id' IN ({})").format( + psql.SQL(", ").join(psql.Literal(id) for id in document_ids_filter) + ) + search_query = search_query.where(filter_sql) + + results = search_query.fetchall() + return self._process_full_text_results(results) + + def _process_full_text_results(self, results: list) -> list[Document]: + """Process full-text search results into Document objects.""" + docs = [] + for row in results: + # row format: (id, text, meta, embedding, score) + text = row[1] + meta = row[2] + score = row[-1] # score is the last column from return_score + + if isinstance(meta, str): + meta = json.loads(meta) + + meta["score"] = score + docs.append(Document(page_content=text, metadata=meta)) + + return docs + + def delete(self): + """Delete the entire collection table.""" + if self._client.check_table_exist(self.table_name): + self._client.drop_table(self.table_name) + + def _create_collection(self, dimension: int): + """Create the collection table with vector and full-text indexes.""" + lock_name = f"vector_indexing_lock_{self._collection_name}" + with redis_client.lock(lock_name, timeout=20): + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" + if redis_client.get(collection_exist_cache_key): + return + + if not self._client.check_table_exist(self.table_name): + # Create table via SQL with CHECK constraint for vector dimension + create_table_sql = psql.SQL(""" + CREATE TABLE IF NOT EXISTS {} ( + id TEXT PRIMARY KEY, + text TEXT NOT NULL, + meta JSONB NOT NULL, + embedding float4[] NOT NULL + CHECK (array_ndims(embedding) = 1 + AND array_length(embedding, 1) = {}) + ); + """).format(psql.Identifier(self.table_name), psql.Literal(dimension)) + self._client.execute(create_table_sql) + + # Wait for table to be fully ready before creating indexes + max_wait_seconds = 30 + poll_interval = 2 + for _ in range(max_wait_seconds // poll_interval): + if self._client.check_table_exist(self.table_name): + break + time.sleep(poll_interval) + else: + raise RuntimeError(f"Table {self.table_name} was not ready after {max_wait_seconds}s") + + # Open table and set vector index + table = self._client.open_table(self.table_name) + table.set_vector_index( + column="embedding", + distance_method=self._config.distance_method, + base_quantization_type=self._config.base_quantization_type, + max_degree=self._config.max_degree, + ef_construction=self._config.ef_construction, + use_reorder=self._config.base_quantization_type == "rabitq", + ) + + # Create full-text search index + table.create_text_index( + index_name=f"ft_idx_{self._collection_name}", + column="text", + tokenizer=self._config.tokenizer, + ) + + redis_client.set(collection_exist_cache_key, 1, ex=3600) + + +class HologresVectorFactory(AbstractVectorFactory): + """Factory class for creating HologresVector instances.""" + + def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> HologresVector: + if dataset.index_struct_dict: + class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"] + collection_name = class_prefix + else: + dataset_id = dataset.id + collection_name = Dataset.gen_collection_name_by_id(dataset_id) + dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.HOLOGRES, collection_name)) + + return HologresVector( + collection_name=collection_name, + config=HologresVectorConfig( + host=dify_config.HOLOGRES_HOST or "", + port=dify_config.HOLOGRES_PORT, + database=dify_config.HOLOGRES_DATABASE or "", + access_key_id=dify_config.HOLOGRES_ACCESS_KEY_ID or "", + access_key_secret=dify_config.HOLOGRES_ACCESS_KEY_SECRET or "", + schema_name=dify_config.HOLOGRES_SCHEMA, + tokenizer=dify_config.HOLOGRES_TOKENIZER, + distance_method=dify_config.HOLOGRES_DISTANCE_METHOD, + base_quantization_type=dify_config.HOLOGRES_BASE_QUANTIZATION_TYPE, + max_degree=dify_config.HOLOGRES_MAX_DEGREE, + ef_construction=dify_config.HOLOGRES_EF_CONSTRUCTION, + ), + ) diff --git a/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py b/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py index b986c79e3a..90d9173409 100644 --- a/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py +++ b/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py @@ -135,8 +135,8 @@ class PGVectoRS(BaseVector): def get_ids_by_metadata_field(self, key: str, value: str): result = None with Session(self._client) as session: - select_statement = sql_text(f"SELECT id FROM {self._collection_name} WHERE meta->>'{key}' = '{value}'; ") - result = session.execute(select_statement).fetchall() + select_statement = sql_text(f"SELECT id FROM {self._collection_name} WHERE meta->>:key = :value") + result = session.execute(select_statement, {"key": key, "value": value}).fetchall() if result: return [item[0] for item in result] else: @@ -172,9 +172,9 @@ class PGVectoRS(BaseVector): def text_exists(self, id: str) -> bool: with Session(self._client) as session: select_statement = sql_text( - f"SELECT id FROM {self._collection_name} WHERE meta->>'doc_id' = '{id}' limit 1; " + f"SELECT id FROM {self._collection_name} WHERE meta->>'doc_id' = :doc_id limit 1" ) - result = session.execute(select_statement).fetchall() + result = session.execute(select_statement, {"doc_id": id}).fetchall() return len(result) > 0 def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: diff --git a/api/core/rag/datasource/vdb/relyt/relyt_vector.py b/api/core/rag/datasource/vdb/relyt/relyt_vector.py index 70857b3e3c..e486375ec2 100644 --- a/api/core/rag/datasource/vdb/relyt/relyt_vector.py +++ b/api/core/rag/datasource/vdb/relyt/relyt_vector.py @@ -154,10 +154,8 @@ class RelytVector(BaseVector): def get_ids_by_metadata_field(self, key: str, value: str): result = None with Session(self.client) as session: - select_statement = sql_text( - f"""SELECT id FROM "{self._collection_name}" WHERE metadata->>'{key}' = '{value}'; """ - ) - result = session.execute(select_statement).fetchall() + select_statement = sql_text(f"""SELECT id FROM "{self._collection_name}" WHERE metadata->>:key = :value""") + result = session.execute(select_statement, {"key": key, "value": value}).fetchall() if result: return [item[0] for item in result] else: @@ -201,11 +199,10 @@ class RelytVector(BaseVector): def delete_by_ids(self, ids: list[str]): with Session(self.client) as session: - ids_str = ",".join(f"'{doc_id}'" for doc_id in ids) select_statement = sql_text( - f"""SELECT id FROM "{self._collection_name}" WHERE metadata->>'doc_id' in ({ids_str}); """ + f"""SELECT id FROM "{self._collection_name}" WHERE metadata->>'doc_id' = ANY(:doc_ids)""" ) - result = session.execute(select_statement).fetchall() + result = session.execute(select_statement, {"doc_ids": ids}).fetchall() if result: ids = [item[0] for item in result] self.delete_by_uuids(ids) @@ -218,9 +215,9 @@ class RelytVector(BaseVector): def text_exists(self, id: str) -> bool: with Session(self.client) as session: select_statement = sql_text( - f"""SELECT id FROM "{self._collection_name}" WHERE metadata->>'doc_id' = '{id}' limit 1; """ + f"""SELECT id FROM "{self._collection_name}" WHERE metadata->>'doc_id' = :doc_id limit 1""" ) - result = session.execute(select_statement).fetchall() + result = session.execute(select_statement, {"doc_id": id}).fetchall() return len(result) > 0 def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py index 3225764693..cd12cd3fae 100644 --- a/api/core/rag/datasource/vdb/vector_factory.py +++ b/api/core/rag/datasource/vdb/vector_factory.py @@ -38,7 +38,7 @@ class AbstractVectorFactory(ABC): class Vector: def __init__(self, dataset: Dataset, attributes: list | None = None): if attributes is None: - attributes = ["doc_id", "dataset_id", "document_id", "doc_hash"] + attributes = ["doc_id", "dataset_id", "document_id", "doc_hash", "doc_type"] self._dataset = dataset self._embeddings = self._get_embeddings() self._attributes = attributes @@ -191,6 +191,10 @@ class Vector: from core.rag.datasource.vdb.iris.iris_vector import IrisVectorFactory return IrisVectorFactory + case VectorType.HOLOGRES: + from core.rag.datasource.vdb.hologres.hologres_vector import HologresVectorFactory + + return HologresVectorFactory case _: raise ValueError(f"Vector store {vector_type} is not supported.") diff --git a/api/core/rag/datasource/vdb/vector_type.py b/api/core/rag/datasource/vdb/vector_type.py index bd99a31446..9cce8e4c32 100644 --- a/api/core/rag/datasource/vdb/vector_type.py +++ b/api/core/rag/datasource/vdb/vector_type.py @@ -34,3 +34,4 @@ class VectorType(StrEnum): MATRIXONE = "matrixone" CLICKZETTA = "clickzetta" IRIS = "iris" + HOLOGRES = "hologres" diff --git a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py index b48dd93f04..ae23c63b0a 100644 --- a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py +++ b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py @@ -196,6 +196,7 @@ class WeaviateVector(BaseVector): ), wc.Property(name="document_id", data_type=wc.DataType.TEXT), wc.Property(name="doc_id", data_type=wc.DataType.TEXT), + wc.Property(name="doc_type", data_type=wc.DataType.TEXT), wc.Property(name="chunk_index", data_type=wc.DataType.INT), ], vector_config=wc.Configure.Vectors.self_provided(), @@ -225,6 +226,8 @@ class WeaviateVector(BaseVector): to_add.append(wc.Property(name="document_id", data_type=wc.DataType.TEXT)) if "doc_id" not in existing: to_add.append(wc.Property(name="doc_id", data_type=wc.DataType.TEXT)) + if "doc_type" not in existing: + to_add.append(wc.Property(name="doc_type", data_type=wc.DataType.TEXT)) if "chunk_index" not in existing: to_add.append(wc.Property(name="chunk_index", data_type=wc.DataType.INT)) diff --git a/api/core/rag/index_processor/index_processor.py b/api/core/rag/index_processor/index_processor.py index c8f9d29012..a7c42c5a4e 100644 --- a/api/core/rag/index_processor/index_processor.py +++ b/api/core/rag/index_processor/index_processor.py @@ -9,8 +9,8 @@ from flask import current_app from sqlalchemy import delete, func, select from core.db.session_factory import session_factory -from dify_graph.nodes.knowledge_index.exc import KnowledgeIndexNodeError -from dify_graph.repositories.index_processor_protocol import Preview, PreviewItem, QaPreview +from core.workflow.nodes.knowledge_index.exc import KnowledgeIndexNodeError +from core.workflow.nodes.knowledge_index.protocols import Preview, PreviewItem, QaPreview from models.dataset import Dataset, Document, DocumentSegment from .index_processor_factory import IndexProcessorFactory diff --git a/api/core/rag/index_processor/index_processor_base.py b/api/core/rag/index_processor/index_processor_base.py index e8b3fa1508..f2191f3702 100644 --- a/api/core/rag/index_processor/index_processor_base.py +++ b/api/core/rag/index_processor/index_processor_base.py @@ -294,7 +294,7 @@ class BaseIndexProcessor(ABC): logging.warning("Error downloading image from %s: %s", image_url, str(e)) return None except Exception: - logging.exception("Unexpected error downloading image from %s", image_url) + logging.warning("Unexpected error downloading image from %s", image_url, exc_info=True) return None def _download_tool_file(self, tool_file_id: str, current_user: Account) -> str | None: diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 8243170c62..4c96b63f25 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -56,18 +56,18 @@ from core.rag.retrieval.template_prompts import ( ) from core.tools.signature import sign_upload_file from core.tools.utils.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool -from dify_graph.file import File, FileTransferMethod, FileType -from dify_graph.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMUsage -from dify_graph.model_runtime.entities.message_entities import PromptMessage, PromptMessageRole, PromptMessageTool -from dify_graph.model_runtime.entities.model_entities import ModelFeature, ModelType -from dify_graph.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel -from dify_graph.nodes.knowledge_retrieval import exc -from dify_graph.repositories.rag_retrieval_protocol import ( +from core.workflow.nodes.knowledge_retrieval import exc +from core.workflow.nodes.knowledge_retrieval.retrieval import ( KnowledgeRetrievalRequest, Source, SourceChildChunk, SourceMetadata, ) +from dify_graph.file import File, FileTransferMethod, FileType +from dify_graph.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMUsage +from dify_graph.model_runtime.entities.message_entities import PromptMessage, PromptMessageRole, PromptMessageTool +from dify_graph.model_runtime.entities.model_entities import ModelFeature, ModelType +from dify_graph.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel from extensions.ext_database import db from extensions.ext_redis import redis_client from libs.json_in_md_parser import parse_and_check_json_markdown @@ -83,6 +83,7 @@ from models.dataset import ( ) from models.dataset import Document as DatasetDocument from models.dataset import Document as DocumentModel +from models.enums import CreatorUserRole from services.external_knowledge_service import ExternalDatasetService from services.feature_service import FeatureService @@ -1009,7 +1010,7 @@ class DatasetRetrieval: content=json.dumps(contents), source="app", source_app_id=app_id, - created_by_role=user_from, + created_by_role=CreatorUserRole(user_from), created_by=user_id, ) dataset_queries.append(dataset_query) diff --git a/api/core/repositories/sqlalchemy_workflow_execution_repository.py b/api/core/repositories/sqlalchemy_workflow_execution_repository.py index 770df8b050..55e96515ac 100644 --- a/api/core/repositories/sqlalchemy_workflow_execution_repository.py +++ b/api/core/repositories/sqlalchemy_workflow_execution_repository.py @@ -146,7 +146,9 @@ class SQLAlchemyWorkflowExecutionRepository(WorkflowExecutionRepository): # No sequence number generation needed anymore - db_model.type = domain_model.workflow_type + from models.workflow import WorkflowType as ModelWorkflowType + + db_model.type = ModelWorkflowType(domain_model.workflow_type.value) db_model.version = domain_model.workflow_version db_model.graph = json.dumps(domain_model.graph) if domain_model.graph else None db_model.inputs = json.dumps(domain_model.inputs) if domain_model.inputs else None diff --git a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py index 3fc333038d..7373ebc7cc 100644 --- a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py +++ b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py @@ -18,7 +18,7 @@ from tenacity import before_sleep_log, retry, retry_if_exception, stop_after_att from configs import dify_config from dify_graph.entities import WorkflowNodeExecution -from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus +from dify_graph.enums import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus from dify_graph.model_runtime.utils.encoders import jsonable_encoder from dify_graph.repositories.workflow_node_execution_repository import OrderConfig, WorkflowNodeExecutionRepository from dify_graph.workflow_type_encoder import WorkflowRuntimeTypeConverter @@ -146,7 +146,7 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository) index=db_model.index, predecessor_node_id=db_model.predecessor_node_id, node_id=db_model.node_id, - node_type=NodeType(db_model.node_type), + node_type=db_model.node_type, title=db_model.title, inputs=inputs, process_data=process_data, diff --git a/api/core/telemetry/__init__.py b/api/core/telemetry/__init__.py new file mode 100644 index 0000000000..ae4f53f3b7 --- /dev/null +++ b/api/core/telemetry/__init__.py @@ -0,0 +1,43 @@ +"""Telemetry facade. + +Thin public API for emitting telemetry events. All routing logic +lives in ``core.telemetry.gateway`` which is shared by both CE and EE. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from core.ops.entities.trace_entity import TraceTaskName +from core.telemetry.events import TelemetryContext, TelemetryEvent +from core.telemetry.gateway import emit as gateway_emit +from core.telemetry.gateway import get_trace_task_to_case + +if TYPE_CHECKING: + from core.ops.ops_trace_manager import TraceQueueManager + + +def emit(event: TelemetryEvent, trace_manager: TraceQueueManager | None = None) -> None: + """Emit a telemetry event. + + Translates the ``TelemetryEvent`` (keyed by ``TraceTaskName``) into a + ``TelemetryCase`` and delegates to ``core.telemetry.gateway.emit()``. + """ + case = get_trace_task_to_case().get(event.name) + if case is None: + return + + context: dict[str, object] = { + "tenant_id": event.context.tenant_id, + "user_id": event.context.user_id, + "app_id": event.context.app_id, + } + gateway_emit(case, context, event.payload, trace_manager) + + +__all__ = [ + "TelemetryContext", + "TelemetryEvent", + "TraceTaskName", + "emit", +] diff --git a/api/core/telemetry/events.py b/api/core/telemetry/events.py new file mode 100644 index 0000000000..35ace47510 --- /dev/null +++ b/api/core/telemetry/events.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from core.ops.entities.trace_entity import TraceTaskName + + +@dataclass(frozen=True) +class TelemetryContext: + tenant_id: str | None = None + user_id: str | None = None + app_id: str | None = None + + +@dataclass(frozen=True) +class TelemetryEvent: + name: TraceTaskName + context: TelemetryContext + payload: dict[str, Any] diff --git a/api/core/telemetry/gateway.py b/api/core/telemetry/gateway.py new file mode 100644 index 0000000000..1ddb68ce1e --- /dev/null +++ b/api/core/telemetry/gateway.py @@ -0,0 +1,239 @@ +"""Telemetry gateway — single routing layer for all editions. + +Maps ``TelemetryCase`` → ``CaseRoute`` and dispatches events to either +the CE/EE trace pipeline (``TraceQueueManager``) or the enterprise-only +metric/log Celery queue. + +This module lives in ``core/`` so both CE and EE share one routing table +and one ``emit()`` entry point. No separate enterprise gateway module is +needed — enterprise-specific dispatch (Celery task, payload offloading) +is handled here behind lazy imports that no-op in CE. +""" + +from __future__ import annotations + +import json +import logging +import uuid +from typing import TYPE_CHECKING, Any + +from core.ops.entities.trace_entity import TraceTaskName +from enterprise.telemetry.contracts import SignalType +from extensions.ext_storage import storage + +if TYPE_CHECKING: + from core.ops.ops_trace_manager import TraceQueueManager + from enterprise.telemetry.contracts import TelemetryCase + +logger = logging.getLogger(__name__) + +PAYLOAD_SIZE_THRESHOLD_BYTES = 1 * 1024 * 1024 + +# --------------------------------------------------------------------------- +# Routing table — authoritative mapping for all editions +# --------------------------------------------------------------------------- + +_case_to_trace_task: dict | None = None +_case_routing: dict | None = None + + +def _get_case_to_trace_task() -> dict: + global _case_to_trace_task + if _case_to_trace_task is None: + from enterprise.telemetry.contracts import TelemetryCase + + _case_to_trace_task = { + TelemetryCase.WORKFLOW_RUN: TraceTaskName.WORKFLOW_TRACE, + TelemetryCase.MESSAGE_RUN: TraceTaskName.MESSAGE_TRACE, + TelemetryCase.NODE_EXECUTION: TraceTaskName.NODE_EXECUTION_TRACE, + TelemetryCase.DRAFT_NODE_EXECUTION: TraceTaskName.DRAFT_NODE_EXECUTION_TRACE, + TelemetryCase.PROMPT_GENERATION: TraceTaskName.PROMPT_GENERATION_TRACE, + TelemetryCase.TOOL_EXECUTION: TraceTaskName.TOOL_TRACE, + TelemetryCase.MODERATION_CHECK: TraceTaskName.MODERATION_TRACE, + TelemetryCase.SUGGESTED_QUESTION: TraceTaskName.SUGGESTED_QUESTION_TRACE, + TelemetryCase.DATASET_RETRIEVAL: TraceTaskName.DATASET_RETRIEVAL_TRACE, + TelemetryCase.GENERATE_NAME: TraceTaskName.GENERATE_NAME_TRACE, + } + return _case_to_trace_task + + +def get_trace_task_to_case() -> dict: + """Return TraceTaskName → TelemetryCase (inverse of _get_case_to_trace_task).""" + return {v: k for k, v in _get_case_to_trace_task().items()} + + +def _get_case_routing() -> dict: + global _case_routing + if _case_routing is None: + from enterprise.telemetry.contracts import CaseRoute, SignalType, TelemetryCase + + _case_routing = { + # TRACE — CE-eligible (flow in both CE and EE) + TelemetryCase.WORKFLOW_RUN: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True), + TelemetryCase.MESSAGE_RUN: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True), + TelemetryCase.TOOL_EXECUTION: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True), + TelemetryCase.MODERATION_CHECK: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True), + TelemetryCase.SUGGESTED_QUESTION: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True), + TelemetryCase.DATASET_RETRIEVAL: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True), + TelemetryCase.GENERATE_NAME: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True), + # TRACE — enterprise-only + TelemetryCase.NODE_EXECUTION: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=False), + TelemetryCase.DRAFT_NODE_EXECUTION: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=False), + TelemetryCase.PROMPT_GENERATION: CaseRoute(signal_type=SignalType.TRACE, ce_eligible=False), + # METRIC_LOG — enterprise-only (signal-driven, not trace) + TelemetryCase.APP_CREATED: CaseRoute(signal_type=SignalType.METRIC_LOG, ce_eligible=False), + TelemetryCase.APP_UPDATED: CaseRoute(signal_type=SignalType.METRIC_LOG, ce_eligible=False), + TelemetryCase.APP_DELETED: CaseRoute(signal_type=SignalType.METRIC_LOG, ce_eligible=False), + TelemetryCase.FEEDBACK_CREATED: CaseRoute(signal_type=SignalType.METRIC_LOG, ce_eligible=False), + } + return _case_routing + + +def __getattr__(name: str) -> dict: + """Lazy module-level access to routing tables.""" + if name == "CASE_ROUTING": + return _get_case_routing() + if name == "CASE_TO_TRACE_TASK": + return _get_case_to_trace_task() + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def is_enterprise_telemetry_enabled() -> bool: + try: + from enterprise.telemetry.exporter import is_enterprise_telemetry_enabled + + return is_enterprise_telemetry_enabled() + except Exception: + return False + + +def _handle_payload_sizing( + payload: dict[str, Any], + tenant_id: str, + event_id: str, +) -> tuple[dict[str, Any], str | None]: + """Inline or offload payload based on size. + + Returns ``(payload_for_envelope, storage_key | None)``. Payloads + exceeding ``PAYLOAD_SIZE_THRESHOLD_BYTES`` are written to object + storage and replaced with an empty dict in the envelope. + """ + try: + payload_json = json.dumps(payload) + payload_size = len(payload_json.encode("utf-8")) + except (TypeError, ValueError): + logger.warning("Failed to serialize payload for sizing: event_id=%s", event_id) + return payload, None + + if payload_size <= PAYLOAD_SIZE_THRESHOLD_BYTES: + return payload, None + + storage_key = f"telemetry/{tenant_id}/{event_id}.json" + try: + storage.save(storage_key, payload_json.encode("utf-8")) + logger.debug("Stored large payload to storage: key=%s, size=%d", storage_key, payload_size) + return {}, storage_key + except Exception: + logger.warning("Failed to store large payload, inlining instead: event_id=%s", event_id, exc_info=True) + return payload, None + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def emit( + case: TelemetryCase, + context: dict[str, Any], + payload: dict[str, Any], + trace_manager: TraceQueueManager | None = None, +) -> None: + """Route a telemetry event to the correct pipeline. + + TRACE events are enqueued into ``TraceQueueManager`` (works in both CE + and EE). Enterprise-only traces are silently dropped when EE is + disabled. + + METRIC_LOG events are dispatched to the enterprise Celery queue; + silently dropped when enterprise telemetry is unavailable. + """ + route = _get_case_routing().get(case) + if route is None: + logger.warning("Unknown telemetry case: %s, dropping event", case) + return + + if not route.ce_eligible and not is_enterprise_telemetry_enabled(): + logger.debug("Dropping EE-only event: case=%s (EE disabled)", case) + return + + if route.signal_type == SignalType.TRACE: + _emit_trace(case, context, payload, trace_manager) + else: + _emit_metric_log(case, context, payload) + + +def _emit_trace( + case: TelemetryCase, + context: dict[str, Any], + payload: dict[str, Any], + trace_manager: TraceQueueManager | None, +) -> None: + from core.ops.ops_trace_manager import TraceQueueManager as LocalTraceQueueManager + from core.ops.ops_trace_manager import TraceTask + + trace_task_name = _get_case_to_trace_task().get(case) + if trace_task_name is None: + logger.warning("No TraceTaskName mapping for case: %s", case) + return + + queue_manager = trace_manager or LocalTraceQueueManager( + app_id=context.get("app_id"), + user_id=context.get("user_id"), + ) + queue_manager.add_trace_task(TraceTask(trace_task_name, user_id=context.get("user_id"), **payload)) + logger.debug("Enqueued trace task: case=%s, app_id=%s", case, context.get("app_id")) + + +def _emit_metric_log( + case: TelemetryCase, + context: dict[str, Any], + payload: dict[str, Any], +) -> None: + """Build envelope and dispatch to enterprise Celery queue. + + No-ops when the enterprise telemetry task is not importable (CE mode). + """ + try: + from tasks.enterprise_telemetry_task import process_enterprise_telemetry + except ImportError: + logger.debug("Enterprise metric/log dispatch unavailable, dropping: case=%s", case) + return + + tenant_id = context.get("tenant_id") or "" + event_id = str(uuid.uuid4()) + + payload_for_envelope, payload_ref = _handle_payload_sizing(payload, tenant_id, event_id) + + from enterprise.telemetry.contracts import TelemetryEnvelope + + envelope = TelemetryEnvelope( + case=case, + tenant_id=tenant_id, + event_id=event_id, + payload=payload_for_envelope, + metadata={"payload_ref": payload_ref} if payload_ref else None, + ) + + process_enterprise_telemetry.delay(envelope.model_dump_json()) + logger.debug( + "Enqueued metric/log event: case=%s, tenant_id=%s, event_id=%s", + case, + tenant_id, + event_id, + ) diff --git a/api/core/tools/utils/configuration.py b/api/core/tools/utils/configuration.py index 3ac487a471..37a2c957b0 100644 --- a/api/core/tools/utils/configuration.py +++ b/api/core/tools/utils/configuration.py @@ -116,6 +116,7 @@ class ToolParameterConfigurationManager: return a deep copy of parameters with decrypted values """ + parameters = self._deep_copy(parameters) cache = ToolParameterCache( tenant_id=self.tenant_id, diff --git a/api/core/tools/utils/workflow_configuration_sync.py b/api/core/tools/utils/workflow_configuration_sync.py index d8ce53083b..28f1376655 100644 --- a/api/core/tools/utils/workflow_configuration_sync.py +++ b/api/core/tools/utils/workflow_configuration_sync.py @@ -3,7 +3,7 @@ from typing import Any from core.tools.entities.tool_entities import WorkflowToolParameterConfiguration from core.tools.errors import WorkflowToolHumanInputNotSupportedError -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.base.entities import OutputVariableEntity from dify_graph.variables.input_entities import VariableEntity @@ -51,7 +51,7 @@ class WorkflowToolConfigurationUtils: def ensure_no_human_input_nodes(cls, graph: Mapping[str, Any]) -> None: nodes = graph.get("nodes", []) for node in nodes: - if node.get("data", {}).get("type") == NodeType.HUMAN_INPUT: + if node.get("data", {}).get("type") == BuiltinNodeTypes.HUMAN_INPUT: raise WorkflowToolHumanInputNotSupportedError() @classmethod diff --git a/api/core/trigger/constants.py b/api/core/trigger/constants.py new file mode 100644 index 0000000000..bfa45c3f2b --- /dev/null +++ b/api/core/trigger/constants.py @@ -0,0 +1,18 @@ +from typing import Final + +TRIGGER_WEBHOOK_NODE_TYPE: Final[str] = "trigger-webhook" +TRIGGER_SCHEDULE_NODE_TYPE: Final[str] = "trigger-schedule" +TRIGGER_PLUGIN_NODE_TYPE: Final[str] = "trigger-plugin" +TRIGGER_INFO_METADATA_KEY: Final[str] = "trigger_info" + +TRIGGER_NODE_TYPES: Final[frozenset[str]] = frozenset( + { + TRIGGER_WEBHOOK_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_PLUGIN_NODE_TYPE, + } +) + + +def is_trigger_node_type(node_type: str) -> bool: + return node_type in TRIGGER_NODE_TYPES diff --git a/api/core/trigger/debug/event_selectors.py b/api/core/trigger/debug/event_selectors.py index 442a2434d5..2a133b2b94 100644 --- a/api/core/trigger/debug/event_selectors.py +++ b/api/core/trigger/debug/event_selectors.py @@ -11,6 +11,11 @@ from typing import Any from pydantic import BaseModel from core.plugin.entities.request import TriggerInvokeEventResponse +from core.trigger.constants import ( + TRIGGER_PLUGIN_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_WEBHOOK_NODE_TYPE, +) from core.trigger.debug.event_bus import TriggerDebugEventBus from core.trigger.debug.events import ( PluginTriggerDebugEvent, @@ -19,10 +24,9 @@ from core.trigger.debug.events import ( build_plugin_pool_key, build_webhook_pool_key, ) +from core.workflow.nodes.trigger_plugin.entities import TriggerEventNodeData +from core.workflow.nodes.trigger_schedule.entities import ScheduleConfig from dify_graph.entities.graph_config import NodeConfigDict -from dify_graph.enums import NodeType -from dify_graph.nodes.trigger_plugin.entities import TriggerEventNodeData -from dify_graph.nodes.trigger_schedule.entities import ScheduleConfig from extensions.ext_redis import redis_client from libs.datetime_utils import ensure_naive_utc, naive_utc_now from libs.schedule_utils import calculate_next_run_at @@ -206,21 +210,19 @@ def create_event_poller( if not node_config: raise ValueError("Node data not found for node %s", node_id) node_type = draft_workflow.get_node_type_from_node_config(node_config) - match node_type: - case NodeType.TRIGGER_PLUGIN: - return PluginTriggerDebugEventPoller( - tenant_id=tenant_id, user_id=user_id, app_id=app_id, node_config=node_config, node_id=node_id - ) - case NodeType.TRIGGER_WEBHOOK: - return WebhookTriggerDebugEventPoller( - tenant_id=tenant_id, user_id=user_id, app_id=app_id, node_config=node_config, node_id=node_id - ) - case NodeType.TRIGGER_SCHEDULE: - return ScheduleTriggerDebugEventPoller( - tenant_id=tenant_id, user_id=user_id, app_id=app_id, node_config=node_config, node_id=node_id - ) - case _: - raise ValueError("unable to create event poller for node type %s", node_type) + if node_type == TRIGGER_PLUGIN_NODE_TYPE: + return PluginTriggerDebugEventPoller( + tenant_id=tenant_id, user_id=user_id, app_id=app_id, node_config=node_config, node_id=node_id + ) + if node_type == TRIGGER_WEBHOOK_NODE_TYPE: + return WebhookTriggerDebugEventPoller( + tenant_id=tenant_id, user_id=user_id, app_id=app_id, node_config=node_config, node_id=node_id + ) + if node_type == TRIGGER_SCHEDULE_NODE_TYPE: + return ScheduleTriggerDebugEventPoller( + tenant_id=tenant_id, user_id=user_id, app_id=app_id, node_config=node_config, node_id=node_id + ) + raise ValueError("unable to create event poller for node type %s", node_type) def select_trigger_debug_events( diff --git a/api/core/workflow/__init__.py b/api/core/workflow/__init__.py index 57c2ef3d10..937012dcee 100644 --- a/api/core/workflow/__init__.py +++ b/api/core/workflow/__init__.py @@ -1,4 +1 @@ -from .node_factory import DifyNodeFactory -from .workflow_entry import WorkflowEntry - -__all__ = ["DifyNodeFactory", "WorkflowEntry"] +"""Core workflow package.""" diff --git a/api/core/workflow/node_factory.py b/api/core/workflow/node_factory.py index d7f2a67c06..ab34263a79 100644 --- a/api/core/workflow/node_factory.py +++ b/api/core/workflow/node_factory.py @@ -1,4 +1,7 @@ -from collections.abc import Callable, Mapping +import importlib +import pkgutil +from collections.abc import Callable, Iterator, Mapping, MutableMapping +from functools import lru_cache from typing import TYPE_CHECKING, Any, TypeAlias, cast, final from sqlalchemy import select @@ -8,7 +11,6 @@ from typing_extensions import override from configs import dify_config from core.app.entities.app_invoke_entities import DifyRunContext from core.app.llm.model_access import build_dify_model_access -from core.datasource.datasource_manager import DatasourceManager from core.helper.code_executor.code_executor import ( CodeExecutionError, CodeExecutor, @@ -17,15 +19,19 @@ from core.helper.ssrf_proxy import ssrf_proxy from core.memory.token_buffer_memory import TokenBufferMemory from core.model_manager import ModelInstance from core.prompt.entities.advanced_prompt_entities import MemoryConfig -from core.rag.index_processor.index_processor import IndexProcessor -from core.rag.retrieval.dataset_retrieval import DatasetRetrieval -from core.rag.summary_index.summary_index import SummaryIndex from core.repositories.human_input_repository import HumanInputFormRepositoryImpl from core.tools.tool_file_manager import ToolFileManager +from core.trigger.constants import TRIGGER_NODE_TYPES +from core.workflow.nodes.agent.message_transformer import AgentMessageTransformer +from core.workflow.nodes.agent.plugin_strategy_adapter import ( + PluginAgentStrategyPresentationProvider, + PluginAgentStrategyResolver, +) +from core.workflow.nodes.agent.runtime_support import AgentRuntimeSupport from dify_graph.entities.base_node_data import BaseNodeData from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY -from dify_graph.enums import NodeType, SystemVariableKey +from dify_graph.enums import BuiltinNodeTypes, NodeType, SystemVariableKey from dify_graph.file.file_manager import file_manager from dify_graph.graph.graph import NodeFactory from dify_graph.model_runtime.entities.model_entities import ModelType @@ -39,7 +45,7 @@ from dify_graph.nodes.document_extractor import UnstructuredApiConfig from dify_graph.nodes.http_request import build_http_request_config from dify_graph.nodes.llm.entities import LLMNodeData from dify_graph.nodes.llm.exc import LLMModeRequiredError, ModelNotExistError -from dify_graph.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING +from dify_graph.nodes.llm.protocols import TemplateRenderer from dify_graph.nodes.parameter_extractor.entities import ParameterExtractorNodeData from dify_graph.nodes.question_classifier.entities import QuestionClassifierNodeData from dify_graph.nodes.template_transform.template_renderer import ( @@ -53,6 +59,135 @@ if TYPE_CHECKING: from dify_graph.entities import GraphInitParams from dify_graph.runtime import GraphRuntimeState +LATEST_VERSION = "latest" +_START_NODE_TYPES: frozenset[NodeType] = frozenset( + (BuiltinNodeTypes.START, BuiltinNodeTypes.DATASOURCE, *TRIGGER_NODE_TYPES) +) + + +def _import_node_package(package_name: str, *, excluded_modules: frozenset[str] = frozenset()) -> None: + package = importlib.import_module(package_name) + for _, module_name, _ in pkgutil.walk_packages(package.__path__, package.__name__ + "."): + if module_name in excluded_modules: + continue + importlib.import_module(module_name) + + +@lru_cache(maxsize=1) +def register_nodes() -> None: + """Import production node modules so they self-register with ``Node``.""" + _import_node_package("dify_graph.nodes") + _import_node_package("core.workflow.nodes") + + +def get_node_type_classes_mapping() -> Mapping[NodeType, Mapping[str, type[Node]]]: + """Return a read-only snapshot of the current production node registry. + + The workflow layer owns node bootstrap because it must compose built-in + `dify_graph.nodes.*` implementations with workflow-local nodes under + `core.workflow.nodes.*`. Keeping this import side effect here avoids + reintroducing registry bootstrapping into lower-level graph primitives. + """ + register_nodes() + return Node.get_node_type_classes_mapping() + + +def resolve_workflow_node_class(*, node_type: NodeType, node_version: str) -> type[Node]: + node_mapping = get_node_type_classes_mapping().get(node_type) + if not node_mapping: + raise ValueError(f"No class mapping found for node type: {node_type}") + + latest_node_class = node_mapping.get(LATEST_VERSION) + matched_node_class = node_mapping.get(node_version) + node_class = matched_node_class or latest_node_class + if not node_class: + raise ValueError(f"No latest version class found for node type: {node_type}") + return node_class + + +def is_start_node_type(node_type: NodeType) -> bool: + """Return True when the node type can serve as a workflow entry point.""" + return node_type in _START_NODE_TYPES + + +def get_default_root_node_id(graph_config: Mapping[str, Any]) -> str: + """Resolve the default entry node for a persisted top-level workflow graph. + + This workflow-layer helper depends on start-node semantics defined by + `is_start_node_type`, so it intentionally lives next to the node registry + instead of in the raw `dify_graph.entities.graph_config` schema module. + """ + nodes = graph_config.get("nodes") + if not isinstance(nodes, list): + raise ValueError("nodes in workflow graph must be a list") + + for node in nodes: + if not isinstance(node, Mapping): + continue + + if node.get("type") == "custom-note": + continue + + node_id = node.get("id") + data = node.get("data") + if not isinstance(node_id, str) or not isinstance(data, Mapping): + continue + + node_type = data.get("type") + if isinstance(node_type, str) and is_start_node_type(node_type): + return node_id + + raise ValueError("Unable to determine default root node ID from workflow graph") + + +class _LazyNodeTypeClassesMapping(MutableMapping[NodeType, Mapping[str, type[Node]]]): + """Mutable dict-like view over the current node registry.""" + + def __init__(self) -> None: + self._cached_snapshot: dict[NodeType, Mapping[str, type[Node]]] = {} + self._cached_version = -1 + self._deleted: set[NodeType] = set() + self._overrides: dict[NodeType, Mapping[str, type[Node]]] = {} + + def _snapshot(self) -> dict[NodeType, Mapping[str, type[Node]]]: + current_version = Node.get_registry_version() + if self._cached_version != current_version: + self._cached_snapshot = dict(get_node_type_classes_mapping()) + self._cached_version = current_version + if not self._deleted and not self._overrides: + return self._cached_snapshot + + snapshot = {key: value for key, value in self._cached_snapshot.items() if key not in self._deleted} + snapshot.update(self._overrides) + return snapshot + + def __getitem__(self, key: NodeType) -> Mapping[str, type[Node]]: + return self._snapshot()[key] + + def __setitem__(self, key: NodeType, value: Mapping[str, type[Node]]) -> None: + self._deleted.discard(key) + self._overrides[key] = value + + def __delitem__(self, key: NodeType) -> None: + if key in self._overrides: + del self._overrides[key] + return + if key in self._cached_snapshot: + self._deleted.add(key) + return + raise KeyError(key) + + def __iter__(self) -> Iterator[NodeType]: + return iter(self._snapshot()) + + def __len__(self) -> int: + return len(self._snapshot()) + + +# Keep the canonical node-class mapping in the workflow layer that also bootstraps +# legacy `core.workflow.nodes.*` registrations. +NODE_TYPE_CLASSES_MAPPING: MutableMapping[NodeType, Mapping[str, type[Node]]] = _LazyNodeTypeClassesMapping() + LLMCompatibleNodeData: TypeAlias = LLMNodeData | QuestionClassifierNodeData | ParameterExtractorNodeData @@ -94,13 +229,20 @@ class DefaultWorkflowCodeExecutor: return isinstance(error, CodeExecutionError) +class DefaultLLMTemplateRenderer(TemplateRenderer): + def render_jinja2(self, *, template: str, inputs: Mapping[str, Any]) -> str: + result = CodeExecutor.execute_workflow_code_template( + language=CodeLanguage.JINJA2, + code=template, + inputs=inputs, + ) + return str(result.get("result", "")) + + @final class DifyNodeFactory(NodeFactory): """ - Default implementation of NodeFactory that uses the traditional node mapping. - - This factory creates nodes by looking up their types in NODE_TYPE_CLASSES_MAPPING - and instantiating the appropriate node class. + Default implementation of NodeFactory that resolves node classes from the live registry. """ def __init__( @@ -123,11 +265,11 @@ class DifyNodeFactory(NodeFactory): max_object_array_length=dify_config.CODE_MAX_OBJECT_ARRAY_LENGTH, ) self._template_renderer = CodeExecutorJinja2TemplateRenderer(code_executor=self._code_executor) + self._llm_template_renderer: TemplateRenderer = DefaultLLMTemplateRenderer() self._template_transform_max_output_length = dify_config.TEMPLATE_TRANSFORM_MAX_LENGTH self._http_request_http_client = ssrf_proxy self._http_request_tool_file_manager_factory = ToolFileManager self._http_request_file_manager = file_manager - self._rag_retrieval = DatasetRetrieval() self._document_extractor_unstructured_api_config = UnstructuredApiConfig( api_url=dify_config.UNSTRUCTURED_API_URL, api_key=dify_config.UNSTRUCTURED_API_KEY or "", @@ -143,6 +285,10 @@ class DifyNodeFactory(NodeFactory): ) self._llm_credentials_provider, self._llm_model_factory = build_dify_model_access(self._dify_context.tenant_id) + self._agent_strategy_resolver = PluginAgentStrategyResolver() + self._agent_strategy_presentation_provider = PluginAgentStrategyPresentationProvider() + self._agent_runtime_support = AgentRuntimeSupport() + self._agent_message_transformer = AgentMessageTransformer() @staticmethod def _resolve_dify_context(run_context: Mapping[str, Any]) -> DifyRunContext: @@ -170,55 +316,51 @@ class DifyNodeFactory(NodeFactory): node_class = self._resolve_node_class(node_type=node_data.type, node_version=str(node_data.version)) node_type = node_data.type node_init_kwargs_factories: Mapping[NodeType, Callable[[], dict[str, object]]] = { - NodeType.CODE: lambda: { + BuiltinNodeTypes.CODE: lambda: { "code_executor": self._code_executor, "code_limits": self._code_limits, }, - NodeType.TEMPLATE_TRANSFORM: lambda: { + BuiltinNodeTypes.TEMPLATE_TRANSFORM: lambda: { "template_renderer": self._template_renderer, "max_output_length": self._template_transform_max_output_length, }, - NodeType.HTTP_REQUEST: lambda: { + BuiltinNodeTypes.HTTP_REQUEST: lambda: { "http_request_config": self._http_request_config, "http_client": self._http_request_http_client, "tool_file_manager_factory": self._http_request_tool_file_manager_factory, "file_manager": self._http_request_file_manager, }, - NodeType.HUMAN_INPUT: lambda: { + BuiltinNodeTypes.HUMAN_INPUT: lambda: { "form_repository": HumanInputFormRepositoryImpl(tenant_id=self._dify_context.tenant_id), }, - NodeType.KNOWLEDGE_INDEX: lambda: { - "index_processor": IndexProcessor(), - "summary_index_service": SummaryIndex(), - }, - NodeType.LLM: lambda: self._build_llm_compatible_node_init_kwargs( + BuiltinNodeTypes.LLM: lambda: self._build_llm_compatible_node_init_kwargs( node_class=node_class, node_data=node_data, include_http_client=True, ), - NodeType.DATASOURCE: lambda: { - "datasource_manager": DatasourceManager, - }, - NodeType.KNOWLEDGE_RETRIEVAL: lambda: { - "rag_retrieval": self._rag_retrieval, - }, - NodeType.DOCUMENT_EXTRACTOR: lambda: { + BuiltinNodeTypes.DOCUMENT_EXTRACTOR: lambda: { "unstructured_api_config": self._document_extractor_unstructured_api_config, "http_client": self._http_request_http_client, }, - NodeType.QUESTION_CLASSIFIER: lambda: self._build_llm_compatible_node_init_kwargs( + BuiltinNodeTypes.QUESTION_CLASSIFIER: lambda: self._build_llm_compatible_node_init_kwargs( node_class=node_class, node_data=node_data, include_http_client=True, ), - NodeType.PARAMETER_EXTRACTOR: lambda: self._build_llm_compatible_node_init_kwargs( + BuiltinNodeTypes.PARAMETER_EXTRACTOR: lambda: self._build_llm_compatible_node_init_kwargs( node_class=node_class, node_data=node_data, include_http_client=False, ), - NodeType.TOOL: lambda: { + BuiltinNodeTypes.TOOL: lambda: { "tool_file_manager_factory": self._http_request_tool_file_manager_factory(), }, + BuiltinNodeTypes.AGENT: lambda: { + "strategy_resolver": self._agent_strategy_resolver, + "presentation_provider": self._agent_strategy_presentation_provider, + "runtime_support": self._agent_runtime_support, + "message_transformer": self._agent_message_transformer, + }, } node_init_kwargs = node_init_kwargs_factories.get(node_type, lambda: {})() return node_class( @@ -238,16 +380,7 @@ class DifyNodeFactory(NodeFactory): @staticmethod def _resolve_node_class(*, node_type: NodeType, node_version: str) -> type[Node]: - node_mapping = NODE_TYPE_CLASSES_MAPPING.get(node_type) - if not node_mapping: - raise ValueError(f"No class mapping found for node type: {node_type}") - - latest_node_class = node_mapping.get(LATEST_VERSION) - matched_node_class = node_mapping.get(node_version) - node_class = matched_node_class or latest_node_class - if not node_class: - raise ValueError(f"No latest version class found for node type: {node_type}") - return node_class + return resolve_workflow_node_class(node_type=node_type, node_version=node_version) def _build_llm_compatible_node_init_kwargs( self, @@ -270,6 +403,8 @@ class DifyNodeFactory(NodeFactory): model_instance=model_instance, ), } + if validated_node_data.type in {BuiltinNodeTypes.LLM, BuiltinNodeTypes.QUESTION_CLASSIFIER}: + node_init_kwargs["template_renderer"] = self._llm_template_renderer if include_http_client: node_init_kwargs["http_client"] = self._http_request_http_client return node_init_kwargs diff --git a/api/core/workflow/nodes/__init__.py b/api/core/workflow/nodes/__init__.py new file mode 100644 index 0000000000..d23f80be59 --- /dev/null +++ b/api/core/workflow/nodes/__init__.py @@ -0,0 +1 @@ +"""Workflow node implementations that remain under the legacy core.workflow namespace.""" diff --git a/api/core/workflow/nodes/agent/__init__.py b/api/core/workflow/nodes/agent/__init__.py new file mode 100644 index 0000000000..ba6c667194 --- /dev/null +++ b/api/core/workflow/nodes/agent/__init__.py @@ -0,0 +1,4 @@ +from .agent_node import AgentNode +from .entities import AgentNodeData + +__all__ = ["AgentNode", "AgentNodeData"] diff --git a/api/core/workflow/nodes/agent/agent_node.py b/api/core/workflow/nodes/agent/agent_node.py new file mode 100644 index 0000000000..5699ccf404 --- /dev/null +++ b/api/core/workflow/nodes/agent/agent_node.py @@ -0,0 +1,188 @@ +from __future__ import annotations + +from collections.abc import Generator, Mapping, Sequence +from typing import TYPE_CHECKING, Any + +from dify_graph.entities.graph_config import NodeConfigDict +from dify_graph.enums import BuiltinNodeTypes, SystemVariableKey, WorkflowNodeExecutionStatus +from dify_graph.node_events import NodeEventBase, NodeRunResult, StreamCompletedEvent +from dify_graph.nodes.base.node import Node +from dify_graph.nodes.base.variable_template_parser import VariableTemplateParser + +from .entities import AgentNodeData +from .exceptions import ( + AgentInvocationError, + AgentMessageTransformError, +) +from .message_transformer import AgentMessageTransformer +from .runtime_support import AgentRuntimeSupport +from .strategy_protocols import AgentStrategyPresentationProvider, AgentStrategyResolver + +if TYPE_CHECKING: + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState + + +class AgentNode(Node[AgentNodeData]): + node_type = BuiltinNodeTypes.AGENT + + _strategy_resolver: AgentStrategyResolver + _presentation_provider: AgentStrategyPresentationProvider + _runtime_support: AgentRuntimeSupport + _message_transformer: AgentMessageTransformer + + def __init__( + self, + id: str, + config: NodeConfigDict, + graph_init_params: GraphInitParams, + graph_runtime_state: GraphRuntimeState, + *, + strategy_resolver: AgentStrategyResolver, + presentation_provider: AgentStrategyPresentationProvider, + runtime_support: AgentRuntimeSupport, + message_transformer: AgentMessageTransformer, + ) -> None: + super().__init__( + id=id, + config=config, + graph_init_params=graph_init_params, + graph_runtime_state=graph_runtime_state, + ) + self._strategy_resolver = strategy_resolver + self._presentation_provider = presentation_provider + self._runtime_support = runtime_support + self._message_transformer = message_transformer + + @classmethod + def version(cls) -> str: + return "1" + + def populate_start_event(self, event) -> None: + dify_ctx = self.require_dify_context() + event.extras["agent_strategy"] = { + "name": self.node_data.agent_strategy_name, + "icon": self._presentation_provider.get_icon( + tenant_id=dify_ctx.tenant_id, + agent_strategy_provider_name=self.node_data.agent_strategy_provider_name, + ), + } + + def _run(self) -> Generator[NodeEventBase, None, None]: + from core.plugin.impl.exc import PluginDaemonClientSideError + + dify_ctx = self.require_dify_context() + + try: + strategy = self._strategy_resolver.resolve( + tenant_id=dify_ctx.tenant_id, + agent_strategy_provider_name=self.node_data.agent_strategy_provider_name, + agent_strategy_name=self.node_data.agent_strategy_name, + ) + except Exception as e: + yield StreamCompletedEvent( + node_run_result=NodeRunResult( + status=WorkflowNodeExecutionStatus.FAILED, + inputs={}, + error=f"Failed to get agent strategy: {str(e)}", + ), + ) + return + + agent_parameters = strategy.get_parameters() + + parameters = self._runtime_support.build_parameters( + agent_parameters=agent_parameters, + variable_pool=self.graph_runtime_state.variable_pool, + node_data=self.node_data, + strategy=strategy, + tenant_id=dify_ctx.tenant_id, + app_id=dify_ctx.app_id, + invoke_from=dify_ctx.invoke_from, + ) + parameters_for_log = self._runtime_support.build_parameters( + agent_parameters=agent_parameters, + variable_pool=self.graph_runtime_state.variable_pool, + node_data=self.node_data, + strategy=strategy, + tenant_id=dify_ctx.tenant_id, + app_id=dify_ctx.app_id, + invoke_from=dify_ctx.invoke_from, + for_log=True, + ) + credentials = self._runtime_support.build_credentials(parameters=parameters) + + conversation_id = self.graph_runtime_state.variable_pool.get(["sys", SystemVariableKey.CONVERSATION_ID]) + + try: + message_stream = strategy.invoke( + params=parameters, + user_id=dify_ctx.user_id, + app_id=dify_ctx.app_id, + conversation_id=conversation_id.text if conversation_id else None, + credentials=credentials, + ) + except Exception as e: + error = AgentInvocationError(f"Failed to invoke agent: {str(e)}", original_error=e) + yield StreamCompletedEvent( + node_run_result=NodeRunResult( + status=WorkflowNodeExecutionStatus.FAILED, + inputs=parameters_for_log, + error=str(error), + ) + ) + return + + try: + yield from self._message_transformer.transform( + messages=message_stream, + tool_info={ + "icon": self._presentation_provider.get_icon( + tenant_id=dify_ctx.tenant_id, + agent_strategy_provider_name=self.node_data.agent_strategy_provider_name, + ), + "agent_strategy": self.node_data.agent_strategy_name, + }, + parameters_for_log=parameters_for_log, + user_id=dify_ctx.user_id, + tenant_id=dify_ctx.tenant_id, + node_type=self.node_type, + node_id=self._node_id, + node_execution_id=self.id, + ) + except PluginDaemonClientSideError as e: + transform_error = AgentMessageTransformError( + f"Failed to transform agent message: {str(e)}", original_error=e + ) + yield StreamCompletedEvent( + node_run_result=NodeRunResult( + status=WorkflowNodeExecutionStatus.FAILED, + inputs=parameters_for_log, + error=str(transform_error), + ) + ) + + @classmethod + def _extract_variable_selector_to_variable_mapping( + cls, + *, + graph_config: Mapping[str, Any], + node_id: str, + node_data: AgentNodeData, + ) -> Mapping[str, Sequence[str]]: + _ = graph_config # Explicitly mark as unused + result: dict[str, Any] = {} + typed_node_data = node_data + for parameter_name in typed_node_data.agent_parameters: + input = typed_node_data.agent_parameters[parameter_name] + match input.type: + case "mixed" | "constant": + selectors = VariableTemplateParser(str(input.value)).extract_variable_selectors() + for selector in selectors: + result[selector.variable] = selector.value_selector + case "variable": + result[parameter_name] = input.value + + result = {node_id + "." + key: value for key, value in result.items()} + + return result diff --git a/api/dify_graph/nodes/agent/entities.py b/api/core/workflow/nodes/agent/entities.py similarity index 87% rename from api/dify_graph/nodes/agent/entities.py rename to api/core/workflow/nodes/agent/entities.py index f7b7af8fa4..91fed39795 100644 --- a/api/dify_graph/nodes/agent/entities.py +++ b/api/core/workflow/nodes/agent/entities.py @@ -6,14 +6,14 @@ from pydantic import BaseModel from core.prompt.entities.advanced_prompt_entities import MemoryConfig from core.tools.entities.tool_entities import ToolSelector from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType class AgentNodeData(BaseNodeData): - type: NodeType = NodeType.AGENT - agent_strategy_provider_name: str # redundancy + type: NodeType = BuiltinNodeTypes.AGENT + agent_strategy_provider_name: str agent_strategy_name: str - agent_strategy_label: str # redundancy + agent_strategy_label: str memory: MemoryConfig | None = None # The version of the tool parameter. # If this value is None, it indicates this is a previous version diff --git a/api/dify_graph/nodes/agent/exc.py b/api/core/workflow/nodes/agent/exceptions.py similarity index 90% rename from api/dify_graph/nodes/agent/exc.py rename to api/core/workflow/nodes/agent/exceptions.py index ba2c83d8a6..944f5f0b20 100644 --- a/api/dify_graph/nodes/agent/exc.py +++ b/api/core/workflow/nodes/agent/exceptions.py @@ -119,14 +119,3 @@ class AgentVariableTypeError(AgentNodeError): self.expected_type = expected_type self.actual_type = actual_type super().__init__(message) - - -class AgentMaxIterationError(AgentNodeError): - """Exception raised when the agent exceeds the maximum iteration limit.""" - - def __init__(self, max_iteration: int): - self.max_iteration = max_iteration - super().__init__( - f"Agent exceeded the maximum iteration limit of {max_iteration}. " - f"The agent was unable to complete the task within the allowed number of iterations." - ) diff --git a/api/core/workflow/nodes/agent/message_transformer.py b/api/core/workflow/nodes/agent/message_transformer.py new file mode 100644 index 0000000000..f58a5665f4 --- /dev/null +++ b/api/core/workflow/nodes/agent/message_transformer.py @@ -0,0 +1,292 @@ +from __future__ import annotations + +from collections.abc import Generator, Mapping +from typing import Any, cast + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.utils.message_transformer import ToolFileMessageTransformer +from dify_graph.enums import BuiltinNodeTypes, NodeType, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus +from dify_graph.file import File, FileTransferMethod +from dify_graph.model_runtime.entities.llm_entities import LLMUsage, LLMUsageMetadata +from dify_graph.model_runtime.utils.encoders import jsonable_encoder +from dify_graph.node_events import ( + AgentLogEvent, + NodeEventBase, + NodeRunResult, + StreamChunkEvent, + StreamCompletedEvent, +) +from dify_graph.variables.segments import ArrayFileSegment +from extensions.ext_database import db +from factories import file_factory +from models import ToolFile +from services.tools.builtin_tools_manage_service import BuiltinToolManageService + +from .exceptions import AgentNodeError, AgentVariableTypeError, ToolFileNotFoundError + + +class AgentMessageTransformer: + def transform( + self, + *, + messages: Generator[ToolInvokeMessage, None, None], + tool_info: Mapping[str, Any], + parameters_for_log: dict[str, Any], + user_id: str, + tenant_id: str, + node_type: NodeType, + node_id: str, + node_execution_id: str, + ) -> Generator[NodeEventBase, None, None]: + from core.plugin.impl.plugin import PluginInstaller + + message_stream = ToolFileMessageTransformer.transform_tool_invoke_messages( + messages=messages, + user_id=user_id, + tenant_id=tenant_id, + conversation_id=None, + ) + + text = "" + files: list[File] = [] + json_list: list[dict | list] = [] + + agent_logs: list[AgentLogEvent] = [] + agent_execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] = {} + llm_usage = LLMUsage.empty_usage() + variables: dict[str, Any] = {} + + for message in message_stream: + if message.type in { + ToolInvokeMessage.MessageType.IMAGE_LINK, + ToolInvokeMessage.MessageType.BINARY_LINK, + ToolInvokeMessage.MessageType.IMAGE, + }: + assert isinstance(message.message, ToolInvokeMessage.TextMessage) + + url = message.message.text + if message.meta: + transfer_method = message.meta.get("transfer_method", FileTransferMethod.TOOL_FILE) + else: + transfer_method = FileTransferMethod.TOOL_FILE + + tool_file_id = str(url).split("/")[-1].split(".")[0] + + with Session(db.engine) as session: + stmt = select(ToolFile).where(ToolFile.id == tool_file_id) + tool_file = session.scalar(stmt) + if tool_file is None: + raise ToolFileNotFoundError(tool_file_id) + + mapping = { + "tool_file_id": tool_file_id, + "type": file_factory.get_file_type_by_mime_type(tool_file.mimetype), + "transfer_method": transfer_method, + "url": url, + } + file = file_factory.build_from_mapping( + mapping=mapping, + tenant_id=tenant_id, + ) + files.append(file) + elif message.type == ToolInvokeMessage.MessageType.BLOB: + assert isinstance(message.message, ToolInvokeMessage.TextMessage) + assert message.meta + + tool_file_id = message.message.text.split("/")[-1].split(".")[0] + with Session(db.engine) as session: + stmt = select(ToolFile).where(ToolFile.id == tool_file_id) + tool_file = session.scalar(stmt) + if tool_file is None: + raise ToolFileNotFoundError(tool_file_id) + + mapping = { + "tool_file_id": tool_file_id, + "transfer_method": FileTransferMethod.TOOL_FILE, + } + files.append( + file_factory.build_from_mapping( + mapping=mapping, + tenant_id=tenant_id, + ) + ) + elif message.type == ToolInvokeMessage.MessageType.TEXT: + assert isinstance(message.message, ToolInvokeMessage.TextMessage) + text += message.message.text + yield StreamChunkEvent( + selector=[node_id, "text"], + chunk=message.message.text, + is_final=False, + ) + elif message.type == ToolInvokeMessage.MessageType.JSON: + assert isinstance(message.message, ToolInvokeMessage.JsonMessage) + if node_type == BuiltinNodeTypes.AGENT: + if isinstance(message.message.json_object, dict): + msg_metadata: dict[str, Any] = message.message.json_object.pop("execution_metadata", {}) + llm_usage = LLMUsage.from_metadata(cast(LLMUsageMetadata, msg_metadata)) + agent_execution_metadata = { + WorkflowNodeExecutionMetadataKey(key): value + for key, value in msg_metadata.items() + if key in WorkflowNodeExecutionMetadataKey.__members__.values() + } + else: + llm_usage = LLMUsage.empty_usage() + agent_execution_metadata = {} + if message.message.json_object: + json_list.append(message.message.json_object) + elif message.type == ToolInvokeMessage.MessageType.LINK: + assert isinstance(message.message, ToolInvokeMessage.TextMessage) + stream_text = f"Link: {message.message.text}\n" + text += stream_text + yield StreamChunkEvent( + selector=[node_id, "text"], + chunk=stream_text, + is_final=False, + ) + elif message.type == ToolInvokeMessage.MessageType.VARIABLE: + assert isinstance(message.message, ToolInvokeMessage.VariableMessage) + variable_name = message.message.variable_name + variable_value = message.message.variable_value + if message.message.stream: + if not isinstance(variable_value, str): + raise AgentVariableTypeError( + "When 'stream' is True, 'variable_value' must be a string.", + variable_name=variable_name, + expected_type="str", + actual_type=type(variable_value).__name__, + ) + if variable_name not in variables: + variables[variable_name] = "" + variables[variable_name] += variable_value + + yield StreamChunkEvent( + selector=[node_id, variable_name], + chunk=variable_value, + is_final=False, + ) + else: + variables[variable_name] = variable_value + elif message.type == ToolInvokeMessage.MessageType.FILE: + assert message.meta is not None + assert isinstance(message.meta, dict) + if "file" not in message.meta: + raise AgentNodeError("File message is missing 'file' key in meta") + + if not isinstance(message.meta["file"], File): + raise AgentNodeError(f"Expected File object but got {type(message.meta['file']).__name__}") + files.append(message.meta["file"]) + elif message.type == ToolInvokeMessage.MessageType.LOG: + assert isinstance(message.message, ToolInvokeMessage.LogMessage) + if message.message.metadata: + icon = tool_info.get("icon", "") + dict_metadata = dict(message.message.metadata) + if dict_metadata.get("provider"): + manager = PluginInstaller() + plugins = manager.list_plugins(tenant_id) + try: + current_plugin = next( + plugin + for plugin in plugins + if f"{plugin.plugin_id}/{plugin.name}" == dict_metadata["provider"] + ) + icon = current_plugin.declaration.icon + except StopIteration: + pass + icon_dark = None + try: + builtin_tool = next( + provider + for provider in BuiltinToolManageService.list_builtin_tools( + user_id, + tenant_id, + ) + if provider.name == dict_metadata["provider"] + ) + icon = builtin_tool.icon + icon_dark = builtin_tool.icon_dark + except StopIteration: + pass + + dict_metadata["icon"] = icon + dict_metadata["icon_dark"] = icon_dark + message.message.metadata = dict_metadata + agent_log = AgentLogEvent( + message_id=message.message.id, + node_execution_id=node_execution_id, + parent_id=message.message.parent_id, + error=message.message.error, + status=message.message.status.value, + data=message.message.data, + label=message.message.label, + metadata=message.message.metadata, + node_id=node_id, + ) + + for log in agent_logs: + if log.message_id == agent_log.message_id: + log.data = agent_log.data + log.status = agent_log.status + log.error = agent_log.error + log.label = agent_log.label + log.metadata = agent_log.metadata + break + else: + agent_logs.append(agent_log) + + yield agent_log + + json_output: list[dict[str, Any] | list[Any]] = [] + if agent_logs: + for log in agent_logs: + json_output.append( + { + "id": log.message_id, + "parent_id": log.parent_id, + "error": log.error, + "status": log.status, + "data": log.data, + "label": log.label, + "metadata": log.metadata, + "node_id": log.node_id, + } + ) + if json_list: + json_output.extend(json_list) + else: + json_output.append({"data": []}) + + yield StreamChunkEvent( + selector=[node_id, "text"], + chunk="", + is_final=True, + ) + + for var_name in variables: + yield StreamChunkEvent( + selector=[node_id, var_name], + chunk="", + is_final=True, + ) + + yield StreamCompletedEvent( + node_run_result=NodeRunResult( + status=WorkflowNodeExecutionStatus.SUCCEEDED, + outputs={ + "text": text, + "usage": jsonable_encoder(llm_usage), + "files": ArrayFileSegment(value=files), + "json": json_output, + **variables, + }, + metadata={ + **agent_execution_metadata, + WorkflowNodeExecutionMetadataKey.TOOL_INFO: tool_info, + WorkflowNodeExecutionMetadataKey.AGENT_LOG: agent_logs, + }, + inputs=parameters_for_log, + llm_usage=llm_usage, + ) + ) diff --git a/api/core/workflow/nodes/agent/plugin_strategy_adapter.py b/api/core/workflow/nodes/agent/plugin_strategy_adapter.py new file mode 100644 index 0000000000..1fc427ad6c --- /dev/null +++ b/api/core/workflow/nodes/agent/plugin_strategy_adapter.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from factories.agent_factory import get_plugin_agent_strategy + +from .strategy_protocols import AgentStrategyPresentationProvider, AgentStrategyResolver, ResolvedAgentStrategy + + +class PluginAgentStrategyResolver(AgentStrategyResolver): + def resolve( + self, + *, + tenant_id: str, + agent_strategy_provider_name: str, + agent_strategy_name: str, + ) -> ResolvedAgentStrategy: + return get_plugin_agent_strategy( + tenant_id=tenant_id, + agent_strategy_provider_name=agent_strategy_provider_name, + agent_strategy_name=agent_strategy_name, + ) + + +class PluginAgentStrategyPresentationProvider(AgentStrategyPresentationProvider): + def get_icon(self, *, tenant_id: str, agent_strategy_provider_name: str) -> str | None: + from core.plugin.impl.plugin import PluginInstaller + + manager = PluginInstaller() + try: + plugins = manager.list_plugins(tenant_id) + except Exception: + return None + + try: + current_plugin = next( + plugin for plugin in plugins if f"{plugin.plugin_id}/{plugin.name}" == agent_strategy_provider_name + ) + except StopIteration: + return None + + return current_plugin.declaration.icon diff --git a/api/core/workflow/nodes/agent/runtime_support.py b/api/core/workflow/nodes/agent/runtime_support.py new file mode 100644 index 0000000000..2ff7c964b9 --- /dev/null +++ b/api/core/workflow/nodes/agent/runtime_support.py @@ -0,0 +1,276 @@ +from __future__ import annotations + +import json +from collections.abc import Sequence +from typing import Any, cast + +from packaging.version import Version +from pydantic import ValidationError +from sqlalchemy import select +from sqlalchemy.orm import Session + +from core.agent.entities import AgentToolEntity +from core.agent.plugin_entities import AgentStrategyParameter +from core.memory.token_buffer_memory import TokenBufferMemory +from core.model_manager import ModelInstance, ModelManager +from core.plugin.entities.request import InvokeCredentials +from core.provider_manager import ProviderManager +from core.tools.entities.tool_entities import ToolIdentity, ToolParameter, ToolProviderType +from core.tools.tool_manager import ToolManager +from dify_graph.enums import SystemVariableKey +from dify_graph.model_runtime.entities.model_entities import AIModelEntity, ModelType +from dify_graph.runtime import VariablePool +from dify_graph.variables.segments import StringSegment +from extensions.ext_database import db +from models.model import Conversation + +from .entities import AgentNodeData, AgentOldVersionModelFeatures, ParamsAutoGenerated +from .exceptions import AgentInputTypeError, AgentVariableNotFoundError +from .strategy_protocols import ResolvedAgentStrategy + + +class AgentRuntimeSupport: + def build_parameters( + self, + *, + agent_parameters: Sequence[AgentStrategyParameter], + variable_pool: VariablePool, + node_data: AgentNodeData, + strategy: ResolvedAgentStrategy, + tenant_id: str, + app_id: str, + invoke_from: Any, + for_log: bool = False, + ) -> dict[str, Any]: + agent_parameters_dictionary = {parameter.name: parameter for parameter in agent_parameters} + + result: dict[str, Any] = {} + for parameter_name in node_data.agent_parameters: + parameter = agent_parameters_dictionary.get(parameter_name) + if not parameter: + result[parameter_name] = None + continue + + agent_input = node_data.agent_parameters[parameter_name] + match agent_input.type: + case "variable": + variable = variable_pool.get(agent_input.value) # type: ignore[arg-type] + if variable is None: + raise AgentVariableNotFoundError(str(agent_input.value)) + parameter_value = variable.value + case "mixed" | "constant": + try: + if not isinstance(agent_input.value, str): + parameter_value = json.dumps(agent_input.value, ensure_ascii=False) + else: + parameter_value = str(agent_input.value) + except TypeError: + parameter_value = str(agent_input.value) + + segment_group = variable_pool.convert_template(parameter_value) + parameter_value = segment_group.log if for_log else segment_group.text + try: + if not isinstance(agent_input.value, str): + parameter_value = json.loads(parameter_value) + except json.JSONDecodeError: + parameter_value = parameter_value + case _: + raise AgentInputTypeError(agent_input.type) + + value = parameter_value + if parameter.type == "array[tools]": + value = cast(list[dict[str, Any]], value) + value = [tool for tool in value if tool.get("enabled", False)] + value = self._filter_mcp_type_tool(strategy, value) + for tool in value: + if "schemas" in tool: + tool.pop("schemas") + parameters = tool.get("parameters", {}) + if all(isinstance(v, dict) for _, v in parameters.items()): + params = {} + for key, param in parameters.items(): + if param.get("auto", ParamsAutoGenerated.OPEN) in ( + ParamsAutoGenerated.CLOSE, + 0, + ): + value_param = param.get("value", {}) + if value_param and value_param.get("type", "") == "variable": + variable_selector = value_param.get("value") + if not variable_selector: + raise ValueError("Variable selector is missing for a variable-type parameter.") + + variable = variable_pool.get(variable_selector) + if variable is None: + raise AgentVariableNotFoundError(str(variable_selector)) + + params[key] = variable.value + else: + params[key] = value_param.get("value", "") if value_param is not None else None + else: + params[key] = None + parameters = params + tool["settings"] = {k: v.get("value", None) for k, v in tool.get("settings", {}).items()} + tool["parameters"] = parameters + + if not for_log: + if parameter.type == "array[tools]": + value = cast(list[dict[str, Any]], value) + tool_value = [] + for tool in value: + provider_type = ToolProviderType(tool.get("type", ToolProviderType.BUILT_IN)) + setting_params = tool.get("settings", {}) + parameters = tool.get("parameters", {}) + manual_input_params = [key for key, value in parameters.items() if value is not None] + + parameters = {**parameters, **setting_params} + entity = AgentToolEntity( + provider_id=tool.get("provider_name", ""), + provider_type=provider_type, + tool_name=tool.get("tool_name", ""), + tool_parameters=parameters, + plugin_unique_identifier=tool.get("plugin_unique_identifier", None), + credential_id=tool.get("credential_id", None), + ) + + extra = tool.get("extra", {}) + + runtime_variable_pool: VariablePool | None = None + if node_data.version != "1" or node_data.tool_node_version is not None: + runtime_variable_pool = variable_pool + tool_runtime = ToolManager.get_agent_tool_runtime( + tenant_id, + app_id, + entity, + invoke_from, + runtime_variable_pool, + ) + if tool_runtime.entity.description: + tool_runtime.entity.description.llm = ( + extra.get("description", "") or tool_runtime.entity.description.llm + ) + for tool_runtime_params in tool_runtime.entity.parameters: + tool_runtime_params.form = ( + ToolParameter.ToolParameterForm.FORM + if tool_runtime_params.name in manual_input_params + else tool_runtime_params.form + ) + manual_input_value = {} + if tool_runtime.entity.parameters: + manual_input_value = { + key: value for key, value in parameters.items() if key in manual_input_params + } + runtime_parameters = { + **tool_runtime.runtime.runtime_parameters, + **manual_input_value, + } + tool_value.append( + { + **tool_runtime.entity.model_dump(mode="json"), + "runtime_parameters": runtime_parameters, + "credential_id": tool.get("credential_id", None), + "provider_type": provider_type.value, + } + ) + value = tool_value + if parameter.type == AgentStrategyParameter.AgentStrategyParameterType.MODEL_SELECTOR: + value = cast(dict[str, Any], value) + model_instance, model_schema = self.fetch_model(tenant_id=tenant_id, value=value) + history_prompt_messages = [] + if node_data.memory: + memory = self.fetch_memory( + variable_pool=variable_pool, + app_id=app_id, + model_instance=model_instance, + ) + if memory: + prompt_messages = memory.get_history_prompt_messages( + message_limit=node_data.memory.window.size or None + ) + history_prompt_messages = [ + prompt_message.model_dump(mode="json") for prompt_message in prompt_messages + ] + value["history_prompt_messages"] = history_prompt_messages + if model_schema: + model_schema = self._remove_unsupported_model_features_for_old_version(model_schema) + value["entity"] = model_schema.model_dump(mode="json") + else: + value["entity"] = None + result[parameter_name] = value + + return result + + def build_credentials(self, *, parameters: dict[str, Any]) -> InvokeCredentials: + credentials = InvokeCredentials() + credentials.tool_credentials = {} + for tool in parameters.get("tools", []): + if not tool.get("credential_id"): + continue + try: + identity = ToolIdentity.model_validate(tool.get("identity", {})) + except ValidationError: + continue + credentials.tool_credentials[identity.provider] = tool.get("credential_id", None) + return credentials + + def fetch_memory( + self, + *, + variable_pool: VariablePool, + app_id: str, + model_instance: ModelInstance, + ) -> TokenBufferMemory | None: + conversation_id_variable = variable_pool.get(["sys", SystemVariableKey.CONVERSATION_ID]) + if not isinstance(conversation_id_variable, StringSegment): + return None + conversation_id = conversation_id_variable.value + + with Session(db.engine, expire_on_commit=False) as session: + stmt = select(Conversation).where(Conversation.app_id == app_id, Conversation.id == conversation_id) + conversation = session.scalar(stmt) + if not conversation: + return None + + return TokenBufferMemory(conversation=conversation, model_instance=model_instance) + + def fetch_model(self, *, tenant_id: str, value: dict[str, Any]) -> tuple[ModelInstance, AIModelEntity | None]: + provider_manager = ProviderManager() + provider_model_bundle = provider_manager.get_provider_model_bundle( + tenant_id=tenant_id, + provider=value.get("provider", ""), + model_type=ModelType.LLM, + ) + model_name = value.get("model", "") + model_credentials = provider_model_bundle.configuration.get_current_credentials( + model_type=ModelType.LLM, + model=model_name, + ) + provider_name = provider_model_bundle.configuration.provider.provider + model_type_instance = provider_model_bundle.model_type_instance + model_instance = ModelManager().get_model_instance( + tenant_id=tenant_id, + provider=provider_name, + model_type=ModelType(value.get("model_type", "")), + model=model_name, + ) + model_schema = model_type_instance.get_model_schema(model_name, model_credentials) + return model_instance, model_schema + + @staticmethod + def _remove_unsupported_model_features_for_old_version(model_schema: AIModelEntity) -> AIModelEntity: + if model_schema.features: + for feature in model_schema.features[:]: + try: + AgentOldVersionModelFeatures(feature.value) + except ValueError: + model_schema.features.remove(feature) + return model_schema + + @staticmethod + def _filter_mcp_type_tool( + strategy: ResolvedAgentStrategy, + tools: list[dict[str, Any]], + ) -> list[dict[str, Any]]: + meta_version = strategy.meta_version + if meta_version and Version(meta_version) > Version("0.0.1"): + return tools + return [tool for tool in tools if tool.get("type") != ToolProviderType.MCP] diff --git a/api/core/workflow/nodes/agent/strategy_protocols.py b/api/core/workflow/nodes/agent/strategy_protocols.py new file mode 100644 index 0000000000..643d916d15 --- /dev/null +++ b/api/core/workflow/nodes/agent/strategy_protocols.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from collections.abc import Generator, Sequence +from typing import Any, Protocol + +from core.agent.plugin_entities import AgentStrategyParameter +from core.plugin.entities.request import InvokeCredentials +from core.tools.entities.tool_entities import ToolInvokeMessage + + +class ResolvedAgentStrategy(Protocol): + meta_version: str | None + + def get_parameters(self) -> Sequence[AgentStrategyParameter]: ... + + def invoke( + self, + *, + params: dict[str, Any], + user_id: str, + conversation_id: str | None = None, + app_id: str | None = None, + message_id: str | None = None, + credentials: InvokeCredentials | None = None, + ) -> Generator[ToolInvokeMessage, None, None]: ... + + +class AgentStrategyResolver(Protocol): + def resolve( + self, + *, + tenant_id: str, + agent_strategy_provider_name: str, + agent_strategy_name: str, + ) -> ResolvedAgentStrategy: ... + + +class AgentStrategyPresentationProvider(Protocol): + def get_icon(self, *, tenant_id: str, agent_strategy_provider_name: str) -> str | None: ... diff --git a/api/core/workflow/nodes/datasource/__init__.py b/api/core/workflow/nodes/datasource/__init__.py new file mode 100644 index 0000000000..2e9bed5e00 --- /dev/null +++ b/api/core/workflow/nodes/datasource/__init__.py @@ -0,0 +1 @@ +"""Datasource workflow node package.""" diff --git a/api/dify_graph/nodes/datasource/datasource_node.py b/api/core/workflow/nodes/datasource/datasource_node.py similarity index 94% rename from api/dify_graph/nodes/datasource/datasource_node.py rename to api/core/workflow/nodes/datasource/datasource_node.py index b0d3e1a24e..44f4a23a5a 100644 --- a/api/dify_graph/nodes/datasource/datasource_node.py +++ b/api/core/workflow/nodes/datasource/datasource_node.py @@ -1,22 +1,17 @@ from collections.abc import Generator, Mapping, Sequence from typing import TYPE_CHECKING, Any +from core.datasource.datasource_manager import DatasourceManager from core.datasource.entities.datasource_entities import DatasourceProviderType from core.plugin.impl.exc import PluginDaemonClientSideError from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus -from dify_graph.enums import NodeExecutionType, NodeType, SystemVariableKey +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, SystemVariableKey, WorkflowNodeExecutionMetadataKey from dify_graph.node_events import NodeRunResult, StreamCompletedEvent from dify_graph.nodes.base.node import Node from dify_graph.nodes.base.variable_template_parser import VariableTemplateParser -from dify_graph.repositories.datasource_manager_protocol import ( - DatasourceManagerProtocol, - DatasourceParameter, - OnlineDriveDownloadFileParam, -) -from ...entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey -from .entities import DatasourceNodeData +from .entities import DatasourceNodeData, DatasourceParameter, OnlineDriveDownloadFileParam from .exc import DatasourceNodeError if TYPE_CHECKING: @@ -29,7 +24,7 @@ class DatasourceNode(Node[DatasourceNodeData]): Datasource Node """ - node_type = NodeType.DATASOURCE + node_type = BuiltinNodeTypes.DATASOURCE execution_type = NodeExecutionType.ROOT def __init__( @@ -38,7 +33,6 @@ class DatasourceNode(Node[DatasourceNodeData]): config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", - datasource_manager: DatasourceManagerProtocol, ): super().__init__( id=id, @@ -46,7 +40,11 @@ class DatasourceNode(Node[DatasourceNodeData]): graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state, ) - self.datasource_manager = datasource_manager + self.datasource_manager = DatasourceManager + + def populate_start_event(self, event) -> None: + event.provider_id = f"{self.node_data.plugin_id}/{self.node_data.provider_name}" + event.provider_type = self.node_data.provider_type def _run(self) -> Generator: """ diff --git a/api/dify_graph/nodes/datasource/entities.py b/api/core/workflow/nodes/datasource/entities.py similarity index 85% rename from api/dify_graph/nodes/datasource/entities.py rename to api/core/workflow/nodes/datasource/entities.py index 38275ac158..65864474b0 100644 --- a/api/dify_graph/nodes/datasource/entities.py +++ b/api/core/workflow/nodes/datasource/entities.py @@ -4,7 +4,7 @@ from pydantic import BaseModel, field_validator from pydantic_core.core_schema import ValidationInfo from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType class DatasourceEntity(BaseModel): @@ -17,7 +17,7 @@ class DatasourceEntity(BaseModel): class DatasourceNodeData(BaseNodeData, DatasourceEntity): - type: NodeType = NodeType.DATASOURCE + type: NodeType = BuiltinNodeTypes.DATASOURCE class DatasourceInput(BaseModel): # TODO: check this type @@ -42,3 +42,14 @@ class DatasourceNodeData(BaseNodeData, DatasourceEntity): return typ datasource_parameters: dict[str, DatasourceInput] | None = None + + +class DatasourceParameter(BaseModel): + workspace_id: str + page_id: str + type: str + + +class OnlineDriveDownloadFileParam(BaseModel): + id: str + bucket: str diff --git a/api/dify_graph/nodes/datasource/exc.py b/api/core/workflow/nodes/datasource/exc.py similarity index 100% rename from api/dify_graph/nodes/datasource/exc.py rename to api/core/workflow/nodes/datasource/exc.py diff --git a/api/dify_graph/repositories/datasource_manager_protocol.py b/api/core/workflow/nodes/datasource/protocols.py similarity index 79% rename from api/dify_graph/repositories/datasource_manager_protocol.py rename to api/core/workflow/nodes/datasource/protocols.py index fbe2016d3c..c006e0885c 100644 --- a/api/dify_graph/repositories/datasource_manager_protocol.py +++ b/api/core/workflow/nodes/datasource/protocols.py @@ -1,25 +1,10 @@ from collections.abc import Generator from typing import Any, Protocol -from pydantic import BaseModel - from dify_graph.file import File from dify_graph.node_events import StreamChunkEvent, StreamCompletedEvent - -class DatasourceParameter(BaseModel): - workspace_id: str - page_id: str - type: str - - -class OnlineDriveDownloadFileParam(BaseModel): - id: str - bucket: str - - -class DatasourceFinal(BaseModel): - data: dict[str, Any] | None = None +from .entities import DatasourceParameter, OnlineDriveDownloadFileParam class DatasourceManagerProtocol(Protocol): diff --git a/api/core/workflow/nodes/knowledge_index/__init__.py b/api/core/workflow/nodes/knowledge_index/__init__.py new file mode 100644 index 0000000000..efc6a57b3d --- /dev/null +++ b/api/core/workflow/nodes/knowledge_index/__init__.py @@ -0,0 +1,5 @@ +"""Knowledge index workflow node package.""" + +KNOWLEDGE_INDEX_NODE_TYPE = "knowledge-index" + +__all__ = ["KNOWLEDGE_INDEX_NODE_TYPE"] diff --git a/api/dify_graph/nodes/knowledge_index/entities.py b/api/core/workflow/nodes/knowledge_index/entities.py similarity index 96% rename from api/dify_graph/nodes/knowledge_index/entities.py rename to api/core/workflow/nodes/knowledge_index/entities.py index d88ee8e3af..8b00746268 100644 --- a/api/dify_graph/nodes/knowledge_index/entities.py +++ b/api/core/workflow/nodes/knowledge_index/entities.py @@ -3,6 +3,7 @@ from typing import Literal, Union from pydantic import BaseModel from core.rag.retrieval.retrieval_methods import RetrievalMethod +from core.workflow.nodes.knowledge_index import KNOWLEDGE_INDEX_NODE_TYPE from dify_graph.entities.base_node_data import BaseNodeData from dify_graph.enums import NodeType @@ -156,7 +157,7 @@ class KnowledgeIndexNodeData(BaseNodeData): Knowledge index Node Data. """ - type: NodeType = NodeType.KNOWLEDGE_INDEX + type: NodeType = KNOWLEDGE_INDEX_NODE_TYPE chunk_structure: str index_chunk_variable_selector: list[str] indexing_technique: str | None = None diff --git a/api/dify_graph/nodes/knowledge_index/exc.py b/api/core/workflow/nodes/knowledge_index/exc.py similarity index 100% rename from api/dify_graph/nodes/knowledge_index/exc.py rename to api/core/workflow/nodes/knowledge_index/exc.py diff --git a/api/dify_graph/nodes/knowledge_index/knowledge_index_node.py b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py similarity index 91% rename from api/dify_graph/nodes/knowledge_index/knowledge_index_node.py rename to api/core/workflow/nodes/knowledge_index/knowledge_index_node.py index 3c4fe2344c..0a74847bc1 100644 --- a/api/dify_graph/nodes/knowledge_index/knowledge_index_node.py +++ b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py @@ -2,14 +2,15 @@ import logging from collections.abc import Mapping from typing import TYPE_CHECKING, Any +from core.rag.index_processor.index_processor import IndexProcessor +from core.rag.summary_index.summary_index import SummaryIndex +from core.workflow.nodes.knowledge_index import KNOWLEDGE_INDEX_NODE_TYPE from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus -from dify_graph.enums import NodeExecutionType, NodeType, SystemVariableKey +from dify_graph.enums import NodeExecutionType, SystemVariableKey from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.base.template import Template -from dify_graph.repositories.index_processor_protocol import IndexProcessorProtocol -from dify_graph.repositories.summary_index_service_protocol import SummaryIndexServiceProtocol from .entities import KnowledgeIndexNodeData from .exc import ( @@ -25,7 +26,7 @@ _INVOKE_FROM_DEBUGGER = "debugger" class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]): - node_type = NodeType.KNOWLEDGE_INDEX + node_type = KNOWLEDGE_INDEX_NODE_TYPE execution_type = NodeExecutionType.RESPONSE def __init__( @@ -34,12 +35,10 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]): config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", - index_processor: IndexProcessorProtocol, - summary_index_service: SummaryIndexServiceProtocol, ) -> None: super().__init__(id, config, graph_init_params, graph_runtime_state) - self.index_processor = index_processor - self.summary_index_service = summary_index_service + self.index_processor = IndexProcessor() + self.summary_index_service = SummaryIndex() def _run(self) -> NodeRunResult: # type: ignore node_data = self.node_data diff --git a/api/dify_graph/repositories/index_processor_protocol.py b/api/core/workflow/nodes/knowledge_index/protocols.py similarity index 55% rename from api/dify_graph/repositories/index_processor_protocol.py rename to api/core/workflow/nodes/knowledge_index/protocols.py index feaa4ab5de..bb52123082 100644 --- a/api/dify_graph/repositories/index_processor_protocol.py +++ b/api/core/workflow/nodes/knowledge_index/protocols.py @@ -5,21 +5,21 @@ from pydantic import BaseModel, Field class PreviewItem(BaseModel): - content: str | None = Field(None) - child_chunks: list[str] | None = Field(None) - summary: str | None = Field(None) + content: str | None = Field(default=None) + child_chunks: list[str] | None = Field(default=None) + summary: str | None = Field(default=None) class QaPreview(BaseModel): - answer: str | None = Field(None) - question: str | None = Field(None) + answer: str | None = Field(default=None) + question: str | None = Field(default=None) class Preview(BaseModel): chunk_structure: str - parent_mode: str | None = Field(None) - preview: list[PreviewItem] = Field([]) - qa_preview: list[QaPreview] = Field([]) + parent_mode: str | None = Field(default=None) + preview: list[PreviewItem] = Field(default_factory=list) + qa_preview: list[QaPreview] = Field(default_factory=list) total_segments: int @@ -39,3 +39,9 @@ class IndexProcessorProtocol(Protocol): def get_preview_output( self, chunks: Any, dataset_id: str, document_id: str, chunk_structure: str, summary_index_setting: dict | None ) -> Preview: ... + + +class SummaryIndexServiceProtocol(Protocol): + def generate_and_vectorize_summary( + self, dataset_id: str, document_id: str, is_preview: bool, summary_index_setting: dict | None = None + ) -> None: ... diff --git a/api/core/workflow/nodes/knowledge_retrieval/__init__.py b/api/core/workflow/nodes/knowledge_retrieval/__init__.py new file mode 100644 index 0000000000..33ea4277b4 --- /dev/null +++ b/api/core/workflow/nodes/knowledge_retrieval/__init__.py @@ -0,0 +1 @@ +"""Knowledge retrieval workflow node package.""" diff --git a/api/dify_graph/nodes/knowledge_retrieval/entities.py b/api/core/workflow/nodes/knowledge_retrieval/entities.py similarity index 96% rename from api/dify_graph/nodes/knowledge_retrieval/entities.py rename to api/core/workflow/nodes/knowledge_retrieval/entities.py index 8f226b9785..bc5618685a 100644 --- a/api/dify_graph/nodes/knowledge_retrieval/entities.py +++ b/api/core/workflow/nodes/knowledge_retrieval/entities.py @@ -4,7 +4,7 @@ from typing import Literal from pydantic import BaseModel, Field from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.llm.entities import ModelConfig, VisionConfig @@ -114,7 +114,7 @@ class KnowledgeRetrievalNodeData(BaseNodeData): Knowledge retrieval Node Data. """ - type: NodeType = NodeType.KNOWLEDGE_RETRIEVAL + type: NodeType = BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL query_variable_selector: list[str] | None | str = None query_attachment_selector: list[str] | None | str = None dataset_ids: list[str] diff --git a/api/dify_graph/nodes/knowledge_retrieval/exc.py b/api/core/workflow/nodes/knowledge_retrieval/exc.py similarity index 100% rename from api/dify_graph/nodes/knowledge_retrieval/exc.py rename to api/core/workflow/nodes/knowledge_retrieval/exc.py diff --git a/api/dify_graph/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py similarity index 97% rename from api/dify_graph/nodes/knowledge_retrieval/knowledge_retrieval_node.py rename to api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 61c9614340..9c3b9aacbf 100644 --- a/api/dify_graph/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -1,12 +1,19 @@ +"""Knowledge retrieval workflow node implementation. + +This node now lives under ``core.workflow.nodes`` and is discovered directly by +the workflow node registry. +""" + import logging from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING, Any, Literal from core.app.app_config.entities import DatasetRetrieveConfigEntity +from core.rag.retrieval.dataset_retrieval import DatasetRetrieval from dify_graph.entities import GraphInitParams from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.enums import ( - NodeType, + BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) @@ -15,7 +22,6 @@ from dify_graph.model_runtime.utils.encoders import jsonable_encoder from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base import LLMUsageTrackingMixin from dify_graph.nodes.base.node import Node -from dify_graph.repositories.rag_retrieval_protocol import KnowledgeRetrievalRequest, RAGRetrievalProtocol, Source from dify_graph.variables import ( ArrayFileSegment, FileSegment, @@ -32,6 +38,7 @@ from .exc import ( KnowledgeRetrievalNodeError, RateLimitExceededError, ) +from .retrieval import KnowledgeRetrievalRequest, Source if TYPE_CHECKING: from dify_graph.file.models import File @@ -41,7 +48,7 @@ logger = logging.getLogger(__name__) class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeData]): - node_type = NodeType.KNOWLEDGE_RETRIEVAL + node_type = BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL # Instance attributes specific to LLMNode. # Output variable for file @@ -53,7 +60,6 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", - rag_retrieval: RAGRetrievalProtocol, ): super().__init__( id=id, @@ -63,7 +69,7 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD ) # LLM file outputs, used for MultiModal outputs. self._file_outputs = [] - self._rag_retrieval = rag_retrieval + self._rag_retrieval = DatasetRetrieval() @classmethod def version(cls): diff --git a/api/dify_graph/repositories/rag_retrieval_protocol.py b/api/core/workflow/nodes/knowledge_retrieval/retrieval.py similarity index 83% rename from api/dify_graph/repositories/rag_retrieval_protocol.py rename to api/core/workflow/nodes/knowledge_retrieval/retrieval.py index 5f3d38167e..f964f79582 100644 --- a/api/dify_graph/repositories/rag_retrieval_protocol.py +++ b/api/core/workflow/nodes/knowledge_retrieval/retrieval.py @@ -3,9 +3,10 @@ from typing import Any, Literal, Protocol from pydantic import BaseModel, Field from dify_graph.model_runtime.entities import LLMUsage -from dify_graph.nodes.knowledge_retrieval.entities import MetadataFilteringCondition from dify_graph.nodes.llm.entities import ModelConfig +from .entities import MetadataFilteringCondition + class SourceChildChunk(BaseModel): id: str = Field(default="", description="Child chunk ID") @@ -28,7 +29,7 @@ class SourceMetadata(BaseModel): segment_id: str | None = Field(default=None, description="Segment unique identifier") retriever_from: str = Field(default="workflow", description="Retriever source context") score: float = Field(default=0.0, description="Retrieval relevance score") - child_chunks: list[SourceChildChunk] = Field(default=[], description="List of child chunks") + child_chunks: list[SourceChildChunk] = Field(default_factory=list, description="List of child chunks") segment_hit_count: int | None = Field(default=0, description="Number of times segment was retrieved") segment_word_count: int | None = Field(default=0, description="Word count of the segment") segment_position: int | None = Field(default=0, description="Position of segment in document") @@ -81,28 +82,7 @@ class KnowledgeRetrievalRequest(BaseModel): class RAGRetrievalProtocol(Protocol): - """Protocol for RAG-based knowledge retrieval implementations. - - Implementations of this protocol handle knowledge retrieval from datasets - including rate limiting, dataset filtering, and document retrieval. - """ - @property - def llm_usage(self) -> LLMUsage: - """Return accumulated LLM usage for retrieval operations.""" - ... + def llm_usage(self) -> LLMUsage: ... - def knowledge_retrieval(self, request: KnowledgeRetrievalRequest) -> list[Source]: - """Retrieve knowledge from datasets based on the provided request. - - Args: - request: Knowledge retrieval request with search parameters - - Returns: - List of sources matching the search criteria - - Raises: - RateLimitExceededError: If rate limit is exceeded - ModelNotExistError: If specified model doesn't exist - """ - ... + def knowledge_retrieval(self, request: KnowledgeRetrievalRequest) -> list[Source]: ... diff --git a/api/dify_graph/nodes/knowledge_retrieval/template_prompts.py b/api/core/workflow/nodes/knowledge_retrieval/template_prompts.py similarity index 100% rename from api/dify_graph/nodes/knowledge_retrieval/template_prompts.py rename to api/core/workflow/nodes/knowledge_retrieval/template_prompts.py diff --git a/api/dify_graph/nodes/trigger_plugin/__init__.py b/api/core/workflow/nodes/trigger_plugin/__init__.py similarity index 100% rename from api/dify_graph/nodes/trigger_plugin/__init__.py rename to api/core/workflow/nodes/trigger_plugin/__init__.py diff --git a/api/dify_graph/nodes/trigger_plugin/entities.py b/api/core/workflow/nodes/trigger_plugin/entities.py similarity index 95% rename from api/dify_graph/nodes/trigger_plugin/entities.py rename to api/core/workflow/nodes/trigger_plugin/entities.py index 33a61c9bc8..ea7d20befe 100644 --- a/api/dify_graph/nodes/trigger_plugin/entities.py +++ b/api/core/workflow/nodes/trigger_plugin/entities.py @@ -3,16 +3,18 @@ from typing import Any, Literal, Union from pydantic import BaseModel, Field, ValidationInfo, field_validator +from core.trigger.constants import TRIGGER_PLUGIN_NODE_TYPE from core.trigger.entities.entities import EventParameter from dify_graph.entities.base_node_data import BaseNodeData from dify_graph.enums import NodeType -from dify_graph.nodes.trigger_plugin.exc import TriggerEventParameterError + +from .exc import TriggerEventParameterError class TriggerEventNodeData(BaseNodeData): """Plugin trigger node data""" - type: NodeType = NodeType.TRIGGER_PLUGIN + type: NodeType = TRIGGER_PLUGIN_NODE_TYPE class TriggerEventInput(BaseModel): value: Union[Any, list[str]] diff --git a/api/dify_graph/nodes/trigger_plugin/exc.py b/api/core/workflow/nodes/trigger_plugin/exc.py similarity index 100% rename from api/dify_graph/nodes/trigger_plugin/exc.py rename to api/core/workflow/nodes/trigger_plugin/exc.py diff --git a/api/dify_graph/nodes/trigger_plugin/trigger_event_node.py b/api/core/workflow/nodes/trigger_plugin/trigger_event_node.py similarity index 80% rename from api/dify_graph/nodes/trigger_plugin/trigger_event_node.py rename to api/core/workflow/nodes/trigger_plugin/trigger_event_node.py index b4f1116f7e..2048a53064 100644 --- a/api/dify_graph/nodes/trigger_plugin/trigger_event_node.py +++ b/api/core/workflow/nodes/trigger_plugin/trigger_event_node.py @@ -1,8 +1,10 @@ from collections.abc import Mapping +from typing import Any, cast +from core.trigger.constants import TRIGGER_INFO_METADATA_KEY, TRIGGER_PLUGIN_NODE_TYPE from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID -from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus -from dify_graph.enums import NodeExecutionType, NodeType +from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus +from dify_graph.enums import NodeExecutionType, WorkflowNodeExecutionMetadataKey from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node @@ -10,7 +12,7 @@ from .entities import TriggerEventNodeData class TriggerEventNode(Node[TriggerEventNodeData]): - node_type = NodeType.TRIGGER_PLUGIN + node_type = TRIGGER_PLUGIN_NODE_TYPE execution_type = NodeExecutionType.ROOT @classmethod @@ -32,6 +34,9 @@ class TriggerEventNode(Node[TriggerEventNodeData]): def version(cls) -> str: return "1" + def populate_start_event(self, event) -> None: + event.provider_id = self.node_data.provider_id + def _run(self) -> NodeRunResult: """ Run the plugin trigger node. @@ -41,8 +46,8 @@ class TriggerEventNode(Node[TriggerEventNodeData]): """ # Get trigger data passed when workflow was triggered - metadata = { - WorkflowNodeExecutionMetadataKey.TRIGGER_INFO: { + metadata: dict[WorkflowNodeExecutionMetadataKey, Any] = { + cast(WorkflowNodeExecutionMetadataKey, TRIGGER_INFO_METADATA_KEY): { "provider_id": self.node_data.provider_id, "event_name": self.node_data.event_name, "plugin_unique_identifier": self.node_data.plugin_unique_identifier, diff --git a/api/core/workflow/nodes/trigger_schedule/__init__.py b/api/core/workflow/nodes/trigger_schedule/__init__.py new file mode 100644 index 0000000000..07b711a0fd --- /dev/null +++ b/api/core/workflow/nodes/trigger_schedule/__init__.py @@ -0,0 +1,3 @@ +from .trigger_schedule_node import TriggerScheduleNode + +__all__ = ["TriggerScheduleNode"] diff --git a/api/dify_graph/nodes/trigger_schedule/entities.py b/api/core/workflow/nodes/trigger_schedule/entities.py similarity index 94% rename from api/dify_graph/nodes/trigger_schedule/entities.py rename to api/core/workflow/nodes/trigger_schedule/entities.py index 2b0edcabba..95a2548678 100644 --- a/api/dify_graph/nodes/trigger_schedule/entities.py +++ b/api/core/workflow/nodes/trigger_schedule/entities.py @@ -2,6 +2,7 @@ from typing import Literal, Union from pydantic import BaseModel, Field +from core.trigger.constants import TRIGGER_SCHEDULE_NODE_TYPE from dify_graph.entities.base_node_data import BaseNodeData from dify_graph.enums import NodeType @@ -11,7 +12,7 @@ class TriggerScheduleNodeData(BaseNodeData): Trigger Schedule Node Data """ - type: NodeType = NodeType.TRIGGER_SCHEDULE + type: NodeType = TRIGGER_SCHEDULE_NODE_TYPE mode: str = Field(default="visual", description="Schedule mode: visual or cron") frequency: str | None = Field(default=None, description="Frequency for visual mode: hourly, daily, weekly, monthly") cron_expression: str | None = Field(default=None, description="Cron expression for cron mode") diff --git a/api/dify_graph/nodes/trigger_schedule/exc.py b/api/core/workflow/nodes/trigger_schedule/exc.py similarity index 100% rename from api/dify_graph/nodes/trigger_schedule/exc.py rename to api/core/workflow/nodes/trigger_schedule/exc.py diff --git a/api/dify_graph/nodes/trigger_schedule/trigger_schedule_node.py b/api/core/workflow/nodes/trigger_schedule/trigger_schedule_node.py similarity index 85% rename from api/dify_graph/nodes/trigger_schedule/trigger_schedule_node.py rename to api/core/workflow/nodes/trigger_schedule/trigger_schedule_node.py index 7e92eb3f4f..b9580e6ab1 100644 --- a/api/dify_graph/nodes/trigger_schedule/trigger_schedule_node.py +++ b/api/core/workflow/nodes/trigger_schedule/trigger_schedule_node.py @@ -1,15 +1,17 @@ from collections.abc import Mapping +from core.trigger.constants import TRIGGER_SCHEDULE_NODE_TYPE from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus -from dify_graph.enums import NodeExecutionType, NodeType +from dify_graph.enums import NodeExecutionType from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node -from dify_graph.nodes.trigger_schedule.entities import TriggerScheduleNodeData + +from .entities import TriggerScheduleNodeData class TriggerScheduleNode(Node[TriggerScheduleNodeData]): - node_type = NodeType.TRIGGER_SCHEDULE + node_type = TRIGGER_SCHEDULE_NODE_TYPE execution_type = NodeExecutionType.ROOT @classmethod @@ -19,7 +21,7 @@ class TriggerScheduleNode(Node[TriggerScheduleNodeData]): @classmethod def get_default_config(cls, filters: Mapping[str, object] | None = None) -> Mapping[str, object]: return { - "type": "trigger-schedule", + "type": TRIGGER_SCHEDULE_NODE_TYPE, "config": { "mode": "visual", "frequency": "daily", diff --git a/api/dify_graph/nodes/trigger_webhook/__init__.py b/api/core/workflow/nodes/trigger_webhook/__init__.py similarity index 100% rename from api/dify_graph/nodes/trigger_webhook/__init__.py rename to api/core/workflow/nodes/trigger_webhook/__init__.py diff --git a/api/dify_graph/nodes/trigger_webhook/entities.py b/api/core/workflow/nodes/trigger_webhook/entities.py similarity index 97% rename from api/dify_graph/nodes/trigger_webhook/entities.py rename to api/core/workflow/nodes/trigger_webhook/entities.py index a4f8745e71..242bf5ef6a 100644 --- a/api/dify_graph/nodes/trigger_webhook/entities.py +++ b/api/core/workflow/nodes/trigger_webhook/entities.py @@ -3,6 +3,7 @@ from enum import StrEnum from pydantic import BaseModel, Field, field_validator +from core.trigger.constants import TRIGGER_WEBHOOK_NODE_TYPE from dify_graph.entities.base_node_data import BaseNodeData from dify_graph.enums import NodeType from dify_graph.variables.types import SegmentType @@ -93,7 +94,7 @@ class WebhookData(BaseNodeData): class SyncMode(StrEnum): SYNC = "async" # only support - type: NodeType = NodeType.TRIGGER_WEBHOOK + type: NodeType = TRIGGER_WEBHOOK_NODE_TYPE method: Method = Method.GET content_type: ContentType = Field(default=ContentType.JSON) headers: Sequence[WebhookParameter] = Field(default_factory=list) diff --git a/api/dify_graph/nodes/trigger_webhook/exc.py b/api/core/workflow/nodes/trigger_webhook/exc.py similarity index 100% rename from api/dify_graph/nodes/trigger_webhook/exc.py rename to api/core/workflow/nodes/trigger_webhook/exc.py diff --git a/api/dify_graph/nodes/trigger_webhook/node.py b/api/core/workflow/nodes/trigger_webhook/node.py similarity index 97% rename from api/dify_graph/nodes/trigger_webhook/node.py rename to api/core/workflow/nodes/trigger_webhook/node.py index 413eda5272..317844cbda 100644 --- a/api/dify_graph/nodes/trigger_webhook/node.py +++ b/api/core/workflow/nodes/trigger_webhook/node.py @@ -2,9 +2,10 @@ import logging from collections.abc import Mapping from typing import Any +from core.trigger.constants import TRIGGER_WEBHOOK_NODE_TYPE from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus -from dify_graph.enums import NodeExecutionType, NodeType +from dify_graph.enums import NodeExecutionType from dify_graph.file import FileTransferMethod from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node @@ -19,7 +20,7 @@ logger = logging.getLogger(__name__) class TriggerWebhookNode(Node[WebhookData]): - node_type = NodeType.TRIGGER_WEBHOOK + node_type = TRIGGER_WEBHOOK_NODE_TYPE execution_type = NodeExecutionType.ROOT @classmethod diff --git a/api/core/workflow/workflow_entry.py b/api/core/workflow/workflow_entry.py index fef01049c5..2e51a06bab 100644 --- a/api/core/workflow/workflow_entry.py +++ b/api/core/workflow/workflow_entry.py @@ -8,7 +8,7 @@ from core.app.apps.exc import GenerateTaskStoppedError from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom, build_dify_run_context from core.app.workflow.layers.llm_quota import LLMQuotaLayer from core.app.workflow.layers.observability import ObservabilityLayer -from core.workflow.node_factory import DifyNodeFactory +from core.workflow.node_factory import DifyNodeFactory, resolve_workflow_node_class from dify_graph.constants import ENVIRONMENT_VARIABLE_NODE_ID from dify_graph.entities import GraphInitParams from dify_graph.entities.graph_config import NodeConfigDictAdapter @@ -21,9 +21,8 @@ from dify_graph.graph_engine.layers import DebugLoggingLayer, ExecutionLimitsLay from dify_graph.graph_engine.layers.base import GraphEngineLayer from dify_graph.graph_engine.protocols.command_channel import CommandChannel from dify_graph.graph_events import GraphEngineEvent, GraphNodeEventBase, GraphRunFailedEvent -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.nodes.base.node import Node -from dify_graph.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING from dify_graph.runtime import ChildGraphNotFoundError, GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from dify_graph.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader, load_into_variable_pool @@ -253,7 +252,7 @@ class WorkflowEntry: variable_mapping=variable_mapping, user_inputs=user_inputs, ) - if node_type != NodeType.DATASOURCE: + if node_type != BuiltinNodeTypes.DATASOURCE: cls.mapping_user_inputs_to_variable_pool( variable_mapping=variable_mapping, user_inputs=user_inputs, @@ -303,7 +302,7 @@ class WorkflowEntry: "height": node_height, "type": "custom", "data": { - "type": NodeType.START, + "type": BuiltinNodeTypes.START, "title": "Start", "desc": "Start", }, @@ -339,11 +338,11 @@ class WorkflowEntry: # Create a minimal graph for single node execution graph_dict = cls._create_single_node_graph(node_id, node_data) - node_type = NodeType(node_data.get("type", "")) - if node_type not in {NodeType.PARAMETER_EXTRACTOR, NodeType.QUESTION_CLASSIFIER}: + node_type = node_data.get("type", "") + if node_type not in {BuiltinNodeTypes.PARAMETER_EXTRACTOR, BuiltinNodeTypes.QUESTION_CLASSIFIER}: raise ValueError(f"Node type {node_type} not supported") - node_cls = NODE_TYPE_CLASSES_MAPPING[node_type]["1"] + node_cls = resolve_workflow_node_class(node_type=node_type, node_version="1") if not node_cls: raise ValueError(f"Node class not found for node type {node_type}") diff --git a/api/dify_graph/README.md b/api/dify_graph/README.md index 09c4f5afdc..2fc5b8b890 100644 --- a/api/dify_graph/README.md +++ b/api/dify_graph/README.md @@ -113,7 +113,7 @@ The codebase enforces strict layering via import-linter: 1. Create node class in `nodes//` 1. Inherit from `BaseNode` or appropriate base class 1. Implement `_run()` method -1. Register in `nodes/node_mapping.py` +1. Ensure the node module is importable under `nodes//` 1. Add tests in `tests/unit_tests/dify_graph/nodes/` ### Implementing a Custom Layer diff --git a/api/dify_graph/entities/__init__.py b/api/dify_graph/entities/__init__.py index e73c38c1d3..ef7789c49c 100644 --- a/api/dify_graph/entities/__init__.py +++ b/api/dify_graph/entities/__init__.py @@ -1,11 +1,9 @@ -from .agent import AgentNodeStrategyInit from .graph_init_params import GraphInitParams from .workflow_execution import WorkflowExecution from .workflow_node_execution import WorkflowNodeExecution from .workflow_start_reason import WorkflowStartReason __all__ = [ - "AgentNodeStrategyInit", "GraphInitParams", "WorkflowExecution", "WorkflowNodeExecution", diff --git a/api/dify_graph/entities/agent.py b/api/dify_graph/entities/agent.py deleted file mode 100644 index 2b4d6db76f..0000000000 --- a/api/dify_graph/entities/agent.py +++ /dev/null @@ -1,8 +0,0 @@ -from pydantic import BaseModel - - -class AgentNodeStrategyInit(BaseModel): - """Agent node strategy initialization data.""" - - name: str - icon: str | None = None diff --git a/api/dify_graph/entities/base_node_data.py b/api/dify_graph/entities/base_node_data.py index 58869a94c2..47b37c9daf 100644 --- a/api/dify_graph/entities/base_node_data.py +++ b/api/dify_graph/entities/base_node_data.py @@ -121,6 +121,8 @@ class DefaultValue(BaseModel): class BaseNodeData(ABC, BaseModel): # Raw graph payloads are first validated through `NodeConfigDictAdapter`, where # `node["data"]` is typed as `BaseNodeData` before the concrete node class is known. + # `type` therefore accepts downstream string node kinds; unknown node implementations + # are rejected later when the node factory resolves the node registry. # At that boundary, node-specific fields are still "extra" relative to this shared DTO, # and persisted templates/workflows also carry undeclared compatibility keys such as # `selected`, `params`, `paramSchemas`, and `datasource_label`. Keep extras permissive diff --git a/api/dify_graph/entities/workflow_node_execution.py b/api/dify_graph/entities/workflow_node_execution.py index 9dd04e331b..bc7e0d02e5 100644 --- a/api/dify_graph/entities/workflow_node_execution.py +++ b/api/dify_graph/entities/workflow_node_execution.py @@ -48,7 +48,7 @@ class WorkflowNodeExecution(BaseModel): index: int # Sequence number for ordering in trace visualization predecessor_node_id: str | None = None # ID of the node that executed before this one node_id: str # ID of the node being executed - node_type: NodeType # Type of node (e.g., start, llm, knowledge) + node_type: NodeType # Type of node (e.g., start, llm, downstream response node) title: str # Display title of the node # Execution data diff --git a/api/dify_graph/enums.py b/api/dify_graph/enums.py index bb3b13e8c6..b27454d638 100644 --- a/api/dify_graph/enums.py +++ b/api/dify_graph/enums.py @@ -1,4 +1,5 @@ from enum import StrEnum +from typing import ClassVar, TypeAlias class NodeState(StrEnum): @@ -33,56 +34,71 @@ class SystemVariableKey(StrEnum): INVOKE_FROM = "invoke_from" -class NodeType(StrEnum): - START = "start" - END = "end" - ANSWER = "answer" - LLM = "llm" - KNOWLEDGE_RETRIEVAL = "knowledge-retrieval" - KNOWLEDGE_INDEX = "knowledge-index" - IF_ELSE = "if-else" - CODE = "code" - TEMPLATE_TRANSFORM = "template-transform" - QUESTION_CLASSIFIER = "question-classifier" - HTTP_REQUEST = "http-request" - TOOL = "tool" - DATASOURCE = "datasource" - VARIABLE_AGGREGATOR = "variable-aggregator" - LEGACY_VARIABLE_AGGREGATOR = "variable-assigner" # TODO: Merge this into VARIABLE_AGGREGATOR in the database. - LOOP = "loop" - LOOP_START = "loop-start" - LOOP_END = "loop-end" - ITERATION = "iteration" - ITERATION_START = "iteration-start" # Fake start node for iteration. - PARAMETER_EXTRACTOR = "parameter-extractor" - VARIABLE_ASSIGNER = "assigner" - DOCUMENT_EXTRACTOR = "document-extractor" - LIST_OPERATOR = "list-operator" - AGENT = "agent" - TRIGGER_WEBHOOK = "trigger-webhook" - TRIGGER_SCHEDULE = "trigger-schedule" - TRIGGER_PLUGIN = "trigger-plugin" - HUMAN_INPUT = "human-input" +NodeType: TypeAlias = str - @property - def is_trigger_node(self) -> bool: - """Check if this node type is a trigger node.""" - return self in [ - NodeType.TRIGGER_WEBHOOK, - NodeType.TRIGGER_SCHEDULE, - NodeType.TRIGGER_PLUGIN, - ] - @property - def is_start_node(self) -> bool: - """Check if this node type can serve as a workflow entry point.""" - return self in [ - NodeType.START, - NodeType.DATASOURCE, - NodeType.TRIGGER_WEBHOOK, - NodeType.TRIGGER_SCHEDULE, - NodeType.TRIGGER_PLUGIN, - ] +class BuiltinNodeTypes: + """Built-in node type string constants. + + `node_type` values are plain strings throughout the graph runtime. This namespace + only exposes the built-in values shipped by `dify_graph`; downstream packages can + use additional strings without extending this class. + """ + + START: ClassVar[NodeType] = "start" + END: ClassVar[NodeType] = "end" + ANSWER: ClassVar[NodeType] = "answer" + LLM: ClassVar[NodeType] = "llm" + KNOWLEDGE_RETRIEVAL: ClassVar[NodeType] = "knowledge-retrieval" + IF_ELSE: ClassVar[NodeType] = "if-else" + CODE: ClassVar[NodeType] = "code" + TEMPLATE_TRANSFORM: ClassVar[NodeType] = "template-transform" + QUESTION_CLASSIFIER: ClassVar[NodeType] = "question-classifier" + HTTP_REQUEST: ClassVar[NodeType] = "http-request" + TOOL: ClassVar[NodeType] = "tool" + DATASOURCE: ClassVar[NodeType] = "datasource" + VARIABLE_AGGREGATOR: ClassVar[NodeType] = "variable-aggregator" + LEGACY_VARIABLE_AGGREGATOR: ClassVar[NodeType] = "variable-assigner" + LOOP: ClassVar[NodeType] = "loop" + LOOP_START: ClassVar[NodeType] = "loop-start" + LOOP_END: ClassVar[NodeType] = "loop-end" + ITERATION: ClassVar[NodeType] = "iteration" + ITERATION_START: ClassVar[NodeType] = "iteration-start" + PARAMETER_EXTRACTOR: ClassVar[NodeType] = "parameter-extractor" + VARIABLE_ASSIGNER: ClassVar[NodeType] = "assigner" + DOCUMENT_EXTRACTOR: ClassVar[NodeType] = "document-extractor" + LIST_OPERATOR: ClassVar[NodeType] = "list-operator" + AGENT: ClassVar[NodeType] = "agent" + HUMAN_INPUT: ClassVar[NodeType] = "human-input" + + +BUILT_IN_NODE_TYPES: tuple[NodeType, ...] = ( + BuiltinNodeTypes.START, + BuiltinNodeTypes.END, + BuiltinNodeTypes.ANSWER, + BuiltinNodeTypes.LLM, + BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, + BuiltinNodeTypes.IF_ELSE, + BuiltinNodeTypes.CODE, + BuiltinNodeTypes.TEMPLATE_TRANSFORM, + BuiltinNodeTypes.QUESTION_CLASSIFIER, + BuiltinNodeTypes.HTTP_REQUEST, + BuiltinNodeTypes.TOOL, + BuiltinNodeTypes.DATASOURCE, + BuiltinNodeTypes.VARIABLE_AGGREGATOR, + BuiltinNodeTypes.LEGACY_VARIABLE_AGGREGATOR, + BuiltinNodeTypes.LOOP, + BuiltinNodeTypes.LOOP_START, + BuiltinNodeTypes.LOOP_END, + BuiltinNodeTypes.ITERATION, + BuiltinNodeTypes.ITERATION_START, + BuiltinNodeTypes.PARAMETER_EXTRACTOR, + BuiltinNodeTypes.VARIABLE_ASSIGNER, + BuiltinNodeTypes.DOCUMENT_EXTRACTOR, + BuiltinNodeTypes.LIST_OPERATOR, + BuiltinNodeTypes.AGENT, + BuiltinNodeTypes.HUMAN_INPUT, +) class NodeExecutionType(StrEnum): @@ -232,11 +248,12 @@ class WorkflowNodeExecutionMetadataKey(StrEnum): """ TOTAL_TOKENS = "total_tokens" + PROMPT_TOKENS = "prompt_tokens" + COMPLETION_TOKENS = "completion_tokens" TOTAL_PRICE = "total_price" CURRENCY = "currency" TOOL_INFO = "tool_info" AGENT_LOG = "agent_log" - TRIGGER_INFO = "trigger_info" ITERATION_ID = "iteration_id" ITERATION_INDEX = "iteration_index" LOOP_ID = "loop_id" diff --git a/api/dify_graph/graph/graph.py b/api/dify_graph/graph/graph.py index 3eb6bfc359..85117583e0 100644 --- a/api/dify_graph/graph/graph.py +++ b/api/dify_graph/graph/graph.py @@ -83,50 +83,6 @@ class Graph: return node_configs_map - @classmethod - def _find_root_node_id( - cls, - node_configs_map: Mapping[str, NodeConfigDict], - edge_configs: Sequence[Mapping[str, object]], - root_node_id: str | None = None, - ) -> str: - """ - Find the root node ID if not specified. - - :param node_configs_map: mapping of node ID to node config - :param edge_configs: list of edge configurations - :param root_node_id: explicitly specified root node ID - :return: determined root node ID - """ - if root_node_id: - if root_node_id not in node_configs_map: - raise ValueError(f"Root node id {root_node_id} not found in the graph") - return root_node_id - - # Find nodes with no incoming edges - nodes_with_incoming: set[str] = set() - for edge_config in edge_configs: - target = edge_config.get("target") - if isinstance(target, str): - nodes_with_incoming.add(target) - - root_candidates = [nid for nid in node_configs_map if nid not in nodes_with_incoming] - - # Prefer START node if available - start_node_id = None - for nid in root_candidates: - node_data = node_configs_map[nid]["data"] - if node_data.type.is_start_node: - start_node_id = nid - break - - root_node_id = start_node_id or (root_candidates[0] if root_candidates else None) - - if not root_node_id: - raise ValueError("Unable to determine root node ID") - - return root_node_id - @classmethod def _build_edges( cls, edge_configs: list[dict[str, object]] @@ -301,15 +257,15 @@ class Graph: *, graph_config: Mapping[str, object], node_factory: NodeFactory, - root_node_id: str | None = None, + root_node_id: str, skip_validation: bool = False, ) -> Graph: """ - Initialize graph + Initialize a graph with an explicit execution entry point. :param graph_config: graph config containing nodes and edges :param node_factory: factory for creating node instances from config data - :param root_node_id: root node id + :param root_node_id: active root node id :return: graph instance """ # Parse configs @@ -327,8 +283,8 @@ class Graph: # Parse node configurations node_configs_map = cls._parse_node_configs(node_configs) - # Find root node - root_node_id = cls._find_root_node_id(node_configs_map, edge_configs, root_node_id) + if root_node_id not in node_configs_map: + raise ValueError(f"Root node id {root_node_id} not found in the graph") # Build edges edges, in_edges, out_edges = cls._build_edges(edge_configs) diff --git a/api/dify_graph/graph/validation.py b/api/dify_graph/graph/validation.py index 6840bcfed2..50d1440b04 100644 --- a/api/dify_graph/graph/validation.py +++ b/api/dify_graph/graph/validation.py @@ -4,7 +4,7 @@ from collections.abc import Sequence from dataclasses import dataclass from typing import TYPE_CHECKING, Protocol -from dify_graph.enums import NodeExecutionType, NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, NodeType if TYPE_CHECKING: from .graph import Graph @@ -71,7 +71,7 @@ class _RootNodeValidator: """Validates root node invariants.""" invalid_root_code: str = "INVALID_ROOT" - container_entry_types: tuple[NodeType, ...] = (NodeType.ITERATION_START, NodeType.LOOP_START) + container_entry_types: tuple[NodeType, ...] = (BuiltinNodeTypes.ITERATION_START, BuiltinNodeTypes.LOOP_START) def validate(self, graph: Graph) -> Sequence[GraphValidationIssue]: root_node = graph.root_node @@ -86,7 +86,7 @@ class _RootNodeValidator: ) return issues - node_type = getattr(root_node, "node_type", None) + node_type = root_node.node_type if root_node.execution_type != NodeExecutionType.ROOT and node_type not in self.container_entry_types: issues.append( GraphValidationIssue( @@ -114,45 +114,9 @@ class GraphValidator: raise GraphValidationError(issues) -@dataclass(frozen=True, slots=True) -class _TriggerStartExclusivityValidator: - """Ensures trigger nodes do not coexist with UserInput (start) nodes.""" - - conflict_code: str = "TRIGGER_START_NODE_CONFLICT" - - def validate(self, graph: Graph) -> Sequence[GraphValidationIssue]: - start_node_id: str | None = None - trigger_node_ids: list[str] = [] - - for node in graph.nodes.values(): - node_type = getattr(node, "node_type", None) - if not isinstance(node_type, NodeType): - continue - - if node_type == NodeType.START: - start_node_id = node.id - elif node_type.is_trigger_node: - trigger_node_ids.append(node.id) - - if start_node_id and trigger_node_ids: - trigger_list = ", ".join(trigger_node_ids) - return [ - GraphValidationIssue( - code=self.conflict_code, - message=( - f"UserInput (start) node '{start_node_id}' cannot coexist with trigger nodes: {trigger_list}." - ), - node_id=start_node_id, - ) - ] - - return [] - - _DEFAULT_RULES: tuple[GraphValidationRule, ...] = ( _EdgeEndpointValidator(), _RootNodeValidator(), - _TriggerStartExclusivityValidator(), ) diff --git a/api/dify_graph/graph_engine/response_coordinator/__init__.py b/api/dify_graph/graph_engine/response_coordinator/__init__.py index e11d31199c..2a80d316e8 100644 --- a/api/dify_graph/graph_engine/response_coordinator/__init__.py +++ b/api/dify_graph/graph_engine/response_coordinator/__init__.py @@ -6,5 +6,6 @@ of responses based on upstream node outputs and constants. """ from .coordinator import ResponseStreamCoordinator +from .session import RESPONSE_SESSION_NODE_TYPES -__all__ = ["ResponseStreamCoordinator"] +__all__ = ["RESPONSE_SESSION_NODE_TYPES", "ResponseStreamCoordinator"] diff --git a/api/dify_graph/graph_engine/response_coordinator/session.py b/api/dify_graph/graph_engine/response_coordinator/session.py index 0548e88d93..99ac1b5edf 100644 --- a/api/dify_graph/graph_engine/response_coordinator/session.py +++ b/api/dify_graph/graph_engine/response_coordinator/session.py @@ -3,19 +3,34 @@ Internal response session management for response coordinator. This module contains the private ResponseSession class used internally by ResponseStreamCoordinator to manage streaming sessions. + +`RESPONSE_SESSION_NODE_TYPES` is intentionally mutable so downstream applications +can opt additional response-capable node types into session creation without +patching the coordinator. """ from __future__ import annotations from dataclasses import dataclass +from typing import Protocol, cast -from dify_graph.nodes.answer.answer_node import AnswerNode +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.template import Template -from dify_graph.nodes.end.end_node import EndNode -from dify_graph.nodes.knowledge_index import KnowledgeIndexNode from dify_graph.runtime.graph_runtime_state import NodeProtocol +class _ResponseSessionNodeProtocol(NodeProtocol, Protocol): + """Structural contract required from nodes that can open a response session.""" + + def get_streaming_template(self) -> Template: ... + + +RESPONSE_SESSION_NODE_TYPES: list[NodeType] = [ + BuiltinNodeTypes.ANSWER, + BuiltinNodeTypes.END, +] + + @dataclass class ResponseSession: """ @@ -33,10 +48,9 @@ class ResponseSession: """ Create a ResponseSession from a response-capable node. - The parameter is typed as `NodeProtocol` because the graph is exposed behind a protocol at the runtime layer, - but at runtime this must be an `AnswerNode`, `EndNode`, or `KnowledgeIndexNode` that provides: - - `id: str` - - `get_streaming_template() -> Template` + The parameter is typed as `NodeProtocol` because the graph is exposed behind a protocol at the runtime layer. + At runtime this must be a node whose `node_type` is listed in `RESPONSE_SESSION_NODE_TYPES` + and which implements `get_streaming_template()`. Args: node: Node from the materialized workflow graph. @@ -47,11 +61,22 @@ class ResponseSession: Raises: TypeError: If node is not a supported response node type. """ - if not isinstance(node, AnswerNode | EndNode | KnowledgeIndexNode): - raise TypeError("ResponseSession.from_node only supports AnswerNode, EndNode, or KnowledgeIndexNode") + if node.node_type not in RESPONSE_SESSION_NODE_TYPES: + supported_node_types = ", ".join(RESPONSE_SESSION_NODE_TYPES) + raise TypeError( + "ResponseSession.from_node only supports node types in " + f"RESPONSE_SESSION_NODE_TYPES: {supported_node_types}" + ) + + response_node = cast(_ResponseSessionNodeProtocol, node) + try: + template = response_node.get_streaming_template() + except AttributeError as exc: + raise TypeError("ResponseSession.from_node requires get_streaming_template() on response nodes") from exc + return cls( node_id=node.id, - template=node.get_streaming_template(), + template=template, ) def is_complete(self) -> bool: diff --git a/api/dify_graph/graph_events/node.py b/api/dify_graph/graph_events/node.py index 21ddf80b64..8552254627 100644 --- a/api/dify_graph/graph_events/node.py +++ b/api/dify_graph/graph_events/node.py @@ -4,7 +4,6 @@ from datetime import datetime from pydantic import Field from core.rag.entities.citation_metadata import RetrievalSourceMetadata -from dify_graph.entities import AgentNodeStrategyInit from dify_graph.entities.pause_reason import PauseReason from .base import GraphNodeEventBase @@ -13,8 +12,8 @@ from .base import GraphNodeEventBase class NodeRunStartedEvent(GraphNodeEventBase): node_title: str predecessor_node_id: str | None = None - agent_strategy: AgentNodeStrategyInit | None = None start_at: datetime = Field(..., description="node start time") + extras: dict[str, object] = Field(default_factory=dict) # FIXME(-LAN-): only for ToolNode provider_type: str = "" diff --git a/api/dify_graph/node_events/node.py b/api/dify_graph/node_events/node.py index 481e793267..2e3973b8fa 100644 --- a/api/dify_graph/node_events/node.py +++ b/api/dify_graph/node_events/node.py @@ -1,9 +1,9 @@ -from collections.abc import Sequence +from collections.abc import Mapping, Sequence from datetime import datetime +from typing import Any from pydantic import Field -from core.rag.entities.citation_metadata import RetrievalSourceMetadata from dify_graph.entities.pause_reason import PauseReason from dify_graph.file import File from dify_graph.model_runtime.entities.llm_entities import LLMUsage @@ -13,7 +13,7 @@ from .base import NodeEventBase class RunRetrieverResourceEvent(NodeEventBase): - retriever_resources: Sequence[RetrievalSourceMetadata] = Field(..., description="retriever resources") + retriever_resources: Sequence[Mapping[str, Any]] = Field(..., description="retriever resources") context: str = Field(..., description="context") context_files: list[File] | None = Field(default=None, description="context files") diff --git a/api/dify_graph/nodes/__init__.py b/api/dify_graph/nodes/__init__.py index d113ad5e70..0223149bb8 100644 --- a/api/dify_graph/nodes/__init__.py +++ b/api/dify_graph/nodes/__init__.py @@ -1,3 +1,3 @@ -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes -__all__ = ["NodeType"] +__all__ = ["BuiltinNodeTypes"] diff --git a/api/dify_graph/nodes/agent/__init__.py b/api/dify_graph/nodes/agent/__init__.py deleted file mode 100644 index 95e7cf895b..0000000000 --- a/api/dify_graph/nodes/agent/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .agent_node import AgentNode - -__all__ = ["AgentNode"] diff --git a/api/dify_graph/nodes/agent/agent_node.py b/api/dify_graph/nodes/agent/agent_node.py deleted file mode 100644 index d501217454..0000000000 --- a/api/dify_graph/nodes/agent/agent_node.py +++ /dev/null @@ -1,761 +0,0 @@ -from __future__ import annotations - -import json -from collections.abc import Generator, Mapping, Sequence -from typing import TYPE_CHECKING, Any, cast - -from packaging.version import Version -from pydantic import ValidationError -from sqlalchemy import select -from sqlalchemy.orm import Session - -from core.agent.entities import AgentToolEntity -from core.agent.plugin_entities import AgentStrategyParameter -from core.memory.token_buffer_memory import TokenBufferMemory -from core.model_manager import ModelInstance, ModelManager -from core.provider_manager import ProviderManager -from core.tools.entities.tool_entities import ( - ToolIdentity, - ToolInvokeMessage, - ToolParameter, - ToolProviderType, -) -from core.tools.tool_manager import ToolManager -from core.tools.utils.message_transformer import ToolFileMessageTransformer -from dify_graph.enums import ( - NodeType, - SystemVariableKey, - WorkflowNodeExecutionMetadataKey, - WorkflowNodeExecutionStatus, -) -from dify_graph.file import File, FileTransferMethod -from dify_graph.model_runtime.entities.llm_entities import LLMUsage, LLMUsageMetadata -from dify_graph.model_runtime.entities.model_entities import AIModelEntity, ModelType -from dify_graph.model_runtime.utils.encoders import jsonable_encoder -from dify_graph.node_events import ( - AgentLogEvent, - NodeEventBase, - NodeRunResult, - StreamChunkEvent, - StreamCompletedEvent, -) -from dify_graph.nodes.agent.entities import AgentNodeData, AgentOldVersionModelFeatures, ParamsAutoGenerated -from dify_graph.nodes.base.node import Node -from dify_graph.nodes.base.variable_template_parser import VariableTemplateParser -from dify_graph.runtime import VariablePool -from dify_graph.variables.segments import ArrayFileSegment, StringSegment -from extensions.ext_database import db -from factories import file_factory -from factories.agent_factory import get_plugin_agent_strategy -from models import ToolFile -from models.model import Conversation -from services.tools.builtin_tools_manage_service import BuiltinToolManageService - -from .exc import ( - AgentInputTypeError, - AgentInvocationError, - AgentMessageTransformError, - AgentNodeError, - AgentVariableNotFoundError, - AgentVariableTypeError, - ToolFileNotFoundError, -) - -if TYPE_CHECKING: - from core.agent.strategy.plugin import PluginAgentStrategy - from core.plugin.entities.request import InvokeCredentials - - -class AgentNode(Node[AgentNodeData]): - """ - Agent Node - """ - - node_type = NodeType.AGENT - - @classmethod - def version(cls) -> str: - return "1" - - def _run(self) -> Generator[NodeEventBase, None, None]: - from core.plugin.impl.exc import PluginDaemonClientSideError - - dify_ctx = self.require_dify_context() - - try: - strategy = get_plugin_agent_strategy( - tenant_id=dify_ctx.tenant_id, - agent_strategy_provider_name=self.node_data.agent_strategy_provider_name, - agent_strategy_name=self.node_data.agent_strategy_name, - ) - except Exception as e: - yield StreamCompletedEvent( - node_run_result=NodeRunResult( - status=WorkflowNodeExecutionStatus.FAILED, - inputs={}, - error=f"Failed to get agent strategy: {str(e)}", - ), - ) - return - - agent_parameters = strategy.get_parameters() - - # get parameters - parameters = self._generate_agent_parameters( - agent_parameters=agent_parameters, - variable_pool=self.graph_runtime_state.variable_pool, - node_data=self.node_data, - strategy=strategy, - ) - parameters_for_log = self._generate_agent_parameters( - agent_parameters=agent_parameters, - variable_pool=self.graph_runtime_state.variable_pool, - node_data=self.node_data, - for_log=True, - strategy=strategy, - ) - credentials = self._generate_credentials(parameters=parameters) - - # get conversation id - conversation_id = self.graph_runtime_state.variable_pool.get(["sys", SystemVariableKey.CONVERSATION_ID]) - - try: - message_stream = strategy.invoke( - params=parameters, - user_id=dify_ctx.user_id, - app_id=dify_ctx.app_id, - conversation_id=conversation_id.text if conversation_id else None, - credentials=credentials, - ) - except Exception as e: - error = AgentInvocationError(f"Failed to invoke agent: {str(e)}", original_error=e) - yield StreamCompletedEvent( - node_run_result=NodeRunResult( - status=WorkflowNodeExecutionStatus.FAILED, - inputs=parameters_for_log, - error=str(error), - ) - ) - return - - try: - yield from self._transform_message( - messages=message_stream, - tool_info={ - "icon": self.agent_strategy_icon, - "agent_strategy": self.node_data.agent_strategy_name, - }, - parameters_for_log=parameters_for_log, - user_id=dify_ctx.user_id, - tenant_id=dify_ctx.tenant_id, - node_type=self.node_type, - node_id=self._node_id, - node_execution_id=self.id, - ) - except PluginDaemonClientSideError as e: - transform_error = AgentMessageTransformError( - f"Failed to transform agent message: {str(e)}", original_error=e - ) - yield StreamCompletedEvent( - node_run_result=NodeRunResult( - status=WorkflowNodeExecutionStatus.FAILED, - inputs=parameters_for_log, - error=str(transform_error), - ) - ) - - def _generate_agent_parameters( - self, - *, - agent_parameters: Sequence[AgentStrategyParameter], - variable_pool: VariablePool, - node_data: AgentNodeData, - for_log: bool = False, - strategy: PluginAgentStrategy, - ) -> dict[str, Any]: - """ - Generate parameters based on the given tool parameters, variable pool, and node data. - - Args: - agent_parameters (Sequence[AgentParameter]): The list of agent parameters. - variable_pool (VariablePool): The variable pool containing the variables. - node_data (AgentNodeData): The data associated with the agent node. - - Returns: - Mapping[str, Any]: A dictionary containing the generated parameters. - - """ - agent_parameters_dictionary = {parameter.name: parameter for parameter in agent_parameters} - - result: dict[str, Any] = {} - for parameter_name in node_data.agent_parameters: - parameter = agent_parameters_dictionary.get(parameter_name) - if not parameter: - result[parameter_name] = None - continue - agent_input = node_data.agent_parameters[parameter_name] - match agent_input.type: - case "variable": - variable = variable_pool.get(agent_input.value) # type: ignore - if variable is None: - raise AgentVariableNotFoundError(str(agent_input.value)) - parameter_value = variable.value - case "mixed" | "constant": - # variable_pool.convert_template expects a string template, - # but if passing a dict, convert to JSON string first before rendering - try: - if not isinstance(agent_input.value, str): - parameter_value = json.dumps(agent_input.value, ensure_ascii=False) - else: - parameter_value = str(agent_input.value) - except TypeError: - parameter_value = str(agent_input.value) - segment_group = variable_pool.convert_template(parameter_value) - parameter_value = segment_group.log if for_log else segment_group.text - # variable_pool.convert_template returns a string, - # so we need to convert it back to a dictionary - try: - if not isinstance(agent_input.value, str): - parameter_value = json.loads(parameter_value) - except json.JSONDecodeError: - parameter_value = parameter_value - case _: - raise AgentInputTypeError(agent_input.type) - value = parameter_value - if parameter.type == "array[tools]": - value = cast(list[dict[str, Any]], value) - value = [tool for tool in value if tool.get("enabled", False)] - value = self._filter_mcp_type_tool(strategy, value) - for tool in value: - if "schemas" in tool: - tool.pop("schemas") - parameters = tool.get("parameters", {}) - if all(isinstance(v, dict) for _, v in parameters.items()): - params = {} - for key, param in parameters.items(): - if param.get("auto", ParamsAutoGenerated.OPEN) in ( - ParamsAutoGenerated.CLOSE, - 0, - ): - value_param = param.get("value", {}) - if value_param and value_param.get("type", "") == "variable": - variable_selector = value_param.get("value") - if not variable_selector: - raise ValueError("Variable selector is missing for a variable-type parameter.") - - variable = variable_pool.get(variable_selector) - if variable is None: - raise AgentVariableNotFoundError(str(variable_selector)) - - params[key] = variable.value - else: - params[key] = value_param.get("value", "") if value_param is not None else None - else: - params[key] = None - parameters = params - tool["settings"] = {k: v.get("value", None) for k, v in tool.get("settings", {}).items()} - tool["parameters"] = parameters - - if not for_log: - if parameter.type == "array[tools]": - value = cast(list[dict[str, Any]], value) - tool_value = [] - for tool in value: - provider_type = ToolProviderType(tool.get("type", ToolProviderType.BUILT_IN)) - setting_params = tool.get("settings", {}) - parameters = tool.get("parameters", {}) - manual_input_params = [key for key, value in parameters.items() if value is not None] - - parameters = {**parameters, **setting_params} - entity = AgentToolEntity( - provider_id=tool.get("provider_name", ""), - provider_type=provider_type, - tool_name=tool.get("tool_name", ""), - tool_parameters=parameters, - plugin_unique_identifier=tool.get("plugin_unique_identifier", None), - credential_id=tool.get("credential_id", None), - ) - - extra = tool.get("extra", {}) - - # This is an issue that caused problems before. - # Logically, we shouldn't use the node_data.version field for judgment - # But for backward compatibility with historical data - # this version field judgment is still preserved here. - runtime_variable_pool: VariablePool | None = None - if node_data.version != "1" or node_data.tool_node_version is not None: - runtime_variable_pool = variable_pool - dify_ctx = self.require_dify_context() - tool_runtime = ToolManager.get_agent_tool_runtime( - dify_ctx.tenant_id, - dify_ctx.app_id, - entity, - dify_ctx.invoke_from, - runtime_variable_pool, - ) - if tool_runtime.entity.description: - tool_runtime.entity.description.llm = ( - extra.get("description", "") or tool_runtime.entity.description.llm - ) - for tool_runtime_params in tool_runtime.entity.parameters: - tool_runtime_params.form = ( - ToolParameter.ToolParameterForm.FORM - if tool_runtime_params.name in manual_input_params - else tool_runtime_params.form - ) - manual_input_value = {} - if tool_runtime.entity.parameters: - manual_input_value = { - key: value for key, value in parameters.items() if key in manual_input_params - } - runtime_parameters = { - **tool_runtime.runtime.runtime_parameters, - **manual_input_value, - } - tool_value.append( - { - **tool_runtime.entity.model_dump(mode="json"), - "runtime_parameters": runtime_parameters, - "credential_id": tool.get("credential_id", None), - "provider_type": provider_type.value, - } - ) - value = tool_value - if parameter.type == AgentStrategyParameter.AgentStrategyParameterType.MODEL_SELECTOR: - value = cast(dict[str, Any], value) - model_instance, model_schema = self._fetch_model(value) - # memory config - history_prompt_messages = [] - if node_data.memory: - memory = self._fetch_memory(model_instance) - if memory: - prompt_messages = memory.get_history_prompt_messages( - message_limit=node_data.memory.window.size or None - ) - history_prompt_messages = [ - prompt_message.model_dump(mode="json") for prompt_message in prompt_messages - ] - value["history_prompt_messages"] = history_prompt_messages - if model_schema: - # remove structured output feature to support old version agent plugin - model_schema = self._remove_unsupported_model_features_for_old_version(model_schema) - value["entity"] = model_schema.model_dump(mode="json") - else: - value["entity"] = None - result[parameter_name] = value - - return result - - def _generate_credentials( - self, - parameters: dict[str, Any], - ) -> InvokeCredentials: - """ - Generate credentials based on the given agent parameters. - """ - from core.plugin.entities.request import InvokeCredentials - - credentials = InvokeCredentials() - - # generate credentials for tools selector - credentials.tool_credentials = {} - for tool in parameters.get("tools", []): - if tool.get("credential_id"): - try: - identity = ToolIdentity.model_validate(tool.get("identity", {})) - credentials.tool_credentials[identity.provider] = tool.get("credential_id", None) - except ValidationError: - continue - return credentials - - @classmethod - def _extract_variable_selector_to_variable_mapping( - cls, - *, - graph_config: Mapping[str, Any], - node_id: str, - node_data: AgentNodeData, - ) -> Mapping[str, Sequence[str]]: - _ = graph_config # Explicitly mark as unused - result: dict[str, Any] = {} - typed_node_data = node_data - for parameter_name in typed_node_data.agent_parameters: - input = typed_node_data.agent_parameters[parameter_name] - match input.type: - case "mixed" | "constant": - selectors = VariableTemplateParser(str(input.value)).extract_variable_selectors() - for selector in selectors: - result[selector.variable] = selector.value_selector - case "variable": - result[parameter_name] = input.value - - result = {node_id + "." + key: value for key, value in result.items()} - - return result - - @property - def agent_strategy_icon(self) -> str | None: - """ - Get agent strategy icon - :return: - """ - from core.plugin.impl.plugin import PluginInstaller - - manager = PluginInstaller() - dify_ctx = self.require_dify_context() - plugins = manager.list_plugins(dify_ctx.tenant_id) - try: - current_plugin = next( - plugin - for plugin in plugins - if f"{plugin.plugin_id}/{plugin.name}" == self.node_data.agent_strategy_provider_name - ) - icon = current_plugin.declaration.icon - except StopIteration: - icon = None - return icon - - def _fetch_memory(self, model_instance: ModelInstance) -> TokenBufferMemory | None: - # get conversation id - conversation_id_variable = self.graph_runtime_state.variable_pool.get( - ["sys", SystemVariableKey.CONVERSATION_ID] - ) - if not isinstance(conversation_id_variable, StringSegment): - return None - conversation_id = conversation_id_variable.value - - dify_ctx = self.require_dify_context() - with Session(db.engine, expire_on_commit=False) as session: - stmt = select(Conversation).where( - Conversation.app_id == dify_ctx.app_id, Conversation.id == conversation_id - ) - conversation = session.scalar(stmt) - - if not conversation: - return None - - memory = TokenBufferMemory(conversation=conversation, model_instance=model_instance) - - return memory - - def _fetch_model(self, value: dict[str, Any]) -> tuple[ModelInstance, AIModelEntity | None]: - dify_ctx = self.require_dify_context() - provider_manager = ProviderManager() - provider_model_bundle = provider_manager.get_provider_model_bundle( - tenant_id=dify_ctx.tenant_id, provider=value.get("provider", ""), model_type=ModelType.LLM - ) - model_name = value.get("model", "") - model_credentials = provider_model_bundle.configuration.get_current_credentials( - model_type=ModelType.LLM, model=model_name - ) - provider_name = provider_model_bundle.configuration.provider.provider - model_type_instance = provider_model_bundle.model_type_instance - model_instance = ModelManager().get_model_instance( - tenant_id=dify_ctx.tenant_id, - provider=provider_name, - model_type=ModelType(value.get("model_type", "")), - model=model_name, - ) - model_schema = model_type_instance.get_model_schema(model_name, model_credentials) - return model_instance, model_schema - - def _remove_unsupported_model_features_for_old_version(self, model_schema: AIModelEntity) -> AIModelEntity: - if model_schema.features: - for feature in model_schema.features[:]: # Create a copy to safely modify during iteration - try: - AgentOldVersionModelFeatures(feature.value) # Try to create enum member from value - except ValueError: - model_schema.features.remove(feature) - return model_schema - - def _filter_mcp_type_tool(self, strategy: PluginAgentStrategy, tools: list[dict[str, Any]]) -> list[dict[str, Any]]: - """ - Filter MCP type tool - :param strategy: plugin agent strategy - :param tool: tool - :return: filtered tool dict - """ - meta_version = strategy.meta_version - if meta_version and Version(meta_version) > Version("0.0.1"): - return tools - else: - return [tool for tool in tools if tool.get("type") != ToolProviderType.MCP] - - def _transform_message( - self, - messages: Generator[ToolInvokeMessage, None, None], - tool_info: Mapping[str, Any], - parameters_for_log: dict[str, Any], - user_id: str, - tenant_id: str, - node_type: NodeType, - node_id: str, - node_execution_id: str, - ) -> Generator[NodeEventBase, None, None]: - """ - Convert ToolInvokeMessages into tuple[plain_text, files] - """ - # transform message and handle file storage - from core.plugin.impl.plugin import PluginInstaller - - message_stream = ToolFileMessageTransformer.transform_tool_invoke_messages( - messages=messages, - user_id=user_id, - tenant_id=tenant_id, - conversation_id=None, - ) - - text = "" - files: list[File] = [] - json_list: list[dict | list] = [] - - agent_logs: list[AgentLogEvent] = [] - agent_execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] = {} - llm_usage = LLMUsage.empty_usage() - variables: dict[str, Any] = {} - - for message in message_stream: - if message.type in { - ToolInvokeMessage.MessageType.IMAGE_LINK, - ToolInvokeMessage.MessageType.BINARY_LINK, - ToolInvokeMessage.MessageType.IMAGE, - }: - assert isinstance(message.message, ToolInvokeMessage.TextMessage) - - url = message.message.text - if message.meta: - transfer_method = message.meta.get("transfer_method", FileTransferMethod.TOOL_FILE) - else: - transfer_method = FileTransferMethod.TOOL_FILE - - tool_file_id = str(url).split("/")[-1].split(".")[0] - - with Session(db.engine) as session: - stmt = select(ToolFile).where(ToolFile.id == tool_file_id) - tool_file = session.scalar(stmt) - if tool_file is None: - raise ToolFileNotFoundError(tool_file_id) - - mapping = { - "tool_file_id": tool_file_id, - "type": file_factory.get_file_type_by_mime_type(tool_file.mimetype), - "transfer_method": transfer_method, - "url": url, - } - file = file_factory.build_from_mapping( - mapping=mapping, - tenant_id=tenant_id, - ) - files.append(file) - elif message.type == ToolInvokeMessage.MessageType.BLOB: - # get tool file id - assert isinstance(message.message, ToolInvokeMessage.TextMessage) - assert message.meta - - tool_file_id = message.message.text.split("/")[-1].split(".")[0] - with Session(db.engine) as session: - stmt = select(ToolFile).where(ToolFile.id == tool_file_id) - tool_file = session.scalar(stmt) - if tool_file is None: - raise ToolFileNotFoundError(tool_file_id) - - mapping = { - "tool_file_id": tool_file_id, - "transfer_method": FileTransferMethod.TOOL_FILE, - } - - files.append( - file_factory.build_from_mapping( - mapping=mapping, - tenant_id=tenant_id, - ) - ) - elif message.type == ToolInvokeMessage.MessageType.TEXT: - assert isinstance(message.message, ToolInvokeMessage.TextMessage) - text += message.message.text - yield StreamChunkEvent( - selector=[node_id, "text"], - chunk=message.message.text, - is_final=False, - ) - elif message.type == ToolInvokeMessage.MessageType.JSON: - assert isinstance(message.message, ToolInvokeMessage.JsonMessage) - if node_type == NodeType.AGENT: - if isinstance(message.message.json_object, dict): - msg_metadata: dict[str, Any] = message.message.json_object.pop("execution_metadata", {}) - llm_usage = LLMUsage.from_metadata(cast(LLMUsageMetadata, msg_metadata)) - agent_execution_metadata = { - WorkflowNodeExecutionMetadataKey(key): value - for key, value in msg_metadata.items() - if key in WorkflowNodeExecutionMetadataKey.__members__.values() - } - else: - msg_metadata = {} - llm_usage = LLMUsage.empty_usage() - agent_execution_metadata = {} - if message.message.json_object: - json_list.append(message.message.json_object) - elif message.type == ToolInvokeMessage.MessageType.LINK: - assert isinstance(message.message, ToolInvokeMessage.TextMessage) - stream_text = f"Link: {message.message.text}\n" - text += stream_text - yield StreamChunkEvent( - selector=[node_id, "text"], - chunk=stream_text, - is_final=False, - ) - elif message.type == ToolInvokeMessage.MessageType.VARIABLE: - assert isinstance(message.message, ToolInvokeMessage.VariableMessage) - variable_name = message.message.variable_name - variable_value = message.message.variable_value - if message.message.stream: - if not isinstance(variable_value, str): - raise AgentVariableTypeError( - "When 'stream' is True, 'variable_value' must be a string.", - variable_name=variable_name, - expected_type="str", - actual_type=type(variable_value).__name__, - ) - if variable_name not in variables: - variables[variable_name] = "" - variables[variable_name] += variable_value - - yield StreamChunkEvent( - selector=[node_id, variable_name], - chunk=variable_value, - is_final=False, - ) - else: - variables[variable_name] = variable_value - elif message.type == ToolInvokeMessage.MessageType.FILE: - assert message.meta is not None - assert isinstance(message.meta, dict) - # Validate that meta contains a 'file' key - if "file" not in message.meta: - raise AgentNodeError("File message is missing 'file' key in meta") - - # Validate that the file is an instance of File - if not isinstance(message.meta["file"], File): - raise AgentNodeError(f"Expected File object but got {type(message.meta['file']).__name__}") - files.append(message.meta["file"]) - elif message.type == ToolInvokeMessage.MessageType.LOG: - assert isinstance(message.message, ToolInvokeMessage.LogMessage) - if message.message.metadata: - icon = tool_info.get("icon", "") - dict_metadata = dict(message.message.metadata) - if dict_metadata.get("provider"): - manager = PluginInstaller() - plugins = manager.list_plugins(tenant_id) - try: - current_plugin = next( - plugin - for plugin in plugins - if f"{plugin.plugin_id}/{plugin.name}" == dict_metadata["provider"] - ) - icon = current_plugin.declaration.icon - except StopIteration: - pass - icon_dark = None - try: - builtin_tool = next( - provider - for provider in BuiltinToolManageService.list_builtin_tools( - user_id, - tenant_id, - ) - if provider.name == dict_metadata["provider"] - ) - icon = builtin_tool.icon - icon_dark = builtin_tool.icon_dark - except StopIteration: - pass - - dict_metadata["icon"] = icon - dict_metadata["icon_dark"] = icon_dark - message.message.metadata = dict_metadata - agent_log = AgentLogEvent( - message_id=message.message.id, - node_execution_id=node_execution_id, - parent_id=message.message.parent_id, - error=message.message.error, - status=message.message.status.value, - data=message.message.data, - label=message.message.label, - metadata=message.message.metadata, - node_id=node_id, - ) - - # check if the agent log is already in the list - for log in agent_logs: - if log.message_id == agent_log.message_id: - # update the log - log.data = agent_log.data - log.status = agent_log.status - log.error = agent_log.error - log.label = agent_log.label - log.metadata = agent_log.metadata - break - else: - agent_logs.append(agent_log) - - yield agent_log - - # Add agent_logs to outputs['json'] to ensure frontend can access thinking process - json_output: list[dict[str, Any] | list[Any]] = [] - - # Step 1: append each agent log as its own dict. - if agent_logs: - for log in agent_logs: - json_output.append( - { - "id": log.message_id, - "parent_id": log.parent_id, - "error": log.error, - "status": log.status, - "data": log.data, - "label": log.label, - "metadata": log.metadata, - "node_id": log.node_id, - } - ) - # Step 2: normalize JSON into {"data": [...]}.change json to list[dict] - if json_list: - json_output.extend(json_list) - else: - json_output.append({"data": []}) - - # Send final chunk events for all streamed outputs - # Final chunk for text stream - yield StreamChunkEvent( - selector=[node_id, "text"], - chunk="", - is_final=True, - ) - - # Final chunks for any streamed variables - for var_name in variables: - yield StreamChunkEvent( - selector=[node_id, var_name], - chunk="", - is_final=True, - ) - - yield StreamCompletedEvent( - node_run_result=NodeRunResult( - status=WorkflowNodeExecutionStatus.SUCCEEDED, - outputs={ - "text": text, - "usage": jsonable_encoder(llm_usage), - "files": ArrayFileSegment(value=files), - "json": json_output, - **variables, - }, - metadata={ - **agent_execution_metadata, - WorkflowNodeExecutionMetadataKey.TOOL_INFO: tool_info, - WorkflowNodeExecutionMetadataKey.AGENT_LOG: agent_logs, - }, - inputs=parameters_for_log, - llm_usage=llm_usage, - ) - ) diff --git a/api/dify_graph/nodes/answer/answer_node.py b/api/dify_graph/nodes/answer/answer_node.py index c829b892cc..4286e1a492 100644 --- a/api/dify_graph/nodes/answer/answer_node.py +++ b/api/dify_graph/nodes/answer/answer_node.py @@ -1,7 +1,7 @@ from collections.abc import Mapping, Sequence from typing import Any -from dify_graph.enums import NodeExecutionType, NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.answer.entities import AnswerNodeData from dify_graph.nodes.base.node import Node @@ -11,7 +11,7 @@ from dify_graph.variables import ArrayFileSegment, FileSegment, Segment class AnswerNode(Node[AnswerNodeData]): - node_type = NodeType.ANSWER + node_type = BuiltinNodeTypes.ANSWER execution_type = NodeExecutionType.RESPONSE @classmethod diff --git a/api/dify_graph/nodes/answer/entities.py b/api/dify_graph/nodes/answer/entities.py index 3cc1d6572e..cd82df1ac4 100644 --- a/api/dify_graph/nodes/answer/entities.py +++ b/api/dify_graph/nodes/answer/entities.py @@ -4,7 +4,7 @@ from enum import StrEnum, auto from pydantic import BaseModel, Field from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType class AnswerNodeData(BaseNodeData): @@ -12,7 +12,7 @@ class AnswerNodeData(BaseNodeData): Answer Node Data. """ - type: NodeType = NodeType.ANSWER + type: NodeType = BuiltinNodeTypes.ANSWER answer: str = Field(..., description="answer template string") diff --git a/api/dify_graph/nodes/base/node.py b/api/dify_graph/nodes/base/node.py index d7702c8cb4..c6f54ce672 100644 --- a/api/dify_graph/nodes/base/node.py +++ b/api/dify_graph/nodes/base/node.py @@ -1,9 +1,7 @@ from __future__ import annotations -import importlib import logging import operator -import pkgutil from abc import abstractmethod from collections.abc import Generator, Mapping, Sequence from functools import singledispatchmethod @@ -11,7 +9,7 @@ from types import MappingProxyType from typing import Any, ClassVar, Generic, Protocol, TypeVar, cast, get_args, get_origin from uuid import uuid4 -from dify_graph.entities import AgentNodeStrategyInit, GraphInitParams +from dify_graph.entities import GraphInitParams from dify_graph.entities.base_node_data import BaseNodeData, RetryConfig from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY @@ -161,7 +159,7 @@ class Node(Generic[NodeDataT]): Example: class CodeNode(Node[CodeNodeData]): # CodeNodeData is auto-extracted - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE # No need to implement _get_title, _get_error_strategy, etc. """ super().__init_subclass__(**kwargs) @@ -179,7 +177,8 @@ class Node(Generic[NodeDataT]): # Skip base class itself if cls is Node: return - # Only register production node implementations defined under dify_graph.nodes.* + # Only register production node implementations defined under the + # canonical workflow namespaces. # This prevents test helper subclasses from polluting the global registry and # accidentally overriding real node types (e.g., a test Answer node). module_name = getattr(cls, "__module__", "") @@ -187,7 +186,7 @@ class Node(Generic[NodeDataT]): node_type = cls.node_type version = cls.version() bucket = Node._registry.setdefault(node_type, {}) - if module_name.startswith("dify_graph.nodes."): + if module_name.startswith(("dify_graph.nodes.", "core.workflow.nodes.")): # Production node definitions take precedence and may override bucket[version] = cls # type: ignore[index] else: @@ -203,6 +202,7 @@ class Node(Generic[NodeDataT]): else: latest_key = max(version_keys) if version_keys else version bucket["latest"] = bucket[latest_key] + Node._registry_version += 1 @classmethod def _extract_node_data_type_from_generic(cls) -> type[BaseNodeData] | None: @@ -237,6 +237,11 @@ class Node(Generic[NodeDataT]): # Global registry populated via __init_subclass__ _registry: ClassVar[dict[NodeType, dict[str, type[Node]]]] = {} + _registry_version: ClassVar[int] = 0 + + @classmethod + def get_registry_version(cls) -> int: + return cls._registry_version def __init__( self, @@ -269,6 +274,10 @@ class Node(Generic[NodeDataT]): """Validate shared graph node payloads against the subclass-declared NodeData model.""" return cast(NodeDataT, cls._node_data_type.model_validate(node_data, from_attributes=True)) + def init_node_data(self, data: BaseNodeData | Mapping[str, Any]) -> None: + """Hydrate `_node_data` for legacy callers that bypass `__init__`.""" + self._node_data = self.validate_node_data(cast(BaseNodeData, data)) + def post_init(self) -> None: """Optional hook for subclasses requiring extra initialization.""" return @@ -349,6 +358,10 @@ class Node(Generic[NodeDataT]): """ raise NotImplementedError + def populate_start_event(self, event: NodeRunStartedEvent) -> None: + """Allow subclasses to enrich the started event without cross-node imports in the base class.""" + _ = event + def run(self) -> Generator[GraphNodeEventBase, None, None]: execution_id = self.ensure_execution_id() self._start_at = naive_utc_now() @@ -362,39 +375,10 @@ class Node(Generic[NodeDataT]): in_iteration_id=None, start_at=self._start_at, ) - - # === FIXME(-LAN-): Needs to refactor. - from dify_graph.nodes.tool.tool_node import ToolNode - - if isinstance(self, ToolNode): - start_event.provider_id = getattr(self.node_data, "provider_id", "") - start_event.provider_type = getattr(self.node_data, "provider_type", "") - - from dify_graph.nodes.datasource.datasource_node import DatasourceNode - - if isinstance(self, DatasourceNode): - plugin_id = getattr(self.node_data, "plugin_id", "") - provider_name = getattr(self.node_data, "provider_name", "") - - start_event.provider_id = f"{plugin_id}/{provider_name}" - start_event.provider_type = getattr(self.node_data, "provider_type", "") - - from dify_graph.nodes.trigger_plugin.trigger_event_node import TriggerEventNode - - if isinstance(self, TriggerEventNode): - start_event.provider_id = getattr(self.node_data, "provider_id", "") - start_event.provider_type = getattr(self.node_data, "provider_type", "") - - from dify_graph.nodes.agent.agent_node import AgentNode - from dify_graph.nodes.agent.entities import AgentNodeData - - if isinstance(self, AgentNode): - start_event.agent_strategy = AgentNodeStrategyInit( - name=cast(AgentNodeData, self.node_data).agent_strategy_name, - icon=self.agent_strategy_icon, - ) - - # === + try: + self.populate_start_event(start_event) + except Exception: + logger.warning("Failed to populate start event for node %s", self._node_id, exc_info=True) yield start_event try: @@ -513,30 +497,20 @@ class Node(Generic[NodeDataT]): @abstractmethod def version(cls) -> str: """`node_version` returns the version of current node type.""" - # NOTE(QuantumGhost): This should be in sync with `NODE_TYPE_CLASSES_MAPPING`. - # - # If you have introduced a new node type, please add it to `NODE_TYPE_CLASSES_MAPPING` - # in `api/dify_graph/nodes/__init__.py`. + # NOTE(QuantumGhost): Node versions must remain unique per `NodeType` so + # registry lookups can resolve numeric versions and `latest`. raise NotImplementedError("subclasses of BaseNode must implement `version` method.") @classmethod def get_node_type_classes_mapping(cls) -> Mapping[NodeType, Mapping[str, type[Node]]]: - """Return mapping of NodeType -> {version -> Node subclass} using __init_subclass__ registry. + """Return a read-only view of the currently registered node classes. - Import all modules under dify_graph.nodes so subclasses register themselves on import. - Then we return a readonly view of the registry to avoid accidental mutation. + This accessor intentionally performs no imports. The embedding layer that + owns bootstrap (for example `core.workflow.node_factory`) must import any + extension node packages before calling it so their subclasses register via + `__init_subclass__`. """ - # Import all node modules to ensure they are loaded (thus registered) - import dify_graph.nodes as _nodes_pkg - - for _, _modname, _ in pkgutil.walk_packages(_nodes_pkg.__path__, _nodes_pkg.__name__ + "."): - # Avoid importing modules that depend on the registry to prevent circular imports. - if _modname == "dify_graph.nodes.node_mapping": - continue - importlib.import_module(_modname) - - # Return a readonly view so callers can't mutate the registry by accident - return {nt: MappingProxyType(ver_map) for nt, ver_map in cls._registry.items()} + return {node_type: MappingProxyType(version_map) for node_type, version_map in cls._registry.items()} @property def retry(self) -> bool: @@ -811,11 +785,16 @@ class Node(Generic[NodeDataT]): @_dispatch.register def _(self, event: RunRetrieverResourceEvent) -> NodeRunRetrieverResourceEvent: + from core.rag.entities.citation_metadata import RetrievalSourceMetadata + + retriever_resources = [ + RetrievalSourceMetadata.model_validate(resource) for resource in event.retriever_resources + ] return NodeRunRetrieverResourceEvent( id=self.execution_id, node_id=self._node_id, node_type=self.node_type, - retriever_resources=event.retriever_resources, + retriever_resources=retriever_resources, context=event.context, node_version=self.version(), ) diff --git a/api/dify_graph/nodes/code/code_node.py b/api/dify_graph/nodes/code/code_node.py index ac8d6463b9..82d5fced62 100644 --- a/api/dify_graph/nodes/code/code_node.py +++ b/api/dify_graph/nodes/code/code_node.py @@ -4,7 +4,7 @@ from textwrap import dedent from typing import TYPE_CHECKING, Any, Protocol, cast from dify_graph.entities.graph_config import NodeConfigDict -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.code.entities import CodeLanguage, CodeNodeData @@ -72,7 +72,7 @@ _DEFAULT_CODE_BY_LANGUAGE: Mapping[CodeLanguage, str] = { class CodeNode(Node[CodeNodeData]): - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE _limits: CodeNodeLimits def __init__( diff --git a/api/dify_graph/nodes/code/entities.py b/api/dify_graph/nodes/code/entities.py index 25e46226e1..55b4ee4862 100644 --- a/api/dify_graph/nodes/code/entities.py +++ b/api/dify_graph/nodes/code/entities.py @@ -4,7 +4,7 @@ from typing import Annotated, Literal from pydantic import AfterValidator, BaseModel from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.entities import VariableSelector from dify_graph.variables.types import SegmentType @@ -40,7 +40,7 @@ class CodeNodeData(BaseNodeData): Code Node Data. """ - type: NodeType = NodeType.CODE + type: NodeType = BuiltinNodeTypes.CODE class Output(BaseModel): type: Annotated[SegmentType, AfterValidator(_validate_type)] diff --git a/api/dify_graph/nodes/datasource/__init__.py b/api/dify_graph/nodes/datasource/__init__.py deleted file mode 100644 index f6ec44cb77..0000000000 --- a/api/dify_graph/nodes/datasource/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .datasource_node import DatasourceNode - -__all__ = ["DatasourceNode"] diff --git a/api/dify_graph/nodes/document_extractor/entities.py b/api/dify_graph/nodes/document_extractor/entities.py index 9f42d2e605..1110cc2710 100644 --- a/api/dify_graph/nodes/document_extractor/entities.py +++ b/api/dify_graph/nodes/document_extractor/entities.py @@ -2,11 +2,11 @@ from collections.abc import Sequence from dataclasses import dataclass from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType class DocumentExtractorNodeData(BaseNodeData): - type: NodeType = NodeType.DOCUMENT_EXTRACTOR + type: NodeType = BuiltinNodeTypes.DOCUMENT_EXTRACTOR variable_selector: Sequence[str] diff --git a/api/dify_graph/nodes/document_extractor/node.py b/api/dify_graph/nodes/document_extractor/node.py index fe51b1963e..27196f1aca 100644 --- a/api/dify_graph/nodes/document_extractor/node.py +++ b/api/dify_graph/nodes/document_extractor/node.py @@ -22,7 +22,7 @@ from docx.table import Table from docx.text.paragraph import Paragraph from dify_graph.entities.graph_config import NodeConfigDict -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.file import File, FileTransferMethod, file_manager from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node @@ -46,7 +46,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]): Supports plain text, PDF, and DOC/DOCX files. """ - node_type = NodeType.DOCUMENT_EXTRACTOR + node_type = BuiltinNodeTypes.DOCUMENT_EXTRACTOR @classmethod def version(cls) -> str: diff --git a/api/dify_graph/nodes/end/end_node.py b/api/dify_graph/nodes/end/end_node.py index 7aa526b85b..1f5cfab22b 100644 --- a/api/dify_graph/nodes/end/end_node.py +++ b/api/dify_graph/nodes/end/end_node.py @@ -1,4 +1,4 @@ -from dify_graph.enums import NodeExecutionType, NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.base.template import Template @@ -6,7 +6,7 @@ from dify_graph.nodes.end.entities import EndNodeData class EndNode(Node[EndNodeData]): - node_type = NodeType.END + node_type = BuiltinNodeTypes.END execution_type = NodeExecutionType.RESPONSE @classmethod diff --git a/api/dify_graph/nodes/end/entities.py b/api/dify_graph/nodes/end/entities.py index 69cd1dd8f5..be7f0c8de8 100644 --- a/api/dify_graph/nodes/end/entities.py +++ b/api/dify_graph/nodes/end/entities.py @@ -1,7 +1,7 @@ from pydantic import BaseModel, Field from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.entities import OutputVariableEntity @@ -10,7 +10,7 @@ class EndNodeData(BaseNodeData): END Node Data. """ - type: NodeType = NodeType.END + type: NodeType = BuiltinNodeTypes.END outputs: list[OutputVariableEntity] diff --git a/api/dify_graph/nodes/http_request/entities.py b/api/dify_graph/nodes/http_request/entities.py index 46e08ea1a0..f594d58ae6 100644 --- a/api/dify_graph/nodes/http_request/entities.py +++ b/api/dify_graph/nodes/http_request/entities.py @@ -9,7 +9,7 @@ import httpx from pydantic import BaseModel, Field, ValidationInfo, field_validator from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType HTTP_REQUEST_CONFIG_FILTER_KEY = "http_request_config" @@ -90,7 +90,7 @@ class HttpRequestNodeData(BaseNodeData): Code Node Data. """ - type: NodeType = NodeType.HTTP_REQUEST + type: NodeType = BuiltinNodeTypes.HTTP_REQUEST method: Literal[ "get", "post", diff --git a/api/dify_graph/nodes/http_request/node.py b/api/dify_graph/nodes/http_request/node.py index 3895ae92c0..b17c820a80 100644 --- a/api/dify_graph/nodes/http_request/node.py +++ b/api/dify_graph/nodes/http_request/node.py @@ -4,7 +4,7 @@ from collections.abc import Callable, Mapping, Sequence from typing import TYPE_CHECKING, Any from dify_graph.entities.graph_config import NodeConfigDict -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.file import File, FileTransferMethod from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base import variable_template_parser @@ -33,7 +33,7 @@ if TYPE_CHECKING: class HttpRequestNode(Node[HttpRequestNodeData]): - node_type = NodeType.HTTP_REQUEST + node_type = BuiltinNodeTypes.HTTP_REQUEST def __init__( self, diff --git a/api/dify_graph/nodes/human_input/entities.py b/api/dify_graph/nodes/human_input/entities.py index 642c2143e5..7936e47213 100644 --- a/api/dify_graph/nodes/human_input/entities.py +++ b/api/dify_graph/nodes/human_input/entities.py @@ -11,7 +11,7 @@ from typing import Annotated, Any, ClassVar, Literal, Self from pydantic import BaseModel, Field, field_validator, model_validator from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.variable_template_parser import VariableTemplateParser from dify_graph.runtime import VariablePool from dify_graph.variables.consts import SELECTORS_LENGTH @@ -215,7 +215,7 @@ class UserAction(BaseModel): class HumanInputNodeData(BaseNodeData): """Human Input node data.""" - type: NodeType = NodeType.HUMAN_INPUT + type: NodeType = BuiltinNodeTypes.HUMAN_INPUT delivery_methods: list[DeliveryChannelConfig] = Field(default_factory=list) form_content: str = "" inputs: list[FormInput] = Field(default_factory=list) diff --git a/api/dify_graph/nodes/human_input/human_input_node.py b/api/dify_graph/nodes/human_input/human_input_node.py index 3a167d122b..794e33d92e 100644 --- a/api/dify_graph/nodes/human_input/human_input_node.py +++ b/api/dify_graph/nodes/human_input/human_input_node.py @@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.entities.pause_reason import HumanInputRequired -from dify_graph.enums import NodeExecutionType, NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, WorkflowNodeExecutionStatus from dify_graph.node_events import ( HumanInputFormFilledEvent, HumanInputFormTimeoutEvent, @@ -40,7 +40,7 @@ logger = logging.getLogger(__name__) class HumanInputNode(Node[HumanInputNodeData]): - node_type = NodeType.HUMAN_INPUT + node_type = BuiltinNodeTypes.HUMAN_INPUT execution_type = NodeExecutionType.BRANCH _BRANCH_SELECTION_KEYS: tuple[str, ...] = ( diff --git a/api/dify_graph/nodes/if_else/entities.py b/api/dify_graph/nodes/if_else/entities.py index c9bb1cdc7f..ff09f3c023 100644 --- a/api/dify_graph/nodes/if_else/entities.py +++ b/api/dify_graph/nodes/if_else/entities.py @@ -3,7 +3,7 @@ from typing import Literal from pydantic import BaseModel, Field from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.utils.condition.entities import Condition @@ -12,7 +12,7 @@ class IfElseNodeData(BaseNodeData): If Else Node Data. """ - type: NodeType = NodeType.IF_ELSE + type: NodeType = BuiltinNodeTypes.IF_ELSE class Case(BaseModel): """ diff --git a/api/dify_graph/nodes/if_else/if_else_node.py b/api/dify_graph/nodes/if_else/if_else_node.py index 4b6d30c279..7c0370e48c 100644 --- a/api/dify_graph/nodes/if_else/if_else_node.py +++ b/api/dify_graph/nodes/if_else/if_else_node.py @@ -3,7 +3,7 @@ from typing import Any, Literal from typing_extensions import deprecated -from dify_graph.enums import NodeExecutionType, NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.if_else.entities import IfElseNodeData @@ -13,7 +13,7 @@ from dify_graph.utils.condition.processor import ConditionProcessor class IfElseNode(Node[IfElseNodeData]): - node_type = NodeType.IF_ELSE + node_type = BuiltinNodeTypes.IF_ELSE execution_type = NodeExecutionType.BRANCH @classmethod diff --git a/api/dify_graph/nodes/iteration/entities.py b/api/dify_graph/nodes/iteration/entities.py index 6d61c12352..58fd112b12 100644 --- a/api/dify_graph/nodes/iteration/entities.py +++ b/api/dify_graph/nodes/iteration/entities.py @@ -4,7 +4,7 @@ from typing import Any from pydantic import Field from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base import BaseIterationNodeData, BaseIterationState @@ -19,7 +19,7 @@ class IterationNodeData(BaseIterationNodeData): Iteration Node Data. """ - type: NodeType = NodeType.ITERATION + type: NodeType = BuiltinNodeTypes.ITERATION parent_loop_id: str | None = None # redundant field, not used currently iterator_selector: list[str] # variable selector output_selector: list[str] # output selector @@ -34,7 +34,7 @@ class IterationStartNodeData(BaseNodeData): Iteration Start Node Data. """ - type: NodeType = NodeType.ITERATION_START + type: NodeType = BuiltinNodeTypes.ITERATION_START class IterationState(BaseIterationState): diff --git a/api/dify_graph/nodes/iteration/iteration_node.py b/api/dify_graph/nodes/iteration/iteration_node.py index bc36f635e9..f63ba0bc48 100644 --- a/api/dify_graph/nodes/iteration/iteration_node.py +++ b/api/dify_graph/nodes/iteration/iteration_node.py @@ -9,8 +9,8 @@ from typing_extensions import TypeIs from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID from dify_graph.entities.graph_config import NodeConfigDictAdapter from dify_graph.enums import ( + BuiltinNodeTypes, NodeExecutionType, - NodeType, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) @@ -62,7 +62,7 @@ class IterationNode(LLMUsageTrackingMixin, Node[IterationNodeData]): Iteration Node. """ - node_type = NodeType.ITERATION + node_type = BuiltinNodeTypes.ITERATION execution_type = NodeExecutionType.CONTAINER @classmethod @@ -485,15 +485,13 @@ class IterationNode(LLMUsageTrackingMixin, Node[IterationNodeData]): # variable selector to variable mapping try: - # Get node class - from dify_graph.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING - typed_sub_node_config = NodeConfigDictAdapter.validate_python(sub_node_config) node_type = typed_sub_node_config["data"].type - if node_type not in NODE_TYPE_CLASSES_MAPPING: + node_mapping = Node.get_node_type_classes_mapping() + if node_type not in node_mapping: continue node_version = str(typed_sub_node_config["data"].version) - node_cls = NODE_TYPE_CLASSES_MAPPING[node_type][node_version] + node_cls = node_mapping[node_type][node_version] sub_node_variable_mapping = node_cls.extract_variable_selector_to_variable_mapping( graph_config=graph_config, config=typed_sub_node_config @@ -562,7 +560,7 @@ class IterationNode(LLMUsageTrackingMixin, Node[IterationNodeData]): raise IterationIndexNotFoundError(f"iteration {self._node_id} current index not found") current_index = index_variable.value for event in rst: - if isinstance(event, GraphNodeEventBase) and event.node_type == NodeType.ITERATION_START: + if isinstance(event, GraphNodeEventBase) and event.node_type == BuiltinNodeTypes.ITERATION_START: continue if isinstance(event, GraphNodeEventBase): diff --git a/api/dify_graph/nodes/iteration/iteration_start_node.py b/api/dify_graph/nodes/iteration/iteration_start_node.py index 2e1f555ed2..a8ecf3d83b 100644 --- a/api/dify_graph/nodes/iteration/iteration_start_node.py +++ b/api/dify_graph/nodes/iteration/iteration_start_node.py @@ -1,4 +1,4 @@ -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.iteration.entities import IterationStartNodeData @@ -9,7 +9,7 @@ class IterationStartNode(Node[IterationStartNodeData]): Iteration Start Node. """ - node_type = NodeType.ITERATION_START + node_type = BuiltinNodeTypes.ITERATION_START @classmethod def version(cls) -> str: diff --git a/api/dify_graph/nodes/knowledge_index/__init__.py b/api/dify_graph/nodes/knowledge_index/__init__.py deleted file mode 100644 index 23897a1e42..0000000000 --- a/api/dify_graph/nodes/knowledge_index/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .knowledge_index_node import KnowledgeIndexNode - -__all__ = ["KnowledgeIndexNode"] diff --git a/api/dify_graph/nodes/knowledge_retrieval/__init__.py b/api/dify_graph/nodes/knowledge_retrieval/__init__.py deleted file mode 100644 index 4d4a4cbd9f..0000000000 --- a/api/dify_graph/nodes/knowledge_retrieval/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .knowledge_retrieval_node import KnowledgeRetrievalNode - -__all__ = ["KnowledgeRetrievalNode"] diff --git a/api/dify_graph/nodes/list_operator/entities.py b/api/dify_graph/nodes/list_operator/entities.py index a91cfab8de..41b3a40b78 100644 --- a/api/dify_graph/nodes/list_operator/entities.py +++ b/api/dify_graph/nodes/list_operator/entities.py @@ -4,7 +4,7 @@ from enum import StrEnum from pydantic import BaseModel, Field from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType class FilterOperator(StrEnum): @@ -63,7 +63,7 @@ class ExtractConfig(BaseModel): class ListOperatorNodeData(BaseNodeData): - type: NodeType = NodeType.LIST_OPERATOR + type: NodeType = BuiltinNodeTypes.LIST_OPERATOR variable: Sequence[str] = Field(default_factory=list) filter_by: FilterBy order_by: OrderByConfig diff --git a/api/dify_graph/nodes/list_operator/node.py b/api/dify_graph/nodes/list_operator/node.py index d2fdadc29c..dc8b8904f7 100644 --- a/api/dify_graph/nodes/list_operator/node.py +++ b/api/dify_graph/nodes/list_operator/node.py @@ -1,7 +1,7 @@ from collections.abc import Callable, Sequence from typing import Any, TypeAlias, TypeVar -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.file import File from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node @@ -35,7 +35,7 @@ def _negation(filter_: Callable[[_T], bool]) -> Callable[[_T], bool]: class ListOperatorNode(Node[ListOperatorNodeData]): - node_type = NodeType.LIST_OPERATOR + node_type = BuiltinNodeTypes.LIST_OPERATOR @classmethod def version(cls) -> str: diff --git a/api/dify_graph/nodes/llm/entities.py b/api/dify_graph/nodes/llm/entities.py index 71728aa227..6ca01a21da 100644 --- a/api/dify_graph/nodes/llm/entities.py +++ b/api/dify_graph/nodes/llm/entities.py @@ -5,7 +5,7 @@ from pydantic import BaseModel, Field, field_validator from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate, MemoryConfig from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.model_runtime.entities import ImagePromptMessageContent, LLMMode from dify_graph.nodes.base.entities import VariableSelector @@ -60,7 +60,7 @@ class LLMNodeCompletionModelPromptTemplate(CompletionModelPromptTemplate): class LLMNodeData(BaseNodeData): - type: NodeType = NodeType.LLM + type: NodeType = BuiltinNodeTypes.LLM model: ModelConfig prompt_template: Sequence[LLMNodeChatModelMessage] | LLMNodeCompletionModelPromptTemplate prompt_config: PromptConfig = Field(default_factory=PromptConfig) diff --git a/api/dify_graph/nodes/llm/llm_utils.py b/api/dify_graph/nodes/llm/llm_utils.py index ca478a09f8..073dce232f 100644 --- a/api/dify_graph/nodes/llm/llm_utils.py +++ b/api/dify_graph/nodes/llm/llm_utils.py @@ -1,34 +1,53 @@ +from __future__ import annotations + from collections.abc import Sequence -from typing import cast +from typing import Any, cast from core.model_manager import ModelInstance +from dify_graph.file import FileType, file_manager from dify_graph.file.models import File -from dify_graph.model_runtime.entities import PromptMessageRole -from dify_graph.model_runtime.entities.message_entities import ( +from dify_graph.model_runtime.entities import ( ImagePromptMessageContent, PromptMessage, + PromptMessageContentType, + PromptMessageRole, TextPromptMessageContent, ) -from dify_graph.model_runtime.entities.model_entities import AIModelEntity +from dify_graph.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + PromptMessageContentUnionTypes, + SystemPromptMessage, + UserPromptMessage, +) +from dify_graph.model_runtime.entities.model_entities import AIModelEntity, ModelFeature, ModelPropertyKey from dify_graph.model_runtime.memory import PromptMessageMemory from dify_graph.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel +from dify_graph.nodes.base.entities import VariableSelector from dify_graph.runtime import VariablePool -from dify_graph.variables.segments import ArrayAnySegment, ArrayFileSegment, FileSegment, NoneSegment +from dify_graph.variables import ArrayFileSegment, FileSegment +from dify_graph.variables.segments import ArrayAnySegment, NoneSegment -from .exc import InvalidVariableTypeError +from .entities import LLMNodeChatModelMessage, LLMNodeCompletionModelPromptTemplate, MemoryConfig +from .exc import ( + InvalidVariableTypeError, + MemoryRolePrefixRequiredError, + NoPromptFoundError, + TemplateTypeNotSupportError, +) +from .protocols import TemplateRenderer def fetch_model_schema(*, model_instance: ModelInstance) -> AIModelEntity: model_schema = cast(LargeLanguageModel, model_instance.model_type_instance).get_model_schema( model_instance.model_name, - model_instance.credentials, + dict(model_instance.credentials), ) if not model_schema: raise ValueError(f"Model schema not found for {model_instance.model_name}") return model_schema -def fetch_files(variable_pool: VariablePool, selector: Sequence[str]) -> Sequence["File"]: +def fetch_files(variable_pool: VariablePool, selector: Sequence[str]) -> Sequence[File]: variable = variable_pool.get(selector) if variable is None: return [] @@ -89,3 +108,366 @@ def fetch_memory_text( human_prefix=human_prefix, ai_prefix=ai_prefix, ) + + +def fetch_prompt_messages( + *, + sys_query: str | None = None, + sys_files: Sequence[File], + context: str | None = None, + memory: PromptMessageMemory | None = None, + model_instance: ModelInstance, + prompt_template: Sequence[LLMNodeChatModelMessage] | LLMNodeCompletionModelPromptTemplate, + stop: Sequence[str] | None = None, + memory_config: MemoryConfig | None = None, + vision_enabled: bool = False, + vision_detail: ImagePromptMessageContent.DETAIL, + variable_pool: VariablePool, + jinja2_variables: Sequence[VariableSelector], + context_files: list[File] | None = None, + template_renderer: TemplateRenderer | None = None, +) -> tuple[Sequence[PromptMessage], Sequence[str] | None]: + prompt_messages: list[PromptMessage] = [] + model_schema = fetch_model_schema(model_instance=model_instance) + + if isinstance(prompt_template, list): + prompt_messages.extend( + handle_list_messages( + messages=prompt_template, + context=context, + jinja2_variables=jinja2_variables, + variable_pool=variable_pool, + vision_detail_config=vision_detail, + template_renderer=template_renderer, + ) + ) + + prompt_messages.extend( + handle_memory_chat_mode( + memory=memory, + memory_config=memory_config, + model_instance=model_instance, + ) + ) + + if sys_query: + prompt_messages.extend( + handle_list_messages( + messages=[ + LLMNodeChatModelMessage( + text=sys_query, + role=PromptMessageRole.USER, + edition_type="basic", + ) + ], + context="", + jinja2_variables=[], + variable_pool=variable_pool, + vision_detail_config=vision_detail, + template_renderer=template_renderer, + ) + ) + elif isinstance(prompt_template, LLMNodeCompletionModelPromptTemplate): + prompt_messages.extend( + handle_completion_template( + template=prompt_template, + context=context, + jinja2_variables=jinja2_variables, + variable_pool=variable_pool, + template_renderer=template_renderer, + ) + ) + + memory_text = handle_memory_completion_mode( + memory=memory, + memory_config=memory_config, + model_instance=model_instance, + ) + prompt_content = prompt_messages[0].content + if isinstance(prompt_content, str): + prompt_content = str(prompt_content) + if "#histories#" in prompt_content: + prompt_content = prompt_content.replace("#histories#", memory_text) + else: + prompt_content = memory_text + "\n" + prompt_content + prompt_messages[0].content = prompt_content + elif isinstance(prompt_content, list): + for content_item in prompt_content: + if isinstance(content_item, TextPromptMessageContent): + if "#histories#" in content_item.data: + content_item.data = content_item.data.replace("#histories#", memory_text) + else: + content_item.data = memory_text + "\n" + content_item.data + else: + raise ValueError("Invalid prompt content type") + + if sys_query: + if isinstance(prompt_content, str): + prompt_messages[0].content = str(prompt_messages[0].content).replace("#sys.query#", sys_query) + elif isinstance(prompt_content, list): + for content_item in prompt_content: + if isinstance(content_item, TextPromptMessageContent): + content_item.data = sys_query + "\n" + content_item.data + else: + raise ValueError("Invalid prompt content type") + else: + raise TemplateTypeNotSupportError(type_name=str(type(prompt_template))) + + _append_file_prompts( + prompt_messages=prompt_messages, + files=sys_files, + vision_enabled=vision_enabled, + vision_detail=vision_detail, + ) + _append_file_prompts( + prompt_messages=prompt_messages, + files=context_files or [], + vision_enabled=vision_enabled, + vision_detail=vision_detail, + ) + + filtered_prompt_messages: list[PromptMessage] = [] + for prompt_message in prompt_messages: + if isinstance(prompt_message.content, list): + prompt_message_content: list[PromptMessageContentUnionTypes] = [] + for content_item in prompt_message.content: + if not model_schema.features: + if content_item.type == PromptMessageContentType.TEXT: + prompt_message_content.append(content_item) + continue + + if ( + ( + content_item.type == PromptMessageContentType.IMAGE + and ModelFeature.VISION not in model_schema.features + ) + or ( + content_item.type == PromptMessageContentType.DOCUMENT + and ModelFeature.DOCUMENT not in model_schema.features + ) + or ( + content_item.type == PromptMessageContentType.VIDEO + and ModelFeature.VIDEO not in model_schema.features + ) + or ( + content_item.type == PromptMessageContentType.AUDIO + and ModelFeature.AUDIO not in model_schema.features + ) + ): + continue + prompt_message_content.append(content_item) + if prompt_message_content: + prompt_message.content = prompt_message_content + filtered_prompt_messages.append(prompt_message) + elif not prompt_message.is_empty(): + filtered_prompt_messages.append(prompt_message) + + if len(filtered_prompt_messages) == 0: + raise NoPromptFoundError( + "No prompt found in the LLM configuration. Please ensure a prompt is properly configured before proceeding." + ) + + return filtered_prompt_messages, stop + + +def handle_list_messages( + *, + messages: Sequence[LLMNodeChatModelMessage], + context: str | None, + jinja2_variables: Sequence[VariableSelector], + variable_pool: VariablePool, + vision_detail_config: ImagePromptMessageContent.DETAIL, + template_renderer: TemplateRenderer | None = None, +) -> Sequence[PromptMessage]: + prompt_messages: list[PromptMessage] = [] + for message in messages: + if message.edition_type == "jinja2": + result_text = render_jinja2_message( + template=message.jinja2_text or "", + jinja2_variables=jinja2_variables, + variable_pool=variable_pool, + template_renderer=template_renderer, + ) + prompt_messages.append( + combine_message_content_with_role( + contents=[TextPromptMessageContent(data=result_text)], + role=message.role, + ) + ) + continue + + template = message.text.replace("{#context#}", context) if context else message.text + segment_group = variable_pool.convert_template(template) + file_contents: list[PromptMessageContentUnionTypes] = [] + for segment in segment_group.value: + if isinstance(segment, ArrayFileSegment): + for file in segment.value: + if file.type in {FileType.IMAGE, FileType.VIDEO, FileType.AUDIO, FileType.DOCUMENT}: + file_contents.append( + file_manager.to_prompt_message_content(file, image_detail_config=vision_detail_config) + ) + elif isinstance(segment, FileSegment): + file = segment.value + if file.type in {FileType.IMAGE, FileType.VIDEO, FileType.AUDIO, FileType.DOCUMENT}: + file_contents.append( + file_manager.to_prompt_message_content(file, image_detail_config=vision_detail_config) + ) + + if segment_group.text: + prompt_messages.append( + combine_message_content_with_role( + contents=[TextPromptMessageContent(data=segment_group.text)], + role=message.role, + ) + ) + if file_contents: + prompt_messages.append(combine_message_content_with_role(contents=file_contents, role=message.role)) + + return prompt_messages + + +def render_jinja2_message( + *, + template: str, + jinja2_variables: Sequence[VariableSelector], + variable_pool: VariablePool, + template_renderer: TemplateRenderer | None = None, +) -> str: + if not template: + return "" + if template_renderer is None: + raise ValueError("template_renderer is required for jinja2 prompt rendering") + + jinja2_inputs: dict[str, Any] = {} + for jinja2_variable in jinja2_variables: + variable = variable_pool.get(jinja2_variable.value_selector) + jinja2_inputs[jinja2_variable.variable] = variable.to_object() if variable else "" + return template_renderer.render_jinja2(template=template, inputs=jinja2_inputs) + + +def handle_completion_template( + *, + template: LLMNodeCompletionModelPromptTemplate, + context: str | None, + jinja2_variables: Sequence[VariableSelector], + variable_pool: VariablePool, + template_renderer: TemplateRenderer | None = None, +) -> Sequence[PromptMessage]: + if template.edition_type == "jinja2": + result_text = render_jinja2_message( + template=template.jinja2_text or "", + jinja2_variables=jinja2_variables, + variable_pool=variable_pool, + template_renderer=template_renderer, + ) + else: + template_text = template.text.replace("{#context#}", context) if context else template.text + result_text = variable_pool.convert_template(template_text).text + return [ + combine_message_content_with_role( + contents=[TextPromptMessageContent(data=result_text)], + role=PromptMessageRole.USER, + ) + ] + + +def combine_message_content_with_role( + *, + contents: str | list[PromptMessageContentUnionTypes] | None = None, + role: PromptMessageRole, +) -> PromptMessage: + match role: + case PromptMessageRole.USER: + return UserPromptMessage(content=contents) + case PromptMessageRole.ASSISTANT: + return AssistantPromptMessage(content=contents) + case PromptMessageRole.SYSTEM: + return SystemPromptMessage(content=contents) + case _: + raise NotImplementedError(f"Role {role} is not supported") + + +def calculate_rest_token(*, prompt_messages: list[PromptMessage], model_instance: ModelInstance) -> int: + rest_tokens = 2000 + runtime_model_schema = fetch_model_schema(model_instance=model_instance) + runtime_model_parameters = model_instance.parameters + + model_context_tokens = runtime_model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE) + if model_context_tokens: + curr_message_tokens = model_instance.get_llm_num_tokens(prompt_messages) + + max_tokens = 0 + for parameter_rule in runtime_model_schema.parameter_rules: + if parameter_rule.name == "max_tokens" or ( + parameter_rule.use_template and parameter_rule.use_template == "max_tokens" + ): + max_tokens = ( + runtime_model_parameters.get(parameter_rule.name) + or runtime_model_parameters.get(str(parameter_rule.use_template)) + or 0 + ) + + rest_tokens = model_context_tokens - max_tokens - curr_message_tokens + rest_tokens = max(rest_tokens, 0) + + return rest_tokens + + +def handle_memory_chat_mode( + *, + memory: PromptMessageMemory | None, + memory_config: MemoryConfig | None, + model_instance: ModelInstance, +) -> Sequence[PromptMessage]: + if not memory or not memory_config: + return [] + rest_tokens = calculate_rest_token(prompt_messages=[], model_instance=model_instance) + return memory.get_history_prompt_messages( + max_token_limit=rest_tokens, + message_limit=memory_config.window.size if memory_config.window.enabled else None, + ) + + +def handle_memory_completion_mode( + *, + memory: PromptMessageMemory | None, + memory_config: MemoryConfig | None, + model_instance: ModelInstance, +) -> str: + if not memory or not memory_config: + return "" + + rest_tokens = calculate_rest_token(prompt_messages=[], model_instance=model_instance) + if not memory_config.role_prefix: + raise MemoryRolePrefixRequiredError("Memory role prefix is required for completion model.") + + return fetch_memory_text( + memory=memory, + max_token_limit=rest_tokens, + message_limit=memory_config.window.size if memory_config.window.enabled else None, + human_prefix=memory_config.role_prefix.user, + ai_prefix=memory_config.role_prefix.assistant, + ) + + +def _append_file_prompts( + *, + prompt_messages: list[PromptMessage], + files: Sequence[File], + vision_enabled: bool, + vision_detail: ImagePromptMessageContent.DETAIL, +) -> None: + if not vision_enabled or not files: + return + + file_prompts = [file_manager.to_prompt_message_content(file, image_detail_config=vision_detail) for file in files] + if ( + prompt_messages + and isinstance(prompt_messages[-1], UserPromptMessage) + and isinstance(prompt_messages[-1].content, list) + ): + existing_contents = prompt_messages[-1].content + assert isinstance(existing_contents, list) + prompt_messages[-1] = UserPromptMessage(content=file_prompts + existing_contents) + else: + prompt_messages.append(UserPromptMessage(content=file_prompts)) diff --git a/api/dify_graph/nodes/llm/node.py b/api/dify_graph/nodes/llm/node.py index b88ff404c0..5ed90ed7e3 100644 --- a/api/dify_graph/nodes/llm/node.py +++ b/api/dify_graph/nodes/llm/node.py @@ -11,28 +11,26 @@ from typing import TYPE_CHECKING, Any, Literal from sqlalchemy import select -from core.helper.code_executor import CodeExecutor, CodeLanguage from core.llm_generator.output_parser.errors import OutputParserError from core.llm_generator.output_parser.structured_output import invoke_llm_with_structured_output from core.model_manager import ModelInstance from core.prompt.entities.advanced_prompt_entities import CompletionModelPromptTemplate, MemoryConfig from core.prompt.utils.prompt_message_util import PromptMessageUtil -from core.rag.entities.citation_metadata import RetrievalSourceMetadata from core.tools.signature import sign_upload_file from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID from dify_graph.entities import GraphInitParams from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.enums import ( + BuiltinNodeTypes, NodeType, SystemVariableKey, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) -from dify_graph.file import File, FileTransferMethod, FileType, file_manager +from dify_graph.file import File, FileTransferMethod, FileType from dify_graph.model_runtime.entities import ( ImagePromptMessageContent, PromptMessage, - PromptMessageContentType, TextPromptMessageContent, ) from dify_graph.model_runtime.entities.llm_entities import ( @@ -43,14 +41,7 @@ from dify_graph.model_runtime.entities.llm_entities import ( LLMStructuredOutput, LLMUsage, ) -from dify_graph.model_runtime.entities.message_entities import ( - AssistantPromptMessage, - PromptMessageContentUnionTypes, - PromptMessageRole, - SystemPromptMessage, - UserPromptMessage, -) -from dify_graph.model_runtime.entities.model_entities import ModelFeature, ModelPropertyKey +from dify_graph.model_runtime.entities.message_entities import PromptMessageContentUnionTypes from dify_graph.model_runtime.memory import PromptMessageMemory from dify_graph.model_runtime.utils.encoders import jsonable_encoder from dify_graph.node_events import ( @@ -64,13 +55,12 @@ from dify_graph.node_events import ( from dify_graph.nodes.base.entities import VariableSelector from dify_graph.nodes.base.node import Node from dify_graph.nodes.base.variable_template_parser import VariableTemplateParser -from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory +from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory, TemplateRenderer from dify_graph.nodes.protocols import HttpClientProtocol from dify_graph.runtime import VariablePool from dify_graph.variables import ( ArrayFileSegment, ArraySegment, - FileSegment, NoneSegment, ObjectSegment, StringSegment, @@ -89,9 +79,6 @@ from .exc import ( InvalidContextStructureError, InvalidVariableTypeError, LLMNodeError, - MemoryRolePrefixRequiredError, - NoPromptFoundError, - TemplateTypeNotSupportError, VariableNotFoundError, ) from .file_saver import FileSaverImpl, LLMFileSaver @@ -104,7 +91,7 @@ logger = logging.getLogger(__name__) class LLMNode(Node[LLMNodeData]): - node_type = NodeType.LLM + node_type = BuiltinNodeTypes.LLM # Compiled regex for extracting blocks (with compatibility for attributes) _THINK_PATTERN = re.compile(r"]*>(.*?)", re.IGNORECASE | re.DOTALL) @@ -118,6 +105,7 @@ class LLMNode(Node[LLMNodeData]): _model_factory: ModelFactory _model_instance: ModelInstance _memory: PromptMessageMemory | None + _template_renderer: TemplateRenderer def __init__( self, @@ -130,6 +118,7 @@ class LLMNode(Node[LLMNodeData]): model_factory: ModelFactory, model_instance: ModelInstance, http_client: HttpClientProtocol, + template_renderer: TemplateRenderer, memory: PromptMessageMemory | None = None, llm_file_saver: LLMFileSaver | None = None, ): @@ -146,6 +135,7 @@ class LLMNode(Node[LLMNodeData]): self._model_factory = model_factory self._model_instance = model_instance self._memory = memory + self._template_renderer = template_renderer if llm_file_saver is None: dify_ctx = self.require_dify_context() @@ -240,6 +230,7 @@ class LLMNode(Node[LLMNodeData]): variable_pool=variable_pool, jinja2_variables=self.node_data.prompt_config.jinja2_variables, context_files=context_files, + template_renderer=self._template_renderer, ) # handle invoke result @@ -677,7 +668,7 @@ class LLMNode(Node[LLMNodeData]): ) elif isinstance(context_value_variable, ArraySegment): context_str = "" - original_retriever_resource: list[RetrievalSourceMetadata] = [] + original_retriever_resource: list[dict[str, Any]] = [] context_files: list[File] = [] for item in context_value_variable.value: if isinstance(item, str): @@ -693,11 +684,14 @@ class LLMNode(Node[LLMNodeData]): retriever_resource = self._convert_to_original_retriever_resource(item) if retriever_resource: original_retriever_resource.append(retriever_resource) + segment_id = retriever_resource.get("segment_id") + if not segment_id: + continue attachments_with_bindings = db.session.execute( select(SegmentAttachmentBinding, UploadFile) .join(UploadFile, UploadFile.id == SegmentAttachmentBinding.attachment_id) .where( - SegmentAttachmentBinding.segment_id == retriever_resource.segment_id, + SegmentAttachmentBinding.segment_id == segment_id, ) ).all() if attachments_with_bindings: @@ -723,7 +717,7 @@ class LLMNode(Node[LLMNodeData]): context_files=context_files, ) - def _convert_to_original_retriever_resource(self, context_dict: dict) -> RetrievalSourceMetadata | None: + def _convert_to_original_retriever_resource(self, context_dict: dict) -> dict[str, Any] | None: if ( "metadata" in context_dict and "_source" in context_dict["metadata"] @@ -731,28 +725,26 @@ class LLMNode(Node[LLMNodeData]): ): metadata = context_dict.get("metadata", {}) - source = RetrievalSourceMetadata( - position=metadata.get("position"), - dataset_id=metadata.get("dataset_id"), - dataset_name=metadata.get("dataset_name"), - document_id=metadata.get("document_id"), - document_name=metadata.get("document_name"), - data_source_type=metadata.get("data_source_type"), - segment_id=metadata.get("segment_id"), - retriever_from=metadata.get("retriever_from"), - score=metadata.get("score"), - hit_count=metadata.get("segment_hit_count"), - word_count=metadata.get("segment_word_count"), - segment_position=metadata.get("segment_position"), - index_node_hash=metadata.get("segment_index_node_hash"), - content=context_dict.get("content"), - page=metadata.get("page"), - doc_metadata=metadata.get("doc_metadata"), - files=context_dict.get("files"), - summary=context_dict.get("summary"), - ) - - return source + return { + "position": metadata.get("position"), + "dataset_id": metadata.get("dataset_id"), + "dataset_name": metadata.get("dataset_name"), + "document_id": metadata.get("document_id"), + "document_name": metadata.get("document_name"), + "data_source_type": metadata.get("data_source_type"), + "segment_id": metadata.get("segment_id"), + "retriever_from": metadata.get("retriever_from"), + "score": metadata.get("score"), + "hit_count": metadata.get("segment_hit_count"), + "word_count": metadata.get("segment_word_count"), + "segment_position": metadata.get("segment_position"), + "index_node_hash": metadata.get("segment_index_node_hash"), + "content": context_dict.get("content"), + "page": metadata.get("page"), + "doc_metadata": metadata.get("doc_metadata"), + "files": context_dict.get("files"), + "summary": context_dict.get("summary"), + } return None @@ -772,182 +764,24 @@ class LLMNode(Node[LLMNodeData]): variable_pool: VariablePool, jinja2_variables: Sequence[VariableSelector], context_files: list[File] | None = None, + template_renderer: TemplateRenderer | None = None, ) -> tuple[Sequence[PromptMessage], Sequence[str] | None]: - prompt_messages: list[PromptMessage] = [] - model_schema = llm_utils.fetch_model_schema(model_instance=model_instance) - - if isinstance(prompt_template, list): - # For chat model - prompt_messages.extend( - LLMNode.handle_list_messages( - messages=prompt_template, - context=context, - jinja2_variables=jinja2_variables, - variable_pool=variable_pool, - vision_detail_config=vision_detail, - ) - ) - - # Get memory messages for chat mode - memory_messages = _handle_memory_chat_mode( - memory=memory, - memory_config=memory_config, - model_instance=model_instance, - ) - # Extend prompt_messages with memory messages - prompt_messages.extend(memory_messages) - - # Add current query to the prompt messages - if sys_query: - message = LLMNodeChatModelMessage( - text=sys_query, - role=PromptMessageRole.USER, - edition_type="basic", - ) - prompt_messages.extend( - LLMNode.handle_list_messages( - messages=[message], - context="", - jinja2_variables=[], - variable_pool=variable_pool, - vision_detail_config=vision_detail, - ) - ) - - elif isinstance(prompt_template, LLMNodeCompletionModelPromptTemplate): - # For completion model - prompt_messages.extend( - _handle_completion_template( - template=prompt_template, - context=context, - jinja2_variables=jinja2_variables, - variable_pool=variable_pool, - ) - ) - - # Get memory text for completion model - memory_text = _handle_memory_completion_mode( - memory=memory, - memory_config=memory_config, - model_instance=model_instance, - ) - # Insert histories into the prompt - prompt_content = prompt_messages[0].content - # For issue #11247 - Check if prompt content is a string or a list - if isinstance(prompt_content, str): - prompt_content = str(prompt_content) - if "#histories#" in prompt_content: - prompt_content = prompt_content.replace("#histories#", memory_text) - else: - prompt_content = memory_text + "\n" + prompt_content - prompt_messages[0].content = prompt_content - elif isinstance(prompt_content, list): - for content_item in prompt_content: - if isinstance(content_item, TextPromptMessageContent): - if "#histories#" in content_item.data: - content_item.data = content_item.data.replace("#histories#", memory_text) - else: - content_item.data = memory_text + "\n" + content_item.data - else: - raise ValueError("Invalid prompt content type") - - # Add current query to the prompt message - if sys_query: - if isinstance(prompt_content, str): - prompt_content = str(prompt_messages[0].content).replace("#sys.query#", sys_query) - prompt_messages[0].content = prompt_content - elif isinstance(prompt_content, list): - for content_item in prompt_content: - if isinstance(content_item, TextPromptMessageContent): - content_item.data = sys_query + "\n" + content_item.data - else: - raise ValueError("Invalid prompt content type") - else: - raise TemplateTypeNotSupportError(type_name=str(type(prompt_template))) - - # The sys_files will be deprecated later - if vision_enabled and sys_files: - file_prompts = [] - for file in sys_files: - file_prompt = file_manager.to_prompt_message_content(file, image_detail_config=vision_detail) - file_prompts.append(file_prompt) - # If last prompt is a user prompt, add files into its contents, - # otherwise append a new user prompt - if ( - len(prompt_messages) > 0 - and isinstance(prompt_messages[-1], UserPromptMessage) - and isinstance(prompt_messages[-1].content, list) - ): - prompt_messages[-1] = UserPromptMessage(content=file_prompts + prompt_messages[-1].content) - else: - prompt_messages.append(UserPromptMessage(content=file_prompts)) - - # The context_files - if vision_enabled and context_files: - file_prompts = [] - for file in context_files: - file_prompt = file_manager.to_prompt_message_content(file, image_detail_config=vision_detail) - file_prompts.append(file_prompt) - # If last prompt is a user prompt, add files into its contents, - # otherwise append a new user prompt - if ( - len(prompt_messages) > 0 - and isinstance(prompt_messages[-1], UserPromptMessage) - and isinstance(prompt_messages[-1].content, list) - ): - prompt_messages[-1] = UserPromptMessage(content=file_prompts + prompt_messages[-1].content) - else: - prompt_messages.append(UserPromptMessage(content=file_prompts)) - - # Remove empty messages and filter unsupported content - filtered_prompt_messages = [] - for prompt_message in prompt_messages: - if isinstance(prompt_message.content, list): - prompt_message_content: list[PromptMessageContentUnionTypes] = [] - for content_item in prompt_message.content: - # Skip content if features are not defined - if not model_schema.features: - if content_item.type != PromptMessageContentType.TEXT: - continue - prompt_message_content.append(content_item) - continue - - # Skip content if corresponding feature is not supported - if ( - ( - content_item.type == PromptMessageContentType.IMAGE - and ModelFeature.VISION not in model_schema.features - ) - or ( - content_item.type == PromptMessageContentType.DOCUMENT - and ModelFeature.DOCUMENT not in model_schema.features - ) - or ( - content_item.type == PromptMessageContentType.VIDEO - and ModelFeature.VIDEO not in model_schema.features - ) - or ( - content_item.type == PromptMessageContentType.AUDIO - and ModelFeature.AUDIO not in model_schema.features - ) - ): - continue - prompt_message_content.append(content_item) - if len(prompt_message_content) == 1 and prompt_message_content[0].type == PromptMessageContentType.TEXT: - prompt_message.content = prompt_message_content[0].data - else: - prompt_message.content = prompt_message_content - if prompt_message.is_empty(): - continue - filtered_prompt_messages.append(prompt_message) - - if len(filtered_prompt_messages) == 0: - raise NoPromptFoundError( - "No prompt found in the LLM configuration. " - "Please ensure a prompt is properly configured before proceeding." - ) - - return filtered_prompt_messages, stop + return llm_utils.fetch_prompt_messages( + sys_query=sys_query, + sys_files=sys_files, + context=context, + memory=memory, + model_instance=model_instance, + prompt_template=prompt_template, + stop=stop, + memory_config=memory_config, + vision_enabled=vision_enabled, + vision_detail=vision_detail, + variable_pool=variable_pool, + jinja2_variables=jinja2_variables, + context_files=context_files, + template_renderer=template_renderer, + ) @classmethod def _extract_variable_selector_to_variable_mapping( @@ -1047,59 +881,16 @@ class LLMNode(Node[LLMNodeData]): jinja2_variables: Sequence[VariableSelector], variable_pool: VariablePool, vision_detail_config: ImagePromptMessageContent.DETAIL, + template_renderer: TemplateRenderer | None = None, ) -> Sequence[PromptMessage]: - prompt_messages: list[PromptMessage] = [] - for message in messages: - if message.edition_type == "jinja2": - result_text = _render_jinja2_message( - template=message.jinja2_text or "", - jinja2_variables=jinja2_variables, - variable_pool=variable_pool, - ) - prompt_message = _combine_message_content_with_role( - contents=[TextPromptMessageContent(data=result_text)], role=message.role - ) - prompt_messages.append(prompt_message) - else: - # Get segment group from basic message - if context: - template = message.text.replace("{#context#}", context) - else: - template = message.text - segment_group = variable_pool.convert_template(template) - - # Process segments for images - file_contents = [] - for segment in segment_group.value: - if isinstance(segment, ArrayFileSegment): - for file in segment.value: - if file.type in {FileType.IMAGE, FileType.VIDEO, FileType.AUDIO, FileType.DOCUMENT}: - file_content = file_manager.to_prompt_message_content( - file, image_detail_config=vision_detail_config - ) - file_contents.append(file_content) - elif isinstance(segment, FileSegment): - file = segment.value - if file.type in {FileType.IMAGE, FileType.VIDEO, FileType.AUDIO, FileType.DOCUMENT}: - file_content = file_manager.to_prompt_message_content( - file, image_detail_config=vision_detail_config - ) - file_contents.append(file_content) - - # Create message with text from all segments - plain_text = segment_group.text - if plain_text: - prompt_message = _combine_message_content_with_role( - contents=[TextPromptMessageContent(data=plain_text)], role=message.role - ) - prompt_messages.append(prompt_message) - - if file_contents: - # Create message with image contents - prompt_message = _combine_message_content_with_role(contents=file_contents, role=message.role) - prompt_messages.append(prompt_message) - - return prompt_messages + return llm_utils.handle_list_messages( + messages=messages, + context=context, + jinja2_variables=jinja2_variables, + variable_pool=variable_pool, + vision_detail_config=vision_detail_config, + template_renderer=template_renderer, + ) @staticmethod def handle_blocking_result( @@ -1238,152 +1029,3 @@ class LLMNode(Node[LLMNodeData]): @property def model_instance(self) -> ModelInstance: return self._model_instance - - -def _combine_message_content_with_role( - *, contents: str | list[PromptMessageContentUnionTypes] | None = None, role: PromptMessageRole -): - match role: - case PromptMessageRole.USER: - return UserPromptMessage(content=contents) - case PromptMessageRole.ASSISTANT: - return AssistantPromptMessage(content=contents) - case PromptMessageRole.SYSTEM: - return SystemPromptMessage(content=contents) - case _: - raise NotImplementedError(f"Role {role} is not supported") - - -def _render_jinja2_message( - *, - template: str, - jinja2_variables: Sequence[VariableSelector], - variable_pool: VariablePool, -): - if not template: - return "" - - jinja2_inputs = {} - for jinja2_variable in jinja2_variables: - variable = variable_pool.get(jinja2_variable.value_selector) - jinja2_inputs[jinja2_variable.variable] = variable.to_object() if variable else "" - code_execute_resp = CodeExecutor.execute_workflow_code_template( - language=CodeLanguage.JINJA2, - code=template, - inputs=jinja2_inputs, - ) - result_text = code_execute_resp["result"] - return result_text - - -def _calculate_rest_token( - *, - prompt_messages: list[PromptMessage], - model_instance: ModelInstance, -) -> int: - rest_tokens = 2000 - runtime_model_schema = llm_utils.fetch_model_schema(model_instance=model_instance) - runtime_model_parameters = model_instance.parameters - - model_context_tokens = runtime_model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE) - if model_context_tokens: - curr_message_tokens = model_instance.get_llm_num_tokens(prompt_messages) - - max_tokens = 0 - for parameter_rule in runtime_model_schema.parameter_rules: - if parameter_rule.name == "max_tokens" or ( - parameter_rule.use_template and parameter_rule.use_template == "max_tokens" - ): - max_tokens = ( - runtime_model_parameters.get(parameter_rule.name) - or runtime_model_parameters.get(str(parameter_rule.use_template)) - or 0 - ) - - rest_tokens = model_context_tokens - max_tokens - curr_message_tokens - rest_tokens = max(rest_tokens, 0) - - return rest_tokens - - -def _handle_memory_chat_mode( - *, - memory: PromptMessageMemory | None, - memory_config: MemoryConfig | None, - model_instance: ModelInstance, -) -> Sequence[PromptMessage]: - memory_messages: Sequence[PromptMessage] = [] - # Get messages from memory for chat model - if memory and memory_config: - rest_tokens = _calculate_rest_token( - prompt_messages=[], - model_instance=model_instance, - ) - memory_messages = memory.get_history_prompt_messages( - max_token_limit=rest_tokens, - message_limit=memory_config.window.size if memory_config.window.enabled else None, - ) - return memory_messages - - -def _handle_memory_completion_mode( - *, - memory: PromptMessageMemory | None, - memory_config: MemoryConfig | None, - model_instance: ModelInstance, -) -> str: - memory_text = "" - # Get history text from memory for completion model - if memory and memory_config: - rest_tokens = _calculate_rest_token( - prompt_messages=[], - model_instance=model_instance, - ) - if not memory_config.role_prefix: - raise MemoryRolePrefixRequiredError("Memory role prefix is required for completion model.") - memory_text = llm_utils.fetch_memory_text( - memory=memory, - max_token_limit=rest_tokens, - message_limit=memory_config.window.size if memory_config.window.enabled else None, - human_prefix=memory_config.role_prefix.user, - ai_prefix=memory_config.role_prefix.assistant, - ) - return memory_text - - -def _handle_completion_template( - *, - template: LLMNodeCompletionModelPromptTemplate, - context: str | None, - jinja2_variables: Sequence[VariableSelector], - variable_pool: VariablePool, -) -> Sequence[PromptMessage]: - """Handle completion template processing outside of LLMNode class. - - Args: - template: The completion model prompt template - context: Optional context string - jinja2_variables: Variables for jinja2 template rendering - variable_pool: Variable pool for template conversion - - Returns: - Sequence of prompt messages - """ - prompt_messages = [] - if template.edition_type == "jinja2": - result_text = _render_jinja2_message( - template=template.jinja2_text or "", - jinja2_variables=jinja2_variables, - variable_pool=variable_pool, - ) - else: - if context: - template_text = template.text.replace("{#context#}", context) - else: - template_text = template.text - result_text = variable_pool.convert_template(template_text).text - prompt_message = _combine_message_content_with_role( - contents=[TextPromptMessageContent(data=result_text)], role=PromptMessageRole.USER - ) - prompt_messages.append(prompt_message) - return prompt_messages diff --git a/api/dify_graph/nodes/llm/protocols.py b/api/dify_graph/nodes/llm/protocols.py index 8e0365299d..9e95d341c9 100644 --- a/api/dify_graph/nodes/llm/protocols.py +++ b/api/dify_graph/nodes/llm/protocols.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections.abc import Mapping from typing import Any, Protocol from core.model_manager import ModelInstance @@ -19,3 +20,11 @@ class ModelFactory(Protocol): def init_model_instance(self, provider_name: str, model_name: str) -> ModelInstance: """Create a model instance that is ready for schema lookup and invocation.""" ... + + +class TemplateRenderer(Protocol): + """Port for rendering prompt templates used by LLM-compatible nodes.""" + + def render_jinja2(self, *, template: str, inputs: Mapping[str, Any]) -> str: + """Render the given Jinja2 template into plain text.""" + ... diff --git a/api/dify_graph/nodes/loop/entities.py b/api/dify_graph/nodes/loop/entities.py index 8a3df5c234..f0bfad5a0f 100644 --- a/api/dify_graph/nodes/loop/entities.py +++ b/api/dify_graph/nodes/loop/entities.py @@ -4,7 +4,7 @@ from typing import Annotated, Any, Literal from pydantic import AfterValidator, BaseModel, Field, field_validator from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base import BaseLoopNodeData, BaseLoopState from dify_graph.utils.condition.entities import Condition from dify_graph.variables.types import SegmentType @@ -41,7 +41,7 @@ class LoopVariableData(BaseModel): class LoopNodeData(BaseLoopNodeData): - type: NodeType = NodeType.LOOP + type: NodeType = BuiltinNodeTypes.LOOP loop_count: int # Maximum number of loops break_conditions: list[Condition] # Conditions to break the loop logical_operator: Literal["and", "or"] @@ -61,7 +61,7 @@ class LoopStartNodeData(BaseNodeData): Loop Start Node Data. """ - type: NodeType = NodeType.LOOP_START + type: NodeType = BuiltinNodeTypes.LOOP_START class LoopEndNodeData(BaseNodeData): @@ -69,7 +69,7 @@ class LoopEndNodeData(BaseNodeData): Loop End Node Data. """ - type: NodeType = NodeType.LOOP_END + type: NodeType = BuiltinNodeTypes.LOOP_END class LoopState(BaseLoopState): diff --git a/api/dify_graph/nodes/loop/loop_end_node.py b/api/dify_graph/nodes/loop/loop_end_node.py index 73ac5da927..0287708fb3 100644 --- a/api/dify_graph/nodes/loop/loop_end_node.py +++ b/api/dify_graph/nodes/loop/loop_end_node.py @@ -1,4 +1,4 @@ -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.loop.entities import LoopEndNodeData @@ -9,7 +9,7 @@ class LoopEndNode(Node[LoopEndNodeData]): Loop End Node. """ - node_type = NodeType.LOOP_END + node_type = BuiltinNodeTypes.LOOP_END @classmethod def version(cls) -> str: diff --git a/api/dify_graph/nodes/loop/loop_node.py b/api/dify_graph/nodes/loop/loop_node.py index 27b78eaa3f..3c546ffa23 100644 --- a/api/dify_graph/nodes/loop/loop_node.py +++ b/api/dify_graph/nodes/loop/loop_node.py @@ -7,8 +7,8 @@ from typing import TYPE_CHECKING, Any, Literal, cast from dify_graph.entities.graph_config import NodeConfigDictAdapter from dify_graph.enums import ( + BuiltinNodeTypes, NodeExecutionType, - NodeType, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) @@ -46,7 +46,7 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]): Loop Node. """ - node_type = NodeType.LOOP + node_type = BuiltinNodeTypes.LOOP execution_type = NodeExecutionType.CONTAINER @classmethod @@ -250,11 +250,11 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]): if isinstance(event, GraphNodeEventBase): self._append_loop_info_to_event(event=event, loop_run_index=current_index) - if isinstance(event, GraphNodeEventBase) and event.node_type == NodeType.LOOP_START: + if isinstance(event, GraphNodeEventBase) and event.node_type == BuiltinNodeTypes.LOOP_START: continue if isinstance(event, GraphNodeEventBase): yield event - if isinstance(event, NodeRunSucceededEvent) and event.node_type == NodeType.LOOP_END: + if isinstance(event, NodeRunSucceededEvent) and event.node_type == BuiltinNodeTypes.LOOP_END: reach_break_node = True if isinstance(event, GraphRunFailedEvent): raise Exception(event.error) @@ -315,15 +315,13 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]): # variable selector to variable mapping try: - # Get node class - from dify_graph.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING - typed_sub_node_config = NodeConfigDictAdapter.validate_python(sub_node_config) node_type = typed_sub_node_config["data"].type - if node_type not in NODE_TYPE_CLASSES_MAPPING: + node_mapping = Node.get_node_type_classes_mapping() + if node_type not in node_mapping: continue node_version = str(typed_sub_node_config["data"].version) - node_cls = NODE_TYPE_CLASSES_MAPPING[node_type][node_version] + node_cls = node_mapping[node_type][node_version] sub_node_variable_mapping = node_cls.extract_variable_selector_to_variable_mapping( graph_config=graph_config, config=typed_sub_node_config diff --git a/api/dify_graph/nodes/loop/loop_start_node.py b/api/dify_graph/nodes/loop/loop_start_node.py index f469c8286e..e171b4df2f 100644 --- a/api/dify_graph/nodes/loop/loop_start_node.py +++ b/api/dify_graph/nodes/loop/loop_start_node.py @@ -1,4 +1,4 @@ -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.loop.entities import LoopStartNodeData @@ -9,7 +9,7 @@ class LoopStartNode(Node[LoopStartNodeData]): Loop Start Node. """ - node_type = NodeType.LOOP_START + node_type = BuiltinNodeTypes.LOOP_START @classmethod def version(cls) -> str: diff --git a/api/dify_graph/nodes/node_mapping.py b/api/dify_graph/nodes/node_mapping.py deleted file mode 100644 index 8e5405f1aa..0000000000 --- a/api/dify_graph/nodes/node_mapping.py +++ /dev/null @@ -1,9 +0,0 @@ -from collections.abc import Mapping - -from dify_graph.enums import NodeType -from dify_graph.nodes.base.node import Node - -LATEST_VERSION = "latest" - -# Mapping is built by Node.get_node_type_classes_mapping(), which imports and walks dify_graph.nodes -NODE_TYPE_CLASSES_MAPPING: Mapping[NodeType, Mapping[str, type[Node]]] = Node.get_node_type_classes_mapping() diff --git a/api/dify_graph/nodes/parameter_extractor/entities.py b/api/dify_graph/nodes/parameter_extractor/entities.py index 8f8a278d5b..2fb042c16c 100644 --- a/api/dify_graph/nodes/parameter_extractor/entities.py +++ b/api/dify_graph/nodes/parameter_extractor/entities.py @@ -9,7 +9,7 @@ from pydantic import ( from core.prompt.entities.advanced_prompt_entities import MemoryConfig from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.llm.entities import ModelConfig, VisionConfig from dify_graph.variables.types import SegmentType @@ -84,7 +84,7 @@ class ParameterExtractorNodeData(BaseNodeData): Parameter Extractor Node Data. """ - type: NodeType = NodeType.PARAMETER_EXTRACTOR + type: NodeType = BuiltinNodeTypes.PARAMETER_EXTRACTOR model: ModelConfig query: list[str] parameters: list[ParameterConfig] diff --git a/api/dify_graph/nodes/parameter_extractor/parameter_extractor_node.py b/api/dify_graph/nodes/parameter_extractor/parameter_extractor_node.py index 68bd15db30..3913a27697 100644 --- a/api/dify_graph/nodes/parameter_extractor/parameter_extractor_node.py +++ b/api/dify_graph/nodes/parameter_extractor/parameter_extractor_node.py @@ -12,7 +12,7 @@ from core.prompt.simple_prompt_transform import ModelMode from core.prompt.utils.prompt_message_util import PromptMessageUtil from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.enums import ( - NodeType, + BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) @@ -97,7 +97,7 @@ class ParameterExtractorNode(Node[ParameterExtractorNodeData]): Parameter Extractor Node. """ - node_type = NodeType.PARAMETER_EXTRACTOR + node_type = BuiltinNodeTypes.PARAMETER_EXTRACTOR _model_instance: ModelInstance _credentials_provider: "CredentialsProvider" diff --git a/api/dify_graph/nodes/question_classifier/entities.py b/api/dify_graph/nodes/question_classifier/entities.py index 77a6c70c28..0c1601d439 100644 --- a/api/dify_graph/nodes/question_classifier/entities.py +++ b/api/dify_graph/nodes/question_classifier/entities.py @@ -2,7 +2,7 @@ from pydantic import BaseModel, Field from core.prompt.entities.advanced_prompt_entities import MemoryConfig from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.llm import ModelConfig, VisionConfig @@ -12,7 +12,7 @@ class ClassConfig(BaseModel): class QuestionClassifierNodeData(BaseNodeData): - type: NodeType = NodeType.QUESTION_CLASSIFIER + type: NodeType = BuiltinNodeTypes.QUESTION_CLASSIFIER query_variable_selector: list[str] model: ModelConfig classes: list[ClassConfig] diff --git a/api/dify_graph/nodes/question_classifier/question_classifier_node.py b/api/dify_graph/nodes/question_classifier/question_classifier_node.py index a61bca4ea9..59d0a2a4d8 100644 --- a/api/dify_graph/nodes/question_classifier/question_classifier_node.py +++ b/api/dify_graph/nodes/question_classifier/question_classifier_node.py @@ -9,8 +9,8 @@ from core.prompt.utils.prompt_message_util import PromptMessageUtil from dify_graph.entities import GraphInitParams from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.enums import ( + BuiltinNodeTypes, NodeExecutionType, - NodeType, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) @@ -28,7 +28,7 @@ from dify_graph.nodes.llm import ( llm_utils, ) from dify_graph.nodes.llm.file_saver import FileSaverImpl, LLMFileSaver -from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory +from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory, TemplateRenderer from dify_graph.nodes.protocols import HttpClientProtocol from libs.json_in_md_parser import parse_and_check_json_markdown @@ -50,7 +50,7 @@ if TYPE_CHECKING: class QuestionClassifierNode(Node[QuestionClassifierNodeData]): - node_type = NodeType.QUESTION_CLASSIFIER + node_type = BuiltinNodeTypes.QUESTION_CLASSIFIER execution_type = NodeExecutionType.BRANCH _file_outputs: list["File"] @@ -59,6 +59,7 @@ class QuestionClassifierNode(Node[QuestionClassifierNodeData]): _model_factory: "ModelFactory" _model_instance: ModelInstance _memory: PromptMessageMemory | None + _template_renderer: TemplateRenderer def __init__( self, @@ -71,6 +72,7 @@ class QuestionClassifierNode(Node[QuestionClassifierNodeData]): model_factory: "ModelFactory", model_instance: ModelInstance, http_client: HttpClientProtocol, + template_renderer: TemplateRenderer, memory: PromptMessageMemory | None = None, llm_file_saver: LLMFileSaver | None = None, ): @@ -87,6 +89,7 @@ class QuestionClassifierNode(Node[QuestionClassifierNodeData]): self._model_factory = model_factory self._model_instance = model_instance self._memory = memory + self._template_renderer = template_renderer if llm_file_saver is None: dify_ctx = self.require_dify_context() @@ -142,7 +145,7 @@ class QuestionClassifierNode(Node[QuestionClassifierNodeData]): # If both self._get_prompt_template and self._fetch_prompt_messages append a user prompt, # two consecutive user prompts will be generated, causing model's error. # To avoid this, set sys_query to an empty string so that only one user prompt is appended at the end. - prompt_messages, stop = LLMNode.fetch_prompt_messages( + prompt_messages, stop = llm_utils.fetch_prompt_messages( prompt_template=prompt_template, sys_query="", memory=memory, @@ -153,6 +156,7 @@ class QuestionClassifierNode(Node[QuestionClassifierNodeData]): vision_detail=node_data.vision.configs.detail, variable_pool=variable_pool, jinja2_variables=[], + template_renderer=self._template_renderer, ) result_text = "" @@ -287,7 +291,7 @@ class QuestionClassifierNode(Node[QuestionClassifierNodeData]): model_schema = llm_utils.fetch_model_schema(model_instance=model_instance) prompt_template = self._get_prompt_template(node_data, query, None, 2000) - prompt_messages, _ = LLMNode.fetch_prompt_messages( + prompt_messages, _ = llm_utils.fetch_prompt_messages( prompt_template=prompt_template, sys_query="", sys_files=[], @@ -300,6 +304,7 @@ class QuestionClassifierNode(Node[QuestionClassifierNodeData]): vision_detail=node_data.vision.configs.detail, variable_pool=self.graph_runtime_state.variable_pool, jinja2_variables=[], + template_renderer=self._template_renderer, ) rest_tokens = 2000 diff --git a/api/dify_graph/nodes/start/entities.py b/api/dify_graph/nodes/start/entities.py index cbf7348360..92ebd1a2ec 100644 --- a/api/dify_graph/nodes/start/entities.py +++ b/api/dify_graph/nodes/start/entities.py @@ -3,7 +3,7 @@ from collections.abc import Sequence from pydantic import Field from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.variables.input_entities import VariableEntity @@ -12,5 +12,5 @@ class StartNodeData(BaseNodeData): Start Node Data """ - type: NodeType = NodeType.START + type: NodeType = BuiltinNodeTypes.START variables: Sequence[VariableEntity] = Field(default_factory=list) diff --git a/api/dify_graph/nodes/start/start_node.py b/api/dify_graph/nodes/start/start_node.py index c09ead0124..5e6055ea34 100644 --- a/api/dify_graph/nodes/start/start_node.py +++ b/api/dify_graph/nodes/start/start_node.py @@ -3,7 +3,7 @@ from typing import Any from jsonschema import Draft7Validator, ValidationError from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID -from dify_graph.enums import NodeExecutionType, NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.start.entities import StartNodeData @@ -11,7 +11,7 @@ from dify_graph.variables.input_entities import VariableEntityType class StartNode(Node[StartNodeData]): - node_type = NodeType.START + node_type = BuiltinNodeTypes.START execution_type = NodeExecutionType.ROOT @classmethod diff --git a/api/dify_graph/nodes/template_transform/entities.py b/api/dify_graph/nodes/template_transform/entities.py index 2a79a82870..ac29239958 100644 --- a/api/dify_graph/nodes/template_transform/entities.py +++ b/api/dify_graph/nodes/template_transform/entities.py @@ -1,5 +1,5 @@ from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.entities import VariableSelector @@ -8,6 +8,6 @@ class TemplateTransformNodeData(BaseNodeData): Template Transform Node Data. """ - type: NodeType = NodeType.TEMPLATE_TRANSFORM + type: NodeType = BuiltinNodeTypes.TEMPLATE_TRANSFORM variables: list[VariableSelector] template: str diff --git a/api/dify_graph/nodes/template_transform/template_transform_node.py b/api/dify_graph/nodes/template_transform/template_transform_node.py index 9dfb535342..dc6fce2b0a 100644 --- a/api/dify_graph/nodes/template_transform/template_transform_node.py +++ b/api/dify_graph/nodes/template_transform/template_transform_node.py @@ -2,7 +2,7 @@ from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING, Any from dify_graph.entities.graph_config import NodeConfigDict -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.template_transform.entities import TemplateTransformNodeData @@ -19,7 +19,7 @@ DEFAULT_TEMPLATE_TRANSFORM_MAX_OUTPUT_LENGTH = 400_000 class TemplateTransformNode(Node[TemplateTransformNodeData]): - node_type = NodeType.TEMPLATE_TRANSFORM + node_type = BuiltinNodeTypes.TEMPLATE_TRANSFORM _template_renderer: Jinja2TemplateRenderer _max_output_length: int diff --git a/api/dify_graph/nodes/tool/entities.py b/api/dify_graph/nodes/tool/entities.py index 4ba8c16e85..b041ee66fd 100644 --- a/api/dify_graph/nodes/tool/entities.py +++ b/api/dify_graph/nodes/tool/entities.py @@ -5,7 +5,7 @@ from pydantic_core.core_schema import ValidationInfo from core.tools.entities.tool_entities import ToolProviderType from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType class ToolEntity(BaseModel): @@ -33,7 +33,7 @@ class ToolEntity(BaseModel): class ToolNodeData(BaseNodeData, ToolEntity): - type: NodeType = NodeType.TOOL + type: NodeType = BuiltinNodeTypes.TOOL class ToolInput(BaseModel): # TODO: check this type diff --git a/api/dify_graph/nodes/tool/tool_node.py b/api/dify_graph/nodes/tool/tool_node.py index 44d0ca885d..2a205f2f8e 100644 --- a/api/dify_graph/nodes/tool/tool_node.py +++ b/api/dify_graph/nodes/tool/tool_node.py @@ -9,7 +9,7 @@ from core.tools.tool_engine import ToolEngine from core.tools.utils.message_transformer import ToolFileMessageTransformer from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.enums import ( - NodeType, + BuiltinNodeTypes, SystemVariableKey, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, @@ -42,7 +42,7 @@ class ToolNode(Node[ToolNodeData]): Tool Node """ - node_type = NodeType.TOOL + node_type = BuiltinNodeTypes.TOOL def __init__( self, @@ -65,6 +65,10 @@ class ToolNode(Node[ToolNodeData]): def version(cls) -> str: return "1" + def populate_start_event(self, event) -> None: + event.provider_id = self.node_data.provider_id + event.provider_type = self.node_data.provider_type + def _run(self) -> Generator[NodeEventBase, None, None]: """ Run the tool node @@ -77,7 +81,9 @@ class ToolNode(Node[ToolNodeData]): tool_info = { "provider_type": self.node_data.provider_type.value, "provider_id": self.node_data.provider_id, + "tool_name": self.node_data.tool_name, "plugin_unique_identifier": self.node_data.plugin_unique_identifier, + "credential_id": self.node_data.credential_id, } # get tool runtime diff --git a/api/dify_graph/nodes/trigger_schedule/__init__.py b/api/dify_graph/nodes/trigger_schedule/__init__.py deleted file mode 100644 index c9b3ae6a0d..0000000000 --- a/api/dify_graph/nodes/trigger_schedule/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from dify_graph.nodes.trigger_schedule.trigger_schedule_node import TriggerScheduleNode - -__all__ = ["TriggerScheduleNode"] diff --git a/api/dify_graph/nodes/variable_aggregator/entities.py b/api/dify_graph/nodes/variable_aggregator/entities.py index fec4c4474c..4779ebd9a9 100644 --- a/api/dify_graph/nodes/variable_aggregator/entities.py +++ b/api/dify_graph/nodes/variable_aggregator/entities.py @@ -1,7 +1,7 @@ from pydantic import BaseModel from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.variables.types import SegmentType @@ -29,7 +29,7 @@ class VariableAggregatorNodeData(BaseNodeData): Variable Aggregator Node Data. """ - type: NodeType = NodeType.VARIABLE_AGGREGATOR + type: NodeType = BuiltinNodeTypes.VARIABLE_AGGREGATOR output_type: str variables: list[list[str]] advanced_settings: AdvancedSettings | None = None diff --git a/api/dify_graph/nodes/variable_aggregator/variable_aggregator_node.py b/api/dify_graph/nodes/variable_aggregator/variable_aggregator_node.py index 98ab8105fe..7d26de6232 100644 --- a/api/dify_graph/nodes/variable_aggregator/variable_aggregator_node.py +++ b/api/dify_graph/nodes/variable_aggregator/variable_aggregator_node.py @@ -1,6 +1,6 @@ from collections.abc import Mapping -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.variable_aggregator.entities import VariableAggregatorNodeData @@ -8,7 +8,7 @@ from dify_graph.variables.segments import Segment class VariableAggregatorNode(Node[VariableAggregatorNodeData]): - node_type = NodeType.VARIABLE_AGGREGATOR + node_type = BuiltinNodeTypes.VARIABLE_AGGREGATOR @classmethod def version(cls) -> str: diff --git a/api/dify_graph/nodes/variable_assigner/v1/node.py b/api/dify_graph/nodes/variable_assigner/v1/node.py index 1d17b981ba..f9b261b191 100644 --- a/api/dify_graph/nodes/variable_assigner/v1/node.py +++ b/api/dify_graph/nodes/variable_assigner/v1/node.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Any from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID from dify_graph.entities import GraphInitParams from dify_graph.entities.graph_config import NodeConfigDict -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.variable_assigner.common import helpers as common_helpers @@ -18,7 +18,7 @@ if TYPE_CHECKING: class VariableAssignerNode(Node[VariableAssignerData]): - node_type = NodeType.VARIABLE_ASSIGNER + node_type = BuiltinNodeTypes.VARIABLE_ASSIGNER def __init__( self, diff --git a/api/dify_graph/nodes/variable_assigner/v1/node_data.py b/api/dify_graph/nodes/variable_assigner/v1/node_data.py index a75a2397ba..57acb29535 100644 --- a/api/dify_graph/nodes/variable_assigner/v1/node_data.py +++ b/api/dify_graph/nodes/variable_assigner/v1/node_data.py @@ -2,7 +2,7 @@ from collections.abc import Sequence from enum import StrEnum from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType class WriteMode(StrEnum): @@ -12,7 +12,7 @@ class WriteMode(StrEnum): class VariableAssignerData(BaseNodeData): - type: NodeType = NodeType.VARIABLE_ASSIGNER + type: NodeType = BuiltinNodeTypes.VARIABLE_ASSIGNER assigned_variable_selector: Sequence[str] write_mode: WriteMode input_variable_selector: Sequence[str] diff --git a/api/dify_graph/nodes/variable_assigner/v2/entities.py b/api/dify_graph/nodes/variable_assigner/v2/entities.py index ca3a94b777..2b2bbe85de 100644 --- a/api/dify_graph/nodes/variable_assigner/v2/entities.py +++ b/api/dify_graph/nodes/variable_assigner/v2/entities.py @@ -4,7 +4,7 @@ from typing import Any from pydantic import BaseModel, Field from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from .enums import InputType, Operation @@ -23,6 +23,6 @@ class VariableOperationItem(BaseModel): class VariableAssignerNodeData(BaseNodeData): - type: NodeType = NodeType.VARIABLE_ASSIGNER + type: NodeType = BuiltinNodeTypes.VARIABLE_ASSIGNER version: str = "2" items: Sequence[VariableOperationItem] = Field(default_factory=list) diff --git a/api/dify_graph/nodes/variable_assigner/v2/node.py b/api/dify_graph/nodes/variable_assigner/v2/node.py index 771609ceb6..f04a6b3b80 100644 --- a/api/dify_graph/nodes/variable_assigner/v2/node.py +++ b/api/dify_graph/nodes/variable_assigner/v2/node.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Any from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID from dify_graph.entities.graph_config import NodeConfigDict -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.variable_assigner.common import helpers as common_helpers @@ -52,7 +52,7 @@ def _source_mapping_from_item(mapping: MutableMapping[str, Sequence[str]], node_ class VariableAssignerNode(Node[VariableAssignerNodeData]): - node_type = NodeType.VARIABLE_ASSIGNER + node_type = BuiltinNodeTypes.VARIABLE_ASSIGNER def __init__( self, diff --git a/api/dify_graph/repositories/summary_index_service_protocol.py b/api/dify_graph/repositories/summary_index_service_protocol.py deleted file mode 100644 index cbcfdd2a77..0000000000 --- a/api/dify_graph/repositories/summary_index_service_protocol.py +++ /dev/null @@ -1,7 +0,0 @@ -from typing import Protocol - - -class SummaryIndexServiceProtocol(Protocol): - def generate_and_vectorize_summary( - self, dataset_id: str, document_id: str, is_preview: bool, summary_index_setting: dict | None = None - ): ... diff --git a/api/enterprise/__init__.py b/api/enterprise/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/enterprise/telemetry/DATA_DICTIONARY.md b/api/enterprise/telemetry/DATA_DICTIONARY.md new file mode 100644 index 0000000000..60d482cd1c --- /dev/null +++ b/api/enterprise/telemetry/DATA_DICTIONARY.md @@ -0,0 +1,525 @@ +# Dify Enterprise Telemetry Data Dictionary + +Quick reference for all telemetry signals emitted by Dify Enterprise. For configuration and architecture details, see [README.md](./README.md). + +## Resource Attributes + +Attached to every signal (Span, Metric, Log). + +| Attribute | Type | Example | +|-----------|------|---------| +| `service.name` | string | `dify` | +| `host.name` | string | `dify-api-7f8b` | + +## Traces (Spans) + +### `dify.workflow.run` + +| Attribute | Type | Description | +|-----------|------|-------------| +| `dify.trace_id` | string | Business trace ID (Workflow Run ID) | +| `dify.tenant_id` | string | Tenant identifier | +| `dify.app_id` | string | Application identifier | +| `dify.workflow.id` | string | Workflow definition ID | +| `dify.workflow.run_id` | string | Unique ID for this run | +| `dify.workflow.status` | string | `succeeded`, `failed`, `stopped`, etc. | +| `dify.workflow.error` | string | Error message if failed | +| `dify.workflow.elapsed_time` | float | Total execution time (seconds) | +| `dify.invoke_from` | string | `api`, `webapp`, `debug` | +| `dify.conversation.id` | string | Conversation ID (optional) | +| `dify.message.id` | string | Message ID (optional) | +| `dify.invoked_by` | string | User ID who triggered the run | +| `gen_ai.usage.total_tokens` | int | Total tokens across all nodes (optional) | +| `gen_ai.user.id` | string | End-user identifier (optional) | +| `dify.parent.trace_id` | string | Parent workflow trace ID (optional) | +| `dify.parent.workflow.run_id` | string | Parent workflow run ID (optional) | +| `dify.parent.node.execution_id` | string | Parent node execution ID (optional) | +| `dify.parent.app.id` | string | Parent app ID (optional) | + +### `dify.node.execution` + +| Attribute | Type | Description | +|-----------|------|-------------| +| `dify.trace_id` | string | Business trace ID | +| `dify.tenant_id` | string | Tenant identifier | +| `dify.app_id` | string | Application identifier | +| `dify.workflow.id` | string | Workflow definition ID | +| `dify.workflow.run_id` | string | Workflow Run ID | +| `dify.message.id` | string | Message ID (optional) | +| `dify.conversation.id` | string | Conversation ID (optional) | +| `dify.node.execution_id` | string | Unique node execution ID | +| `dify.node.id` | string | Node ID in workflow graph | +| `dify.node.type` | string | Node type (see appendix) | +| `dify.node.title` | string | Display title | +| `dify.node.status` | string | `succeeded`, `failed` | +| `dify.node.error` | string | Error message if failed | +| `dify.node.elapsed_time` | float | Execution time (seconds) | +| `dify.node.index` | int | Execution order index | +| `dify.node.predecessor_node_id` | string | Triggering node ID | +| `dify.node.iteration_id` | string | Iteration ID (optional) | +| `dify.node.loop_id` | string | Loop ID (optional) | +| `dify.node.parallel_id` | string | Parallel branch ID (optional) | +| `dify.node.invoked_by` | string | User ID who triggered execution | +| `gen_ai.usage.input_tokens` | int | Prompt tokens (LLM nodes only) | +| `gen_ai.usage.output_tokens` | int | Completion tokens (LLM nodes only) | +| `gen_ai.usage.total_tokens` | int | Total tokens (LLM nodes only) | +| `gen_ai.request.model` | string | LLM model name (LLM nodes only) | +| `gen_ai.provider.name` | string | LLM provider name (LLM nodes only) | +| `gen_ai.user.id` | string | End-user identifier (optional) | + +### `dify.node.execution.draft` + +Same attributes as `dify.node.execution`. Emitted during Preview/Debug runs. + +## Counters + +All counters are cumulative and emitted at 100% accuracy. + +### Token Counters + +| Metric | Unit | Description | +|--------|------|-------------| +| `dify.tokens.total` | `{token}` | Total tokens consumed | +| `dify.tokens.input` | `{token}` | Input (prompt) tokens | +| `dify.tokens.output` | `{token}` | Output (completion) tokens | + +**Labels:** + +- `tenant_id`, `app_id`, `operation_type`, `model_provider`, `model_name`, `node_type` (if node_execution) + +⚠️ **Warning:** `dify.tokens.total` at workflow level includes all node tokens. Filter by `operation_type` to avoid double-counting. + +#### Token Hierarchy & Query Patterns + +Token metrics are emitted at multiple layers. Understanding the hierarchy prevents double-counting: + +``` +App-level total +├── workflow ← sum of all node_execution tokens (DO NOT add both) +│ └── node_execution ← per-node breakdown +├── message ← independent (non-workflow chat apps only) +├── rule_generate ← independent helper LLM call +├── code_generate ← independent helper LLM call +├── structured_output ← independent helper LLM call +└── instruction_modify← independent helper LLM call +``` + +**Key rule:** `workflow` tokens already include all `node_execution` tokens. Never sum both. + +**Available labels on token metrics:** `tenant_id`, `app_id`, `operation_type`, `model_provider`, `model_name`, `node_type`. +App name is only available on span attributes (`dify.app.name`), not metric labels — use `app_id` for metric queries. + +**Common queries** (PromQL): + +```promql +# ── Totals ────────────────────────────────────────────────── +# App-level total (exclude node_execution to avoid double-counting) +sum by (app_id) (dify_tokens_total{operation_type!="node_execution"}) + +# Single app total +sum (dify_tokens_total{app_id="", operation_type!="node_execution"}) + +# Per-tenant totals +sum by (tenant_id) (dify_tokens_total{operation_type!="node_execution"}) + +# ── Drill-down ────────────────────────────────────────────── +# Workflow-level tokens for an app +sum (dify_tokens_total{app_id="", operation_type="workflow"}) + +# Node-level breakdown within an app +sum by (node_type) (dify_tokens_total{app_id="", operation_type="node_execution"}) + +# Model breakdown for an app +sum by (model_provider, model_name) (dify_tokens_total{app_id=""}) + +# Input vs output per model +sum by (model_name) (dify_tokens_input_total{app_id=""}) +sum by (model_name) (dify_tokens_output_total{app_id=""}) + +# ── Rates ─────────────────────────────────────────────────── +# Token consumption rate (per hour) +sum(rate(dify_tokens_total{operation_type!="node_execution"}[1h])) + +# Per-app consumption rate +sum by (app_id) (rate(dify_tokens_total{operation_type!="node_execution"}[1h])) +``` + +**Finding `app_id` from app name** (trace query — Tempo / Jaeger): + +``` +{ resource.dify.app.name = "My Chatbot" } | select(resource.dify.app.id) +``` + +### Request Counters + +| Metric | Unit | Description | +|--------|------|-------------| +| `dify.requests.total` | `{request}` | Total operations count | + +**Labels by type:** + +| `type` | Additional Labels | +|--------|-------------------| +| `workflow` | `tenant_id`, `app_id`, `status`, `invoke_from` | +| `node` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name`, `status` | +| `draft_node` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name`, `status` | +| `message` | `tenant_id`, `app_id`, `model_provider`, `model_name`, `status`, `invoke_from` | +| `tool` | `tenant_id`, `app_id`, `tool_name` | +| `moderation` | `tenant_id`, `app_id` | +| `suggested_question` | `tenant_id`, `app_id`, `model_provider`, `model_name` | +| `dataset_retrieval` | `tenant_id`, `app_id` | +| `generate_name` | `tenant_id`, `app_id` | +| `prompt_generation` | `tenant_id`, `app_id`, `operation_type`, `model_provider`, `model_name`, `status` | + +### Error Counters + +| Metric | Unit | Description | +|--------|------|-------------| +| `dify.errors.total` | `{error}` | Total failed operations | + +**Labels by type:** + +| `type` | Additional Labels | +|--------|-------------------| +| `workflow` | `tenant_id`, `app_id` | +| `node` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name` | +| `draft_node` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name` | +| `message` | `tenant_id`, `app_id`, `model_provider`, `model_name` | +| `tool` | `tenant_id`, `app_id`, `tool_name` | +| `prompt_generation` | `tenant_id`, `app_id`, `operation_type`, `model_provider`, `model_name` | + +### Other Counters + +| Metric | Unit | Labels | +|--------|------|--------| +| `dify.feedback.total` | `{feedback}` | `tenant_id`, `app_id`, `rating` | +| `dify.dataset.retrievals.total` | `{retrieval}` | `tenant_id`, `app_id`, `dataset_id`, `embedding_model_provider`, `embedding_model`, `rerank_model_provider`, `rerank_model` | +| `dify.app.created.total` | `{app}` | `tenant_id`, `app_id`, `mode` | +| `dify.app.updated.total` | `{app}` | `tenant_id`, `app_id` | +| `dify.app.deleted.total` | `{app}` | `tenant_id`, `app_id` | + +## Histograms + +| Metric | Unit | Labels | +|--------|------|--------| +| `dify.workflow.duration` | `s` | `tenant_id`, `app_id`, `status` | +| `dify.node.duration` | `s` | `tenant_id`, `app_id`, `node_type`, `model_provider`, `model_name`, `plugin_name` | +| `dify.message.duration` | `s` | `tenant_id`, `app_id`, `model_provider`, `model_name` | +| `dify.message.time_to_first_token` | `s` | `tenant_id`, `app_id`, `model_provider`, `model_name` | +| `dify.tool.duration` | `s` | `tenant_id`, `app_id`, `tool_name` | +| `dify.prompt_generation.duration` | `s` | `tenant_id`, `app_id`, `operation_type`, `model_provider`, `model_name` | + +## Structured Logs + +### Span Companion Logs + +Logs that accompany spans. Signal type: `span_detail` + +#### `dify.workflow.run` Companion Log + +**Common attributes:** All span attributes (see Traces section) plus: + +| Additional Attribute | Type | Always Present | Description | +|---------------------|------|----------------|-------------| +| `dify.app.name` | string | No | Application display name | +| `dify.workspace.name` | string | No | Workspace display name | +| `dify.workflow.version` | string | Yes | Workflow definition version | +| `dify.workflow.inputs` | string/JSON | Yes | Input parameters (content-gated) | +| `dify.workflow.outputs` | string/JSON | Yes | Output results (content-gated) | +| `dify.workflow.query` | string | No | User query text (content-gated) | + +**Event attributes:** + +- `dify.event.name`: `"dify.workflow.run"` +- `dify.event.signal`: `"span_detail"` +- `trace_id`, `span_id`, `tenant_id`, `user_id` + +#### `dify.node.execution` and `dify.node.execution.draft` Companion Logs + +**Common attributes:** All span attributes (see Traces section) plus: + +| Additional Attribute | Type | Always Present | Description | +|---------------------|------|----------------|-------------| +| `dify.app.name` | string | No | Application display name | +| `dify.workspace.name` | string | No | Workspace display name | +| `dify.invoke_from` | string | No | Invocation source | +| `gen_ai.tool.name` | string | No | Tool name (tool nodes only) | +| `dify.node.total_price` | float | No | Cost (LLM nodes only) | +| `dify.node.currency` | string | No | Currency code (LLM nodes only) | +| `dify.node.iteration_index` | int | No | Iteration index (iteration nodes) | +| `dify.node.loop_index` | int | No | Loop index (loop nodes) | +| `dify.plugin.name` | string | No | Plugin name (tool/knowledge nodes) | +| `dify.credential.name` | string | No | Credential name (plugin nodes) | +| `dify.credential.id` | string | No | Credential ID (plugin nodes) | +| `dify.dataset.ids` | JSON array | No | Dataset IDs (knowledge nodes) | +| `dify.dataset.names` | JSON array | No | Dataset names (knowledge nodes) | +| `dify.node.inputs` | string/JSON | Yes | Node inputs (content-gated) | +| `dify.node.outputs` | string/JSON | Yes | Node outputs (content-gated) | +| `dify.node.process_data` | string/JSON | No | Processing data (content-gated) | + +**Event attributes:** + +- `dify.event.name`: `"dify.node.execution"` or `"dify.node.execution.draft"` +- `dify.event.signal`: `"span_detail"` +- `trace_id`, `span_id`, `tenant_id`, `user_id` + +### Standalone Logs + +Logs without structural spans. Signal type: `metric_only` + +#### `dify.message.run` + +| Attribute | Type | Description | +|-----------|------|-------------| +| `dify.event.name` | string | `"dify.message.run"` | +| `dify.event.signal` | string | `"metric_only"` | +| `trace_id` | string | OTEL trace ID (32-char hex) | +| `span_id` | string | OTEL span ID (16-char hex) | +| `tenant_id` | string | Tenant identifier | +| `user_id` | string | User identifier (optional) | +| `dify.app_id` | string | Application identifier | +| `dify.message.id` | string | Message identifier | +| `dify.conversation.id` | string | Conversation ID (optional) | +| `dify.workflow.run_id` | string | Workflow run ID (optional) | +| `dify.invoke_from` | string | `service-api`, `web-app`, `debugger`, `explore` | +| `gen_ai.provider.name` | string | LLM provider | +| `gen_ai.request.model` | string | LLM model | +| `gen_ai.usage.input_tokens` | int | Input tokens | +| `gen_ai.usage.output_tokens` | int | Output tokens | +| `gen_ai.usage.total_tokens` | int | Total tokens | +| `dify.message.status` | string | `succeeded`, `failed` | +| `dify.message.error` | string | Error message (if failed) | +| `dify.message.duration` | float | Duration (seconds) | +| `dify.message.time_to_first_token` | float | TTFT (seconds) | +| `dify.message.inputs` | string/JSON | Inputs (content-gated) | +| `dify.message.outputs` | string/JSON | Outputs (content-gated) | + +#### `dify.tool.execution` + +| Attribute | Type | Description | +|-----------|------|-------------| +| `dify.event.name` | string | `"dify.tool.execution"` | +| `dify.event.signal` | string | `"metric_only"` | +| `trace_id` | string | OTEL trace ID | +| `span_id` | string | OTEL span ID | +| `tenant_id` | string | Tenant identifier | +| `dify.app_id` | string | Application identifier | +| `dify.message.id` | string | Message identifier | +| `dify.tool.name` | string | Tool name | +| `dify.tool.duration` | float | Duration (seconds) | +| `dify.tool.status` | string | `succeeded`, `failed` | +| `dify.tool.error` | string | Error message (if failed) | +| `dify.tool.inputs` | string/JSON | Inputs (content-gated) | +| `dify.tool.outputs` | string/JSON | Outputs (content-gated) | +| `dify.tool.parameters` | string/JSON | Parameters (content-gated) | +| `dify.tool.config` | string/JSON | Configuration (content-gated) | + +#### `dify.moderation.check` + +| Attribute | Type | Description | +|-----------|------|-------------| +| `dify.event.name` | string | `"dify.moderation.check"` | +| `dify.event.signal` | string | `"metric_only"` | +| `trace_id` | string | OTEL trace ID | +| `span_id` | string | OTEL span ID | +| `tenant_id` | string | Tenant identifier | +| `dify.app_id` | string | Application identifier | +| `dify.message.id` | string | Message identifier | +| `dify.moderation.type` | string | `input`, `output` | +| `dify.moderation.action` | string | `pass`, `block`, `flag` | +| `dify.moderation.flagged` | boolean | Whether flagged | +| `dify.moderation.categories` | JSON array | Flagged categories | +| `dify.moderation.query` | string | Content (content-gated) | + +#### `dify.suggested_question.generation` + +| Attribute | Type | Description | +|-----------|------|-------------| +| `dify.event.name` | string | `"dify.suggested_question.generation"` | +| `dify.event.signal` | string | `"metric_only"` | +| `trace_id` | string | OTEL trace ID | +| `span_id` | string | OTEL span ID | +| `tenant_id` | string | Tenant identifier | +| `dify.app_id` | string | Application identifier | +| `dify.message.id` | string | Message identifier | +| `dify.suggested_question.count` | int | Number of questions | +| `dify.suggested_question.duration` | float | Duration (seconds) | +| `dify.suggested_question.status` | string | `succeeded`, `failed` | +| `dify.suggested_question.error` | string | Error message (if failed) | +| `dify.suggested_question.questions` | JSON array | Questions (content-gated) | + +#### `dify.dataset.retrieval` + +| Attribute | Type | Description | +|-----------|------|-------------| +| `dify.event.name` | string | `"dify.dataset.retrieval"` | +| `dify.event.signal` | string | `"metric_only"` | +| `trace_id` | string | OTEL trace ID | +| `span_id` | string | OTEL span ID | +| `tenant_id` | string | Tenant identifier | +| `dify.app_id` | string | Application identifier | +| `dify.message.id` | string | Message identifier | +| `dify.dataset.id` | string | Dataset identifier | +| `dify.dataset.name` | string | Dataset name | +| `dify.dataset.embedding_providers` | JSON array | Embedding model providers (one per dataset) | +| `dify.dataset.embedding_models` | JSON array | Embedding models (one per dataset) | +| `dify.retrieval.rerank_provider` | string | Rerank model provider | +| `dify.retrieval.rerank_model` | string | Rerank model name | +| `dify.retrieval.query` | string | Search query (content-gated) | +| `dify.retrieval.document_count` | int | Documents retrieved | +| `dify.retrieval.duration` | float | Duration (seconds) | +| `dify.retrieval.status` | string | `succeeded`, `failed` | +| `dify.retrieval.error` | string | Error message (if failed) | +| `dify.dataset.documents` | JSON array | Documents (content-gated) | + +#### `dify.generate_name.execution` + +| Attribute | Type | Description | +|-----------|------|-------------| +| `dify.event.name` | string | `"dify.generate_name.execution"` | +| `dify.event.signal` | string | `"metric_only"` | +| `trace_id` | string | OTEL trace ID | +| `span_id` | string | OTEL span ID | +| `tenant_id` | string | Tenant identifier | +| `dify.app_id` | string | Application identifier | +| `dify.conversation.id` | string | Conversation identifier | +| `dify.generate_name.duration` | float | Duration (seconds) | +| `dify.generate_name.status` | string | `succeeded`, `failed` | +| `dify.generate_name.error` | string | Error message (if failed) | +| `dify.generate_name.inputs` | string/JSON | Inputs (content-gated) | +| `dify.generate_name.outputs` | string | Generated name (content-gated) | + +#### `dify.prompt_generation.execution` + +| Attribute | Type | Description | +|-----------|------|-------------| +| `dify.event.name` | string | `"dify.prompt_generation.execution"` | +| `dify.event.signal` | string | `"metric_only"` | +| `trace_id` | string | OTEL trace ID | +| `span_id` | string | OTEL span ID | +| `tenant_id` | string | Tenant identifier | +| `dify.app_id` | string | Application identifier | +| `dify.prompt_generation.operation_type` | string | Operation type (see appendix) | +| `gen_ai.provider.name` | string | LLM provider | +| `gen_ai.request.model` | string | LLM model | +| `gen_ai.usage.input_tokens` | int | Input tokens | +| `gen_ai.usage.output_tokens` | int | Output tokens | +| `gen_ai.usage.total_tokens` | int | Total tokens | +| `dify.prompt_generation.duration` | float | Duration (seconds) | +| `dify.prompt_generation.status` | string | `succeeded`, `failed` | +| `dify.prompt_generation.error` | string | Error message (if failed) | +| `dify.prompt_generation.instruction` | string | Instruction (content-gated) | +| `dify.prompt_generation.output` | string/JSON | Output (content-gated) | + +#### `dify.app.created` + +| Attribute | Type | Description | +|-----------|------|-------------| +| `dify.event.name` | string | `"dify.app.created"` | +| `dify.event.signal` | string | `"metric_only"` | +| `tenant_id` | string | Tenant identifier | +| `dify.app_id` | string | Application identifier | +| `dify.app.mode` | string | `chat`, `completion`, `agent-chat`, `workflow` | +| `dify.app.created_at` | string | Timestamp (ISO 8601) | + +#### `dify.app.updated` + +| Attribute | Type | Description | +|-----------|------|-------------| +| `dify.event.name` | string | `"dify.app.updated"` | +| `dify.event.signal` | string | `"metric_only"` | +| `tenant_id` | string | Tenant identifier | +| `dify.app_id` | string | Application identifier | +| `dify.app.updated_at` | string | Timestamp (ISO 8601) | + +#### `dify.app.deleted` + +| Attribute | Type | Description | +|-----------|------|-------------| +| `dify.event.name` | string | `"dify.app.deleted"` | +| `dify.event.signal` | string | `"metric_only"` | +| `tenant_id` | string | Tenant identifier | +| `dify.app_id` | string | Application identifier | +| `dify.app.deleted_at` | string | Timestamp (ISO 8601) | + +#### `dify.feedback.created` + +| Attribute | Type | Description | +|-----------|------|-------------| +| `dify.event.name` | string | `"dify.feedback.created"` | +| `dify.event.signal` | string | `"metric_only"` | +| `trace_id` | string | OTEL trace ID | +| `span_id` | string | OTEL span ID | +| `tenant_id` | string | Tenant identifier | +| `dify.app_id` | string | Application identifier | +| `dify.message.id` | string | Message identifier | +| `dify.feedback.rating` | string | `like`, `dislike`, `null` | +| `dify.feedback.content` | string | Feedback text (content-gated) | +| `dify.feedback.created_at` | string | Timestamp (ISO 8601) | + +#### `dify.telemetry.rehydration_failed` + +Diagnostic event for telemetry system health monitoring. + +| Attribute | Type | Description | +|-----------|------|-------------| +| `dify.event.name` | string | `"dify.telemetry.rehydration_failed"` | +| `dify.event.signal` | string | `"metric_only"` | +| `tenant_id` | string | Tenant identifier | +| `dify.telemetry.error` | string | Error message | +| `dify.telemetry.payload_type` | string | Payload type (see appendix) | +| `dify.telemetry.correlation_id` | string | Correlation ID | + +## Content-Gated Attributes + +When `ENTERPRISE_INCLUDE_CONTENT=false`, these attributes are replaced with reference strings (`ref:{id_type}={uuid}`). + +| Attribute | Signal | +|-----------|--------| +| `dify.workflow.inputs` | `dify.workflow.run` | +| `dify.workflow.outputs` | `dify.workflow.run` | +| `dify.workflow.query` | `dify.workflow.run` | +| `dify.node.inputs` | `dify.node.execution` | +| `dify.node.outputs` | `dify.node.execution` | +| `dify.node.process_data` | `dify.node.execution` | +| `dify.message.inputs` | `dify.message.run` | +| `dify.message.outputs` | `dify.message.run` | +| `dify.tool.inputs` | `dify.tool.execution` | +| `dify.tool.outputs` | `dify.tool.execution` | +| `dify.tool.parameters` | `dify.tool.execution` | +| `dify.tool.config` | `dify.tool.execution` | +| `dify.moderation.query` | `dify.moderation.check` | +| `dify.suggested_question.questions` | `dify.suggested_question.generation` | +| `dify.retrieval.query` | `dify.dataset.retrieval` | +| `dify.dataset.documents` | `dify.dataset.retrieval` | +| `dify.generate_name.inputs` | `dify.generate_name.execution` | +| `dify.generate_name.outputs` | `dify.generate_name.execution` | +| `dify.prompt_generation.instruction` | `dify.prompt_generation.execution` | +| `dify.prompt_generation.output` | `dify.prompt_generation.execution` | +| `dify.feedback.content` | `dify.feedback.created` | + +## Appendix + +### Operation Types + +- `workflow`, `node_execution`, `message`, `rule_generate`, `code_generate`, `structured_output`, `instruction_modify` + +### Node Types + +- `start`, `end`, `answer`, `llm`, `knowledge-retrieval`, `knowledge-index`, `if-else`, `code`, `template-transform`, `question-classifier`, `http-request`, `tool`, `datasource`, `variable-aggregator`, `loop`, `iteration`, `parameter-extractor`, `assigner`, `document-extractor`, `list-operator`, `agent`, `trigger-webhook`, `trigger-schedule`, `trigger-plugin`, `human-input` + +### Workflow Statuses + +- `running`, `succeeded`, `failed`, `stopped`, `partial-succeeded`, `paused` + +### Payload Types + +- `workflow`, `node`, `message`, `tool`, `moderation`, `suggested_question`, `dataset_retrieval`, `generate_name`, `prompt_generation`, `app`, `feedback` + +### Null Value Behavior + +**Spans:** Attributes with `null` values are omitted. + +**Logs:** Attributes with `null` values appear as `null` in JSON. + +**Content-Gated:** Replaced with reference strings, not set to `null`. diff --git a/api/enterprise/telemetry/README.md b/api/enterprise/telemetry/README.md new file mode 100644 index 0000000000..298509a8c1 --- /dev/null +++ b/api/enterprise/telemetry/README.md @@ -0,0 +1,121 @@ +# Dify Enterprise Telemetry + +This document provides an overview of the Dify Enterprise OpenTelemetry (OTEL) exporter and how to configure it for integration with observability stacks like Prometheus, Grafana, Jaeger, or Honeycomb. + +## Overview + +Dify Enterprise uses a "slim span + rich companion log" architecture to provide high-fidelity observability without overwhelming trace storage. + +- **Traces (Spans)**: Capture the structure, identity, and timing of high-level operations (Workflows and Nodes). +- **Structured Logs**: Provide deep context (inputs, outputs, metadata) for every event, correlated to spans via `trace_id` and `span_id`. +- **Metrics**: Provide 100% accurate counters and histograms for usage, performance, and error tracking. + +### Signal Architecture + +```mermaid +graph TD + A[Workflow Run] -->|Span| B(dify.workflow.run) + A -->|Log| C(dify.workflow.run detail) + B ---|trace_id| C + + D[Node Execution] -->|Span| E(dify.node.execution) + D -->|Log| F(dify.node.execution detail) + E ---|span_id| F + + G[Message/Tool/etc] -->|Log| H(dify.* event) + G -->|Metric| I(dify.* counter/histogram) +``` + +## Configuration + +The Enterprise OTEL exporter is configured via environment variables. + +| Variable | Description | Default | +|----------|-------------|---------| +| `ENTERPRISE_ENABLED` | Master switch for all enterprise features. | `false` | +| `ENTERPRISE_TELEMETRY_ENABLED` | Master switch for enterprise telemetry. | `false` | +| `ENTERPRISE_OTLP_ENDPOINT` | OTLP collector endpoint (e.g., `http://otel-collector:4318`). | - | +| `ENTERPRISE_OTLP_HEADERS` | Custom headers for OTLP requests (e.g., `x-scope-orgid=tenant1`). | - | +| `ENTERPRISE_OTLP_PROTOCOL` | OTLP transport protocol (`http` or `grpc`). | `http` | +| `ENTERPRISE_OTLP_API_KEY` | Bearer token for authentication. | - | +| `ENTERPRISE_INCLUDE_CONTENT` | Whether to include sensitive content (inputs/outputs) in logs. | `true` | +| `ENTERPRISE_SERVICE_NAME` | Service name reported to OTEL. | `dify` | +| `ENTERPRISE_OTEL_SAMPLING_RATE` | Sampling rate for traces (0.0 to 1.0). Metrics are always 100%. | `1.0` | + +## Correlation Model + +Dify uses deterministic ID generation to ensure signals are correlated across different services and asynchronous tasks. + +### ID Generation Rules + +- `trace_id`: Derived from the correlation ID (workflow_run_id or node_execution_id for drafts) using `int(UUID(correlation_id))` +- `span_id`: Derived from the source ID using the lower 64 bits of `UUID(source_id)` + +### Scenario A: Simple Workflow + +A single workflow run with multiple nodes. All spans and logs share the same `trace_id` (derived from `workflow_run_id`). + +``` +trace_id = UUID(workflow_run_id) +├── [root span] dify.workflow.run (span_id = hash(workflow_run_id)) +│ ├── [child] dify.node.execution - "Start" (span_id = hash(node_exec_id_1)) +│ ├── [child] dify.node.execution - "LLM" (span_id = hash(node_exec_id_2)) +│ └── [child] dify.node.execution - "End" (span_id = hash(node_exec_id_3)) +``` + +### Scenario B: Nested Sub-Workflow + +A workflow calling another workflow via a Tool or Sub-workflow node. The child workflow's spans are linked to the parent via `parent_span_id`. Both workflows share the same trace_id. + +``` +trace_id = UUID(outer_workflow_run_id) ← shared across both workflows +├── [root] dify.workflow.run (outer) (span_id = hash(outer_workflow_run_id)) +│ ├── dify.node.execution - "Start Node" +│ ├── dify.node.execution - "Tool Node" (triggers sub-workflow) +│ │ └── [child] dify.workflow.run (inner) (span_id = hash(inner_workflow_run_id)) +│ │ ├── dify.node.execution - "Inner Start" +│ │ └── dify.node.execution - "Inner End" +│ └── dify.node.execution - "End Node" +``` + +**Key attributes for nested workflows:** + +- Inner workflow's `dify.parent.trace_id` = outer `workflow_run_id` +- Inner workflow's `dify.parent.node.execution_id` = tool node's `execution_id` +- Inner workflow's `dify.parent.workflow.run_id` = outer `workflow_run_id` +- Inner workflow's `dify.parent.app.id` = outer `app_id` + +### Scenario C: Draft Node Execution + +A single node run in isolation (debugger/preview mode). It creates its own trace where the node span is the root. + +``` +trace_id = UUID(node_execution_id) ← own trace, NOT part of any workflow +└── dify.node.execution.draft (span_id = hash(node_execution_id)) +``` + +**Key difference:** Draft executions use `node_execution_id` as the correlation_id, so they are NOT children of any workflow trace. + +## Content Gating + +When `ENTERPRISE_INCLUDE_CONTENT` is set to `false`, sensitive content attributes (inputs, outputs, queries) are replaced with reference strings (e.g., `ref:workflow_run_id=...`) to prevent data leakage to the OTEL collector. + +**Reference String Format:** + +``` +ref:{id_type}={uuid} +``` + +**Examples:** + +``` +ref:workflow_run_id=550e8400-e29b-41d4-a716-446655440000 +ref:node_execution_id=660e8400-e29b-41d4-a716-446655440001 +ref:message_id=770e8400-e29b-41d4-a716-446655440002 +``` + +To retrieve actual content when gating is enabled, query the Dify database using the provided UUID. + +## Reference + +For a complete list of telemetry signals, attributes, and data structures, see [DATA_DICTIONARY.md](./DATA_DICTIONARY.md). diff --git a/api/enterprise/telemetry/__init__.py b/api/enterprise/telemetry/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/enterprise/telemetry/contracts.py b/api/enterprise/telemetry/contracts.py new file mode 100644 index 0000000000..91398cb8cb --- /dev/null +++ b/api/enterprise/telemetry/contracts.py @@ -0,0 +1,73 @@ +"""Telemetry gateway contracts and data structures. + +This module defines the envelope format for telemetry events and the routing +configuration that determines how each event type is processed. +""" + +from __future__ import annotations + +from enum import StrEnum +from typing import Any + +from pydantic import BaseModel, ConfigDict + + +class TelemetryCase(StrEnum): + """Enumeration of all known telemetry event cases.""" + + WORKFLOW_RUN = "workflow_run" + NODE_EXECUTION = "node_execution" + DRAFT_NODE_EXECUTION = "draft_node_execution" + MESSAGE_RUN = "message_run" + TOOL_EXECUTION = "tool_execution" + MODERATION_CHECK = "moderation_check" + SUGGESTED_QUESTION = "suggested_question" + DATASET_RETRIEVAL = "dataset_retrieval" + GENERATE_NAME = "generate_name" + PROMPT_GENERATION = "prompt_generation" + APP_CREATED = "app_created" + APP_UPDATED = "app_updated" + APP_DELETED = "app_deleted" + FEEDBACK_CREATED = "feedback_created" + + +class SignalType(StrEnum): + """Signal routing type for telemetry cases.""" + + TRACE = "trace" + METRIC_LOG = "metric_log" + + +class CaseRoute(BaseModel): + """Routing configuration for a telemetry case. + + Attributes: + signal_type: The type of signal (trace or metric_log). + ce_eligible: Whether this case is eligible for community edition tracing. + """ + + signal_type: SignalType + ce_eligible: bool + + +class TelemetryEnvelope(BaseModel): + """Envelope for telemetry events. + + Attributes: + case: The telemetry case type. + tenant_id: The tenant identifier. + event_id: Unique event identifier for deduplication. + payload: The main event payload (inline for small payloads, + empty when offloaded to storage via ``payload_ref``). + metadata: Optional metadata dictionary. When the gateway + offloads a large payload to object storage, this contains + ``{"payload_ref": ""}``. + """ + + model_config = ConfigDict(extra="forbid", use_enum_values=False) + + case: TelemetryCase + tenant_id: str + event_id: str + payload: dict[str, Any] + metadata: dict[str, Any] | None = None diff --git a/api/enterprise/telemetry/draft_trace.py b/api/enterprise/telemetry/draft_trace.py new file mode 100644 index 0000000000..4a9b4a2c70 --- /dev/null +++ b/api/enterprise/telemetry/draft_trace.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any + +from core.telemetry import TelemetryContext, TelemetryEvent, TraceTaskName +from core.telemetry import emit as telemetry_emit +from dify_graph.enums import WorkflowNodeExecutionMetadataKey +from models.workflow import WorkflowNodeExecutionModel + + +def enqueue_draft_node_execution_trace( + *, + execution: WorkflowNodeExecutionModel, + outputs: Mapping[str, Any] | None, + workflow_execution_id: str | None, + user_id: str, +) -> None: + node_data = _build_node_execution_data( + execution=execution, + outputs=outputs, + workflow_execution_id=workflow_execution_id, + ) + telemetry_emit( + TelemetryEvent( + name=TraceTaskName.DRAFT_NODE_EXECUTION_TRACE, + context=TelemetryContext( + tenant_id=execution.tenant_id, + user_id=user_id, + app_id=execution.app_id, + ), + payload={"node_execution_data": node_data}, + ) + ) + + +def _build_node_execution_data( + *, + execution: WorkflowNodeExecutionModel, + outputs: Mapping[str, Any] | None, + workflow_execution_id: str | None, +) -> dict[str, Any]: + metadata = execution.execution_metadata_dict + node_outputs = outputs if outputs is not None else execution.outputs_dict + execution_id = workflow_execution_id or execution.workflow_run_id or execution.id + + return { + "workflow_id": execution.workflow_id, + "workflow_execution_id": execution_id, + "tenant_id": execution.tenant_id, + "app_id": execution.app_id, + "node_execution_id": execution.id, + "node_id": execution.node_id, + "node_type": execution.node_type, + "title": execution.title, + "status": execution.status, + "error": execution.error, + "elapsed_time": execution.elapsed_time, + "index": execution.index, + "predecessor_node_id": execution.predecessor_node_id, + "created_at": execution.created_at, + "finished_at": execution.finished_at, + "total_tokens": metadata.get(WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS, 0), + "total_price": metadata.get(WorkflowNodeExecutionMetadataKey.TOTAL_PRICE, 0.0), + "currency": metadata.get(WorkflowNodeExecutionMetadataKey.CURRENCY), + "tool_name": (metadata.get(WorkflowNodeExecutionMetadataKey.TOOL_INFO) or {}).get("tool_name") + if isinstance(metadata.get(WorkflowNodeExecutionMetadataKey.TOOL_INFO), dict) + else None, + "iteration_id": metadata.get(WorkflowNodeExecutionMetadataKey.ITERATION_ID), + "iteration_index": metadata.get(WorkflowNodeExecutionMetadataKey.ITERATION_INDEX), + "loop_id": metadata.get(WorkflowNodeExecutionMetadataKey.LOOP_ID), + "loop_index": metadata.get(WorkflowNodeExecutionMetadataKey.LOOP_INDEX), + "parallel_id": metadata.get(WorkflowNodeExecutionMetadataKey.PARALLEL_ID), + "node_inputs": execution.inputs_dict, + "node_outputs": node_outputs, + "process_data": execution.process_data_dict, + } diff --git a/api/enterprise/telemetry/enterprise_trace.py b/api/enterprise/telemetry/enterprise_trace.py new file mode 100644 index 0000000000..50f1bc5a45 --- /dev/null +++ b/api/enterprise/telemetry/enterprise_trace.py @@ -0,0 +1,938 @@ +"""Enterprise trace handler — duck-typed, NOT a BaseTraceInstance subclass. + +Invoked directly in the Celery task, not through OpsTraceManager dispatch. +Only requires a matching ``trace(trace_info)`` method signature. + +Signal strategy: +- **Traces (spans)**: workflow run, node execution, draft node execution only. +- **Metrics + structured logs**: all other event types. + +Token metric labels (unified structure): +All token metrics (dify.tokens.input, dify.tokens.output, dify.tokens.total) use the +same label set for consistent filtering and aggregation: +- tenant_id: Tenant identifier +- app_id: Application identifier +- operation_type: Source of token usage (workflow | node_execution | message | rule_generate | etc.) +- model_provider: LLM provider name (empty string if not applicable) +- model_name: LLM model name (empty string if not applicable) +- node_type: Workflow node type (empty string if not node_execution) + +This unified structure allows filtering by operation_type to separate: +- Workflow-level aggregates (operation_type=workflow) +- Individual node executions (operation_type=node_execution) +- Direct message calls (operation_type=message) +- Prompt generation operations (operation_type=rule_generate, code_generate, etc.) + +Without this, tokens are double-counted when querying totals (workflow totals include +node totals, since workflow.total_tokens is the sum of all node tokens). +""" + +from __future__ import annotations + +import json +import logging +from typing import Any, cast + +from opentelemetry.util.types import AttributeValue + +from core.ops.entities.trace_entity import ( + BaseTraceInfo, + DatasetRetrievalTraceInfo, + DraftNodeExecutionTrace, + GenerateNameTraceInfo, + MessageTraceInfo, + ModerationTraceInfo, + OperationType, + PromptGenerationTraceInfo, + SuggestedQuestionTraceInfo, + ToolTraceInfo, + WorkflowNodeTraceInfo, + WorkflowTraceInfo, +) +from enterprise.telemetry.entities import ( + EnterpriseTelemetryCounter, + EnterpriseTelemetryEvent, + EnterpriseTelemetryHistogram, + EnterpriseTelemetrySpan, + TokenMetricLabels, +) +from enterprise.telemetry.telemetry_log import emit_metric_only_event, emit_telemetry_log + +logger = logging.getLogger(__name__) + + +class EnterpriseOtelTrace: + """Duck-typed enterprise trace handler. + + ``*_trace`` methods emit spans (workflow/node only) or structured logs + (all other events), plus metrics at 100 % accuracy. + """ + + def __init__(self) -> None: + from extensions.ext_enterprise_telemetry import get_enterprise_exporter + + exporter = get_enterprise_exporter() + if exporter is None: + raise RuntimeError("EnterpriseOtelTrace instantiated but exporter is not initialized") + self._exporter = exporter + + def trace(self, trace_info: BaseTraceInfo) -> None: + if isinstance(trace_info, WorkflowTraceInfo): + self._workflow_trace(trace_info) + elif isinstance(trace_info, MessageTraceInfo): + self._message_trace(trace_info) + elif isinstance(trace_info, ToolTraceInfo): + self._tool_trace(trace_info) + elif isinstance(trace_info, DraftNodeExecutionTrace): + self._draft_node_execution_trace(trace_info) + elif isinstance(trace_info, WorkflowNodeTraceInfo): + self._node_execution_trace(trace_info) + elif isinstance(trace_info, ModerationTraceInfo): + self._moderation_trace(trace_info) + elif isinstance(trace_info, SuggestedQuestionTraceInfo): + self._suggested_question_trace(trace_info) + elif isinstance(trace_info, DatasetRetrievalTraceInfo): + self._dataset_retrieval_trace(trace_info) + elif isinstance(trace_info, GenerateNameTraceInfo): + self._generate_name_trace(trace_info) + elif isinstance(trace_info, PromptGenerationTraceInfo): + self._prompt_generation_trace(trace_info) + + def _common_attrs(self, trace_info: BaseTraceInfo) -> dict[str, Any]: + metadata = self._metadata(trace_info) + tenant_id, app_id, user_id = self._context_ids(trace_info, metadata) + return { + "dify.trace_id": trace_info.resolved_trace_id, + "dify.tenant_id": tenant_id, + "dify.app_id": app_id, + "dify.app.name": metadata.get("app_name"), + "dify.workspace.name": metadata.get("workspace_name"), + "gen_ai.user.id": user_id, + "dify.message.id": trace_info.message_id, + } + + def _metadata(self, trace_info: BaseTraceInfo) -> dict[str, Any]: + return trace_info.metadata + + def _context_ids( + self, + trace_info: BaseTraceInfo, + metadata: dict[str, Any], + ) -> tuple[str | None, str | None, str | None]: + tenant_id = getattr(trace_info, "tenant_id", None) or metadata.get("tenant_id") + app_id = getattr(trace_info, "app_id", None) or metadata.get("app_id") + user_id = getattr(trace_info, "user_id", None) or metadata.get("user_id") + return tenant_id, app_id, user_id + + def _labels(self, **values: AttributeValue) -> dict[str, AttributeValue]: + return dict(values) + + def _safe_payload_value(self, value: Any) -> str | dict[str, Any] | list[object] | None: + if isinstance(value, str): + return value + if isinstance(value, dict): + return cast(dict[str, Any], value) + if isinstance(value, list): + items: list[object] = [] + for item in cast(list[object], value): + items.append(item) + return items + return None + + def _content_or_ref(self, value: Any, ref: str) -> Any: + if self._exporter.include_content: + return self._maybe_json(value) + return ref + + def _maybe_json(self, value: Any) -> str | None: + if value is None: + return None + if isinstance(value, str): + return value + try: + return json.dumps(value, default=str) + except (TypeError, ValueError): + return str(value) + + # ------------------------------------------------------------------ + # SPAN-emitting handlers (workflow, node execution, draft node) + # ------------------------------------------------------------------ + + def _workflow_trace(self, info: WorkflowTraceInfo) -> None: + metadata = self._metadata(info) + tenant_id, app_id, user_id = self._context_ids(info, metadata) + # -- Span attrs: identity + structure + status + timing + gen_ai scalars -- + span_attrs: dict[str, Any] = { + "dify.trace_id": info.resolved_trace_id, + "dify.tenant_id": tenant_id, + "dify.app_id": app_id, + "dify.workflow.id": info.workflow_id, + "dify.workflow.run_id": info.workflow_run_id, + "dify.workflow.status": info.workflow_run_status, + "dify.workflow.error": info.error, + "dify.workflow.elapsed_time": info.workflow_run_elapsed_time, + "dify.invoke_from": metadata.get("triggered_from"), + "dify.conversation.id": info.conversation_id, + "dify.message.id": info.message_id, + "dify.invoked_by": info.invoked_by, + "gen_ai.usage.total_tokens": info.total_tokens, + "gen_ai.user.id": user_id, + } + + trace_correlation_override, parent_span_id_source = info.resolved_parent_context + + parent_ctx = metadata.get("parent_trace_context") + if isinstance(parent_ctx, dict): + parent_ctx_dict = cast(dict[str, Any], parent_ctx) + span_attrs["dify.parent.trace_id"] = parent_ctx_dict.get("trace_id") + span_attrs["dify.parent.node.execution_id"] = parent_ctx_dict.get("parent_node_execution_id") + span_attrs["dify.parent.workflow.run_id"] = parent_ctx_dict.get("parent_workflow_run_id") + span_attrs["dify.parent.app.id"] = parent_ctx_dict.get("parent_app_id") + + self._exporter.export_span( + EnterpriseTelemetrySpan.WORKFLOW_RUN, + span_attrs, + correlation_id=info.workflow_run_id, + span_id_source=info.workflow_run_id, + start_time=info.start_time, + end_time=info.end_time, + trace_correlation_override=trace_correlation_override, + parent_span_id_source=parent_span_id_source, + ) + + # -- Companion log: ALL attrs (span + detail) for full picture -- + log_attrs: dict[str, Any] = {**span_attrs} + log_attrs.update( + { + "dify.app.name": metadata.get("app_name"), + "dify.workspace.name": metadata.get("workspace_name"), + "gen_ai.user.id": user_id, + "gen_ai.usage.total_tokens": info.total_tokens, + "dify.workflow.version": info.workflow_run_version, + } + ) + + ref = f"ref:workflow_run_id={info.workflow_run_id}" + log_attrs["dify.workflow.inputs"] = self._content_or_ref(info.workflow_run_inputs, ref) + log_attrs["dify.workflow.outputs"] = self._content_or_ref(info.workflow_run_outputs, ref) + log_attrs["dify.workflow.query"] = self._content_or_ref(info.query, ref) + + emit_telemetry_log( + event_name=EnterpriseTelemetryEvent.WORKFLOW_RUN, + attributes=log_attrs, + signal="span_detail", + trace_id_source=info.workflow_run_id, + span_id_source=info.workflow_run_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + # -- Metrics -- + labels = self._labels( + tenant_id=tenant_id or "", + app_id=app_id or "", + ) + token_labels = TokenMetricLabels( + tenant_id=tenant_id or "", + app_id=app_id or "", + operation_type=OperationType.WORKFLOW, + model_provider="", + model_name="", + node_type="", + ).to_dict() + self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels) + if info.prompt_tokens is not None and info.prompt_tokens > 0: + self._exporter.increment_counter(EnterpriseTelemetryCounter.INPUT_TOKENS, info.prompt_tokens, token_labels) + if info.completion_tokens is not None and info.completion_tokens > 0: + self._exporter.increment_counter( + EnterpriseTelemetryCounter.OUTPUT_TOKENS, info.completion_tokens, token_labels + ) + invoke_from = metadata.get("triggered_from", "") + self._exporter.increment_counter( + EnterpriseTelemetryCounter.REQUESTS, + 1, + self._labels( + **labels, + type="workflow", + status=info.workflow_run_status, + invoke_from=invoke_from, + ), + ) + # Prefer wall-clock timestamps over the elapsed_time field: elapsed_time defaults + # to 0 in the DB and can be stale if the Celery write races with the trace task. + # start_time = workflow_run.created_at, end_time = workflow_run.finished_at. + if info.start_time and info.end_time: + workflow_duration = (info.end_time - info.start_time).total_seconds() + elif info.workflow_run_elapsed_time: + workflow_duration = float(info.workflow_run_elapsed_time) + else: + workflow_duration = 0.0 + self._exporter.record_histogram( + EnterpriseTelemetryHistogram.WORKFLOW_DURATION, + workflow_duration, + self._labels( + **labels, + status=info.workflow_run_status, + ), + ) + + if info.error: + self._exporter.increment_counter( + EnterpriseTelemetryCounter.ERRORS, + 1, + self._labels( + **labels, + type="workflow", + ), + ) + + def _node_execution_trace(self, info: WorkflowNodeTraceInfo) -> None: + self._emit_node_execution_trace(info, EnterpriseTelemetrySpan.NODE_EXECUTION, "node") + + def _draft_node_execution_trace(self, info: DraftNodeExecutionTrace) -> None: + self._emit_node_execution_trace( + info, + EnterpriseTelemetrySpan.DRAFT_NODE_EXECUTION, + "draft_node", + correlation_id_override=info.node_execution_id, + trace_correlation_override_param=info.workflow_run_id, + ) + + def _emit_node_execution_trace( + self, + info: WorkflowNodeTraceInfo, + span_name: EnterpriseTelemetrySpan, + request_type: str, + correlation_id_override: str | None = None, + trace_correlation_override_param: str | None = None, + ) -> None: + metadata = self._metadata(info) + tenant_id, app_id, user_id = self._context_ids(info, metadata) + # -- Span attrs: identity + structure + status + timing + gen_ai scalars -- + span_attrs: dict[str, Any] = { + "dify.trace_id": info.resolved_trace_id, + "dify.tenant_id": tenant_id, + "dify.app_id": app_id, + "dify.workflow.id": info.workflow_id, + "dify.workflow.run_id": info.workflow_run_id, + "dify.message.id": info.message_id, + "dify.conversation.id": metadata.get("conversation_id"), + "dify.node.execution_id": info.node_execution_id, + "dify.node.id": info.node_id, + "dify.node.type": info.node_type, + "dify.node.title": info.title, + "dify.node.status": info.status, + "dify.node.error": info.error, + "dify.node.elapsed_time": info.elapsed_time, + "dify.node.index": info.index, + "dify.node.predecessor_node_id": info.predecessor_node_id, + "dify.node.iteration_id": info.iteration_id, + "dify.node.loop_id": info.loop_id, + "dify.node.parallel_id": info.parallel_id, + "dify.node.invoked_by": info.invoked_by, + "gen_ai.usage.input_tokens": info.prompt_tokens, + "gen_ai.usage.output_tokens": info.completion_tokens, + "gen_ai.usage.total_tokens": info.total_tokens, + "gen_ai.request.model": info.model_name, + "gen_ai.provider.name": info.model_provider, + "gen_ai.user.id": user_id, + } + + resolved_override, _ = info.resolved_parent_context + trace_correlation_override = trace_correlation_override_param or resolved_override + + effective_correlation_id = correlation_id_override or info.workflow_run_id + self._exporter.export_span( + span_name, + span_attrs, + correlation_id=effective_correlation_id, + span_id_source=info.node_execution_id, + start_time=info.start_time, + end_time=info.end_time, + trace_correlation_override=trace_correlation_override, + ) + + # -- Companion log: ALL attrs (span + detail) -- + log_attrs: dict[str, Any] = {**span_attrs} + log_attrs.update( + { + "dify.app.name": metadata.get("app_name"), + "dify.workspace.name": metadata.get("workspace_name"), + "dify.invoke_from": metadata.get("invoke_from"), + "gen_ai.user.id": user_id, + "gen_ai.usage.total_tokens": info.total_tokens, + "dify.node.total_price": info.total_price, + "dify.node.currency": info.currency, + "gen_ai.provider.name": info.model_provider, + "gen_ai.request.model": info.model_name, + "gen_ai.tool.name": info.tool_name, + "dify.node.iteration_index": info.iteration_index, + "dify.node.loop_index": info.loop_index, + "dify.plugin.name": metadata.get("plugin_name"), + "dify.credential.name": metadata.get("credential_name"), + "dify.credential.id": metadata.get("credential_id"), + "dify.dataset.ids": self._maybe_json(metadata.get("dataset_ids")), + "dify.dataset.names": self._maybe_json(metadata.get("dataset_names")), + } + ) + + ref = f"ref:node_execution_id={info.node_execution_id}" + log_attrs["dify.node.inputs"] = self._content_or_ref(info.node_inputs, ref) + log_attrs["dify.node.outputs"] = self._content_or_ref(info.node_outputs, ref) + log_attrs["dify.node.process_data"] = self._content_or_ref(info.process_data, ref) + + emit_telemetry_log( + event_name=span_name.value, + attributes=log_attrs, + signal="span_detail", + trace_id_source=info.workflow_run_id, + span_id_source=info.node_execution_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + # -- Metrics -- + labels = self._labels( + tenant_id=tenant_id or "", + app_id=app_id or "", + node_type=info.node_type, + model_provider=info.model_provider or "", + ) + if info.total_tokens: + token_labels = TokenMetricLabels( + tenant_id=tenant_id or "", + app_id=app_id or "", + operation_type=OperationType.NODE_EXECUTION, + model_provider=info.model_provider or "", + model_name=info.model_name or "", + node_type=info.node_type, + ).to_dict() + self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels) + if info.prompt_tokens is not None and info.prompt_tokens > 0: + self._exporter.increment_counter( + EnterpriseTelemetryCounter.INPUT_TOKENS, info.prompt_tokens, token_labels + ) + if info.completion_tokens is not None and info.completion_tokens > 0: + self._exporter.increment_counter( + EnterpriseTelemetryCounter.OUTPUT_TOKENS, info.completion_tokens, token_labels + ) + self._exporter.increment_counter( + EnterpriseTelemetryCounter.REQUESTS, + 1, + self._labels( + **labels, + type=request_type, + status=info.status, + model_name=info.model_name or "", + ), + ) + duration_labels = dict(labels) + duration_labels["model_name"] = info.model_name or "" + plugin_name = metadata.get("plugin_name") + if plugin_name and info.node_type in {"tool", "knowledge-retrieval"}: + duration_labels["plugin_name"] = plugin_name + self._exporter.record_histogram(EnterpriseTelemetryHistogram.NODE_DURATION, info.elapsed_time, duration_labels) + + if info.error: + self._exporter.increment_counter( + EnterpriseTelemetryCounter.ERRORS, + 1, + self._labels( + **labels, + type=request_type, + model_name=info.model_name or "", + ), + ) + + # ------------------------------------------------------------------ + # METRIC-ONLY handlers (structured log + counters/histograms) + # ------------------------------------------------------------------ + + def _message_trace(self, info: MessageTraceInfo) -> None: + metadata = self._metadata(info) + tenant_id, app_id, user_id = self._context_ids(info, metadata) + attrs = self._common_attrs(info) + attrs.update( + { + "dify.invoke_from": metadata.get("from_source"), + "dify.conversation.id": metadata.get("conversation_id"), + "dify.conversation.mode": info.conversation_mode, + "gen_ai.provider.name": metadata.get("ls_provider"), + "gen_ai.request.model": metadata.get("ls_model_name"), + "gen_ai.usage.input_tokens": info.message_tokens, + "gen_ai.usage.output_tokens": info.answer_tokens, + "gen_ai.usage.total_tokens": info.total_tokens, + "dify.message.status": metadata.get("status"), + "dify.message.error": info.error, + "dify.message.from_source": metadata.get("from_source"), + "dify.message.from_end_user_id": metadata.get("from_end_user_id"), + "dify.message.from_account_id": metadata.get("from_account_id"), + "dify.streaming": info.is_streaming_request, + "dify.message.time_to_first_token": info.gen_ai_server_time_to_first_token, + "dify.message.streaming_duration": info.llm_streaming_time_to_generate, + "dify.workflow.run_id": metadata.get("workflow_run_id"), + } + ) + node_execution_id = metadata.get("node_execution_id") + if node_execution_id: + attrs["dify.node.execution_id"] = node_execution_id + + ref = f"ref:message_id={info.message_id}" + inputs = self._safe_payload_value(info.inputs) + outputs = self._safe_payload_value(info.outputs) + attrs["dify.message.inputs"] = self._content_or_ref(inputs, ref) + attrs["dify.message.outputs"] = self._content_or_ref(outputs, ref) + + emit_metric_only_event( + event_name=EnterpriseTelemetryEvent.MESSAGE_RUN, + attributes=attrs, + trace_id_source=metadata.get("workflow_run_id") or (str(info.message_id) if info.message_id else None), + span_id_source=node_execution_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + labels = self._labels( + tenant_id=tenant_id or "", + app_id=app_id or "", + model_provider=metadata.get("ls_provider") or "", + model_name=metadata.get("ls_model_name") or "", + ) + token_labels = TokenMetricLabels( + tenant_id=tenant_id or "", + app_id=app_id or "", + operation_type=OperationType.MESSAGE, + model_provider=metadata.get("ls_provider") or "", + model_name=metadata.get("ls_model_name") or "", + node_type="", + ).to_dict() + self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels) + if info.message_tokens > 0: + self._exporter.increment_counter(EnterpriseTelemetryCounter.INPUT_TOKENS, info.message_tokens, token_labels) + if info.answer_tokens > 0: + self._exporter.increment_counter(EnterpriseTelemetryCounter.OUTPUT_TOKENS, info.answer_tokens, token_labels) + invoke_from = metadata.get("from_source", "") + self._exporter.increment_counter( + EnterpriseTelemetryCounter.REQUESTS, + 1, + self._labels( + **labels, + type="message", + status=metadata.get("status", ""), + invoke_from=invoke_from, + ), + ) + + if info.start_time and info.end_time: + duration = (info.end_time - info.start_time).total_seconds() + self._exporter.record_histogram(EnterpriseTelemetryHistogram.MESSAGE_DURATION, duration, labels) + + if info.gen_ai_server_time_to_first_token is not None: + self._exporter.record_histogram( + EnterpriseTelemetryHistogram.MESSAGE_TTFT, info.gen_ai_server_time_to_first_token, labels + ) + + if info.error: + self._exporter.increment_counter( + EnterpriseTelemetryCounter.ERRORS, + 1, + self._labels( + **labels, + type="message", + ), + ) + + def _tool_trace(self, info: ToolTraceInfo) -> None: + metadata = self._metadata(info) + tenant_id, app_id, user_id = self._context_ids(info, metadata) + attrs = self._common_attrs(info) + attrs.update( + { + "gen_ai.tool.name": info.tool_name, + "dify.tool.time_cost": info.time_cost, + "dify.tool.error": info.error, + "dify.workflow.run_id": metadata.get("workflow_run_id"), + } + ) + node_execution_id = metadata.get("node_execution_id") + if node_execution_id: + attrs["dify.node.execution_id"] = node_execution_id + + ref = f"ref:message_id={info.message_id}" + attrs["dify.tool.inputs"] = self._content_or_ref(info.tool_inputs, ref) + attrs["dify.tool.outputs"] = self._content_or_ref(info.tool_outputs, ref) + attrs["dify.tool.parameters"] = self._content_or_ref(info.tool_parameters, ref) + attrs["dify.tool.config"] = self._content_or_ref(info.tool_config, ref) + + emit_metric_only_event( + event_name=EnterpriseTelemetryEvent.TOOL_EXECUTION, + attributes=attrs, + trace_id_source=info.resolved_trace_id, + span_id_source=node_execution_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + labels = self._labels( + tenant_id=tenant_id or "", + app_id=app_id or "", + tool_name=info.tool_name, + ) + self._exporter.increment_counter( + EnterpriseTelemetryCounter.REQUESTS, + 1, + self._labels( + **labels, + type="tool", + ), + ) + self._exporter.record_histogram(EnterpriseTelemetryHistogram.TOOL_DURATION, float(info.time_cost), labels) + + if info.error: + self._exporter.increment_counter( + EnterpriseTelemetryCounter.ERRORS, + 1, + self._labels( + **labels, + type="tool", + ), + ) + + def _moderation_trace(self, info: ModerationTraceInfo) -> None: + metadata = self._metadata(info) + tenant_id, app_id, user_id = self._context_ids(info, metadata) + attrs = self._common_attrs(info) + attrs.update( + { + "dify.moderation.flagged": info.flagged, + "dify.moderation.action": info.action, + "dify.moderation.preset_response": info.preset_response, + "dify.workflow.run_id": metadata.get("workflow_run_id"), + } + ) + node_execution_id = metadata.get("node_execution_id") + if node_execution_id: + attrs["dify.node.execution_id"] = node_execution_id + + attrs["dify.moderation.query"] = self._content_or_ref( + info.query, + f"ref:message_id={info.message_id}", + ) + + emit_metric_only_event( + event_name=EnterpriseTelemetryEvent.MODERATION_CHECK, + attributes=attrs, + trace_id_source=info.resolved_trace_id, + span_id_source=node_execution_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + labels = self._labels( + tenant_id=tenant_id or "", + app_id=app_id or "", + ) + self._exporter.increment_counter( + EnterpriseTelemetryCounter.REQUESTS, + 1, + self._labels( + **labels, + type="moderation", + ), + ) + + def _suggested_question_trace(self, info: SuggestedQuestionTraceInfo) -> None: + metadata = self._metadata(info) + tenant_id, app_id, user_id = self._context_ids(info, metadata) + attrs = self._common_attrs(info) + attrs.update( + { + "gen_ai.usage.total_tokens": info.total_tokens, + "dify.suggested_question.status": info.status, + "dify.suggested_question.error": info.error, + "gen_ai.provider.name": info.model_provider, + "gen_ai.request.model": info.model_id, + "dify.suggested_question.count": len(info.suggested_question), + "dify.workflow.run_id": metadata.get("workflow_run_id"), + } + ) + node_execution_id = metadata.get("node_execution_id") + if node_execution_id: + attrs["dify.node.execution_id"] = node_execution_id + + attrs["dify.suggested_question.questions"] = self._content_or_ref( + info.suggested_question, + f"ref:message_id={info.message_id}", + ) + + emit_metric_only_event( + event_name=EnterpriseTelemetryEvent.SUGGESTED_QUESTION_GENERATION, + attributes=attrs, + trace_id_source=info.resolved_trace_id, + span_id_source=node_execution_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + labels = self._labels( + tenant_id=tenant_id or "", + app_id=app_id or "", + ) + self._exporter.increment_counter( + EnterpriseTelemetryCounter.REQUESTS, + 1, + self._labels( + **labels, + type="suggested_question", + model_provider=info.model_provider or "", + model_name=info.model_id or "", + ), + ) + + def _dataset_retrieval_trace(self, info: DatasetRetrievalTraceInfo) -> None: + metadata = self._metadata(info) + tenant_id, app_id, user_id = self._context_ids(info, metadata) + attrs = self._common_attrs(info) + attrs["dify.dataset.error"] = info.error + attrs["dify.workflow.run_id"] = metadata.get("workflow_run_id") + node_execution_id = metadata.get("node_execution_id") + if node_execution_id: + attrs["dify.node.execution_id"] = node_execution_id + + docs: list[dict[str, Any]] = [] + documents_any: Any = info.documents + documents_list: list[Any] = cast(list[Any], documents_any) if isinstance(documents_any, list) else [] + for entry in documents_list: + if isinstance(entry, dict): + entry_dict: dict[str, Any] = cast(dict[str, Any], entry) + docs.append(entry_dict) + dataset_ids: list[str] = [] + dataset_names: list[str] = [] + structured_docs: list[dict[str, Any]] = [] + for doc in docs: + meta_raw = doc.get("metadata") + meta: dict[str, Any] = cast(dict[str, Any], meta_raw) if isinstance(meta_raw, dict) else {} + did = meta.get("dataset_id") + dname = meta.get("dataset_name") + if did and did not in dataset_ids: + dataset_ids.append(did) + if dname and dname not in dataset_names: + dataset_names.append(dname) + structured_docs.append( + { + "dataset_id": did, + "document_id": meta.get("document_id"), + "segment_id": meta.get("segment_id"), + "score": meta.get("score"), + } + ) + + attrs["dify.dataset.ids"] = self._maybe_json(dataset_ids) + attrs["dify.dataset.names"] = self._maybe_json(dataset_names) + attrs["dify.retrieval.document_count"] = len(docs) + + embedding_models_raw: Any = metadata.get("embedding_models") + embedding_models: dict[str, Any] = ( + cast(dict[str, Any], embedding_models_raw) if isinstance(embedding_models_raw, dict) else {} + ) + if embedding_models: + providers: list[str] = [] + models: list[str] = [] + for ds_info in embedding_models.values(): + if isinstance(ds_info, dict): + ds_info_dict: dict[str, Any] = cast(dict[str, Any], ds_info) + p = ds_info_dict.get("embedding_model_provider", "") + m = ds_info_dict.get("embedding_model", "") + if p and p not in providers: + providers.append(p) + if m and m not in models: + models.append(m) + attrs["dify.dataset.embedding_providers"] = self._maybe_json(providers) + attrs["dify.dataset.embedding_models"] = self._maybe_json(models) + + # Add rerank model to logs + rerank_provider = metadata.get("rerank_model_provider", "") + rerank_model = metadata.get("rerank_model_name", "") + if rerank_provider or rerank_model: + attrs["dify.retrieval.rerank_provider"] = rerank_provider + attrs["dify.retrieval.rerank_model"] = rerank_model + + ref = f"ref:message_id={info.message_id}" + retrieval_inputs = self._safe_payload_value(info.inputs) + attrs["dify.retrieval.query"] = self._content_or_ref(retrieval_inputs, ref) + attrs["dify.dataset.documents"] = self._content_or_ref(structured_docs, ref) + + emit_metric_only_event( + event_name=EnterpriseTelemetryEvent.DATASET_RETRIEVAL, + attributes=attrs, + trace_id_source=metadata.get("workflow_run_id") or (str(info.message_id) if info.message_id else None), + span_id_source=node_execution_id or (str(info.message_id) if info.message_id else None), + tenant_id=tenant_id, + user_id=user_id, + ) + + labels = self._labels( + tenant_id=tenant_id or "", + app_id=app_id or "", + ) + self._exporter.increment_counter( + EnterpriseTelemetryCounter.REQUESTS, + 1, + self._labels( + **labels, + type="dataset_retrieval", + ), + ) + + for did in dataset_ids: + # Get embedding model for this specific dataset + ds_embedding_info = embedding_models.get(did, {}) + embedding_provider = ds_embedding_info.get("embedding_model_provider", "") + embedding_model = ds_embedding_info.get("embedding_model", "") + + # Get rerank model (same for all datasets in this retrieval) + rerank_provider = metadata.get("rerank_model_provider", "") + rerank_model = metadata.get("rerank_model_name", "") + + self._exporter.increment_counter( + EnterpriseTelemetryCounter.DATASET_RETRIEVALS, + 1, + self._labels( + **labels, + dataset_id=did, + embedding_model_provider=embedding_provider, + embedding_model=embedding_model, + rerank_model_provider=rerank_provider, + rerank_model=rerank_model, + ), + ) + + def _generate_name_trace(self, info: GenerateNameTraceInfo) -> None: + metadata = self._metadata(info) + tenant_id, app_id, user_id = self._context_ids(info, metadata) + attrs = self._common_attrs(info) + attrs["dify.conversation.id"] = info.conversation_id + node_execution_id = metadata.get("node_execution_id") + if node_execution_id: + attrs["dify.node.execution_id"] = node_execution_id + + ref = f"ref:conversation_id={info.conversation_id}" + inputs = self._safe_payload_value(info.inputs) + outputs = self._safe_payload_value(info.outputs) + attrs["dify.generate_name.inputs"] = self._content_or_ref(inputs, ref) + attrs["dify.generate_name.outputs"] = self._content_or_ref(outputs, ref) + + emit_metric_only_event( + event_name=EnterpriseTelemetryEvent.GENERATE_NAME_EXECUTION, + attributes=attrs, + trace_id_source=info.resolved_trace_id, + span_id_source=node_execution_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + labels = self._labels( + tenant_id=tenant_id or "", + app_id=app_id or "", + ) + self._exporter.increment_counter( + EnterpriseTelemetryCounter.REQUESTS, + 1, + self._labels( + **labels, + type="generate_name", + ), + ) + + def _prompt_generation_trace(self, info: PromptGenerationTraceInfo) -> None: + metadata = self._metadata(info) + tenant_id, app_id, user_id = self._context_ids(info, metadata) + attrs = { + "dify.trace_id": info.resolved_trace_id, + "dify.tenant_id": tenant_id, + "dify.user.id": user_id, + "dify.app.id": app_id or "", + "dify.app.name": metadata.get("app_name"), + "dify.workspace.name": metadata.get("workspace_name"), + "dify.operation.type": info.operation_type, + "gen_ai.provider.name": info.model_provider, + "gen_ai.request.model": info.model_name, + "gen_ai.usage.input_tokens": info.prompt_tokens, + "gen_ai.usage.output_tokens": info.completion_tokens, + "gen_ai.usage.total_tokens": info.total_tokens, + "dify.prompt_generation.latency": info.latency, + "dify.prompt_generation.error": info.error, + } + node_execution_id = metadata.get("node_execution_id") + if node_execution_id: + attrs["dify.node.execution_id"] = node_execution_id + + if info.total_price is not None: + attrs["dify.prompt_generation.total_price"] = info.total_price + attrs["dify.prompt_generation.currency"] = info.currency + + ref = f"ref:trace_id={info.trace_id}" + outputs = self._safe_payload_value(info.outputs) + attrs["dify.prompt_generation.instruction"] = self._content_or_ref(info.instruction, ref) + attrs["dify.prompt_generation.output"] = self._content_or_ref(outputs, ref) + + emit_metric_only_event( + event_name=EnterpriseTelemetryEvent.PROMPT_GENERATION_EXECUTION, + attributes=attrs, + trace_id_source=info.resolved_trace_id, + span_id_source=node_execution_id, + tenant_id=tenant_id, + user_id=user_id, + ) + + token_labels = TokenMetricLabels( + tenant_id=tenant_id or "", + app_id=app_id or "", + operation_type=info.operation_type, + model_provider=info.model_provider, + model_name=info.model_name, + node_type="", + ).to_dict() + + labels = self._labels( + tenant_id=tenant_id or "", + app_id=app_id or "", + operation_type=info.operation_type, + model_provider=info.model_provider, + model_name=info.model_name, + ) + + self._exporter.increment_counter(EnterpriseTelemetryCounter.TOKENS, info.total_tokens, token_labels) + if info.prompt_tokens > 0: + self._exporter.increment_counter(EnterpriseTelemetryCounter.INPUT_TOKENS, info.prompt_tokens, token_labels) + if info.completion_tokens > 0: + self._exporter.increment_counter( + EnterpriseTelemetryCounter.OUTPUT_TOKENS, info.completion_tokens, token_labels + ) + + status = "failed" if info.error else "success" + self._exporter.increment_counter( + EnterpriseTelemetryCounter.REQUESTS, + 1, + self._labels( + **labels, + type="prompt_generation", + status=status, + ), + ) + + self._exporter.record_histogram( + EnterpriseTelemetryHistogram.PROMPT_GENERATION_DURATION, + info.latency, + labels, + ) + + if info.error: + self._exporter.increment_counter( + EnterpriseTelemetryCounter.ERRORS, + 1, + self._labels( + **labels, + type="prompt_generation", + ), + ) diff --git a/api/enterprise/telemetry/entities/__init__.py b/api/enterprise/telemetry/entities/__init__.py new file mode 100644 index 0000000000..4a9bd3dbf8 --- /dev/null +++ b/api/enterprise/telemetry/entities/__init__.py @@ -0,0 +1,121 @@ +from enum import StrEnum +from typing import cast + +from opentelemetry.util.types import AttributeValue +from pydantic import BaseModel, ConfigDict + + +class EnterpriseTelemetrySpan(StrEnum): + WORKFLOW_RUN = "dify.workflow.run" + NODE_EXECUTION = "dify.node.execution" + DRAFT_NODE_EXECUTION = "dify.node.execution.draft" + + +class EnterpriseTelemetryEvent(StrEnum): + """Event names for enterprise telemetry logs.""" + + APP_CREATED = "dify.app.created" + APP_UPDATED = "dify.app.updated" + APP_DELETED = "dify.app.deleted" + FEEDBACK_CREATED = "dify.feedback.created" + WORKFLOW_RUN = "dify.workflow.run" + MESSAGE_RUN = "dify.message.run" + TOOL_EXECUTION = "dify.tool.execution" + MODERATION_CHECK = "dify.moderation.check" + SUGGESTED_QUESTION_GENERATION = "dify.suggested_question.generation" + DATASET_RETRIEVAL = "dify.dataset.retrieval" + GENERATE_NAME_EXECUTION = "dify.generate_name.execution" + PROMPT_GENERATION_EXECUTION = "dify.prompt_generation.execution" + REHYDRATION_FAILED = "dify.telemetry.rehydration_failed" + + +class EnterpriseTelemetryCounter(StrEnum): + TOKENS = "tokens" + INPUT_TOKENS = "input_tokens" + OUTPUT_TOKENS = "output_tokens" + REQUESTS = "requests" + ERRORS = "errors" + FEEDBACK = "feedback" + DATASET_RETRIEVALS = "dataset_retrievals" + APP_CREATED = "app_created" + APP_UPDATED = "app_updated" + APP_DELETED = "app_deleted" + + +class EnterpriseTelemetryHistogram(StrEnum): + WORKFLOW_DURATION = "workflow_duration" + NODE_DURATION = "node_duration" + MESSAGE_DURATION = "message_duration" + MESSAGE_TTFT = "message_ttft" + TOOL_DURATION = "tool_duration" + PROMPT_GENERATION_DURATION = "prompt_generation_duration" + + +class TokenMetricLabels(BaseModel): + """Unified label structure for all dify.token.* metrics. + + All token counters (dify.tokens.input, dify.tokens.output, dify.tokens.total) MUST + use this exact label set to ensure consistent filtering and aggregation across + different operation types. + + Attributes: + tenant_id: Tenant identifier. + app_id: Application identifier. + operation_type: Source of token usage (workflow | node_execution | message | + rule_generate | code_generate | structured_output | instruction_modify). + model_provider: LLM provider name. Empty string if not applicable (e.g., workflow-level). + model_name: LLM model name. Empty string if not applicable (e.g., workflow-level). + node_type: Workflow node type. Empty string unless operation_type=node_execution. + + Usage: + labels = TokenMetricLabels( + tenant_id="tenant-123", + app_id="app-456", + operation_type=OperationType.WORKFLOW, + model_provider="", + model_name="", + node_type="", + ) + exporter.increment_counter( + EnterpriseTelemetryCounter.INPUT_TOKENS, + 100, + labels.to_dict() + ) + + Design rationale: + Without this unified structure, tokens get double-counted when querying totals + because workflow.total_tokens is already the sum of all node tokens. The + operation_type label allows filtering to separate workflow-level aggregates from + node-level detail, while keeping the same label cardinality for consistent queries. + """ + + tenant_id: str + app_id: str + operation_type: str + model_provider: str + model_name: str + node_type: str + + model_config = ConfigDict(extra="forbid", frozen=True) + + def to_dict(self) -> dict[str, AttributeValue]: + return cast( + dict[str, AttributeValue], + { + "tenant_id": self.tenant_id, + "app_id": self.app_id, + "operation_type": self.operation_type, + "model_provider": self.model_provider, + "model_name": self.model_name, + "node_type": self.node_type, + }, + ) + + +__all__ = [ + "EnterpriseTelemetryCounter", + "EnterpriseTelemetryEvent", + "EnterpriseTelemetryHistogram", + "EnterpriseTelemetrySpan", + "TokenMetricLabels", +] diff --git a/api/enterprise/telemetry/event_handlers.py b/api/enterprise/telemetry/event_handlers.py new file mode 100644 index 0000000000..167cde2cd8 --- /dev/null +++ b/api/enterprise/telemetry/event_handlers.py @@ -0,0 +1,99 @@ +"""Blinker signal handlers for enterprise telemetry. + +Registered at import time via ``@signal.connect`` decorators. +Import must happen during ``ext_enterprise_telemetry.init_app()`` to +ensure handlers fire. Each handler delegates to ``core.telemetry.gateway`` +which handles routing, EE-gating, and dispatch. + +All handlers are best-effort: exceptions are caught and logged so that +telemetry failures never break user-facing operations. +""" + +from __future__ import annotations + +import logging + +from events.app_event import app_was_created, app_was_deleted, app_was_updated +from events.feedback_event import feedback_was_created + +logger = logging.getLogger(__name__) + +__all__ = [ + "_handle_app_created", + "_handle_app_deleted", + "_handle_app_updated", + "_handle_feedback_created", +] + + +@app_was_created.connect +def _handle_app_created(sender: object, **kwargs: object) -> None: + try: + from core.telemetry.gateway import emit as gateway_emit + from enterprise.telemetry.contracts import TelemetryCase + + gateway_emit( + case=TelemetryCase.APP_CREATED, + context={"tenant_id": str(getattr(sender, "tenant_id", "") or "")}, + payload={ + "app_id": getattr(sender, "id", None), + "mode": getattr(sender, "mode", None), + }, + ) + except Exception: + logger.warning("Failed to emit app_created telemetry", exc_info=True) + + +@app_was_deleted.connect +def _handle_app_deleted(sender: object, **kwargs: object) -> None: + try: + from core.telemetry.gateway import emit as gateway_emit + from enterprise.telemetry.contracts import TelemetryCase + + gateway_emit( + case=TelemetryCase.APP_DELETED, + context={"tenant_id": str(getattr(sender, "tenant_id", "") or "")}, + payload={"app_id": getattr(sender, "id", None)}, + ) + except Exception: + logger.warning("Failed to emit app_deleted telemetry", exc_info=True) + + +@app_was_updated.connect +def _handle_app_updated(sender: object, **kwargs: object) -> None: + try: + from core.telemetry.gateway import emit as gateway_emit + from enterprise.telemetry.contracts import TelemetryCase + + gateway_emit( + case=TelemetryCase.APP_UPDATED, + context={"tenant_id": str(getattr(sender, "tenant_id", "") or "")}, + payload={"app_id": getattr(sender, "id", None)}, + ) + except Exception: + logger.warning("Failed to emit app_updated telemetry", exc_info=True) + + +@feedback_was_created.connect +def _handle_feedback_created(sender: object, **kwargs: object) -> None: + try: + from core.telemetry.gateway import emit as gateway_emit + from enterprise.telemetry.contracts import TelemetryCase + + tenant_id = str(kwargs.get("tenant_id", "") or "") + gateway_emit( + case=TelemetryCase.FEEDBACK_CREATED, + context={"tenant_id": tenant_id}, + payload={ + "message_id": getattr(sender, "message_id", None), + "app_id": getattr(sender, "app_id", None), + "conversation_id": getattr(sender, "conversation_id", None), + "from_end_user_id": getattr(sender, "from_end_user_id", None), + "from_account_id": getattr(sender, "from_account_id", None), + "rating": getattr(sender, "rating", None), + "from_source": getattr(sender, "from_source", None), + "content": getattr(sender, "content", None), + }, + ) + except Exception: + logger.warning("Failed to emit feedback_created telemetry", exc_info=True) diff --git a/api/enterprise/telemetry/exporter.py b/api/enterprise/telemetry/exporter.py new file mode 100644 index 0000000000..f7b7301b8e --- /dev/null +++ b/api/enterprise/telemetry/exporter.py @@ -0,0 +1,289 @@ +"""Enterprise OTEL exporter — shared by EnterpriseOtelTrace, event handlers, and direct instrumentation. + +Uses dedicated TracerProvider and MeterProvider instances (configurable sampling, +independent from ext_otel.py infrastructure). + +Initialized once during Flask extension init (single-threaded via ext_enterprise_telemetry.py). +Accessed via ``ext_enterprise_telemetry.get_enterprise_exporter()`` from any thread/process. +""" + +import logging +import socket +import uuid +from datetime import UTC, datetime +from typing import Any, cast + +from opentelemetry import trace +from opentelemetry.context import Context +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as GRPCMetricExporter +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCSpanExporter +from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as HTTPMetricExporter +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPSpanExporter +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.sdk.trace.sampling import ParentBasedTraceIdRatio +from opentelemetry.semconv.resource import ResourceAttributes +from opentelemetry.trace import SpanContext, TraceFlags +from opentelemetry.util.types import Attributes, AttributeValue + +from configs import dify_config +from enterprise.telemetry.entities import EnterpriseTelemetryCounter, EnterpriseTelemetryHistogram +from enterprise.telemetry.id_generator import ( + CorrelationIdGenerator, + compute_deterministic_span_id, + set_correlation_id, + set_span_id_source, +) + +logger = logging.getLogger(__name__) + + +def is_enterprise_telemetry_enabled() -> bool: + return bool(dify_config.ENTERPRISE_ENABLED and dify_config.ENTERPRISE_TELEMETRY_ENABLED) + + +def _parse_otlp_headers(raw: str) -> dict[str, str]: + """Parse ``key=value,key2=value2`` into a dict.""" + if not raw: + return {} + headers: dict[str, str] = {} + for pair in raw.split(","): + if "=" not in pair: + continue + k, v = pair.split("=", 1) + headers[k.strip().lower()] = v.strip() + return headers + + +def _datetime_to_ns(dt: datetime) -> int: + """Convert a datetime to nanoseconds since epoch (OTEL convention).""" + # Ensure we always interpret naive datetimes as UTC instead of local time. + if dt.tzinfo is None: + dt = dt.replace(tzinfo=UTC) + else: + dt = dt.astimezone(UTC) + return int(dt.timestamp() * 1_000_000_000) + + +class _ExporterFactory: + def __init__(self, protocol: str, endpoint: str, headers: dict[str, str], insecure: bool): + self._protocol = protocol + self._endpoint = endpoint + self._headers = headers + self._grpc_headers = tuple(headers.items()) if headers else None + self._http_headers = headers or None + self._insecure = insecure + + def create_trace_exporter(self) -> HTTPSpanExporter | GRPCSpanExporter: + if self._protocol == "grpc": + return GRPCSpanExporter( + endpoint=self._endpoint or None, + headers=self._grpc_headers, + insecure=self._insecure, + ) + trace_endpoint = f"{self._endpoint}/v1/traces" if self._endpoint else "" + return HTTPSpanExporter(endpoint=trace_endpoint or None, headers=self._http_headers) + + def create_metric_exporter(self) -> HTTPMetricExporter | GRPCMetricExporter: + if self._protocol == "grpc": + return GRPCMetricExporter( + endpoint=self._endpoint or None, + headers=self._grpc_headers, + insecure=self._insecure, + ) + metric_endpoint = f"{self._endpoint}/v1/metrics" if self._endpoint else "" + return HTTPMetricExporter(endpoint=metric_endpoint or None, headers=self._http_headers) + + +class EnterpriseExporter: + """Shared OTEL exporter for all enterprise telemetry. + + ``export_span`` creates spans with optional real timestamps, deterministic + span/trace IDs, and cross-workflow parent linking. + ``increment_counter`` / ``record_histogram`` emit OTEL metrics at 100% accuracy. + """ + + def __init__(self, config: object) -> None: + endpoint: str = getattr(config, "ENTERPRISE_OTLP_ENDPOINT", "") + headers_raw: str = getattr(config, "ENTERPRISE_OTLP_HEADERS", "") + protocol: str = (getattr(config, "ENTERPRISE_OTLP_PROTOCOL", "http") or "http").lower() + service_name: str = getattr(config, "ENTERPRISE_SERVICE_NAME", "dify") + sampling_rate: float = getattr(config, "ENTERPRISE_OTEL_SAMPLING_RATE", 1.0) + self.include_content: bool = getattr(config, "ENTERPRISE_INCLUDE_CONTENT", True) + api_key: str = getattr(config, "ENTERPRISE_OTLP_API_KEY", "") + + # Auto-detect TLS: https:// uses secure, everything else is insecure + insecure = not endpoint.startswith("https://") + + resource = Resource( + attributes={ + ResourceAttributes.SERVICE_NAME: service_name, + ResourceAttributes.HOST_NAME: socket.gethostname(), + } + ) + sampler = ParentBasedTraceIdRatio(sampling_rate) + id_generator = CorrelationIdGenerator() + self._tracer_provider = TracerProvider(resource=resource, sampler=sampler, id_generator=id_generator) + + headers = _parse_otlp_headers(headers_raw) + if api_key: + if "authorization" in headers: + logger.warning( + "ENTERPRISE_OTLP_API_KEY is set but ENTERPRISE_OTLP_HEADERS also contains " + "'authorization'; the API key will take precedence." + ) + headers["authorization"] = f"Bearer {api_key}" + factory = _ExporterFactory(protocol, endpoint, headers, insecure=insecure) + + trace_exporter = factory.create_trace_exporter() + self._tracer_provider.add_span_processor(BatchSpanProcessor(trace_exporter)) + self._tracer = self._tracer_provider.get_tracer("dify.enterprise") + + metric_exporter = factory.create_metric_exporter() + self._meter_provider = MeterProvider( + resource=resource, + metric_readers=[PeriodicExportingMetricReader(metric_exporter)], + ) + meter = self._meter_provider.get_meter("dify.enterprise") + self._counters = { + EnterpriseTelemetryCounter.TOKENS: meter.create_counter("dify.tokens.total", unit="{token}"), + EnterpriseTelemetryCounter.INPUT_TOKENS: meter.create_counter("dify.tokens.input", unit="{token}"), + EnterpriseTelemetryCounter.OUTPUT_TOKENS: meter.create_counter("dify.tokens.output", unit="{token}"), + EnterpriseTelemetryCounter.REQUESTS: meter.create_counter("dify.requests.total", unit="{request}"), + EnterpriseTelemetryCounter.ERRORS: meter.create_counter("dify.errors.total", unit="{error}"), + EnterpriseTelemetryCounter.FEEDBACK: meter.create_counter("dify.feedback.total", unit="{feedback}"), + EnterpriseTelemetryCounter.DATASET_RETRIEVALS: meter.create_counter( + "dify.dataset.retrievals.total", unit="{retrieval}" + ), + EnterpriseTelemetryCounter.APP_CREATED: meter.create_counter("dify.app.created.total", unit="{app}"), + EnterpriseTelemetryCounter.APP_UPDATED: meter.create_counter("dify.app.updated.total", unit="{app}"), + EnterpriseTelemetryCounter.APP_DELETED: meter.create_counter("dify.app.deleted.total", unit="{app}"), + } + self._histograms = { + EnterpriseTelemetryHistogram.WORKFLOW_DURATION: meter.create_histogram("dify.workflow.duration", unit="s"), + EnterpriseTelemetryHistogram.NODE_DURATION: meter.create_histogram("dify.node.duration", unit="s"), + EnterpriseTelemetryHistogram.MESSAGE_DURATION: meter.create_histogram("dify.message.duration", unit="s"), + EnterpriseTelemetryHistogram.MESSAGE_TTFT: meter.create_histogram( + "dify.message.time_to_first_token", unit="s" + ), + EnterpriseTelemetryHistogram.TOOL_DURATION: meter.create_histogram("dify.tool.duration", unit="s"), + EnterpriseTelemetryHistogram.PROMPT_GENERATION_DURATION: meter.create_histogram( + "dify.prompt_generation.duration", unit="s" + ), + } + + def export_span( + self, + name: str, + attributes: dict[str, Any], + correlation_id: str | None = None, + span_id_source: str | None = None, + start_time: datetime | None = None, + end_time: datetime | None = None, + trace_correlation_override: str | None = None, + parent_span_id_source: str | None = None, + ) -> None: + """Export an OTEL span with optional deterministic IDs and real timestamps. + + Args: + name: Span operation name. + attributes: Span attributes dict. + correlation_id: Source for trace_id derivation (groups spans in one trace). + span_id_source: Source for deterministic span_id (e.g. workflow_run_id or node_execution_id). + start_time: Real span start time. When None, uses current time. + end_time: Real span end time. When None, span ends immediately. + trace_correlation_override: Override trace_id source (for cross-workflow linking). + When set, trace_id is derived from this instead of ``correlation_id``. + parent_span_id_source: Override parent span_id source (for cross-workflow linking). + When set, parent span_id is derived from this value. When None and + ``correlation_id`` is set, parent is the workflow root span. + """ + effective_trace_correlation = trace_correlation_override or correlation_id + set_correlation_id(effective_trace_correlation) + set_span_id_source(span_id_source) + + try: + parent_context: Context | None = None + # A span is the "root" of its correlation group when span_id_source == correlation_id + # (i.e. a workflow root span). All other spans are children. + if parent_span_id_source: + # Cross-workflow linking: parent is an explicit span (e.g. tool node in outer workflow) + parent_span_id = compute_deterministic_span_id(parent_span_id_source) + try: + parent_trace_id = int(uuid.UUID(effective_trace_correlation)) if effective_trace_correlation else 0 + except (ValueError, AttributeError): + logger.warning( + "Invalid trace correlation UUID for cross-workflow link: %s, span=%s", + effective_trace_correlation, + name, + ) + parent_trace_id = 0 + if parent_trace_id: + parent_span_context = SpanContext( + trace_id=parent_trace_id, + span_id=parent_span_id, + is_remote=True, + trace_flags=TraceFlags(TraceFlags.SAMPLED), + ) + parent_context = trace.set_span_in_context(trace.NonRecordingSpan(parent_span_context)) + elif correlation_id and correlation_id != span_id_source: + # Child span: parent is the correlation-group root (workflow root span) + parent_span_id = compute_deterministic_span_id(correlation_id) + try: + parent_trace_id = int(uuid.UUID(effective_trace_correlation or correlation_id)) + except (ValueError, AttributeError): + logger.warning( + "Invalid trace correlation UUID for child span link: %s, span=%s", + effective_trace_correlation or correlation_id, + name, + ) + parent_trace_id = 0 + if parent_trace_id: + parent_span_context = SpanContext( + trace_id=parent_trace_id, + span_id=parent_span_id, + is_remote=True, + trace_flags=TraceFlags(TraceFlags.SAMPLED), + ) + parent_context = trace.set_span_in_context(trace.NonRecordingSpan(parent_span_context)) + + span_start_time = _datetime_to_ns(start_time) if start_time is not None else None + span_end_on_exit = end_time is None + + with self._tracer.start_as_current_span( + name, + context=parent_context, + start_time=span_start_time, + end_on_exit=span_end_on_exit, + ) as span: + for key, value in attributes.items(): + if value is not None: + span.set_attribute(key, value) + if end_time is not None: + span.end(end_time=_datetime_to_ns(end_time)) + except Exception: + logger.exception("Failed to export span %s", name) + finally: + set_correlation_id(None) + set_span_id_source(None) + + def increment_counter( + self, name: EnterpriseTelemetryCounter, value: int, labels: dict[str, AttributeValue] + ) -> None: + counter = self._counters.get(name) + if counter: + counter.add(value, cast(Attributes, labels)) + + def record_histogram( + self, name: EnterpriseTelemetryHistogram, value: float, labels: dict[str, AttributeValue] + ) -> None: + histogram = self._histograms.get(name) + if histogram: + histogram.record(value, cast(Attributes, labels)) + + def shutdown(self) -> None: + self._tracer_provider.shutdown() + self._meter_provider.shutdown() diff --git a/api/enterprise/telemetry/id_generator.py b/api/enterprise/telemetry/id_generator.py new file mode 100644 index 0000000000..f3e5d6d0d6 --- /dev/null +++ b/api/enterprise/telemetry/id_generator.py @@ -0,0 +1,75 @@ +"""Custom OTEL ID Generator for correlation-based trace/span ID derivation. + +Uses contextvars for thread-safe correlation_id -> trace_id mapping. +When a span_id_source is set, the span_id is derived deterministically +from that value, enabling any span to reference another as parent +without depending on span creation order. +""" + +import random +import uuid +from contextvars import ContextVar + +from opentelemetry.sdk.trace.id_generator import IdGenerator + +_correlation_id_context: ContextVar[str | None] = ContextVar("correlation_id", default=None) +_span_id_source_context: ContextVar[str | None] = ContextVar("span_id_source", default=None) + + +def set_correlation_id(correlation_id: str | None) -> None: + _correlation_id_context.set(correlation_id) + + +def get_correlation_id() -> str | None: + return _correlation_id_context.get() + + +def set_span_id_source(source_id: str | None) -> None: + """Set the source for deterministic span_id generation. + + When set, ``generate_span_id()`` derives the span_id from this value + (lower 64 bits of the UUID). Pass the ``workflow_run_id`` for workflow + root spans or ``node_execution_id`` for node spans. + """ + _span_id_source_context.set(source_id) + + +def compute_deterministic_span_id(source_id: str) -> int: + """Derive a deterministic span_id from any UUID string. + + Uses the lower 64 bits of the UUID, guaranteeing non-zero output + (OTEL requires span_id != 0). + """ + span_id = uuid.UUID(source_id).int & ((1 << 64) - 1) + return span_id if span_id != 0 else 1 + + +class CorrelationIdGenerator(IdGenerator): + """ID generator that derives trace_id and optionally span_id from context. + + - trace_id: always derived from correlation_id (groups all spans in one trace) + - span_id: derived from span_id_source when set (enables deterministic + parent-child linking), otherwise random + """ + + def generate_trace_id(self) -> int: + correlation_id = _correlation_id_context.get() + if correlation_id: + try: + return uuid.UUID(correlation_id).int + except (ValueError, AttributeError): + pass + return random.getrandbits(128) + + def generate_span_id(self) -> int: + source = _span_id_source_context.get() + if source: + try: + return compute_deterministic_span_id(source) + except (ValueError, AttributeError): + pass + + span_id = random.getrandbits(64) + while span_id == 0: + span_id = random.getrandbits(64) + return span_id diff --git a/api/enterprise/telemetry/metric_handler.py b/api/enterprise/telemetry/metric_handler.py new file mode 100644 index 0000000000..25bec993b7 --- /dev/null +++ b/api/enterprise/telemetry/metric_handler.py @@ -0,0 +1,381 @@ +"""Enterprise metric/log event handler. + +This module processes metric and log telemetry events after they've been +dequeued from the enterprise_telemetry Celery queue. It handles case routing, +idempotency checking, and payload rehydration. +""" + +from __future__ import annotations + +import json +import logging +from typing import Any + +from enterprise.telemetry.contracts import TelemetryCase, TelemetryEnvelope +from extensions.ext_redis import redis_client +from extensions.ext_storage import storage + +logger = logging.getLogger(__name__) + + +class EnterpriseMetricHandler: + """Handler for enterprise metric and log telemetry events. + + Processes envelopes from the enterprise_telemetry queue, routing each + case to the appropriate handler method. Implements idempotency checking + and payload rehydration with fallback. + """ + + def _increment_diagnostic_counter(self, counter_name: str, labels: dict[str, str] | None = None) -> None: + """Increment a diagnostic counter for operational monitoring. + + Args: + counter_name: Name of the counter (e.g., 'processed_total', 'deduped_total'). + labels: Optional labels for the counter. + """ + try: + from extensions.ext_enterprise_telemetry import get_enterprise_exporter + + exporter = get_enterprise_exporter() + if not exporter: + return + + full_counter_name = f"enterprise_telemetry.handler.{counter_name}" + logger.debug( + "Diagnostic counter: %s, labels=%s", + full_counter_name, + labels or {}, + ) + except Exception: + logger.debug("Failed to increment diagnostic counter: %s", counter_name, exc_info=True) + + def handle(self, envelope: TelemetryEnvelope) -> None: + """Main entry point for processing telemetry envelopes. + + Args: + envelope: The telemetry envelope to process. + """ + # Check for duplicate events + if self._is_duplicate(envelope): + logger.debug( + "Skipping duplicate event: tenant_id=%s, event_id=%s", + envelope.tenant_id, + envelope.event_id, + ) + self._increment_diagnostic_counter("deduped_total") + return + + # Route to appropriate handler based on case + case = envelope.case + if case == TelemetryCase.APP_CREATED: + self._on_app_created(envelope) + self._increment_diagnostic_counter("processed_total", {"case": "app_created"}) + elif case == TelemetryCase.APP_UPDATED: + self._on_app_updated(envelope) + self._increment_diagnostic_counter("processed_total", {"case": "app_updated"}) + elif case == TelemetryCase.APP_DELETED: + self._on_app_deleted(envelope) + self._increment_diagnostic_counter("processed_total", {"case": "app_deleted"}) + elif case == TelemetryCase.FEEDBACK_CREATED: + self._on_feedback_created(envelope) + self._increment_diagnostic_counter("processed_total", {"case": "feedback_created"}) + elif case == TelemetryCase.MESSAGE_RUN: + self._on_message_run(envelope) + self._increment_diagnostic_counter("processed_total", {"case": "message_run"}) + elif case == TelemetryCase.TOOL_EXECUTION: + self._on_tool_execution(envelope) + self._increment_diagnostic_counter("processed_total", {"case": "tool_execution"}) + elif case == TelemetryCase.MODERATION_CHECK: + self._on_moderation_check(envelope) + self._increment_diagnostic_counter("processed_total", {"case": "moderation_check"}) + elif case == TelemetryCase.SUGGESTED_QUESTION: + self._on_suggested_question(envelope) + self._increment_diagnostic_counter("processed_total", {"case": "suggested_question"}) + elif case == TelemetryCase.DATASET_RETRIEVAL: + self._on_dataset_retrieval(envelope) + self._increment_diagnostic_counter("processed_total", {"case": "dataset_retrieval"}) + elif case == TelemetryCase.GENERATE_NAME: + self._on_generate_name(envelope) + self._increment_diagnostic_counter("processed_total", {"case": "generate_name"}) + elif case == TelemetryCase.PROMPT_GENERATION: + self._on_prompt_generation(envelope) + self._increment_diagnostic_counter("processed_total", {"case": "prompt_generation"}) + else: + logger.warning( + "Unknown telemetry case: %s (tenant_id=%s, event_id=%s)", + case, + envelope.tenant_id, + envelope.event_id, + ) + + def _is_duplicate(self, envelope: TelemetryEnvelope) -> bool: + """Check if this event has already been processed. + + Uses Redis with TTL for deduplication. Returns True if duplicate, + False if first time seeing this event. + + Args: + envelope: The telemetry envelope to check. + + Returns: + True if this event_id has been seen before, False otherwise. + """ + dedup_key = f"telemetry:dedup:{envelope.tenant_id}:{envelope.event_id}" + + try: + # Atomic set-if-not-exists with 1h TTL + # Returns True if key was set (first time), None if already exists (duplicate) + was_set = redis_client.set(dedup_key, b"1", nx=True, ex=3600) + return was_set is None + except Exception: + # Fail open: if Redis is unavailable, process the event + # (prefer occasional duplicate over lost data) + logger.warning( + "Redis unavailable for deduplication check, processing event anyway: %s", + envelope.event_id, + exc_info=True, + ) + return False + + def _rehydrate(self, envelope: TelemetryEnvelope) -> dict[str, Any]: + """Rehydrate payload from storage reference or inline data. + + If the envelope payload is empty and metadata contains a + ``payload_ref``, the full payload is loaded from object storage + (where the gateway wrote it as JSON). When both the inline + payload and storage resolution fail, a degraded-event marker + is emitted so the gap is observable. + + Args: + envelope: The telemetry envelope containing payload data. + + Returns: + The rehydrated payload dictionary, or ``{}`` on total failure. + """ + payload = envelope.payload + + # Resolve from object storage when the gateway offloaded a large payload. + if not payload and envelope.metadata: + payload_ref = envelope.metadata.get("payload_ref") + if payload_ref: + try: + payload_bytes = storage.load(payload_ref) + payload = json.loads(payload_bytes.decode("utf-8")) + logger.debug("Loaded payload from storage: key=%s", payload_ref) + except Exception: + logger.warning( + "Failed to load payload from storage: key=%s, event_id=%s", + payload_ref, + envelope.event_id, + exc_info=True, + ) + + if not payload: + # Storage resolution failed or no data available — emit degraded event. + logger.error( + "Payload rehydration failed for event_id=%s, tenant_id=%s, case=%s", + envelope.event_id, + envelope.tenant_id, + envelope.case, + ) + from enterprise.telemetry.entities import EnterpriseTelemetryEvent + from enterprise.telemetry.telemetry_log import emit_metric_only_event + + emit_metric_only_event( + event_name=EnterpriseTelemetryEvent.REHYDRATION_FAILED, + attributes={ + "dify.tenant_id": envelope.tenant_id, + "dify.event_id": envelope.event_id, + "dify.case": envelope.case, + "rehydration_failed": True, + }, + tenant_id=envelope.tenant_id, + ) + self._increment_diagnostic_counter("rehydration_failed_total") + return {} + + return payload + + # Stub methods for each metric/log case + # These will be implemented in later tasks with actual emission logic + + def _on_app_created(self, envelope: TelemetryEnvelope) -> None: + """Handle app created event.""" + from enterprise.telemetry.entities import EnterpriseTelemetryCounter, EnterpriseTelemetryEvent + from enterprise.telemetry.telemetry_log import emit_metric_only_event + from extensions.ext_enterprise_telemetry import get_enterprise_exporter + + exporter = get_enterprise_exporter() + if not exporter: + logger.debug("No exporter available for APP_CREATED: event_id=%s", envelope.event_id) + return + + payload = self._rehydrate(envelope) + if not payload: + return + + attrs = { + "dify.app.id": payload.get("app_id"), + "dify.tenant_id": envelope.tenant_id, + "dify.event.id": envelope.event_id, + "dify.app.mode": payload.get("mode"), + } + + emit_metric_only_event( + event_name=EnterpriseTelemetryEvent.APP_CREATED, + attributes=attrs, + tenant_id=envelope.tenant_id, + ) + exporter.increment_counter( + EnterpriseTelemetryCounter.APP_CREATED, + 1, + { + "tenant_id": envelope.tenant_id, + "app_id": str(payload.get("app_id", "")), + "mode": str(payload.get("mode", "")), + }, + ) + + def _on_app_updated(self, envelope: TelemetryEnvelope) -> None: + """Handle app updated event.""" + from enterprise.telemetry.entities import EnterpriseTelemetryCounter, EnterpriseTelemetryEvent + from enterprise.telemetry.telemetry_log import emit_metric_only_event + from extensions.ext_enterprise_telemetry import get_enterprise_exporter + + exporter = get_enterprise_exporter() + if not exporter: + logger.debug("No exporter available for APP_UPDATED: event_id=%s", envelope.event_id) + return + + payload = self._rehydrate(envelope) + if not payload: + return + + attrs = { + "dify.app.id": payload.get("app_id"), + "dify.tenant_id": envelope.tenant_id, + "dify.event.id": envelope.event_id, + } + + emit_metric_only_event( + event_name=EnterpriseTelemetryEvent.APP_UPDATED, + attributes=attrs, + tenant_id=envelope.tenant_id, + ) + exporter.increment_counter( + EnterpriseTelemetryCounter.APP_UPDATED, + 1, + { + "tenant_id": envelope.tenant_id, + "app_id": str(payload.get("app_id", "")), + }, + ) + + def _on_app_deleted(self, envelope: TelemetryEnvelope) -> None: + """Handle app deleted event.""" + from enterprise.telemetry.entities import EnterpriseTelemetryCounter, EnterpriseTelemetryEvent + from enterprise.telemetry.telemetry_log import emit_metric_only_event + from extensions.ext_enterprise_telemetry import get_enterprise_exporter + + exporter = get_enterprise_exporter() + if not exporter: + logger.debug("No exporter available for APP_DELETED: event_id=%s", envelope.event_id) + return + + payload = self._rehydrate(envelope) + if not payload: + return + + attrs = { + "dify.app.id": payload.get("app_id"), + "dify.tenant_id": envelope.tenant_id, + "dify.event.id": envelope.event_id, + } + + emit_metric_only_event( + event_name=EnterpriseTelemetryEvent.APP_DELETED, + attributes=attrs, + tenant_id=envelope.tenant_id, + ) + exporter.increment_counter( + EnterpriseTelemetryCounter.APP_DELETED, + 1, + { + "tenant_id": envelope.tenant_id, + "app_id": str(payload.get("app_id", "")), + }, + ) + + def _on_feedback_created(self, envelope: TelemetryEnvelope) -> None: + """Handle feedback created event.""" + from enterprise.telemetry.entities import EnterpriseTelemetryCounter, EnterpriseTelemetryEvent + from enterprise.telemetry.telemetry_log import emit_metric_only_event + from extensions.ext_enterprise_telemetry import get_enterprise_exporter + + exporter = get_enterprise_exporter() + if not exporter: + logger.debug("No exporter available for FEEDBACK_CREATED: event_id=%s", envelope.event_id) + return + + payload = self._rehydrate(envelope) + if not payload: + return + + include_content = exporter.include_content + attrs: dict = { + "dify.message.id": payload.get("message_id"), + "dify.tenant_id": envelope.tenant_id, + "dify.event.id": envelope.event_id, + "dify.app_id": payload.get("app_id"), + "dify.conversation.id": payload.get("conversation_id"), + "gen_ai.user.id": payload.get("from_end_user_id") or payload.get("from_account_id"), + "dify.feedback.rating": payload.get("rating"), + "dify.feedback.from_source": payload.get("from_source"), + } + if include_content: + attrs["dify.feedback.content"] = payload.get("content") + + user_id = payload.get("from_end_user_id") or payload.get("from_account_id") + emit_metric_only_event( + event_name=EnterpriseTelemetryEvent.FEEDBACK_CREATED, + attributes=attrs, + tenant_id=envelope.tenant_id, + user_id=str(user_id or ""), + ) + exporter.increment_counter( + EnterpriseTelemetryCounter.FEEDBACK, + 1, + { + "tenant_id": envelope.tenant_id, + "app_id": str(payload.get("app_id", "")), + "rating": str(payload.get("rating", "")), + }, + ) + + def _on_message_run(self, envelope: TelemetryEnvelope) -> None: + """Handle message run event (stub).""" + logger.debug("Processing MESSAGE_RUN: event_id=%s", envelope.event_id) + + def _on_tool_execution(self, envelope: TelemetryEnvelope) -> None: + """Handle tool execution event (stub).""" + logger.debug("Processing TOOL_EXECUTION: event_id=%s", envelope.event_id) + + def _on_moderation_check(self, envelope: TelemetryEnvelope) -> None: + """Handle moderation check event (stub).""" + logger.debug("Processing MODERATION_CHECK: event_id=%s", envelope.event_id) + + def _on_suggested_question(self, envelope: TelemetryEnvelope) -> None: + """Handle suggested question event (stub).""" + logger.debug("Processing SUGGESTED_QUESTION: event_id=%s", envelope.event_id) + + def _on_dataset_retrieval(self, envelope: TelemetryEnvelope) -> None: + """Handle dataset retrieval event (stub).""" + logger.debug("Processing DATASET_RETRIEVAL: event_id=%s", envelope.event_id) + + def _on_generate_name(self, envelope: TelemetryEnvelope) -> None: + """Handle generate name event (stub).""" + logger.debug("Processing GENERATE_NAME: event_id=%s", envelope.event_id) + + def _on_prompt_generation(self, envelope: TelemetryEnvelope) -> None: + """Handle prompt generation event (stub).""" + logger.debug("Processing PROMPT_GENERATION: event_id=%s", envelope.event_id) diff --git a/api/enterprise/telemetry/telemetry_log.py b/api/enterprise/telemetry/telemetry_log.py new file mode 100644 index 0000000000..8cce4a9fcd --- /dev/null +++ b/api/enterprise/telemetry/telemetry_log.py @@ -0,0 +1,122 @@ +"""Structured-log emitter for enterprise telemetry events. + +Emits structured JSON log lines correlated with OTEL traces via trace_id. +Picked up by ``StructuredJSONFormatter`` → stdout/Loki/Elastic. +""" + +from __future__ import annotations + +import logging +import uuid +from functools import lru_cache +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from enterprise.telemetry.entities import EnterpriseTelemetryEvent + +logger = logging.getLogger("dify.telemetry") + + +@lru_cache(maxsize=4096) +def compute_trace_id_hex(uuid_str: str | None) -> str: + """Convert a business UUID string to a 32-hex OTEL-compatible trace_id. + + Returns empty string when *uuid_str* is ``None`` or invalid. + """ + if not uuid_str: + return "" + normalized = uuid_str.strip().lower() + if len(normalized) == 32 and all(ch in "0123456789abcdef" for ch in normalized): + return normalized + try: + return f"{uuid.UUID(normalized).int:032x}" + except (ValueError, AttributeError): + return "" + + +@lru_cache(maxsize=4096) +def compute_span_id_hex(uuid_str: str | None) -> str: + if not uuid_str: + return "" + normalized = uuid_str.strip().lower() + if len(normalized) == 16 and all(ch in "0123456789abcdef" for ch in normalized): + return normalized + try: + from enterprise.telemetry.id_generator import compute_deterministic_span_id + + return f"{compute_deterministic_span_id(normalized):016x}" + except (ValueError, AttributeError): + return "" + + +def emit_telemetry_log( + *, + event_name: str | EnterpriseTelemetryEvent, + attributes: dict[str, Any], + signal: str = "metric_only", + trace_id_source: str | None = None, + span_id_source: str | None = None, + tenant_id: str | None = None, + user_id: str | None = None, +) -> None: + """Emit a structured log line for a telemetry event. + + Parameters + ---------- + event_name: + Canonical event name, e.g. ``"dify.workflow.run"``. + attributes: + All event-specific attributes (already built by the caller). + signal: + ``"metric_only"`` for events with no span, ``"span_detail"`` + for detail logs accompanying a slim span. + trace_id_source: + A UUID string (e.g. ``workflow_run_id``) used to derive a 32-hex + trace_id for cross-signal correlation. + tenant_id: + Tenant identifier (for the ``IdentityContextFilter``). + user_id: + User identifier (for the ``IdentityContextFilter``). + """ + if not logger.isEnabledFor(logging.INFO): + return + attrs = { + "dify.event.name": event_name, + "dify.event.signal": signal, + **attributes, + } + + extra: dict[str, Any] = {"attributes": attrs} + + trace_id_hex = compute_trace_id_hex(trace_id_source) + if trace_id_hex: + extra["trace_id"] = trace_id_hex + span_id_hex = compute_span_id_hex(span_id_source) + if span_id_hex: + extra["span_id"] = span_id_hex + if tenant_id: + extra["tenant_id"] = tenant_id + if user_id: + extra["user_id"] = user_id + + logger.info("telemetry.%s", signal, extra=extra) + + +def emit_metric_only_event( + *, + event_name: str | EnterpriseTelemetryEvent, + attributes: dict[str, Any], + trace_id_source: str | None = None, + span_id_source: str | None = None, + tenant_id: str | None = None, + user_id: str | None = None, +) -> None: + emit_telemetry_log( + event_name=event_name, + attributes=attributes, + signal="metric_only", + trace_id_source=trace_id_source, + span_id_source=span_id_source, + tenant_id=tenant_id, + user_id=user_id, + ) diff --git a/api/events/app_event.py b/api/events/app_event.py index f2ce71bbbb..3a0094b77c 100644 --- a/api/events/app_event.py +++ b/api/events/app_event.py @@ -3,6 +3,12 @@ from blinker import signal # sender: app app_was_created = signal("app-was-created") +# sender: app +app_was_deleted = signal("app-was-deleted") + +# sender: app +app_was_updated = signal("app-was-updated") + # sender: app, kwargs: app_model_config app_model_config_was_updated = signal("app-model-config-was-updated") diff --git a/api/events/event_handlers/delete_tool_parameters_cache_when_sync_draft_workflow.py b/api/events/event_handlers/delete_tool_parameters_cache_when_sync_draft_workflow.py index 5c02a16a7d..c43e99f0f4 100644 --- a/api/events/event_handlers/delete_tool_parameters_cache_when_sync_draft_workflow.py +++ b/api/events/event_handlers/delete_tool_parameters_cache_when_sync_draft_workflow.py @@ -2,7 +2,7 @@ import logging from core.tools.tool_manager import ToolManager from core.tools.utils.configuration import ToolParameterConfigurationManager -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.nodes.tool.entities import ToolEntity from events.app_event import app_draft_workflow_was_synced @@ -16,7 +16,7 @@ def handle(sender, **kwargs): if synced_draft_workflow is None: return for node_data in synced_draft_workflow.graph_dict.get("nodes", []): - if node_data.get("data", {}).get("type") == NodeType.TOOL: + if node_data.get("data", {}).get("type") == BuiltinNodeTypes.TOOL: try: tool_entity = ToolEntity.model_validate(node_data["data"]) tool_runtime = ToolManager.get_tool_runtime( diff --git a/api/events/event_handlers/sync_workflow_schedule_when_app_published.py b/api/events/event_handlers/sync_workflow_schedule_when_app_published.py index 90f562d167..168513fc04 100644 --- a/api/events/event_handlers/sync_workflow_schedule_when_app_published.py +++ b/api/events/event_handlers/sync_workflow_schedule_when_app_published.py @@ -4,7 +4,7 @@ from typing import cast from sqlalchemy import select from sqlalchemy.orm import Session -from dify_graph.nodes.trigger_schedule.entities import SchedulePlanUpdate +from core.workflow.nodes.trigger_schedule.entities import SchedulePlanUpdate from events.app_event import app_published_workflow_was_updated from extensions.ext_database import db from models import AppMode, Workflow, WorkflowSchedulePlan diff --git a/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py b/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py index 8da33d03b9..92bc9db075 100644 --- a/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py +++ b/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py @@ -2,8 +2,8 @@ from typing import cast from sqlalchemy import select -from dify_graph.nodes import NodeType -from dify_graph.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData +from core.workflow.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData +from dify_graph.nodes import BuiltinNodeTypes from events.app_event import app_published_workflow_was_updated from extensions.ext_database import db from models.dataset import AppDatasetJoin @@ -53,7 +53,7 @@ def get_dataset_ids_from_workflow(published_workflow: Workflow) -> set[str]: # fetch all knowledge retrieval nodes knowledge_retrieval_nodes = [ - node for node in nodes if node.get("data", {}).get("type") == NodeType.KNOWLEDGE_RETRIEVAL + node for node in nodes if node.get("data", {}).get("type") == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL ] if not knowledge_retrieval_nodes: diff --git a/api/events/event_handlers/update_app_triggers_when_app_published_workflow_updated.py b/api/events/event_handlers/update_app_triggers_when_app_published_workflow_updated.py index fd211a3e55..b3917d5622 100644 --- a/api/events/event_handlers/update_app_triggers_when_app_published_workflow_updated.py +++ b/api/events/event_handlers/update_app_triggers_when_app_published_workflow_updated.py @@ -3,7 +3,7 @@ from typing import cast from sqlalchemy import select from sqlalchemy.orm import Session -from dify_graph.nodes import NodeType +from core.trigger.constants import TRIGGER_NODE_TYPES from events.app_event import app_published_workflow_was_updated from extensions.ext_database import db from models import AppMode @@ -98,7 +98,7 @@ def get_trigger_infos_from_workflow(published_workflow: Workflow) -> list[dict]: return [] nodes = graph.get("nodes", []) - trigger_types = {NodeType.TRIGGER_WEBHOOK.value, NodeType.TRIGGER_SCHEDULE.value, NodeType.TRIGGER_PLUGIN.value} + trigger_types = TRIGGER_NODE_TYPES trigger_infos = [ { diff --git a/api/events/feedback_event.py b/api/events/feedback_event.py new file mode 100644 index 0000000000..8d91d5c5e5 --- /dev/null +++ b/api/events/feedback_event.py @@ -0,0 +1,4 @@ +from blinker import signal + +# sender: MessageFeedback, kwargs: tenant_id +feedback_was_created = signal("feedback-was-created") diff --git a/api/extensions/ext_celery.py b/api/extensions/ext_celery.py index 7b6a73af52..b08d94d1f2 100644 --- a/api/extensions/ext_celery.py +++ b/api/extensions/ext_celery.py @@ -204,6 +204,8 @@ def init_app(app: DifyApp) -> Celery: "schedule": timedelta(minutes=dify_config.API_TOKEN_LAST_USED_UPDATE_INTERVAL), } + if dify_config.ENTERPRISE_ENABLED and dify_config.ENTERPRISE_TELEMETRY_ENABLED: + imports.append("tasks.enterprise_telemetry_task") celery_app.conf.update(beat_schedule=beat_schedule, imports=imports) return celery_app diff --git a/api/extensions/ext_enterprise_telemetry.py b/api/extensions/ext_enterprise_telemetry.py new file mode 100644 index 0000000000..f785c00ae0 --- /dev/null +++ b/api/extensions/ext_enterprise_telemetry.py @@ -0,0 +1,50 @@ +"""Flask extension for enterprise telemetry lifecycle management. + +Initializes the EnterpriseExporter singleton during ``create_app()`` +(single-threaded), registers blinker event handlers, and hooks atexit +for graceful shutdown. + +Skipped entirely when ``ENTERPRISE_ENABLED`` and ``ENTERPRISE_TELEMETRY_ENABLED`` +are false (``is_enabled()`` gate). +""" + +from __future__ import annotations + +import atexit +import logging +from typing import TYPE_CHECKING + +from configs import dify_config + +if TYPE_CHECKING: + from dify_app import DifyApp + from enterprise.telemetry.exporter import EnterpriseExporter + +logger = logging.getLogger(__name__) + +_exporter: EnterpriseExporter | None = None + + +def is_enabled() -> bool: + return bool(dify_config.ENTERPRISE_ENABLED and dify_config.ENTERPRISE_TELEMETRY_ENABLED) + + +def init_app(app: DifyApp) -> None: + global _exporter + + if not is_enabled(): + return + + from enterprise.telemetry.exporter import EnterpriseExporter + + _exporter = EnterpriseExporter(dify_config) + atexit.register(_exporter.shutdown) + + # Import to trigger @signal.connect decorator registration + import enterprise.telemetry.event_handlers # noqa: F401 # type: ignore[reportUnusedImport] + + logger.info("Enterprise telemetry initialized") + + +def get_enterprise_exporter() -> EnterpriseExporter | None: + return _exporter diff --git a/api/extensions/ext_otel.py b/api/extensions/ext_otel.py index a5baa21018..3573aec7ce 100644 --- a/api/extensions/ext_otel.py +++ b/api/extensions/ext_otel.py @@ -78,16 +78,24 @@ def init_app(app: DifyApp): protocol = (dify_config.OTEL_EXPORTER_OTLP_PROTOCOL or "").lower() if dify_config.OTEL_EXPORTER_TYPE == "otlp": if protocol == "grpc": + # Auto-detect TLS: https:// uses secure, everything else is insecure + endpoint = dify_config.OTLP_BASE_ENDPOINT + insecure = not endpoint.startswith("https://") + exporter = GRPCSpanExporter( - endpoint=dify_config.OTLP_BASE_ENDPOINT, + endpoint=endpoint, # Header field names must consist of lowercase letters, check RFC7540 - headers=(("authorization", f"Bearer {dify_config.OTLP_API_KEY}"),), - insecure=True, + headers=( + (("authorization", f"Bearer {dify_config.OTLP_API_KEY}"),) if dify_config.OTLP_API_KEY else None + ), + insecure=insecure, ) metric_exporter = GRPCMetricExporter( - endpoint=dify_config.OTLP_BASE_ENDPOINT, - headers=(("authorization", f"Bearer {dify_config.OTLP_API_KEY}"),), - insecure=True, + endpoint=endpoint, + headers=( + (("authorization", f"Bearer {dify_config.OTLP_API_KEY}"),) if dify_config.OTLP_API_KEY else None + ), + insecure=insecure, ) else: headers = {"Authorization": f"Bearer {dify_config.OTLP_API_KEY}"} if dify_config.OTLP_API_KEY else None diff --git a/api/extensions/logstore/repositories/logstore_api_workflow_node_execution_repository.py b/api/extensions/logstore/repositories/logstore_api_workflow_node_execution_repository.py index 7ee4638e77..a94d75ec76 100644 --- a/api/extensions/logstore/repositories/logstore_api_workflow_node_execution_repository.py +++ b/api/extensions/logstore/repositories/logstore_api_workflow_node_execution_repository.py @@ -17,7 +17,8 @@ from dify_graph.enums import WorkflowNodeExecutionStatus from extensions.logstore.aliyun_logstore import AliyunLogStore from extensions.logstore.repositories import safe_float, safe_int from extensions.logstore.sql_escape import escape_identifier, escape_logstore_query_value -from models.workflow import WorkflowNodeExecutionModel +from models.enums import CreatorUserRole +from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionTriggeredFrom from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository logger = logging.getLogger(__name__) @@ -47,12 +48,28 @@ def _dict_to_workflow_node_execution_model(data: dict[str, Any]) -> WorkflowNode model.tenant_id = data.get("tenant_id") or "" model.app_id = data.get("app_id") or "" model.workflow_id = data.get("workflow_id") or "" - model.triggered_from = data.get("triggered_from") or "" + triggered_from_val = data.get("triggered_from") + try: + model.triggered_from = ( + WorkflowNodeExecutionTriggeredFrom(str(triggered_from_val)) + if triggered_from_val + else WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN + ) + except ValueError: + logger.warning("Invalid triggered_from value: %s, falling back to WORKFLOW_RUN", triggered_from_val) + model.triggered_from = WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN model.node_id = data.get("node_id") or "" model.node_type = data.get("node_type") or "" model.status = data.get("status") or "running" # Default status if missing model.title = data.get("title") or "" - model.created_by_role = data.get("created_by_role") or "" + created_by_role_val = data.get("created_by_role") + try: + model.created_by_role = ( + CreatorUserRole(str(created_by_role_val)) if created_by_role_val else CreatorUserRole.ACCOUNT + ) + except ValueError: + logger.warning("Invalid created_by_role value: %s, falling back to ACCOUNT", created_by_role_val) + model.created_by_role = CreatorUserRole.ACCOUNT model.created_by = data.get("created_by") or "" model.index = safe_int(data.get("index", 0)) diff --git a/api/extensions/logstore/repositories/logstore_api_workflow_run_repository.py b/api/extensions/logstore/repositories/logstore_api_workflow_run_repository.py index 14382ed876..bdfc81bd1c 100644 --- a/api/extensions/logstore/repositories/logstore_api_workflow_run_repository.py +++ b/api/extensions/logstore/repositories/logstore_api_workflow_run_repository.py @@ -22,12 +22,13 @@ from typing import Any, cast from sqlalchemy.orm import sessionmaker +from dify_graph.enums import WorkflowExecutionStatus from extensions.logstore.aliyun_logstore import AliyunLogStore from extensions.logstore.repositories import safe_float, safe_int from extensions.logstore.sql_escape import escape_identifier, escape_logstore_query_value, escape_sql_string from libs.infinite_scroll_pagination import InfiniteScrollPagination -from models.enums import WorkflowRunTriggeredFrom -from models.workflow import WorkflowRun +from models.enums import CreatorUserRole, WorkflowRunTriggeredFrom +from models.workflow import WorkflowRun, WorkflowType from repositories.api_workflow_run_repository import APIWorkflowRunRepository from repositories.types import ( AverageInteractionStats, @@ -59,11 +60,37 @@ def _dict_to_workflow_run(data: dict[str, Any]) -> WorkflowRun: model.tenant_id = data.get("tenant_id") or "" model.app_id = data.get("app_id") or "" model.workflow_id = data.get("workflow_id") or "" - model.type = data.get("type") or "" - model.triggered_from = data.get("triggered_from") or "" + type_val = data.get("type") + try: + model.type = WorkflowType(str(type_val)) if type_val else WorkflowType.WORKFLOW + except ValueError: + logger.warning("Invalid type value: %s, falling back to WORKFLOW", type_val) + model.type = WorkflowType.WORKFLOW + triggered_from_val = data.get("triggered_from") + try: + model.triggered_from = ( + WorkflowRunTriggeredFrom(str(triggered_from_val)) + if triggered_from_val + else WorkflowRunTriggeredFrom.APP_RUN + ) + except ValueError: + logger.warning("Invalid triggered_from value: %s, falling back to APP_RUN", triggered_from_val) + model.triggered_from = WorkflowRunTriggeredFrom.APP_RUN model.version = data.get("version") or "" - model.status = data.get("status") or "running" # Default status if missing - model.created_by_role = data.get("created_by_role") or "" + status_val = data.get("status") + try: + model.status = WorkflowExecutionStatus(str(status_val)) if status_val else WorkflowExecutionStatus.RUNNING + except ValueError: + logger.warning("Invalid status value: %s, falling back to RUNNING", status_val) + model.status = WorkflowExecutionStatus.RUNNING + created_by_role_val = data.get("created_by_role") + try: + model.created_by_role = ( + CreatorUserRole(str(created_by_role_val)) if created_by_role_val else CreatorUserRole.ACCOUNT + ) + except ValueError: + logger.warning("Invalid created_by_role value: %s, falling back to ACCOUNT", created_by_role_val) + model.created_by_role = CreatorUserRole.ACCOUNT model.created_by = data.get("created_by") or "" model.total_tokens = safe_int(data.get("total_tokens", 0)) diff --git a/api/extensions/logstore/repositories/logstore_workflow_node_execution_repository.py b/api/extensions/logstore/repositories/logstore_workflow_node_execution_repository.py index bd1c08d96e..d84c0bc432 100644 --- a/api/extensions/logstore/repositories/logstore_workflow_node_execution_repository.py +++ b/api/extensions/logstore/repositories/logstore_workflow_node_execution_repository.py @@ -19,7 +19,6 @@ from sqlalchemy.orm import sessionmaker from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository from dify_graph.entities import WorkflowNodeExecution from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus -from dify_graph.enums import NodeType from dify_graph.model_runtime.utils.encoders import jsonable_encoder from dify_graph.repositories.workflow_node_execution_repository import OrderConfig, WorkflowNodeExecutionRepository from dify_graph.workflow_type_encoder import WorkflowRuntimeTypeConverter @@ -78,7 +77,7 @@ def _dict_to_workflow_node_execution(data: dict[str, Any]) -> WorkflowNodeExecut index=safe_int(data.get("index", 0)), predecessor_node_id=data.get("predecessor_node_id"), node_id=data.get("node_id", ""), - node_type=NodeType(data.get("node_type", "start")), + node_type=data.get("node_type", "start"), title=data.get("title", ""), inputs=inputs, process_data=process_data, @@ -185,7 +184,7 @@ class LogstoreWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository): ("predecessor_node_id", domain_model.predecessor_node_id or ""), ("node_execution_id", domain_model.node_execution_id or ""), ("node_id", domain_model.node_id), - ("node_type", domain_model.node_type.value), + ("node_type", domain_model.node_type), ("title", domain_model.title), ( "inputs", diff --git a/api/extensions/otel/parser/__init__.py b/api/extensions/otel/parser/__init__.py index 164db7c275..c671e8b409 100644 --- a/api/extensions/otel/parser/__init__.py +++ b/api/extensions/otel/parser/__init__.py @@ -5,7 +5,7 @@ This module provides parsers that extract node-specific metadata and set OpenTelemetry span attributes according to semantic conventions. """ -from extensions.otel.parser.base import DefaultNodeOTelParser, NodeOTelParser, safe_json_dumps +from extensions.otel.parser.base import DefaultNodeOTelParser, NodeOTelParser, safe_json_dumps, should_include_content from extensions.otel.parser.llm import LLMNodeOTelParser from extensions.otel.parser.retrieval import RetrievalNodeOTelParser from extensions.otel.parser.tool import ToolNodeOTelParser @@ -17,4 +17,5 @@ __all__ = [ "RetrievalNodeOTelParser", "ToolNodeOTelParser", "safe_json_dumps", + "should_include_content", ] diff --git a/api/extensions/otel/parser/base.py b/api/extensions/otel/parser/base.py index fc84147e01..92281968b1 100644 --- a/api/extensions/otel/parser/base.py +++ b/api/extensions/otel/parser/base.py @@ -1,5 +1,10 @@ """ Base parser interface and utilities for OpenTelemetry node parsers. + +Content gating: ``should_include_content()`` controls whether content-bearing +span attributes (inputs, outputs, prompts, completions, documents) are written. +Gate is only active in EE (``ENTERPRISE_ENABLED=True``) when +``ENTERPRISE_INCLUDE_CONTENT=False``; CE behaviour is unchanged. """ import json @@ -9,7 +14,8 @@ from opentelemetry.trace import Span from opentelemetry.trace.status import Status, StatusCode from pydantic import BaseModel -from dify_graph.enums import NodeType +from configs import dify_config +from dify_graph.enums import BuiltinNodeTypes from dify_graph.file.models import File from dify_graph.graph_events import GraphNodeEventBase from dify_graph.nodes.base.node import Node @@ -17,6 +23,17 @@ from dify_graph.variables import Segment from extensions.otel.semconv.gen_ai import ChainAttributes, GenAIAttributes +def should_include_content() -> bool: + """Return True if content should be written to spans. + + CE (ENTERPRISE_ENABLED=False): always True — no behaviour change. + EE: follows ENTERPRISE_INCLUDE_CONTENT (default True). + """ + if not dify_config.ENTERPRISE_ENABLED: + return True + return dify_config.ENTERPRISE_INCLUDE_CONTENT + + def safe_json_dumps(obj: Any, ensure_ascii: bool = False) -> str: """ Safely serialize objects to JSON, handling non-serializable types. @@ -84,31 +101,28 @@ class DefaultNodeOTelParser: span.set_attribute("node.id", node.id) if node.execution_id: span.set_attribute("node.execution_id", node.execution_id) - if hasattr(node, "node_type") and node.node_type: - span.set_attribute("node.type", node.node_type.value) + span.set_attribute("node.type", node.node_type) span.set_attribute(GenAIAttributes.FRAMEWORK, "dify") - node_type = getattr(node, "node_type", None) - if isinstance(node_type, NodeType): - if node_type == NodeType.LLM: - span.set_attribute(GenAIAttributes.SPAN_KIND, "LLM") - elif node_type == NodeType.KNOWLEDGE_RETRIEVAL: - span.set_attribute(GenAIAttributes.SPAN_KIND, "RETRIEVER") - elif node_type == NodeType.TOOL: - span.set_attribute(GenAIAttributes.SPAN_KIND, "TOOL") - else: - span.set_attribute(GenAIAttributes.SPAN_KIND, "TASK") + node_type = node.node_type + if node_type == BuiltinNodeTypes.LLM: + span.set_attribute(GenAIAttributes.SPAN_KIND, "LLM") + elif node_type == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: + span.set_attribute(GenAIAttributes.SPAN_KIND, "RETRIEVER") + elif node_type == BuiltinNodeTypes.TOOL: + span.set_attribute(GenAIAttributes.SPAN_KIND, "TOOL") else: span.set_attribute(GenAIAttributes.SPAN_KIND, "TASK") # Extract inputs and outputs from result_event if result_event and result_event.node_run_result: node_run_result = result_event.node_run_result - if node_run_result.inputs: - span.set_attribute(ChainAttributes.INPUT_VALUE, safe_json_dumps(node_run_result.inputs)) - if node_run_result.outputs: - span.set_attribute(ChainAttributes.OUTPUT_VALUE, safe_json_dumps(node_run_result.outputs)) + if should_include_content(): + if node_run_result.inputs: + span.set_attribute(ChainAttributes.INPUT_VALUE, safe_json_dumps(node_run_result.inputs)) + if node_run_result.outputs: + span.set_attribute(ChainAttributes.OUTPUT_VALUE, safe_json_dumps(node_run_result.outputs)) if error: span.record_exception(error) diff --git a/api/extensions/otel/semconv/dify.py b/api/extensions/otel/semconv/dify.py index a20b9b358d..301ddd11aa 100644 --- a/api/extensions/otel/semconv/dify.py +++ b/api/extensions/otel/semconv/dify.py @@ -21,3 +21,15 @@ class DifySpanAttributes: INVOKE_FROM = "dify.invoke_from" """Invocation source, e.g. SERVICE_API, WEB_APP, DEBUGGER.""" + + INVOKED_BY = "dify.invoked_by" + """Invoked by, e.g. end_user, account, user.""" + + USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens" + """Number of input tokens (prompt tokens) used.""" + + USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens" + """Number of output tokens (completion tokens) generated.""" + + USAGE_TOTAL_TOKENS = "gen_ai.usage.total_tokens" + """Total number of tokens used.""" diff --git a/api/migrations/versions/2026_03_02_1805-0ec65df55790_add_indexes_for_human_input_forms.py b/api/migrations/versions/2026_03_02_1805-0ec65df55790_add_indexes_for_human_input_forms.py new file mode 100644 index 0000000000..63fd58b1bf --- /dev/null +++ b/api/migrations/versions/2026_03_02_1805-0ec65df55790_add_indexes_for_human_input_forms.py @@ -0,0 +1,68 @@ +"""add indexes for human_input_forms query patterns + +Revision ID: 0ec65df55790 +Revises: e288952f2994 +Create Date: 2026-03-02 18:05:00.000000 + +""" + +from alembic import op + + +# revision identifiers, used by Alembic. +revision = "0ec65df55790" +down_revision = "e288952f2994" +branch_labels = None +depends_on = None + + +def upgrade(): + with op.batch_alter_table("human_input_forms", schema=None) as batch_op: + batch_op.create_index( + "human_input_forms_workflow_run_id_node_id_idx", + ["workflow_run_id", "node_id"], + unique=False, + ) + batch_op.create_index( + "human_input_forms_status_created_at_idx", + ["status", "created_at"], + unique=False, + ) + batch_op.create_index( + "human_input_forms_status_expiration_time_idx", + ["status", "expiration_time"], + unique=False, + ) + + with op.batch_alter_table("human_input_form_deliveries", schema=None) as batch_op: + batch_op.create_index( + batch_op.f("human_input_form_deliveries_form_id_idx"), + ["form_id"], + unique=False, + ) + + with op.batch_alter_table("human_input_form_recipients", schema=None) as batch_op: + batch_op.create_index( + batch_op.f("human_input_form_recipients_delivery_id_idx"), + ["delivery_id"], + unique=False, + ) + batch_op.create_index( + batch_op.f("human_input_form_recipients_form_id_idx"), + ["form_id"], + unique=False, + ) + + +def downgrade(): + with op.batch_alter_table("human_input_forms", schema=None) as batch_op: + batch_op.drop_index("human_input_forms_workflow_run_id_node_id_idx") + batch_op.drop_index("human_input_forms_status_expiration_time_idx") + batch_op.drop_index("human_input_forms_status_created_at_idx") + + with op.batch_alter_table("human_input_form_recipients", schema=None) as batch_op: + batch_op.drop_index(batch_op.f("human_input_form_recipients_form_id_idx")) + batch_op.drop_index(batch_op.f("human_input_form_recipients_delivery_id_idx")) + + with op.batch_alter_table("human_input_form_deliveries", schema=None) as batch_op: + batch_op.drop_index(batch_op.f("human_input_form_deliveries_form_id_idx")) diff --git a/api/models/account.py b/api/models/account.py index f7a9c20026..1a43c9ca17 100644 --- a/api/models/account.py +++ b/api/models/account.py @@ -8,12 +8,12 @@ from uuid import uuid4 import sqlalchemy as sa from flask_login import UserMixin from sqlalchemy import DateTime, String, func, select -from sqlalchemy.orm import Mapped, Session, mapped_column, validates +from sqlalchemy.orm import Mapped, Session, mapped_column from typing_extensions import deprecated from .base import TypeBase from .engine import db -from .types import LongText, StringUUID +from .types import EnumText, LongText, StringUUID class TenantAccountRole(enum.StrEnum): @@ -104,7 +104,9 @@ class Account(UserMixin, TypeBase): last_active_at: Mapped[datetime] = mapped_column( DateTime, server_default=func.current_timestamp(), nullable=False, init=False ) - status: Mapped[str] = mapped_column(String(16), server_default=sa.text("'active'"), default="active") + status: Mapped[AccountStatus] = mapped_column( + EnumText(AccountStatus, length=16), server_default=sa.text("'active'"), default=AccountStatus.ACTIVE + ) initialized_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None) created_at: Mapped[datetime] = mapped_column( DateTime, server_default=func.current_timestamp(), nullable=False, init=False @@ -116,12 +118,6 @@ class Account(UserMixin, TypeBase): role: TenantAccountRole | None = field(default=None, init=False) _current_tenant: "Tenant | None" = field(default=None, init=False) - @validates("status") - def _normalize_status(self, _key: str, value: str | AccountStatus) -> str: - if isinstance(value, AccountStatus): - return value.value - return value - @property def is_password_set(self): return self.password is not None @@ -177,8 +173,7 @@ class Account(UserMixin, TypeBase): return self.role def get_status(self) -> AccountStatus: - status_str = self.status - return AccountStatus(status_str) + return self.status @classmethod def get_by_openid(cls, provider: str, open_id: str): @@ -249,7 +244,9 @@ class Tenant(TypeBase): name: Mapped[str] = mapped_column(String(255)) encrypt_public_key: Mapped[str | None] = mapped_column(LongText, default=None) plan: Mapped[str] = mapped_column(String(255), server_default=sa.text("'basic'"), default="basic") - status: Mapped[str] = mapped_column(String(255), server_default=sa.text("'normal'"), default="normal") + status: Mapped[TenantStatus] = mapped_column( + EnumText(TenantStatus, length=255), server_default=sa.text("'normal'"), default=TenantStatus.NORMAL + ) custom_config: Mapped[str | None] = mapped_column(LongText, default=None) created_at: Mapped[datetime] = mapped_column( DateTime, server_default=func.current_timestamp(), nullable=False, init=False @@ -291,7 +288,9 @@ class TenantAccountJoin(TypeBase): tenant_id: Mapped[str] = mapped_column(StringUUID) account_id: Mapped[str] = mapped_column(StringUUID) current: Mapped[bool] = mapped_column(sa.Boolean, server_default=sa.text("false"), default=False) - role: Mapped[str] = mapped_column(String(16), server_default="normal", default="normal") + role: Mapped[TenantAccountRole] = mapped_column( + EnumText(TenantAccountRole, length=16), server_default="normal", default=TenantAccountRole.NORMAL + ) invited_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None) created_at: Mapped[datetime] = mapped_column( DateTime, server_default=func.current_timestamp(), nullable=False, init=False @@ -324,6 +323,11 @@ class AccountIntegrate(TypeBase): ) +class InvitationCodeStatus(enum.StrEnum): + UNUSED = "unused" + USED = "used" + + class InvitationCode(TypeBase): __tablename__ = "invitation_codes" __table_args__ = ( @@ -335,7 +339,11 @@ class InvitationCode(TypeBase): id: Mapped[int] = mapped_column(sa.Integer, init=False) batch: Mapped[str] = mapped_column(String(255)) code: Mapped[str] = mapped_column(String(32)) - status: Mapped[str] = mapped_column(String(16), server_default=sa.text("'unused'"), default="unused") + status: Mapped[InvitationCodeStatus] = mapped_column( + EnumText(InvitationCodeStatus, length=16), + server_default=sa.text("'unused'"), + default=InvitationCodeStatus.UNUSED, + ) used_at: Mapped[datetime | None] = mapped_column(DateTime, default=None) used_by_tenant_id: Mapped[str | None] = mapped_column(StringUUID, default=None) used_by_account_id: Mapped[str | None] = mapped_column(StringUUID, default=None) @@ -367,10 +375,13 @@ class TenantPluginPermission(TypeBase): ) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) install_permission: Mapped[InstallPermission] = mapped_column( - String(16), nullable=False, server_default="everyone", default=InstallPermission.EVERYONE + EnumText(InstallPermission, length=16), + nullable=False, + server_default="everyone", + default=InstallPermission.EVERYONE, ) debug_permission: Mapped[DebugPermission] = mapped_column( - String(16), nullable=False, server_default="noone", default=DebugPermission.NOBODY + EnumText(DebugPermission, length=16), nullable=False, server_default="noone", default=DebugPermission.NOBODY ) @@ -396,10 +407,13 @@ class TenantPluginAutoUpgradeStrategy(TypeBase): ) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) strategy_setting: Mapped[StrategySetting] = mapped_column( - String(16), nullable=False, server_default="fix_only", default=StrategySetting.FIX_ONLY + EnumText(StrategySetting, length=16), + nullable=False, + server_default="fix_only", + default=StrategySetting.FIX_ONLY, ) upgrade_mode: Mapped[UpgradeMode] = mapped_column( - String(16), nullable=False, server_default="exclude", default=UpgradeMode.EXCLUDE + EnumText(UpgradeMode, length=16), nullable=False, server_default="exclude", default=UpgradeMode.EXCLUDE ) exclude_plugins: Mapped[list[str]] = mapped_column(sa.JSON, nullable=False, default_factory=list) include_plugins: Mapped[list[str]] = mapped_column(sa.JSON, nullable=False, default_factory=list) diff --git a/api/models/dataset.py b/api/models/dataset.py index 4ef39fcde1..b3fa11a58c 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -30,8 +30,9 @@ from services.entities.knowledge_entities.knowledge_entities import ParentMode, from .account import Account from .base import Base, TypeBase from .engine import db +from .enums import CreatorUserRole from .model import App, Tag, TagBinding, UploadFile -from .types import AdjustedJSON, BinaryData, LongText, StringUUID, adjusted_json_index +from .types import AdjustedJSON, BinaryData, EnumText, LongText, StringUUID, adjusted_json_index logger = logging.getLogger(__name__) @@ -59,7 +60,11 @@ class Dataset(Base): name: Mapped[str] = mapped_column(String(255)) description = mapped_column(LongText, nullable=True) provider: Mapped[str] = mapped_column(String(255), server_default=sa.text("'vendor'")) - permission: Mapped[str] = mapped_column(String(255), server_default=sa.text("'only_me'")) + permission: Mapped[DatasetPermissionEnum] = mapped_column( + EnumText(DatasetPermissionEnum, length=255), + server_default=sa.text("'only_me'"), + default=DatasetPermissionEnum.ONLY_ME, + ) data_source_type = mapped_column(String(255)) indexing_technique: Mapped[str | None] = mapped_column(String(255)) index_struct = mapped_column(LongText, nullable=True) @@ -1003,7 +1008,7 @@ class DatasetQuery(TypeBase): content: Mapped[str] = mapped_column(LongText, nullable=False) source: Mapped[str] = mapped_column(String(255), nullable=False) source_app_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) - created_by_role: Mapped[str] = mapped_column(String(255), nullable=False) + created_by_role: Mapped[CreatorUserRole] = mapped_column(EnumText(CreatorUserRole, length=255), nullable=False) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) created_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=sa.func.current_timestamp(), init=False diff --git a/api/models/enums.py b/api/models/enums.py index ed6236209f..eb478fe02c 100644 --- a/api/models/enums.py +++ b/api/models/enums.py @@ -1,6 +1,10 @@ from enum import StrEnum -from dify_graph.enums import NodeType +from core.trigger.constants import ( + TRIGGER_PLUGIN_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_WEBHOOK_NODE_TYPE, +) class CreatorUserRole(StrEnum): @@ -66,9 +70,29 @@ class AppTriggerStatus(StrEnum): class AppTriggerType(StrEnum): """App Trigger Type Enum""" - TRIGGER_WEBHOOK = NodeType.TRIGGER_WEBHOOK.value - TRIGGER_SCHEDULE = NodeType.TRIGGER_SCHEDULE.value - TRIGGER_PLUGIN = NodeType.TRIGGER_PLUGIN.value + TRIGGER_WEBHOOK = TRIGGER_WEBHOOK_NODE_TYPE + TRIGGER_SCHEDULE = TRIGGER_SCHEDULE_NODE_TYPE + TRIGGER_PLUGIN = TRIGGER_PLUGIN_NODE_TYPE # for backward compatibility UNKNOWN = "unknown" + + +class AppStatus(StrEnum): + """App Status Enum""" + + NORMAL = "normal" + + +class AppMCPServerStatus(StrEnum): + """AppMCPServer Status Enum""" + + NORMAL = "normal" + ACTIVE = "active" + INACTIVE = "inactive" + + +class ConversationStatus(StrEnum): + """Conversation Status Enum""" + + NORMAL = "normal" diff --git a/api/models/human_input.py b/api/models/human_input.py index 709cc8fe61..48e7fbb9ea 100644 --- a/api/models/human_input.py +++ b/api/models/human_input.py @@ -30,6 +30,15 @@ def _generate_token() -> str: class HumanInputForm(DefaultFieldsMixin, Base): __tablename__ = "human_input_forms" + __table_args__ = ( + sa.Index( + "human_input_forms_workflow_run_id_node_id_idx", + "workflow_run_id", + "node_id", + ), + sa.Index("human_input_forms_status_expiration_time_idx", "status", "expiration_time"), + sa.Index("human_input_forms_status_created_at_idx", "status", "created_at"), + ) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) @@ -84,6 +93,12 @@ class HumanInputForm(DefaultFieldsMixin, Base): class HumanInputDelivery(DefaultFieldsMixin, Base): __tablename__ = "human_input_form_deliveries" + __table_args__ = ( + sa.Index( + None, + "form_id", + ), + ) form_id: Mapped[str] = mapped_column( StringUUID, @@ -181,6 +196,10 @@ RecipientPayload = Annotated[ class HumanInputFormRecipient(DefaultFieldsMixin, Base): __tablename__ = "human_input_form_recipients" + __table_args__ = ( + sa.Index(None, "form_id"), + sa.Index(None, "delivery_id"), + ) form_id: Mapped[str] = mapped_column( StringUUID, diff --git a/api/models/model.py b/api/models/model.py index ed0614c195..2e747df2c7 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -29,9 +29,9 @@ from libs.uuid_utils import uuidv7 from .account import Account, Tenant from .base import Base, TypeBase, gen_uuidv4_string from .engine import db -from .enums import CreatorUserRole +from .enums import AppMCPServerStatus, AppStatus, ConversationStatus, CreatorUserRole, MessageStatus from .provider_ids import GenericProviderID -from .types import LongText, StringUUID +from .types import EnumText, LongText, StringUUID if TYPE_CHECKING: from .workflow import Workflow @@ -337,13 +337,15 @@ class App(Base): tenant_id: Mapped[str] = mapped_column(StringUUID) name: Mapped[str] = mapped_column(String(255)) description: Mapped[str] = mapped_column(LongText, default=sa.text("''")) - mode: Mapped[str] = mapped_column(String(255)) - icon_type: Mapped[str | None] = mapped_column(String(255)) # image, emoji, link + mode: Mapped[AppMode] = mapped_column(EnumText(AppMode, length=255)) + icon_type: Mapped[IconType | None] = mapped_column(EnumText(IconType, length=255)) icon = mapped_column(String(255)) icon_background: Mapped[str | None] = mapped_column(String(255)) app_model_config_id = mapped_column(StringUUID, nullable=True) workflow_id = mapped_column(StringUUID, nullable=True) - status: Mapped[str] = mapped_column(String(255), server_default=sa.text("'normal'")) + status: Mapped[AppStatus] = mapped_column( + EnumText(AppStatus, length=255), server_default=sa.text("'normal'"), default=AppStatus.NORMAL + ) enable_site: Mapped[bool] = mapped_column(sa.Boolean) enable_api: Mapped[bool] = mapped_column(sa.Boolean) api_rpm: Mapped[int] = mapped_column(sa.Integer, server_default=sa.text("0")) @@ -1000,14 +1002,16 @@ class Conversation(Base): model_provider = mapped_column(String(255), nullable=True) override_model_configs = mapped_column(LongText) model_id = mapped_column(String(255), nullable=True) - mode: Mapped[str] = mapped_column(String(255)) + mode: Mapped[AppMode] = mapped_column(EnumText(AppMode, length=255)) name: Mapped[str] = mapped_column(String(255), nullable=False) summary = mapped_column(LongText) _inputs: Mapped[dict[str, Any]] = mapped_column("inputs", sa.JSON) introduction = mapped_column(LongText) system_instruction = mapped_column(LongText) system_instruction_tokens: Mapped[int] = mapped_column(sa.Integer, nullable=False, server_default=sa.text("0")) - status: Mapped[str] = mapped_column(String(255), nullable=False) + status: Mapped[ConversationStatus] = mapped_column( + EnumText(ConversationStatus, length=255), nullable=False, default=ConversationStatus.NORMAL + ) # The `invoke_from` records how the conversation is created. # @@ -1351,7 +1355,12 @@ class Message(Base): provider_response_latency: Mapped[float] = mapped_column(sa.Float, nullable=False, server_default=sa.text("0")) total_price: Mapped[Decimal | None] = mapped_column(sa.Numeric(10, 7)) currency: Mapped[str] = mapped_column(String(255), nullable=False) - status: Mapped[str] = mapped_column(String(255), nullable=False, server_default=sa.text("'normal'")) + status: Mapped[MessageStatus] = mapped_column( + EnumText(MessageStatus, length=255), + nullable=False, + server_default=sa.text("'normal'"), + default=MessageStatus.NORMAL, + ) error: Mapped[str | None] = mapped_column(LongText) message_metadata: Mapped[str | None] = mapped_column(LongText) invoke_from: Mapped[str | None] = mapped_column(String(255), nullable=True) @@ -1364,7 +1373,7 @@ class Message(Base): ) agent_based: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) workflow_run_id: Mapped[str | None] = mapped_column(StringUUID) - app_mode: Mapped[str | None] = mapped_column(String(255), nullable=True) + app_mode: Mapped[AppMode | None] = mapped_column(EnumText(AppMode, length=255), nullable=True) @property def inputs(self) -> dict[str, Any]: @@ -1766,8 +1775,10 @@ class MessageFile(TypeBase): ) message_id: Mapped[str] = mapped_column(StringUUID, nullable=False) type: Mapped[str] = mapped_column(String(255), nullable=False) - transfer_method: Mapped[FileTransferMethod] = mapped_column(String(255), nullable=False) - created_by_role: Mapped[CreatorUserRole] = mapped_column(String(255), nullable=False) + transfer_method: Mapped[FileTransferMethod] = mapped_column( + EnumText(FileTransferMethod, length=255), nullable=False + ) + created_by_role: Mapped[CreatorUserRole] = mapped_column(EnumText(CreatorUserRole, length=255), nullable=False) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) belongs_to: Mapped[Literal["user", "assistant"] | None] = mapped_column(String(255), nullable=True, default=None) url: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None) @@ -1976,7 +1987,9 @@ class AppMCPServer(TypeBase): name: Mapped[str] = mapped_column(String(255), nullable=False) description: Mapped[str] = mapped_column(String(255), nullable=False) server_code: Mapped[str] = mapped_column(String(255), nullable=False) - status: Mapped[str] = mapped_column(String(255), nullable=False, server_default=sa.text("'normal'")) + status: Mapped[AppMCPServerStatus] = mapped_column( + EnumText(AppMCPServerStatus, length=255), nullable=False, server_default=sa.text("'normal'") + ) parameters: Mapped[str] = mapped_column(LongText, nullable=False) created_at: Mapped[datetime] = mapped_column( @@ -2015,7 +2028,7 @@ class Site(Base): id = mapped_column(StringUUID, default=lambda: str(uuid4())) app_id = mapped_column(StringUUID, nullable=False) title: Mapped[str] = mapped_column(String(255), nullable=False) - icon_type = mapped_column(String(255), nullable=True) + icon_type: Mapped[IconType | None] = mapped_column(EnumText(IconType, length=255), nullable=True) icon = mapped_column(String(255)) icon_background = mapped_column(String(255)) description = mapped_column(LongText) @@ -2030,7 +2043,9 @@ class Site(Base): customize_domain = mapped_column(String(255)) customize_token_strategy: Mapped[str] = mapped_column(String(255), nullable=False) prompt_public: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) - status = mapped_column(String(255), nullable=False, server_default=sa.text("'normal'")) + status: Mapped[AppStatus] = mapped_column( + EnumText(AppStatus, length=255), nullable=False, server_default=sa.text("'normal'"), default=AppStatus.NORMAL + ) created_by = mapped_column(StringUUID, nullable=True) created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) updated_by = mapped_column(StringUUID, nullable=True) @@ -2110,7 +2125,12 @@ class UploadFile(Base): # The `created_by_role` field indicates whether the file was created by an `Account` or an `EndUser`. # Its value is derived from the `CreatorUserRole` enumeration. - created_by_role: Mapped[str] = mapped_column(String(255), nullable=False, server_default=sa.text("'account'")) + created_by_role: Mapped[CreatorUserRole] = mapped_column( + EnumText(CreatorUserRole, length=255), + nullable=False, + server_default=sa.text("'account'"), + default=CreatorUserRole.ACCOUNT, + ) # The `created_by` field stores the ID of the entity that created this upload file. # @@ -2163,7 +2183,7 @@ class UploadFile(Base): self.size = size self.extension = extension self.mime_type = mime_type - self.created_by_role = created_by_role.value + self.created_by_role = created_by_role self.created_by = created_by self.created_at = created_at self.used = used @@ -2226,7 +2246,7 @@ class MessageAgentThought(TypeBase): ) message_id: Mapped[str] = mapped_column(StringUUID, nullable=False) position: Mapped[int] = mapped_column(sa.Integer, nullable=False) - created_by_role: Mapped[str] = mapped_column(String(255), nullable=False) + created_by_role: Mapped[CreatorUserRole] = mapped_column(EnumText(CreatorUserRole, length=255), nullable=False) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) message_chain_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None) thought: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None) diff --git a/api/models/provider.py b/api/models/provider.py index 6175a3ae88..18a0fe92c8 100644 --- a/api/models/provider.py +++ b/api/models/provider.py @@ -13,7 +13,7 @@ from libs.uuid_utils import uuidv7 from .base import TypeBase from .engine import db -from .types import LongText, StringUUID +from .types import EnumText, LongText, StringUUID class ProviderType(StrEnum): @@ -69,8 +69,8 @@ class Provider(TypeBase): ) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) provider_name: Mapped[str] = mapped_column(String(255), nullable=False) - provider_type: Mapped[str] = mapped_column( - String(40), nullable=False, server_default=text("'custom'"), default="custom" + provider_type: Mapped[ProviderType] = mapped_column( + EnumText(ProviderType, length=40), nullable=False, server_default=text("'custom'"), default=ProviderType.CUSTOM ) is_valid: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=text("false"), default=False) last_used: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, init=False) diff --git a/api/models/trigger.py b/api/models/trigger.py index 209345eb84..43d7fc5b24 100644 --- a/api/models/trigger.py +++ b/api/models/trigger.py @@ -227,7 +227,7 @@ class WorkflowTriggerLog(TypeBase): queue_name: Mapped[str] = mapped_column(String(100), nullable=False) celery_task_id: Mapped[str | None] = mapped_column(String(255), nullable=True) - created_by_role: Mapped[str] = mapped_column(String(255), nullable=False) + created_by_role: Mapped[CreatorUserRole] = mapped_column(EnumText(CreatorUserRole, length=255), nullable=False) created_by: Mapped[str] = mapped_column(String(255), nullable=False) retry_count: Mapped[int] = mapped_column(sa.Integer, nullable=False, default=0) elapsed_time: Mapped[float | None] = mapped_column(sa.Float, nullable=True, default=None) diff --git a/api/models/web.py b/api/models/web.py index 5f6a7b40bf..a1cc11c375 100644 --- a/api/models/web.py +++ b/api/models/web.py @@ -2,13 +2,14 @@ from datetime import datetime from uuid import uuid4 import sqlalchemy as sa -from sqlalchemy import DateTime, String, func +from sqlalchemy import DateTime, func from sqlalchemy.orm import Mapped, mapped_column from .base import TypeBase from .engine import db +from .enums import CreatorUserRole from .model import Message -from .types import StringUUID +from .types import EnumText, StringUUID class SavedMessage(TypeBase): @@ -24,7 +25,9 @@ class SavedMessage(TypeBase): ) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) message_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - created_by_role: Mapped[str] = mapped_column(String(255), nullable=False, server_default=sa.text("'end_user'")) + created_by_role: Mapped[CreatorUserRole] = mapped_column( + EnumText(CreatorUserRole, length=255), nullable=False, server_default=sa.text("'end_user'") + ) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) created_at: Mapped[datetime] = mapped_column( DateTime, @@ -50,8 +53,8 @@ class PinnedConversation(TypeBase): ) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) conversation_id: Mapped[str] = mapped_column(StringUUID) - created_by_role: Mapped[str] = mapped_column( - String(255), + created_by_role: Mapped[CreatorUserRole] = mapped_column( + EnumText(CreatorUserRole, length=255), nullable=False, server_default=sa.text("'end_user'"), ) diff --git a/api/models/workflow.py b/api/models/workflow.py index 21b899eeda..fdb8de0653 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -22,13 +22,14 @@ from sqlalchemy import ( from sqlalchemy.orm import Mapped, declared_attr, mapped_column from typing_extensions import deprecated +from core.trigger.constants import TRIGGER_INFO_METADATA_KEY, TRIGGER_PLUGIN_NODE_TYPE from dify_graph.constants import ( CONVERSATION_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID, ) from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter from dify_graph.entities.pause_reason import HumanInputRequired, PauseReason, PauseReasonType, SchedulingPause -from dify_graph.enums import NodeType, WorkflowExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeType, WorkflowExecutionStatus from dify_graph.file.constants import maybe_file_object from dify_graph.file.models import File from dify_graph.variables import utils as variable_utils @@ -53,7 +54,7 @@ from libs import helper from .account import Account from .base import Base, DefaultFieldsMixin, TypeBase from .engine import db -from .enums import CreatorUserRole, DraftVariableType, ExecutionOffLoadType +from .enums import CreatorUserRole, DraftVariableType, ExecutionOffLoadType, WorkflowRunTriggeredFrom from .types import EnumText, LongText, StringUUID logger = logging.getLogger(__name__) @@ -141,7 +142,7 @@ class Workflow(Base): # bug id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - type: Mapped[str] = mapped_column(String(255), nullable=False) + type: Mapped[WorkflowType] = mapped_column(EnumText(WorkflowType, length=255), nullable=False) version: Mapped[str] = mapped_column(String(255), nullable=False) marked_name: Mapped[str] = mapped_column(String(255), default="", server_default="") marked_comment: Mapped[str] = mapped_column(String(255), default="", server_default="") @@ -188,7 +189,7 @@ class Workflow(Base): # bug workflow.id = str(uuid4()) workflow.tenant_id = tenant_id workflow.app_id = app_id - workflow.type = type + workflow.type = WorkflowType(type) workflow.version = version workflow.graph = graph workflow.features = features @@ -269,12 +270,12 @@ class Workflow(Base): # bug loop_id = node_config.get("loop_id") if loop_id is None: raise _InvalidGraphDefinitionError("invalid graph") - return NodeType.LOOP, loop_id + return BuiltinNodeTypes.LOOP, loop_id elif in_iteration: iteration_id = node_config.get("iteration_id") if iteration_id is None: raise _InvalidGraphDefinitionError("invalid graph") - return NodeType.ITERATION, iteration_id + return BuiltinNodeTypes.ITERATION, iteration_id else: return None @@ -353,9 +354,7 @@ class Workflow(Base): # bug if specific_node_type: yield from ( - (node["id"], node["data"]) - for node in graph_dict["nodes"] - if node["data"]["type"] == specific_node_type.value + (node["id"], node["data"]) for node in graph_dict["nodes"] if node["data"]["type"] == specific_node_type ) else: yield from ((node["id"], node["data"]) for node in graph_dict["nodes"]) @@ -608,8 +607,8 @@ class WorkflowRun(Base): app_id: Mapped[str] = mapped_column(StringUUID) workflow_id: Mapped[str] = mapped_column(StringUUID) - type: Mapped[str] = mapped_column(String(255)) - triggered_from: Mapped[str] = mapped_column(String(255)) + type: Mapped[WorkflowType] = mapped_column(EnumText(WorkflowType, length=255)) + triggered_from: Mapped[WorkflowRunTriggeredFrom] = mapped_column(EnumText(WorkflowRunTriggeredFrom, length=255)) version: Mapped[str] = mapped_column(String(255)) graph: Mapped[str | None] = mapped_column(LongText) inputs: Mapped[str | None] = mapped_column(LongText) @@ -830,7 +829,9 @@ class WorkflowNodeExecutionModel(Base): # This model is expected to have `offlo tenant_id: Mapped[str] = mapped_column(StringUUID) app_id: Mapped[str] = mapped_column(StringUUID) workflow_id: Mapped[str] = mapped_column(StringUUID) - triggered_from: Mapped[str] = mapped_column(String(255)) + triggered_from: Mapped[WorkflowNodeExecutionTriggeredFrom] = mapped_column( + EnumText(WorkflowNodeExecutionTriggeredFrom, length=255) + ) workflow_run_id: Mapped[str | None] = mapped_column(StringUUID) index: Mapped[int] = mapped_column(sa.Integer) predecessor_node_id: Mapped[str | None] = mapped_column(String(255)) @@ -846,7 +847,7 @@ class WorkflowNodeExecutionModel(Base): # This model is expected to have `offlo elapsed_time: Mapped[float] = mapped_column(sa.Float, server_default=sa.text("0")) execution_metadata: Mapped[str | None] = mapped_column(LongText) created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.current_timestamp()) - created_by_role: Mapped[str] = mapped_column(String(255)) + created_by_role: Mapped[CreatorUserRole] = mapped_column(EnumText(CreatorUserRole, length=255)) created_by: Mapped[str] = mapped_column(StringUUID) finished_at: Mapped[datetime | None] = mapped_column(DateTime) @@ -921,18 +922,18 @@ class WorkflowNodeExecutionModel(Base): # This model is expected to have `offlo extras: dict[str, Any] = {} execution_metadata = self.execution_metadata_dict if execution_metadata: - if self.node_type == NodeType.TOOL and "tool_info" in execution_metadata: + if self.node_type == BuiltinNodeTypes.TOOL and "tool_info" in execution_metadata: tool_info: dict[str, Any] = execution_metadata["tool_info"] extras["icon"] = ToolManager.get_tool_icon( tenant_id=self.tenant_id, provider_type=tool_info["provider_type"], provider_id=tool_info["provider_id"], ) - elif self.node_type == NodeType.DATASOURCE and "datasource_info" in execution_metadata: + elif self.node_type == BuiltinNodeTypes.DATASOURCE and "datasource_info" in execution_metadata: datasource_info = execution_metadata["datasource_info"] extras["icon"] = datasource_info.get("icon") - elif self.node_type == NodeType.TRIGGER_PLUGIN and "trigger_info" in execution_metadata: - trigger_info = execution_metadata["trigger_info"] or {} + elif self.node_type == TRIGGER_PLUGIN_NODE_TYPE and TRIGGER_INFO_METADATA_KEY in execution_metadata: + trigger_info = execution_metadata[TRIGGER_INFO_METADATA_KEY] or {} provider_id = trigger_info.get("provider_id") if provider_id: extras["icon"] = TriggerManager.get_trigger_plugin_icon( @@ -1130,7 +1131,7 @@ class WorkflowAppLog(TypeBase): workflow_id: Mapped[str] = mapped_column(StringUUID, nullable=False) workflow_run_id: Mapped[str] = mapped_column(StringUUID) created_from: Mapped[str] = mapped_column(String(255), nullable=False) - created_by_role: Mapped[str] = mapped_column(String(255), nullable=False) + created_by_role: Mapped[CreatorUserRole] = mapped_column(EnumText(CreatorUserRole, length=255), nullable=False) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) created_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.current_timestamp(), init=False @@ -1204,7 +1205,7 @@ class WorkflowArchiveLog(TypeBase): app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) workflow_id: Mapped[str] = mapped_column(StringUUID, nullable=False) workflow_run_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - created_by_role: Mapped[str] = mapped_column(String(255), nullable=False) + created_by_role: Mapped[CreatorUserRole] = mapped_column(EnumText(CreatorUserRole, length=255), nullable=False) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) log_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) @@ -1213,7 +1214,9 @@ class WorkflowArchiveLog(TypeBase): run_version: Mapped[str] = mapped_column(String(255), nullable=False) run_status: Mapped[str] = mapped_column(String(255), nullable=False) - run_triggered_from: Mapped[str] = mapped_column(String(255), nullable=False) + run_triggered_from: Mapped[WorkflowRunTriggeredFrom] = mapped_column( + EnumText(WorkflowRunTriggeredFrom, length=255), nullable=False + ) run_error: Mapped[str | None] = mapped_column(LongText, nullable=True) run_elapsed_time: Mapped[float] = mapped_column(sa.Float, nullable=False, server_default=sa.text("0")) run_total_tokens: Mapped[int] = mapped_column(sa.BigInteger, server_default=sa.text("0")) diff --git a/api/pyproject.toml b/api/pyproject.toml index b3202b49c4..ecd3e4a3c9 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -6,12 +6,12 @@ requires-python = ">=3.11,<3.13" dependencies = [ "aliyun-log-python-sdk~=0.9.37", "arize-phoenix-otel~=0.15.0", - "azure-identity==1.25.2", - "beautifulsoup4==4.12.2", - "boto3==1.42.65", + "azure-identity==1.25.3", + "beautifulsoup4==4.14.3", + "boto3==1.42.68", "bs4~=0.0.1", "cachetools~=5.3.0", - "celery~=5.5.2", + "celery~=5.6.2", "charset-normalizer>=3.4.4", "flask~=3.1.2", "flask-compress>=1.17,<1.24", @@ -35,12 +35,12 @@ dependencies = [ "jsonschema>=4.25.1", "langfuse~=2.51.3", "langsmith~=0.7.16", - "markdown~=3.8.1", + "markdown~=3.10.2", "mlflow-skinny>=3.0.0", "numpy~=1.26.4", "openpyxl~=3.1.5", "opik~=1.10.37", - "litellm==1.82.1", # Pinned to avoid madoka dependency issue + "litellm==1.82.2", # Pinned to avoid madoka dependency issue "opentelemetry-api==1.28.0", "opentelemetry-distro==0.49b0", "opentelemetry-exporter-otlp==1.28.0", @@ -58,30 +58,30 @@ dependencies = [ "opentelemetry-sdk==1.28.0", "opentelemetry-semantic-conventions==0.49b0", "opentelemetry-util-http==0.49b0", - "pandas[excel,output-formatting,performance]~=2.2.2", + "pandas[excel,output-formatting,performance]~=3.0.1", "psycogreen~=1.0.2", "psycopg2-binary~=2.9.6", "pycryptodome==3.23.0", "pydantic~=2.12.5", "pydantic-extra-types~=2.11.0", "pydantic-settings~=2.13.1", - "pyjwt~=2.11.0", - "pypdfium2==5.2.0", + "pyjwt~=2.12.0", + "pypdfium2==5.6.0", "python-docx~=1.2.0", - "python-dotenv==1.0.1", + "python-dotenv==1.2.2", "pyyaml~=6.0.1", "readabilipy~=0.3.0", "redis[hiredis]~=7.3.0", - "resend~=2.9.0", - "sentry-sdk[flask]~=2.28.0", + "resend~=2.23.0", + "sentry-sdk[flask]~=2.54.0", "sqlalchemy~=2.0.29", - "starlette==0.49.1", + "starlette==0.52.1", "tiktoken~=0.12.0", "transformers~=5.3.0", - "unstructured[docx,epub,md,ppt,pptx]~=0.18.18", - "yarl~=1.18.3", + "unstructured[docx,epub,md,ppt,pptx]~=0.21.5", + "yarl~=1.23.0", "webvtt-py~=0.5.1", - "sseclient-py~=1.8.0", + "sseclient-py~=1.9.0", "httpx-sse~=0.4.0", "sendgrid~=6.12.3", "flask-restx~=1.3.2", @@ -111,7 +111,7 @@ package = false dev = [ "coverage~=7.13.4", "dotenv-linter~=0.7.0", - "faker~=40.8.0", + "faker~=40.11.0", "lxml-stubs~=0.5.1", "basedpyright~=1.38.2", "ruff~=0.15.5", @@ -120,7 +120,7 @@ dev = [ "pytest-cov~=7.0.0", "pytest-env~=1.1.3", "pytest-mock~=3.15.1", - "testcontainers~=4.13.2", + "testcontainers~=4.14.1", "types-aiofiles~=25.1.0", "types-beautifulsoup4~=4.12.0", "types-cachetools~=6.2.0", @@ -226,6 +226,7 @@ vdb = [ "xinference-client~=1.2.2", "mo-vector~=0.1.13", "mysql-connector-python>=9.3.0", + "holo-search-sdk>=0.4.1", ] [tool.mypy] diff --git a/api/pyrefly-local-excludes.txt b/api/pyrefly-local-excludes.txt index d3b2ede745..c044824a82 100644 --- a/api/pyrefly-local-excludes.txt +++ b/api/pyrefly-local-excludes.txt @@ -123,7 +123,7 @@ dify_graph/nodes/human_input/human_input_node.py dify_graph/nodes/if_else/if_else_node.py dify_graph/nodes/iteration/iteration_node.py dify_graph/nodes/knowledge_index/knowledge_index_node.py -dify_graph/nodes/knowledge_retrieval/knowledge_retrieval_node.py +core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py dify_graph/nodes/list_operator/node.py dify_graph/nodes/llm/node.py dify_graph/nodes/loop/loop_node.py diff --git a/api/pyrightconfig.json b/api/pyrightconfig.json index 007c49ddb0..48271aab61 100644 --- a/api/pyrightconfig.json +++ b/api/pyrightconfig.json @@ -35,7 +35,8 @@ "tos", "gmpy2", "sendgrid", - "sendgrid.helpers.mail" + "sendgrid.helpers.mail", + "holo_search_sdk.types" ], "reportUnknownMemberType": "hint", "reportUnknownParameterType": "hint", diff --git a/api/services/account_service.py b/api/services/account_service.py index f0eac2a522..bd520f54cf 100644 --- a/api/services/account_service.py +++ b/api/services/account_service.py @@ -1089,9 +1089,9 @@ class TenantService: ta = db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, account_id=account.id).first() if ta: - ta.role = role + ta.role = TenantAccountRole(role) else: - ta = TenantAccountJoin(tenant_id=tenant.id, account_id=account.id, role=role) + ta = TenantAccountJoin(tenant_id=tenant.id, account_id=account.id, role=TenantAccountRole(role)) db.session.add(ta) db.session.commit() @@ -1319,10 +1319,10 @@ class TenantService: db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, role="owner").first() ) if current_owner_join: - current_owner_join.role = "admin" + current_owner_join.role = TenantAccountRole.ADMIN # Update the role of the target member - target_member_join.role = new_role + target_member_join.role = TenantAccountRole(new_role) db.session.commit() @staticmethod diff --git a/api/services/app_dsl_service.py b/api/services/app_dsl_service.py index 06f4ccb90e..43bf6374b1 100644 --- a/api/services/app_dsl_service.py +++ b/api/services/app_dsl_service.py @@ -20,14 +20,19 @@ from sqlalchemy.orm import Session from configs import dify_config from core.helper import ssrf_proxy from core.plugin.entities.plugin import PluginDependency -from dify_graph.enums import NodeType +from core.trigger.constants import ( + TRIGGER_PLUGIN_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_WEBHOOK_NODE_TYPE, +) +from core.workflow.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData +from core.workflow.nodes.trigger_schedule.trigger_schedule_node import TriggerScheduleNode +from dify_graph.enums import BuiltinNodeTypes from dify_graph.model_runtime.utils.encoders import jsonable_encoder -from dify_graph.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData from dify_graph.nodes.llm.entities import LLMNodeData from dify_graph.nodes.parameter_extractor.entities import ParameterExtractorNodeData from dify_graph.nodes.question_classifier.entities import QuestionClassifierNodeData from dify_graph.nodes.tool.entities import ToolNodeData -from dify_graph.nodes.trigger_schedule.trigger_schedule_node import TriggerScheduleNode from events.app_event import app_model_config_was_updated, app_was_created from extensions.ext_redis import redis_client from factories import variable_factory @@ -429,17 +434,18 @@ class AppDslService: # Set icon type icon_type_value = icon_type or app_data.get("icon_type") + resolved_icon_type: IconType if icon_type_value in [IconType.EMOJI, IconType.IMAGE, IconType.LINK]: - icon_type = icon_type_value + resolved_icon_type = IconType(icon_type_value) else: - icon_type = IconType.EMOJI + resolved_icon_type = IconType.EMOJI icon = icon or str(app_data.get("icon", "")) if app: # Update existing app app.name = name or app_data.get("name", app.name) app.description = description or app_data.get("description", app.description) - app.icon_type = icon_type + app.icon_type = resolved_icon_type app.icon = icon app.icon_background = icon_background or app_data.get("icon_background", app.icon_background) app.updated_by = account.id @@ -452,10 +458,10 @@ class AppDslService: app = App() app.id = str(uuid4()) app.tenant_id = account.current_tenant_id - app.mode = app_mode.value + app.mode = app_mode app.name = name or app_data.get("name", "") app.description = description or app_data.get("description", "") - app.icon_type = icon_type + app.icon_type = resolved_icon_type app.icon = icon app.icon_background = icon_background or app_data.get("icon_background", "#FFFFFF") app.enable_site = True @@ -499,7 +505,7 @@ class AppDslService: unique_hash = None graph = workflow_data.get("graph", {}) for node in graph.get("nodes", []): - if node.get("data", {}).get("type", "") == NodeType.KNOWLEDGE_RETRIEVAL: + if node.get("data", {}).get("type", "") == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: dataset_ids = node["data"].get("dataset_ids", []) node["data"]["dataset_ids"] = [ decrypted_id @@ -549,9 +555,12 @@ class AppDslService: "kind": "app", "app": { "name": app_model.name, - "mode": app_model.mode, - "icon": app_model.icon if app_model.icon_type == "image" else "🤖", - "icon_background": "#FFEAD5" if app_model.icon_type == "image" else app_model.icon_background, + "mode": app_model.mode.value if isinstance(app_model.mode, AppMode) else app_model.mode, + "icon": app_model.icon, + "icon_type": ( + app_model.icon_type.value if isinstance(app_model.icon_type, IconType) else app_model.icon_type + ), + "icon_background": app_model.icon_background, "description": app_model.description, "use_icon_as_answer_icon": app_model.use_icon_as_answer_icon, }, @@ -587,27 +596,27 @@ class AppDslService: if not node_data: continue data_type = node_data.get("type", "") - if data_type == NodeType.KNOWLEDGE_RETRIEVAL: + if data_type == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: dataset_ids = node_data.get("dataset_ids", []) node_data["dataset_ids"] = [ cls.encrypt_dataset_id(dataset_id=dataset_id, tenant_id=app_model.tenant_id) for dataset_id in dataset_ids ] # filter credential id from tool node - if not include_secret and data_type == NodeType.TOOL: + if not include_secret and data_type == BuiltinNodeTypes.TOOL: node_data.pop("credential_id", None) # filter credential id from agent node - if not include_secret and data_type == NodeType.AGENT: + if not include_secret and data_type == BuiltinNodeTypes.AGENT: for tool in node_data.get("agent_parameters", {}).get("tools", {}).get("value", []): tool.pop("credential_id", None) - if data_type == NodeType.TRIGGER_SCHEDULE.value: + if data_type == TRIGGER_SCHEDULE_NODE_TYPE: # override the config with the default config node_data["config"] = TriggerScheduleNode.get_default_config()["config"] - if data_type == NodeType.TRIGGER_WEBHOOK.value: + if data_type == TRIGGER_WEBHOOK_NODE_TYPE: # clear the webhook_url node_data["webhook_url"] = "" node_data["webhook_debug_url"] = "" - if data_type == NodeType.TRIGGER_PLUGIN.value: + if data_type == TRIGGER_PLUGIN_NODE_TYPE: # clear the subscription_id node_data["subscription_id"] = "" @@ -671,31 +680,31 @@ class AppDslService: try: typ = node.get("data", {}).get("type") match typ: - case NodeType.TOOL: + case BuiltinNodeTypes.TOOL: tool_entity = ToolNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_tool_dependency(tool_entity.provider_id), ) - case NodeType.LLM: + case BuiltinNodeTypes.LLM: llm_entity = LLMNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_model_provider_dependency(llm_entity.model.provider), ) - case NodeType.QUESTION_CLASSIFIER: + case BuiltinNodeTypes.QUESTION_CLASSIFIER: question_classifier_entity = QuestionClassifierNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_model_provider_dependency( question_classifier_entity.model.provider ), ) - case NodeType.PARAMETER_EXTRACTOR: + case BuiltinNodeTypes.PARAMETER_EXTRACTOR: parameter_extractor_entity = ParameterExtractorNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_model_provider_dependency( parameter_extractor_entity.model.provider ), ) - case NodeType.KNOWLEDGE_RETRIEVAL: + case BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: knowledge_retrieval_entity = KnowledgeRetrievalNodeData.model_validate(node["data"]) if knowledge_retrieval_entity.retrieval_mode == "multiple": if knowledge_retrieval_entity.multiple_retrieval_config: diff --git a/api/services/app_service.py b/api/services/app_service.py index aba8954f1a..c5d1479a20 100644 --- a/api/services/app_service.py +++ b/api/services/app_service.py @@ -19,7 +19,7 @@ from extensions.ext_database import db from libs.datetime_utils import naive_utc_now from libs.login import current_user from models import Account -from models.model import App, AppMode, AppModelConfig, Site +from models.model import App, AppMode, AppModelConfig, IconType, Site from models.tools import ApiToolProvider from services.billing_service import BillingService from services.enterprise.enterprise_service import EnterpriseService @@ -187,7 +187,10 @@ class AppService: for tool in agent_mode.get("tools") or []: if not isinstance(tool, dict) or len(tool.keys()) <= 3: continue - agent_tool_entity = AgentToolEntity(**cast(dict[str, Any], tool)) + typed_tool = {key: value for key, value in tool.items() if isinstance(key, str)} + if len(typed_tool) != len(tool): + continue + agent_tool_entity = AgentToolEntity.model_validate(typed_tool) # get tool try: tool_runtime = ToolManager.get_agent_tool_runtime( @@ -254,7 +257,7 @@ class AppService: assert current_user is not None app.name = args["name"] app.description = args["description"] - app.icon_type = args["icon_type"] + app.icon_type = IconType(args["icon_type"]) if args["icon_type"] else None app.icon = args["icon"] app.icon_background = args["icon_background"] app.use_icon_as_answer_icon = args.get("use_icon_as_answer_icon", False) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 3a7d483a9d..c527c71d7b 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -254,7 +254,7 @@ class DatasetService: dataset.embedding_model_provider = embedding_model.provider if embedding_model else None dataset.embedding_model = embedding_model.model_name if embedding_model else None dataset.retrieval_model = retrieval_model.model_dump() if retrieval_model else None - dataset.permission = permission or DatasetPermissionEnum.ONLY_ME + dataset.permission = DatasetPermissionEnum(permission) if permission else DatasetPermissionEnum.ONLY_ME dataset.provider = provider if summary_index_setting is not None: dataset.summary_index_setting = summary_index_setting diff --git a/api/services/file_service.py b/api/services/file_service.py index e08b78bf4c..ecb30faaa8 100644 --- a/api/services/file_service.py +++ b/api/services/file_service.py @@ -58,8 +58,9 @@ class FileService: # get file extension extension = os.path.splitext(filename)[1].lstrip(".").lower() - # check if filename contains invalid characters - if any(c in filename for c in ["/", "\\", ":", "*", "?", '"', "<", ">", "|"]): + # Only reject path separators here. The original filename is stored as metadata, + # while the storage key is UUID-based. + if any(c in filename for c in ["/", "\\"]): raise ValueError("Filename contains invalid characters") if len(filename) > 200: diff --git a/api/services/hit_testing_service.py b/api/services/hit_testing_service.py index c00c76a826..d85b290534 100644 --- a/api/services/hit_testing_service.py +++ b/api/services/hit_testing_service.py @@ -13,6 +13,7 @@ from dify_graph.model_runtime.entities import LLMMode from extensions.ext_database import db from models import Account from models.dataset import Dataset, DatasetQuery +from models.enums import CreatorUserRole logger = logging.getLogger(__name__) @@ -98,7 +99,7 @@ class HitTestingService: content=json.dumps(dataset_queries), source="hit_testing", source_app_id=None, - created_by_role="account", + created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, ) db.session.add(dataset_query) @@ -138,7 +139,7 @@ class HitTestingService: content=query, source="hit_testing", source_app_id=None, - created_by_role="account", + created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, ) diff --git a/api/services/rag_pipeline/rag_pipeline.py b/api/services/rag_pipeline/rag_pipeline.py index ce745a4679..899a6ba378 100644 --- a/api/services/rag_pipeline/rag_pipeline.py +++ b/api/services/rag_pipeline/rag_pipeline.py @@ -36,19 +36,19 @@ from core.rag.entities.event import ( ) from core.repositories.factory import DifyCoreRepositoryFactory from core.repositories.sqlalchemy_workflow_node_execution_repository import SQLAlchemyWorkflowNodeExecutionRepository +from core.workflow.node_factory import LATEST_VERSION, get_node_type_classes_mapping from core.workflow.workflow_entry import WorkflowEntry from dify_graph.entities.workflow_node_execution import ( WorkflowNodeExecution, WorkflowNodeExecutionStatus, ) -from dify_graph.enums import ErrorStrategy, NodeType, SystemVariableKey +from dify_graph.enums import BuiltinNodeTypes, ErrorStrategy, NodeType, SystemVariableKey from dify_graph.errors import WorkflowNodeRunFailedError from dify_graph.graph_events import NodeRunFailedEvent, NodeRunSucceededEvent from dify_graph.graph_events.base import GraphNodeEventBase from dify_graph.node_events.base import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.http_request import HTTP_REQUEST_CONFIG_FILTER_KEY, build_http_request_config -from dify_graph.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING from dify_graph.repositories.workflow_node_execution_repository import OrderConfig from dify_graph.runtime import VariablePool from dify_graph.system_variable import SystemVariable @@ -381,10 +381,10 @@ class RagPipelineService: """ # return default block config default_block_configs: list[dict[str, Any]] = [] - for node_type, node_class_mapping in NODE_TYPE_CLASSES_MAPPING.items(): + for node_type, node_class_mapping in get_node_type_classes_mapping().items(): node_class = node_class_mapping[LATEST_VERSION] filters = None - if node_type is NodeType.HTTP_REQUEST: + if node_type == BuiltinNodeTypes.HTTP_REQUEST: filters = { HTTP_REQUEST_CONFIG_FILTER_KEY: build_http_request_config( max_connect_timeout=dify_config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT, @@ -410,14 +410,15 @@ class RagPipelineService: :return: """ node_type_enum = NodeType(node_type) + node_mapping = get_node_type_classes_mapping() # return default block config - if node_type_enum not in NODE_TYPE_CLASSES_MAPPING: + if node_type_enum not in node_mapping: return None - node_class = NODE_TYPE_CLASSES_MAPPING[node_type_enum][LATEST_VERSION] + node_class = node_mapping[node_type_enum][LATEST_VERSION] final_filters = dict(filters) if filters else {} - if node_type_enum is NodeType.HTTP_REQUEST and HTTP_REQUEST_CONFIG_FILTER_KEY not in final_filters: + if node_type_enum == BuiltinNodeTypes.HTTP_REQUEST and HTTP_REQUEST_CONFIG_FILTER_KEY not in final_filters: final_filters[HTTP_REQUEST_CONFIG_FILTER_KEY] = build_http_request_config( max_connect_timeout=dify_config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT, max_read_timeout=dify_config.HTTP_REQUEST_MAX_READ_TIMEOUT, @@ -499,7 +500,7 @@ class RagPipelineService: session=session, app_id=pipeline.id, node_id=workflow_node_execution.node_id, - node_type=NodeType(workflow_node_execution.node_type), + node_type=workflow_node_execution.node_type, enclosing_node_id=enclosing_node_id, node_execution_id=workflow_node_execution.id, user=account, @@ -1261,7 +1262,7 @@ class RagPipelineService: session=session, app_id=pipeline.id, node_id=workflow_node_execution_db_model.node_id, - node_type=NodeType(workflow_node_execution_db_model.node_type), + node_type=workflow_node_execution_db_model.node_type, enclosing_node_id=enclosing_node_id, node_execution_id=workflow_node_execution.id, user=current_user, diff --git a/api/services/rag_pipeline/rag_pipeline_dsl_service.py b/api/services/rag_pipeline/rag_pipeline_dsl_service.py index 58bb4b7c90..c7da1afe1b 100644 --- a/api/services/rag_pipeline/rag_pipeline_dsl_service.py +++ b/api/services/rag_pipeline/rag_pipeline_dsl_service.py @@ -22,10 +22,11 @@ from sqlalchemy.orm import Session from core.helper import ssrf_proxy from core.helper.name_generator import generate_incremental_name from core.plugin.entities.plugin import PluginDependency -from dify_graph.enums import NodeType +from core.workflow.nodes.datasource.entities import DatasourceNodeData +from core.workflow.nodes.knowledge_index import KNOWLEDGE_INDEX_NODE_TYPE +from core.workflow.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData +from dify_graph.enums import BuiltinNodeTypes from dify_graph.model_runtime.utils.encoders import jsonable_encoder -from dify_graph.nodes.datasource.entities import DatasourceNodeData -from dify_graph.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData from dify_graph.nodes.llm.entities import LLMNodeData from dify_graph.nodes.parameter_extractor.entities import ParameterExtractorNodeData from dify_graph.nodes.question_classifier.entities import QuestionClassifierNodeData @@ -287,7 +288,7 @@ class RagPipelineDslService: nodes = graph.get("nodes", []) dataset_id = None for node in nodes: - if node.get("data", {}).get("type") == "knowledge-index": + if node.get("data", {}).get("type") == KNOWLEDGE_INDEX_NODE_TYPE: knowledge_configuration = KnowledgeConfiguration.model_validate(node.get("data", {})) if ( dataset @@ -428,7 +429,7 @@ class RagPipelineDslService: nodes = graph.get("nodes", []) dataset_id = None for node in nodes: - if node.get("data", {}).get("type") == "knowledge-index": + if node.get("data", {}).get("type") == KNOWLEDGE_INDEX_NODE_TYPE: knowledge_configuration = KnowledgeConfiguration.model_validate(node.get("data", {})) if not dataset: dataset = Dataset( @@ -562,7 +563,7 @@ class RagPipelineDslService: graph = workflow_data.get("graph", {}) for node in graph.get("nodes", []): - if node.get("data", {}).get("type", "") == NodeType.KNOWLEDGE_RETRIEVAL: + if node.get("data", {}).get("type", "") == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: dataset_ids = node["data"].get("dataset_ids", []) node["data"]["dataset_ids"] = [ decrypted_id @@ -696,17 +697,17 @@ class RagPipelineDslService: if not node_data: continue data_type = node_data.get("type", "") - if data_type == NodeType.KNOWLEDGE_RETRIEVAL: + if data_type == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: dataset_ids = node_data.get("dataset_ids", []) node["data"]["dataset_ids"] = [ self.encrypt_dataset_id(dataset_id=dataset_id, tenant_id=pipeline.tenant_id) for dataset_id in dataset_ids ] # filter credential id from tool node - if not include_secret and data_type == NodeType.TOOL: + if not include_secret and data_type == BuiltinNodeTypes.TOOL: node_data.pop("credential_id", None) # filter credential id from agent node - if not include_secret and data_type == NodeType.AGENT: + if not include_secret and data_type == BuiltinNodeTypes.AGENT: for tool in node_data.get("agent_parameters", {}).get("tools", {}).get("value", []): tool.pop("credential_id", None) @@ -740,35 +741,35 @@ class RagPipelineDslService: try: typ = node.get("data", {}).get("type") match typ: - case NodeType.TOOL: + case BuiltinNodeTypes.TOOL: tool_entity = ToolNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_tool_dependency(tool_entity.provider_id), ) - case NodeType.DATASOURCE: + case BuiltinNodeTypes.DATASOURCE: datasource_entity = DatasourceNodeData.model_validate(node["data"]) if datasource_entity.provider_type != "local_file": dependencies.append(datasource_entity.plugin_id) - case NodeType.LLM: + case BuiltinNodeTypes.LLM: llm_entity = LLMNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_model_provider_dependency(llm_entity.model.provider), ) - case NodeType.QUESTION_CLASSIFIER: + case BuiltinNodeTypes.QUESTION_CLASSIFIER: question_classifier_entity = QuestionClassifierNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_model_provider_dependency( question_classifier_entity.model.provider ), ) - case NodeType.PARAMETER_EXTRACTOR: + case BuiltinNodeTypes.PARAMETER_EXTRACTOR: parameter_extractor_entity = ParameterExtractorNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_model_provider_dependency( parameter_extractor_entity.model.provider ), ) - case NodeType.KNOWLEDGE_INDEX: + case _ if typ == KNOWLEDGE_INDEX_NODE_TYPE: knowledge_index_entity = KnowledgeConfiguration.model_validate(node["data"]) if knowledge_index_entity.indexing_technique == "high_quality": if knowledge_index_entity.embedding_model_provider: @@ -789,7 +790,7 @@ class RagPipelineDslService: knowledge_index_entity.retrieval_model.reranking_model.reranking_provider_name ), ) - case NodeType.KNOWLEDGE_RETRIEVAL: + case BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: knowledge_retrieval_entity = KnowledgeRetrievalNodeData.model_validate(node["data"]) if knowledge_retrieval_entity.retrieval_mode == "multiple": if knowledge_retrieval_entity.multiple_retrieval_config: diff --git a/api/services/retention/workflow_run/restore_archived_workflow_run.py b/api/services/retention/workflow_run/restore_archived_workflow_run.py index d4a6e87585..64dad7ba52 100644 --- a/api/services/retention/workflow_run/restore_archived_workflow_run.py +++ b/api/services/retention/workflow_run/restore_archived_workflow_run.py @@ -358,21 +358,19 @@ class WorkflowRunRestore: self, model: type[DeclarativeBase] | Any, ) -> tuple[set[str], set[str], set[str]]: - columns = list(model.__table__.columns) + table = model.__table__ + columns = list(table.columns) + autoincrement_column = getattr(table, "autoincrement_column", None) + + def has_insert_default(column: Any) -> bool: + # SQLAlchemy may set column.autoincrement to "auto" on non-PK columns. + # Only treat the resolved autoincrement column as DB-generated. + return column.default is not None or column.server_default is not None or column is autoincrement_column + column_names = {column.key for column in columns} - required_columns = { - column.key - for column in columns - if not column.nullable - and column.default is None - and column.server_default is None - and not column.autoincrement - } + required_columns = {column.key for column in columns if not column.nullable and not has_insert_default(column)} non_nullable_with_default = { - column.key - for column in columns - if not column.nullable - and (column.default is not None or column.server_default is not None or column.autoincrement) + column.key for column in columns if not column.nullable and has_insert_default(column) } return column_names, required_columns, non_nullable_with_default diff --git a/api/services/saved_message_service.py b/api/services/saved_message_service.py index 4dd6c8107b..d0f4f27968 100644 --- a/api/services/saved_message_service.py +++ b/api/services/saved_message_service.py @@ -3,6 +3,7 @@ from typing import Union from extensions.ext_database import db from libs.infinite_scroll_pagination import InfiniteScrollPagination from models import Account +from models.enums import CreatorUserRole from models.model import App, EndUser from models.web import SavedMessage from services.message_service import MessageService @@ -54,7 +55,7 @@ class SavedMessageService: saved_message = SavedMessage( app_id=app_model.id, message_id=message.id, - created_by_role="account" if isinstance(user, Account) else "end_user", + created_by_role=CreatorUserRole.ACCOUNT if isinstance(user, Account) else CreatorUserRole.END_USER, created_by=user.id, ) diff --git a/api/services/trigger/schedule_service.py b/api/services/trigger/schedule_service.py index 88b640305d..7e9d010d2f 100644 --- a/api/services/trigger/schedule_service.py +++ b/api/services/trigger/schedule_service.py @@ -5,15 +5,15 @@ from datetime import datetime from sqlalchemy import select from sqlalchemy.orm import Session -from dify_graph.entities.graph_config import NodeConfigDict -from dify_graph.nodes import NodeType -from dify_graph.nodes.trigger_schedule.entities import ( +from core.trigger.constants import TRIGGER_SCHEDULE_NODE_TYPE +from core.workflow.nodes.trigger_schedule.entities import ( ScheduleConfig, SchedulePlanUpdate, TriggerScheduleNodeData, VisualConfig, ) -from dify_graph.nodes.trigger_schedule.exc import ScheduleConfigError, ScheduleNotFoundError +from core.workflow.nodes.trigger_schedule.exc import ScheduleConfigError, ScheduleNotFoundError +from dify_graph.entities.graph_config import NodeConfigDict from libs.schedule_utils import calculate_next_run_at, convert_12h_to_24h from models.account import Account, TenantAccountJoin from models.trigger import WorkflowSchedulePlan @@ -240,7 +240,7 @@ class ScheduleService: for node in nodes: node_data = node.get("data", {}) - if node_data.get("type") != NodeType.TRIGGER_SCHEDULE.value: + if node_data.get("type") != TRIGGER_SCHEDULE_NODE_TYPE: continue node_id = node.get("id", "start") diff --git a/api/services/trigger/trigger_service.py b/api/services/trigger/trigger_service.py index 2343bbbd3d..24bbeda329 100644 --- a/api/services/trigger/trigger_service.py +++ b/api/services/trigger/trigger_service.py @@ -12,13 +12,13 @@ from sqlalchemy.orm import Session from core.plugin.entities.plugin_daemon import CredentialType from core.plugin.entities.request import TriggerDispatchResponse, TriggerInvokeEventResponse from core.plugin.impl.exc import PluginNotFoundError +from core.trigger.constants import TRIGGER_PLUGIN_NODE_TYPE from core.trigger.debug.events import PluginTriggerDebugEvent from core.trigger.provider import PluginTriggerProviderController from core.trigger.trigger_manager import TriggerManager from core.trigger.utils.encryption import create_trigger_provider_encrypter_for_subscription +from core.workflow.nodes.trigger_plugin.entities import TriggerEventNodeData from dify_graph.entities.graph_config import NodeConfigDict -from dify_graph.enums import NodeType -from dify_graph.nodes.trigger_plugin.entities import TriggerEventNodeData from extensions.ext_database import db from extensions.ext_redis import redis_client from models.model import App @@ -179,7 +179,7 @@ class TriggerService: # Walk nodes to find plugin triggers nodes_in_graph: list[Mapping[str, Any]] = [] - for node_id, node_config in workflow.walk_nodes(NodeType.TRIGGER_PLUGIN): + for node_id, node_config in workflow.walk_nodes(TRIGGER_PLUGIN_NODE_TYPE): # Extract plugin trigger configuration from node plugin_id = node_config.get("plugin_id", "") provider_id = node_config.get("provider_id", "") diff --git a/api/services/trigger/webhook_service.py b/api/services/trigger/webhook_service.py index 02977b934c..3c1a4cc747 100644 --- a/api/services/trigger/webhook_service.py +++ b/api/services/trigger/webhook_service.py @@ -16,15 +16,15 @@ from werkzeug.exceptions import RequestEntityTooLarge from configs import dify_config from core.app.entities.app_invoke_entities import InvokeFrom from core.tools.tool_file_manager import ToolFileManager -from dify_graph.entities.graph_config import NodeConfigDict -from dify_graph.enums import NodeType -from dify_graph.file.models import FileTransferMethod -from dify_graph.nodes.trigger_webhook.entities import ( +from core.trigger.constants import TRIGGER_WEBHOOK_NODE_TYPE +from core.workflow.nodes.trigger_webhook.entities import ( ContentType, WebhookBodyParameter, WebhookData, WebhookParameter, ) +from dify_graph.entities.graph_config import NodeConfigDict +from dify_graph.file.models import FileTransferMethod from dify_graph.variables.types import ArrayValidation, SegmentType from enums.quota_type import QuotaType from extensions.ext_database import db @@ -862,7 +862,7 @@ class WebhookService: node_id: str webhook_id: str - nodes_id_in_graph = [node_id for node_id, _ in workflow.walk_nodes(NodeType.TRIGGER_WEBHOOK)] + nodes_id_in_graph = [node_id for node_id, _ in workflow.walk_nodes(TRIGGER_WEBHOOK_NODE_TYPE)] # Check webhook node limit if len(nodes_id_in_graph) > cls.MAX_WEBHOOK_NODES_PER_WORKFLOW: diff --git a/api/services/web_conversation_service.py b/api/services/web_conversation_service.py index 560aec2330..e028e3e5e3 100644 --- a/api/services/web_conversation_service.py +++ b/api/services/web_conversation_service.py @@ -7,6 +7,7 @@ from core.app.entities.app_invoke_entities import InvokeFrom from extensions.ext_database import db from libs.infinite_scroll_pagination import InfiniteScrollPagination from models import Account +from models.enums import CreatorUserRole from models.model import App, EndUser from models.web import PinnedConversation from services.conversation_service import ConversationService @@ -84,7 +85,7 @@ class WebConversationService: pinned_conversation = PinnedConversation( app_id=app_model.id, conversation_id=conversation.id, - created_by_role="account" if isinstance(user, Account) else "end_user", + created_by_role=CreatorUserRole.ACCOUNT if isinstance(user, Account) else CreatorUserRole.END_USER, created_by=user.id, ) diff --git a/api/services/workflow/workflow_converter.py b/api/services/workflow/workflow_converter.py index 0153046acc..006483fe97 100644 --- a/api/services/workflow/workflow_converter.py +++ b/api/services/workflow/workflow_converter.py @@ -18,13 +18,13 @@ from core.prompt.utils.prompt_template_parser import PromptTemplateParser from dify_graph.file.models import FileUploadConfig from dify_graph.model_runtime.entities.llm_entities import LLMMode from dify_graph.model_runtime.utils.encoders import jsonable_encoder -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.variables.input_entities import VariableEntity from events.app_event import app_was_created from extensions.ext_database import db from models import Account from models.api_based_extension import APIBasedExtension, APIBasedExtensionPoint -from models.model import App, AppMode, AppModelConfig +from models.model import App, AppMode, AppModelConfig, IconType from models.workflow import Workflow, WorkflowType @@ -72,7 +72,7 @@ class WorkflowConverter: new_app.tenant_id = app_model.tenant_id new_app.name = name or app_model.name + "(workflow)" new_app.mode = AppMode.ADVANCED_CHAT if app_model.mode == AppMode.CHAT else AppMode.WORKFLOW - new_app.icon_type = icon_type or app_model.icon_type + new_app.icon_type = IconType(icon_type) if icon_type else app_model.icon_type new_app.icon = icon or app_model.icon new_app.icon_background = icon_background or app_model.icon_background new_app.enable_site = app_model.enable_site @@ -234,7 +234,7 @@ class WorkflowConverter: "position": None, "data": { "title": "START", - "type": NodeType.START, + "type": BuiltinNodeTypes.START, "variables": [jsonable_encoder(v) for v in variables], }, } @@ -296,7 +296,7 @@ class WorkflowConverter: "position": None, "data": { "title": f"HTTP REQUEST {api_based_extension.name}", - "type": NodeType.HTTP_REQUEST, + "type": BuiltinNodeTypes.HTTP_REQUEST, "method": "post", "url": api_based_extension.api_endpoint, "authorization": {"type": "api-key", "config": {"type": "bearer", "api_key": api_key}}, @@ -314,7 +314,7 @@ class WorkflowConverter: "position": None, "data": { "title": f"Parse {api_based_extension.name} Response", - "type": NodeType.CODE, + "type": BuiltinNodeTypes.CODE, "variables": [{"variable": "response_json", "value_selector": [http_request_node["id"], "body"]}], "code_language": "python3", "code": "import json\n\ndef main(response_json: str) -> str:\n response_body = json.loads(" @@ -354,7 +354,7 @@ class WorkflowConverter: "position": None, "data": { "title": "KNOWLEDGE RETRIEVAL", - "type": NodeType.KNOWLEDGE_RETRIEVAL, + "type": BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, "query_variable_selector": query_variable_selector, "dataset_ids": dataset_config.dataset_ids, "retrieval_mode": retrieve_config.retrieve_strategy.value, @@ -402,9 +402,9 @@ class WorkflowConverter: :param external_data_variable_node_mapping: external data variable node mapping """ # fetch start and knowledge retrieval node - start_node = next(filter(lambda n: n["data"]["type"] == NodeType.START, graph["nodes"])) + start_node = next(filter(lambda n: n["data"]["type"] == BuiltinNodeTypes.START, graph["nodes"])) knowledge_retrieval_node = next( - filter(lambda n: n["data"]["type"] == NodeType.KNOWLEDGE_RETRIEVAL, graph["nodes"]), None + filter(lambda n: n["data"]["type"] == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, graph["nodes"]), None ) role_prefix = None @@ -523,7 +523,7 @@ class WorkflowConverter: "position": None, "data": { "title": "LLM", - "type": NodeType.LLM, + "type": BuiltinNodeTypes.LLM, "model": { "provider": model_config.provider, "name": model_config.model, @@ -578,7 +578,7 @@ class WorkflowConverter: "position": None, "data": { "title": "END", - "type": NodeType.END, + "type": BuiltinNodeTypes.END, "outputs": [{"variable": "result", "value_selector": ["llm", "text"]}], }, } @@ -592,7 +592,7 @@ class WorkflowConverter: return { "id": "answer", "position": None, - "data": {"title": "ANSWER", "type": NodeType.ANSWER, "answer": "{{#llm.text#}}"}, + "data": {"title": "ANSWER", "type": BuiltinNodeTypes.ANSWER, "answer": "{{#llm.text#}}"}, } def _create_edge(self, source: str, target: str): diff --git a/api/services/workflow_draft_variable_service.py b/api/services/workflow_draft_variable_service.py index b6f6fc5490..804bf28b66 100644 --- a/api/services/workflow_draft_variable_service.py +++ b/api/services/workflow_draft_variable_service.py @@ -14,10 +14,11 @@ from sqlalchemy.sql.expression import and_, or_ from configs import dify_config from core.app.entities.app_invoke_entities import InvokeFrom +from core.trigger.constants import is_trigger_node_type from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID, ENVIRONMENT_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID -from dify_graph.enums import SystemVariableKey +from dify_graph.enums import NodeType, SystemVariableKey from dify_graph.file.models import File -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.nodes.variable_assigner.common.helpers import get_updated_variables from dify_graph.variable_loader import VariableLoader from dify_graph.variables import Segment, StringSegment, VariableBase @@ -386,7 +387,7 @@ class WorkflowDraftVariableService: # # This implementation must remain synchronized with the `_build_from_variable_assigner_mapping` # and `save` methods. - if node_type == NodeType.VARIABLE_ASSIGNER: + if node_type == BuiltinNodeTypes.VARIABLE_ASSIGNER: return variable output_value = outputs_dict.get(variable.name, absent) else: @@ -753,8 +754,8 @@ class DraftVariableSaver: # technical variables from being exposed in the draft environment, particularly those # that aren't meant to be directly edited or viewed by users. _EXCLUDE_VARIABLE_NAMES_MAPPING: dict[NodeType, frozenset[str]] = { - NodeType.LLM: frozenset(["finish_reason"]), - NodeType.LOOP: frozenset(["loop_round"]), + BuiltinNodeTypes.LLM: frozenset(["finish_reason"]), + BuiltinNodeTypes.LOOP: frozenset(["loop_round"]), } # Database session used for persisting draft variables. @@ -815,7 +816,7 @@ class DraftVariableSaver: ) def _should_save_output_variables_for_draft(self) -> bool: - if self._enclosing_node_id is not None and self._node_type != NodeType.VARIABLE_ASSIGNER: + if self._enclosing_node_id is not None and self._node_type != BuiltinNodeTypes.VARIABLE_ASSIGNER: # Currently we do not save output variables for nodes inside loop or iteration. return False return True @@ -1053,9 +1054,9 @@ class DraftVariableSaver: process_data = {} if not self._should_save_output_variables_for_draft(): return - if self._node_type == NodeType.VARIABLE_ASSIGNER: + if self._node_type == BuiltinNodeTypes.VARIABLE_ASSIGNER: draft_vars = self._build_from_variable_assigner_mapping(process_data=process_data) - elif self._node_type == NodeType.START or self._node_type.is_trigger_node: + elif self._node_type == BuiltinNodeTypes.START or is_trigger_node_type(self._node_type): draft_vars = self._build_variables_from_start_mapping(outputs) else: draft_vars = self._build_variables_from_mapping(outputs) @@ -1071,7 +1072,7 @@ class DraftVariableSaver: @staticmethod def _should_variable_be_visible(node_id: str, node_type: NodeType, name: str) -> bool: - if node_type in NodeType.IF_ELSE: + if node_type == BuiltinNodeTypes.IF_ELSE: return False if node_id == SYSTEM_VARIABLE_NODE_ID and not is_system_variable_editable(name): return False diff --git a/api/services/workflow_service.py b/api/services/workflow_service.py index 2549cdf180..319107d3fb 100644 --- a/api/services/workflow_service.py +++ b/api/services/workflow_service.py @@ -14,16 +14,23 @@ from core.app.apps.workflow.app_config_manager import WorkflowAppConfigManager from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom, build_dify_run_context from core.repositories import DifyCoreRepositoryFactory from core.repositories.human_input_repository import HumanInputFormRepositoryImpl +from core.trigger.constants import is_trigger_node_type +from core.workflow.node_factory import LATEST_VERSION, get_node_type_classes_mapping, is_start_node_type from core.workflow.workflow_entry import WorkflowEntry from dify_graph.entities import GraphInitParams, WorkflowNodeExecution from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.entities.pause_reason import HumanInputRequired -from dify_graph.enums import ErrorStrategy, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus +from dify_graph.enums import ( + ErrorStrategy, + NodeType, + WorkflowNodeExecutionMetadataKey, + WorkflowNodeExecutionStatus, +) from dify_graph.errors import WorkflowNodeRunFailedError from dify_graph.file import File from dify_graph.graph_events import GraphNodeEventBase, NodeRunFailedEvent, NodeRunSucceededEvent from dify_graph.node_events import NodeRunResult -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.nodes.base.node import Node from dify_graph.nodes.http_request import HTTP_REQUEST_CONFIG_FILTER_KEY, build_http_request_config from dify_graph.nodes.human_input.entities import ( @@ -34,7 +41,6 @@ from dify_graph.nodes.human_input.entities import ( ) from dify_graph.nodes.human_input.enums import HumanInputFormKind from dify_graph.nodes.human_input.human_input_node import HumanInputNode -from dify_graph.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING from dify_graph.nodes.start.entities import StartNodeData from dify_graph.repositories.human_input_form_repository import FormCreateParams from dify_graph.runtime import GraphRuntimeState, VariablePool @@ -310,7 +316,7 @@ class WorkflowService: for _, node_data in draft_workflow.walk_nodes() if (node_type_str := node_data.get("type")) and isinstance(node_type_str, str) - and NodeType(node_type_str).is_trigger_node + and is_trigger_node_type(node_type_str) ) if trigger_node_count > 2: raise TriggerNodeLimitExceededError(count=trigger_node_count, limit=2) @@ -619,10 +625,10 @@ class WorkflowService: """ # return default block config default_block_configs: list[Mapping[str, object]] = [] - for node_type, node_class_mapping in NODE_TYPE_CLASSES_MAPPING.items(): + for node_type, node_class_mapping in get_node_type_classes_mapping().items(): node_class = node_class_mapping[LATEST_VERSION] filters = None - if node_type is NodeType.HTTP_REQUEST: + if node_type == BuiltinNodeTypes.HTTP_REQUEST: filters = { HTTP_REQUEST_CONFIG_FILTER_KEY: build_http_request_config( max_connect_timeout=dify_config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT, @@ -650,14 +656,15 @@ class WorkflowService: :return: """ node_type_enum = NodeType(node_type) + node_mapping = get_node_type_classes_mapping() # return default block config - if node_type_enum not in NODE_TYPE_CLASSES_MAPPING: + if node_type_enum not in node_mapping: return {} - node_class = NODE_TYPE_CLASSES_MAPPING[node_type_enum][LATEST_VERSION] + node_class = node_mapping[node_type_enum][LATEST_VERSION] resolved_filters = dict(filters) if filters else {} - if node_type_enum is NodeType.HTTP_REQUEST and HTTP_REQUEST_CONFIG_FILTER_KEY not in resolved_filters: + if node_type_enum == BuiltinNodeTypes.HTTP_REQUEST and HTTP_REQUEST_CONFIG_FILTER_KEY not in resolved_filters: resolved_filters[HTTP_REQUEST_CONFIG_FILTER_KEY] = build_http_request_config( max_connect_timeout=dify_config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT, max_read_timeout=dify_config.HTTP_REQUEST_MAX_READ_TIMEOUT, @@ -695,7 +702,7 @@ class WorkflowService: node_config = draft_workflow.get_node_config_by_id(node_id) node_type = Workflow.get_node_type_from_node_config(node_config) node_data = node_config["data"] - if node_type.is_start_node: + if is_start_node_type(node_type): with Session(bind=db.engine) as session, session.begin(): draft_var_srv = WorkflowDraftVariableService(session) conversation_id = draft_var_srv.get_or_create_conversation( @@ -703,7 +710,7 @@ class WorkflowService: app=app_model, workflow=draft_workflow, ) - if node_type is NodeType.START: + if node_type == BuiltinNodeTypes.START: start_data = StartNodeData.model_validate(node_data, from_attributes=True) user_inputs = _rebuild_file_for_user_inputs_in_start_node( tenant_id=draft_workflow.tenant_id, start_node_data=start_data, user_inputs=user_inputs @@ -782,7 +789,7 @@ class WorkflowService: session=session, app_id=app_model.id, node_id=workflow_node_execution.node_id, - node_type=NodeType(workflow_node_execution.node_type), + node_type=workflow_node_execution.node_type, enclosing_node_id=enclosing_node_id, node_execution_id=node_execution.id, user=account, @@ -815,7 +822,7 @@ class WorkflowService: node_config = draft_workflow.get_node_config_by_id(node_id) node_type = Workflow.get_node_type_from_node_config(node_config) - if node_type is not NodeType.HUMAN_INPUT: + if node_type != BuiltinNodeTypes.HUMAN_INPUT: raise ValueError("Node type must be human-input.") # inputs: values used to fill missing upstream variables referenced in form_content. @@ -874,7 +881,7 @@ class WorkflowService: node_config = draft_workflow.get_node_config_by_id(node_id) node_type = Workflow.get_node_type_from_node_config(node_config) - if node_type is not NodeType.HUMAN_INPUT: + if node_type != BuiltinNodeTypes.HUMAN_INPUT: raise ValueError("Node type must be human-input.") # inputs: values used to fill missing upstream variables referenced in form_content. @@ -914,7 +921,7 @@ class WorkflowService: session=session, app_id=app_model.id, node_id=node_id, - node_type=NodeType.HUMAN_INPUT, + node_type=BuiltinNodeTypes.HUMAN_INPUT, node_execution_id=str(uuid.uuid4()), user=account, enclosing_node_id=enclosing_node_id, @@ -939,7 +946,7 @@ class WorkflowService: node_config = draft_workflow.get_node_config_by_id(node_id) node_type = Workflow.get_node_type_from_node_config(node_config) - if node_type is not NodeType.HUMAN_INPUT: + if node_type != BuiltinNodeTypes.HUMAN_INPUT: raise ValueError("Node type must be human-input.") node_data = HumanInputNodeData.model_validate(node_config["data"], from_attributes=True) @@ -1327,18 +1334,18 @@ class WorkflowService: for node in node_configs: node_type = node.get("data", {}).get("type") if node_type: - node_types.add(NodeType(node_type)) + node_types.add(node_type) # start node and trigger node cannot coexist - if NodeType.START in node_types: - if any(nt.is_trigger_node for nt in node_types): + if BuiltinNodeTypes.START in node_types: + if any(is_trigger_node_type(nt) for nt in node_types): raise ValueError("Start node and trigger nodes cannot coexist in the same workflow") for node in node_configs: node_data = node.get("data", {}) node_type = node_data.get("type") - if node_type == NodeType.HUMAN_INPUT: + if node_type == BuiltinNodeTypes.HUMAN_INPUT: self._validate_human_input_node_data(node_data) def validate_features_structure(self, app_model: App, features: dict): @@ -1460,7 +1467,7 @@ def _setup_variable_pool( conversation_variables: list[VariableBase], ): # Only inject system variables for START node type. - if node_type == NodeType.START or node_type.is_trigger_node: + if is_start_node_type(node_type): system_variable = SystemVariable( user_id=user_id, app_id=workflow.app_id, diff --git a/api/tasks/enterprise_telemetry_task.py b/api/tasks/enterprise_telemetry_task.py new file mode 100644 index 0000000000..7d5ea7c0a5 --- /dev/null +++ b/api/tasks/enterprise_telemetry_task.py @@ -0,0 +1,52 @@ +"""Celery worker for enterprise metric/log telemetry events. + +This module defines the Celery task that processes telemetry envelopes +from the enterprise_telemetry queue. It deserializes envelopes and +dispatches them to the EnterpriseMetricHandler. +""" + +import json +import logging + +from celery import shared_task + +from enterprise.telemetry.contracts import TelemetryEnvelope +from enterprise.telemetry.metric_handler import EnterpriseMetricHandler + +logger = logging.getLogger(__name__) + + +@shared_task(queue="enterprise_telemetry") +def process_enterprise_telemetry(envelope_json: str) -> None: + """Process enterprise metric/log telemetry envelope. + + This task is enqueued by the TelemetryGateway for metric/log-only + events. It deserializes the envelope and dispatches to the handler. + + Best-effort processing: logs errors but never raises, to avoid + failing user requests due to telemetry issues. + + Args: + envelope_json: JSON-serialized TelemetryEnvelope. + """ + try: + # Deserialize envelope + envelope_dict = json.loads(envelope_json) + envelope = TelemetryEnvelope.model_validate(envelope_dict) + + # Process through handler + handler = EnterpriseMetricHandler() + handler.handle(envelope) + + logger.debug( + "Successfully processed telemetry envelope: tenant_id=%s, event_id=%s, case=%s", + envelope.tenant_id, + envelope.event_id, + envelope.case, + ) + except Exception: + # Best-effort: log and drop on error, never fail user request + logger.warning( + "Failed to process enterprise telemetry envelope, dropping event", + exc_info=True, + ) diff --git a/api/tasks/ops_trace_task.py b/api/tasks/ops_trace_task.py index 72e3b42ca7..c95b8db078 100644 --- a/api/tasks/ops_trace_task.py +++ b/api/tasks/ops_trace_task.py @@ -39,17 +39,36 @@ def process_trace_tasks(file_info): trace_info["documents"] = [Document.model_validate(doc) for doc in trace_info["documents"]] try: + trace_type = trace_info_info_map.get(trace_info_type) + if trace_type: + trace_info = trace_type(**trace_info) + + from extensions.ext_enterprise_telemetry import is_enabled as is_ee_telemetry_enabled + + if is_ee_telemetry_enabled(): + from enterprise.telemetry.enterprise_trace import EnterpriseOtelTrace + + try: + EnterpriseOtelTrace().trace(trace_info) + except Exception: + logger.exception("Enterprise trace failed for app_id: %s", app_id) + if trace_instance: with current_app.app_context(): - trace_type = trace_info_info_map.get(trace_info_type) - if trace_type: - trace_info = trace_type(**trace_info) trace_instance.trace(trace_info) + logger.info("Processing trace tasks success, app_id: %s", app_id) except Exception as e: - logger.info("error:\n\n\n%s\n\n\n\n", e) + logger.exception("Processing trace tasks failed, app_id: %s", app_id) failed_key = f"{OPS_TRACE_FAILED_KEY}_{app_id}" redis_client.incr(failed_key) - logger.info("Processing trace tasks failed, app_id: %s", app_id) finally: - storage.delete(file_path) + try: + storage.delete(file_path) + except Exception as e: + logger.warning( + "Failed to delete trace file %s for app_id %s: %s", + file_path, + app_id, + e, + ) diff --git a/api/tasks/trigger_processing_tasks.py b/api/tasks/trigger_processing_tasks.py index d06b8c980b..75ae1f6316 100644 --- a/api/tasks/trigger_processing_tasks.py +++ b/api/tasks/trigger_processing_tasks.py @@ -20,13 +20,14 @@ from core.db.session_factory import session_factory from core.plugin.entities.plugin_daemon import CredentialType from core.plugin.entities.request import TriggerInvokeEventResponse from core.plugin.impl.exc import PluginInvokeError +from core.trigger.constants import TRIGGER_PLUGIN_NODE_TYPE from core.trigger.debug.event_bus import TriggerDebugEventBus from core.trigger.debug.events import PluginTriggerDebugEvent, build_plugin_pool_key from core.trigger.entities.entities import TriggerProviderEntity from core.trigger.provider import PluginTriggerProviderController from core.trigger.trigger_manager import TriggerManager -from dify_graph.enums import NodeType, WorkflowExecutionStatus -from dify_graph.nodes.trigger_plugin.entities import TriggerEventNodeData +from core.workflow.nodes.trigger_plugin.entities import TriggerEventNodeData +from dify_graph.enums import WorkflowExecutionStatus from enums.quota_type import QuotaType, unlimited from models.enums import ( AppTriggerType, @@ -164,7 +165,7 @@ def _record_trigger_failure_log( elapsed_time=0.0, total_tokens=0, total_steps=0, - created_by_role=created_by_role.value, + created_by_role=created_by_role, created_by=created_by, created_at=now, finished_at=now, @@ -179,7 +180,7 @@ def _record_trigger_failure_log( workflow_id=workflow.id, workflow_run_id=workflow_run.id, created_from=WorkflowAppLogCreatedFrom.SERVICE_API.value, - created_by_role=created_by_role.value, + created_by_role=created_by_role, created_by=created_by, ) session.add(workflow_app_log) @@ -212,7 +213,7 @@ def _record_trigger_failure_log( error=error_message, queue_name=queue_name, retry_count=0, - created_by_role=created_by_role.value, + created_by_role=created_by_role, created_by=created_by, triggered_at=now, finished_at=now, @@ -278,7 +279,7 @@ def dispatch_triggered_workflow( # Find the trigger node in the workflow event_node = None - for node_id, node_config in workflow.walk_nodes(NodeType.TRIGGER_PLUGIN): + for node_id, node_config in workflow.walk_nodes(TRIGGER_PLUGIN_NODE_TYPE): if node_id == plugin_trigger.node_id: event_node = node_config break diff --git a/api/tasks/workflow_execution_tasks.py b/api/tasks/workflow_execution_tasks.py index db8721e90b..f41118e592 100644 --- a/api/tasks/workflow_execution_tasks.py +++ b/api/tasks/workflow_execution_tasks.py @@ -94,13 +94,15 @@ def _create_workflow_run_from_execution( workflow_run.tenant_id = tenant_id workflow_run.app_id = app_id workflow_run.workflow_id = execution.workflow_id - workflow_run.type = execution.workflow_type.value - workflow_run.triggered_from = triggered_from.value + from models.workflow import WorkflowType as ModelWorkflowType + + workflow_run.type = ModelWorkflowType(execution.workflow_type.value) + workflow_run.triggered_from = triggered_from workflow_run.version = execution.workflow_version json_converter = WorkflowRuntimeTypeConverter() workflow_run.graph = json.dumps(json_converter.to_json_encodable(execution.graph)) workflow_run.inputs = json.dumps(json_converter.to_json_encodable(execution.inputs)) - workflow_run.status = execution.status.value + workflow_run.status = execution.status workflow_run.outputs = ( json.dumps(json_converter.to_json_encodable(execution.outputs)) if execution.outputs else "{}" ) @@ -108,7 +110,7 @@ def _create_workflow_run_from_execution( workflow_run.elapsed_time = execution.elapsed_time workflow_run.total_tokens = execution.total_tokens workflow_run.total_steps = execution.total_steps - workflow_run.created_by_role = creator_user_role.value + workflow_run.created_by_role = creator_user_role workflow_run.created_by = creator_user_id workflow_run.created_at = execution.started_at workflow_run.finished_at = execution.finished_at @@ -121,7 +123,7 @@ def _update_workflow_run_from_execution(workflow_run: WorkflowRun, execution: Wo Update a WorkflowRun database model from a WorkflowExecution domain entity. """ json_converter = WorkflowRuntimeTypeConverter() - workflow_run.status = execution.status.value + workflow_run.status = execution.status workflow_run.outputs = ( json.dumps(json_converter.to_json_encodable(execution.outputs)) if execution.outputs else "{}" ) diff --git a/api/tasks/workflow_node_execution_tasks.py b/api/tasks/workflow_node_execution_tasks.py index 3f607dc55e..466ef6c858 100644 --- a/api/tasks/workflow_node_execution_tasks.py +++ b/api/tasks/workflow_node_execution_tasks.py @@ -98,12 +98,12 @@ def _create_node_execution_from_domain( node_execution.tenant_id = tenant_id node_execution.app_id = app_id node_execution.workflow_id = execution.workflow_id - node_execution.triggered_from = triggered_from.value + node_execution.triggered_from = triggered_from node_execution.workflow_run_id = execution.workflow_execution_id node_execution.index = execution.index node_execution.predecessor_node_id = execution.predecessor_node_id node_execution.node_id = execution.node_id - node_execution.node_type = execution.node_type.value + node_execution.node_type = execution.node_type node_execution.title = execution.title node_execution.node_execution_id = execution.node_execution_id @@ -128,7 +128,7 @@ def _create_node_execution_from_domain( node_execution.status = execution.status.value node_execution.error = execution.error node_execution.elapsed_time = execution.elapsed_time - node_execution.created_by_role = creator_user_role.value + node_execution.created_by_role = creator_user_role node_execution.created_by = creator_user_id node_execution.created_at = execution.created_at node_execution.finished_at = execution.finished_at diff --git a/api/tasks/workflow_schedule_tasks.py b/api/tasks/workflow_schedule_tasks.py index ced7ef973b..8c64d3ab27 100644 --- a/api/tasks/workflow_schedule_tasks.py +++ b/api/tasks/workflow_schedule_tasks.py @@ -3,7 +3,7 @@ import logging from celery import shared_task from core.db.session_factory import session_factory -from dify_graph.nodes.trigger_schedule.exc import ( +from core.workflow.nodes.trigger_schedule.exc import ( ScheduleExecutionError, ScheduleNotFoundError, TenantOwnerNotFoundError, diff --git a/api/tests/integration_tests/.env.example b/api/tests/integration_tests/.env.example index 37f8830482..f84d39aeb5 100644 --- a/api/tests/integration_tests/.env.example +++ b/api/tests/integration_tests/.env.example @@ -77,6 +77,19 @@ IRIS_MAX_CONNECTION=3 IRIS_TEXT_INDEX=true IRIS_TEXT_INDEX_LANGUAGE=en +# Hologres configuration +HOLOGRES_HOST=localhost +HOLOGRES_PORT=80 +HOLOGRES_DATABASE=test_db +HOLOGRES_ACCESS_KEY_ID=test_access_key_id +HOLOGRES_ACCESS_KEY_SECRET=test_access_key_secret +HOLOGRES_SCHEMA=public +HOLOGRES_TOKENIZER=jieba +HOLOGRES_DISTANCE_METHOD=Cosine +HOLOGRES_BASE_QUANTIZATION_TYPE=rabitq +HOLOGRES_MAX_DEGREE=64 +HOLOGRES_EF_CONSTRUCTION=400 + # Upload configuration UPLOAD_FILE_SIZE_LIMIT=15 diff --git a/api/tests/integration_tests/controllers/console/app/test_chat_message_permissions.py b/api/tests/integration_tests/controllers/console/app/test_chat_message_permissions.py index 498ac56d5d..afb6938baa 100644 --- a/api/tests/integration_tests/controllers/console/app/test_chat_message_permissions.py +++ b/api/tests/integration_tests/controllers/console/app/test_chat_message_permissions.py @@ -165,7 +165,7 @@ class TestChatMessageApiPermissions: agent_thoughts=[], message_files=[], message_metadata_dict={}, - status="success", + status="normal", error="", parent_message_id=None, ) diff --git a/api/tests/integration_tests/core/workflow/nodes/datasource/test_datasource_node_integration.py b/api/tests/integration_tests/core/workflow/nodes/datasource/test_datasource_node_integration.py index c043c7dc10..3e79792b5b 100644 --- a/api/tests/integration_tests/core/workflow/nodes/datasource/test_datasource_node_integration.py +++ b/api/tests/integration_tests/core/workflow/nodes/datasource/test_datasource_node_integration.py @@ -1,6 +1,7 @@ +from core.workflow.nodes.datasource.datasource_node import DatasourceNode +from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult, StreamCompletedEvent -from dify_graph.nodes.datasource.datasource_node import DatasourceNode class _Seg: @@ -28,13 +29,17 @@ class _GS: class _GP: - tenant_id = "t1" - app_id = "app-1" workflow_id = "wf-1" graph_config = {} - user_id = "u1" - user_from = "account" - invoke_from = "debugger" + run_context = { + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "t1", + "app_id": "app-1", + "user_id": "u1", + "user_from": "account", + "invoke_from": "debugger", + } + } call_depth = 0 @@ -61,6 +66,8 @@ def test_node_integration_minimal_stream(mocker): def get_upload_file_by_id(cls, **_): raise AssertionError + mocker.patch("core.workflow.nodes.datasource.datasource_node.DatasourceManager", new=_Mgr) + node = DatasourceNode( id="n", config={ @@ -77,7 +84,6 @@ def test_node_integration_minimal_stream(mocker): }, graph_init_params=_GP(), graph_runtime_state=_GS(vp), - datasource_manager=_Mgr, ) out = list(node._run()) diff --git a/api/tests/integration_tests/services/test_workflow_draft_variable_service.py b/api/tests/integration_tests/services/test_workflow_draft_variable_service.py index 7c4dcda2dc..b19b4ebdad 100644 --- a/api/tests/integration_tests/services/test_workflow_draft_variable_service.py +++ b/api/tests/integration_tests/services/test_workflow_draft_variable_service.py @@ -7,7 +7,7 @@ from sqlalchemy import delete from sqlalchemy.orm import Session from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.variables.segments import StringSegment from dify_graph.variables.types import SegmentType from dify_graph.variables.variables import StringVariable @@ -286,7 +286,7 @@ class TestDraftVariableLoader(unittest.TestCase): session=session, app_id=self._test_app_id, node_id="test_offload_node", - node_type=NodeType.LLM, # Use a real node type + node_type=BuiltinNodeTypes.LLM, # Use a real node type node_execution_id=node_execution_id, user=setup_account, ) @@ -542,7 +542,7 @@ class TestWorkflowDraftVariableServiceResetVariable(unittest.TestCase): index=1, node_execution_id=str(uuid.uuid4()), node_id=self._node_id, - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Test Node", inputs='{"input": "test input"}', process_data='{"test_var": "process_value", "other_var": "other_process"}', diff --git a/api/tests/integration_tests/vdb/__mock/hologres.py b/api/tests/integration_tests/vdb/__mock/hologres.py new file mode 100644 index 0000000000..b60cf358c0 --- /dev/null +++ b/api/tests/integration_tests/vdb/__mock/hologres.py @@ -0,0 +1,209 @@ +import json +import os +from typing import Any + +import holo_search_sdk as holo +import pytest +from _pytest.monkeypatch import MonkeyPatch +from psycopg import sql as psql + +# Shared in-memory storage: {table_name: {doc_id: {"id", "text", "meta", "embedding"}}} +_mock_tables: dict[str, dict[str, dict[str, Any]]] = {} + + +class MockSearchQuery: + """Mock query builder for search_vector and search_text results.""" + + def __init__(self, table_name: str, search_type: str): + self._table_name = table_name + self._search_type = search_type + self._limit_val = 10 + self._filter_sql = None + + def select(self, columns): + return self + + def limit(self, n): + self._limit_val = n + return self + + def where(self, filter_sql): + self._filter_sql = filter_sql + return self + + def _apply_filter(self, row: dict[str, Any]) -> bool: + """Apply the filter SQL to check if a row matches.""" + if self._filter_sql is None: + return True + + # Extract literals (the document IDs) from the filter SQL + # Filter format: meta->>'document_id' IN ('doc1', 'doc2') + literals = [v for t, v in _extract_identifiers_and_literals(self._filter_sql) if t == "literal"] + if not literals: + return True + + # Get the document_id from the row's meta field + meta = row.get("meta", "{}") + if isinstance(meta, str): + meta = json.loads(meta) + doc_id = meta.get("document_id") + + return doc_id in literals + + def fetchall(self): + data = _mock_tables.get(self._table_name, {}) + results = [] + for row in list(data.values())[: self._limit_val]: + # Apply filter if present + if not self._apply_filter(row): + continue + + if self._search_type == "vector": + # row format expected by _process_vector_results: (distance, id, text, meta) + results.append((0.1, row["id"], row["text"], row["meta"])) + else: + # row format expected by _process_full_text_results: (id, text, meta, embedding, score) + results.append((row["id"], row["text"], row["meta"], row.get("embedding", []), 0.9)) + return results + + +class MockTable: + """Mock table object returned by client.open_table().""" + + def __init__(self, table_name: str): + self._table_name = table_name + + def upsert_multi(self, index_column, values, column_names, update=True, update_columns=None): + if self._table_name not in _mock_tables: + _mock_tables[self._table_name] = {} + id_idx = column_names.index("id") + for row in values: + doc_id = row[id_idx] + _mock_tables[self._table_name][doc_id] = dict(zip(column_names, row)) + + def search_vector(self, vector, column, distance_method, output_name): + return MockSearchQuery(self._table_name, "vector") + + def search_text(self, column, expression, return_score=False, return_score_name="score", return_all_columns=False): + return MockSearchQuery(self._table_name, "text") + + def set_vector_index( + self, column, distance_method, base_quantization_type, max_degree, ef_construction, use_reorder + ): + pass + + def create_text_index(self, index_name, column, tokenizer): + pass + + +def _extract_sql_template(query) -> str: + """Extract the SQL template string from a psycopg Composed object.""" + if isinstance(query, psql.Composed): + for part in query: + if isinstance(part, psql.SQL): + return part._obj + if isinstance(query, psql.SQL): + return query._obj + return "" + + +def _extract_identifiers_and_literals(query) -> list[Any]: + """Extract Identifier and Literal values from a psycopg Composed object.""" + values: list[Any] = [] + if isinstance(query, psql.Composed): + for part in query: + if isinstance(part, psql.Identifier): + values.append(("ident", part._obj[0] if part._obj else "")) + elif isinstance(part, psql.Literal): + values.append(("literal", part._obj)) + elif isinstance(part, psql.Composed): + # Handles SQL(...).join(...) for IN clauses + for sub in part: + if isinstance(sub, psql.Literal): + values.append(("literal", sub._obj)) + return values + + +class MockHologresClient: + """Mock holo_search_sdk client that stores data in memory.""" + + def connect(self): + pass + + def check_table_exist(self, table_name): + return table_name in _mock_tables + + def open_table(self, table_name): + return MockTable(table_name) + + def execute(self, query, fetch_result=False): + template = _extract_sql_template(query) + params = _extract_identifiers_and_literals(query) + + if "CREATE TABLE" in template.upper(): + # Extract table name from first identifier + table_name = next((v for t, v in params if t == "ident"), "unknown") + if table_name not in _mock_tables: + _mock_tables[table_name] = {} + return None + + if "SELECT 1" in template: + # text_exists: SELECT 1 FROM {table} WHERE id = {id} LIMIT 1 + table_name = next((v for t, v in params if t == "ident"), "") + doc_id = next((v for t, v in params if t == "literal"), "") + data = _mock_tables.get(table_name, {}) + return [(1,)] if doc_id in data else [] + + if "SELECT id" in template: + # get_ids_by_metadata_field: SELECT id FROM {table} WHERE meta->>{key} = {value} + table_name = next((v for t, v in params if t == "ident"), "") + literals = [v for t, v in params if t == "literal"] + key = literals[0] if len(literals) > 0 else "" + value = literals[1] if len(literals) > 1 else "" + data = _mock_tables.get(table_name, {}) + return [(doc_id,) for doc_id, row in data.items() if json.loads(row.get("meta", "{}")).get(key) == value] + + if "DELETE" in template.upper(): + table_name = next((v for t, v in params if t == "ident"), "") + if "id IN" in template: + # delete_by_ids + ids_to_delete = [v for t, v in params if t == "literal"] + for did in ids_to_delete: + _mock_tables.get(table_name, {}).pop(did, None) + elif "meta->>" in template: + # delete_by_metadata_field + literals = [v for t, v in params if t == "literal"] + key = literals[0] if len(literals) > 0 else "" + value = literals[1] if len(literals) > 1 else "" + data = _mock_tables.get(table_name, {}) + to_remove = [ + doc_id for doc_id, row in data.items() if json.loads(row.get("meta", "{}")).get(key) == value + ] + for did in to_remove: + data.pop(did, None) + return None + + return [] if fetch_result else None + + def drop_table(self, table_name): + _mock_tables.pop(table_name, None) + + +def mock_connect(**kwargs): + """Replacement for holo_search_sdk.connect() that returns a mock client.""" + return MockHologresClient() + + +MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true" + + +@pytest.fixture +def setup_hologres_mock(monkeypatch: MonkeyPatch): + if MOCK: + monkeypatch.setattr(holo, "connect", mock_connect) + + yield + + if MOCK: + _mock_tables.clear() + monkeypatch.undo() diff --git a/api/tests/integration_tests/vdb/hologres/__init__.py b/api/tests/integration_tests/vdb/hologres/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/integration_tests/vdb/hologres/test_hologres.py b/api/tests/integration_tests/vdb/hologres/test_hologres.py new file mode 100644 index 0000000000..ff2be88ef1 --- /dev/null +++ b/api/tests/integration_tests/vdb/hologres/test_hologres.py @@ -0,0 +1,149 @@ +import os +import uuid +from typing import cast + +from holo_search_sdk.types import BaseQuantizationType, DistanceType, TokenizerType + +from core.rag.datasource.vdb.hologres.hologres_vector import HologresVector, HologresVectorConfig +from core.rag.models.document import Document +from tests.integration_tests.vdb.__mock.hologres import setup_hologres_mock +from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, get_example_text, setup_mock_redis + +MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true" + + +class HologresVectorTest(AbstractVectorTest): + def __init__(self): + super().__init__() + # Hologres requires collection names to be lowercase + self.collection_name = self.collection_name.lower() + self.vector = HologresVector( + collection_name=self.collection_name, + config=HologresVectorConfig( + host=os.environ.get("HOLOGRES_HOST", "localhost"), + port=int(os.environ.get("HOLOGRES_PORT", "80")), + database=os.environ.get("HOLOGRES_DATABASE", "test_db"), + access_key_id=os.environ.get("HOLOGRES_ACCESS_KEY_ID", "test_key"), + access_key_secret=os.environ.get("HOLOGRES_ACCESS_KEY_SECRET", "test_secret"), + schema_name=os.environ.get("HOLOGRES_SCHEMA", "public"), + tokenizer=cast(TokenizerType, os.environ.get("HOLOGRES_TOKENIZER", "jieba")), + distance_method=cast(DistanceType, os.environ.get("HOLOGRES_DISTANCE_METHOD", "Cosine")), + base_quantization_type=cast( + BaseQuantizationType, os.environ.get("HOLOGRES_BASE_QUANTIZATION_TYPE", "rabitq") + ), + max_degree=int(os.environ.get("HOLOGRES_MAX_DEGREE", "64")), + ef_construction=int(os.environ.get("HOLOGRES_EF_CONSTRUCTION", "400")), + ), + ) + + def search_by_full_text(self): + """Override: full-text index may not be immediately ready in real mode.""" + hits_by_full_text = self.vector.search_by_full_text(query=get_example_text()) + if MOCK: + # In mock mode, full-text search should return the document we inserted + assert len(hits_by_full_text) == 1 + assert hits_by_full_text[0].metadata["doc_id"] == self.example_doc_id + else: + # In real mode, full-text index may need time to become active + assert len(hits_by_full_text) >= 0 + + def search_by_vector_with_filter(self): + """Test vector search with document_ids_filter.""" + # Create another document with different document_id + other_doc_id = str(uuid.uuid4()) + other_doc = Document( + page_content="other_text", + metadata={ + "doc_id": other_doc_id, + "doc_hash": other_doc_id, + "document_id": other_doc_id, + "dataset_id": self.dataset_id, + }, + ) + self.vector.add_texts(documents=[other_doc], embeddings=[self.example_embedding]) + + # Search with filter - should only return the original document + hits = self.vector.search_by_vector( + query_vector=self.example_embedding, + document_ids_filter=[self.example_doc_id], + ) + assert len(hits) == 1 + assert hits[0].metadata["doc_id"] == self.example_doc_id + + # Search without filter - should return both + all_hits = self.vector.search_by_vector(query_vector=self.example_embedding, top_k=10) + assert len(all_hits) >= 2 + + def search_by_full_text_with_filter(self): + """Test full-text search with document_ids_filter.""" + # Create another document with different document_id + other_doc_id = str(uuid.uuid4()) + other_doc = Document( + page_content="unique_other_text", + metadata={ + "doc_id": other_doc_id, + "doc_hash": other_doc_id, + "document_id": other_doc_id, + "dataset_id": self.dataset_id, + }, + ) + self.vector.add_texts(documents=[other_doc], embeddings=[self.example_embedding]) + + # Search with filter - should only return the original document + hits = self.vector.search_by_full_text( + query=get_example_text(), + document_ids_filter=[self.example_doc_id], + ) + if MOCK: + assert len(hits) == 1 + assert hits[0].metadata["doc_id"] == self.example_doc_id + + def get_ids_by_metadata_field(self): + """Override: Hologres implements this method via JSONB query.""" + ids = self.vector.get_ids_by_metadata_field(key="document_id", value=self.example_doc_id) + assert ids is not None + assert len(ids) == 1 + + def run_all_tests(self): + # Clean up before running tests + self.vector.delete() + # Run base tests (create, search, text_exists, get_ids, add_texts, delete_by_ids, delete) + super().run_all_tests() + + # Additional filter tests require fresh data (table was deleted by base tests) + if MOCK: + # Recreate collection for filter tests + self.vector.create( + texts=[ + Document( + page_content=get_example_text(), + metadata={ + "doc_id": self.example_doc_id, + "doc_hash": self.example_doc_id, + "document_id": self.example_doc_id, + "dataset_id": self.dataset_id, + }, + ) + ], + embeddings=[self.example_embedding], + ) + self.search_by_vector_with_filter() + self.search_by_full_text_with_filter() + # Clean up + self.vector.delete() + + +def test_hologres_vector(setup_mock_redis, setup_hologres_mock): + """ + Test Hologres vector database implementation. + + This test covers: + - Creating collection with vector index + - Adding texts with embeddings + - Vector similarity search + - Full-text search + - Text existence check + - Batch deletion by IDs + - Collection deletion + """ + HologresVectorTest().run_all_tests() diff --git a/api/tests/integration_tests/workflow/nodes/test_code.py b/api/tests/integration_tests/workflow/nodes/test_code.py index f8b7f95493..e3a2b6b866 100644 --- a/api/tests/integration_tests/workflow/nodes/test_code.py +++ b/api/tests/integration_tests/workflow/nodes/test_code.py @@ -60,7 +60,7 @@ def init_code_node(code_config: dict): graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node = CodeNode( id=str(uuid.uuid4()), diff --git a/api/tests/integration_tests/workflow/nodes/test_http.py b/api/tests/integration_tests/workflow/nodes/test_http.py index 347fa9c9ed..f885f69e55 100644 --- a/api/tests/integration_tests/workflow/nodes/test_http.py +++ b/api/tests/integration_tests/workflow/nodes/test_http.py @@ -70,7 +70,7 @@ def init_http_node(config: dict): graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node = HttpRequestNode( id=str(uuid.uuid4()), @@ -189,7 +189,7 @@ def test_custom_authorization_header(setup_http_mock): @pytest.mark.parametrize("setup_http_mock", [["none"]], indirect=True) def test_custom_auth_with_empty_api_key_raises_error(setup_http_mock): """Test: In custom authentication mode, when the api_key is empty, AuthorizationConfigError should be raised.""" - from dify_graph.enums import NodeType + from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.http_request.entities import ( HttpRequestNodeAuthorization, HttpRequestNodeData, @@ -210,7 +210,7 @@ def test_custom_auth_with_empty_api_key_raises_error(setup_http_mock): # Create node data with custom auth and empty api_key node_data = HttpRequestNodeData( - type=NodeType.HTTP_REQUEST, + type=BuiltinNodeTypes.HTTP_REQUEST, title="http", desc="", url="http://example.com", @@ -717,7 +717,7 @@ def test_nested_object_variable_selector(setup_http_mock): graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node = HttpRequestNode( id=str(uuid.uuid4()), diff --git a/api/tests/integration_tests/workflow/nodes/test_llm.py b/api/tests/integration_tests/workflow/nodes/test_llm.py index 2aca9f5157..d628348f1e 100644 --- a/api/tests/integration_tests/workflow/nodes/test_llm.py +++ b/api/tests/integration_tests/workflow/nodes/test_llm.py @@ -10,7 +10,7 @@ from core.model_manager import ModelInstance from dify_graph.enums import WorkflowNodeExecutionStatus from dify_graph.node_events import StreamCompletedEvent from dify_graph.nodes.llm.node import LLMNode -from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory +from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory, TemplateRenderer from dify_graph.nodes.protocols import HttpClientProtocol from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable @@ -75,6 +75,7 @@ def init_llm_node(config: dict) -> LLMNode: credentials_provider=MagicMock(spec=CredentialsProvider), model_factory=MagicMock(spec=ModelFactory), model_instance=MagicMock(spec=ModelInstance), + template_renderer=MagicMock(spec=TemplateRenderer), http_client=MagicMock(spec=HttpClientProtocol), ) @@ -158,7 +159,7 @@ def test_execute_llm(): return mock_model_instance # Mock fetch_prompt_messages to avoid database calls - def mock_fetch_prompt_messages_1(**_kwargs): + def mock_fetch_prompt_messages_1(*_args, **_kwargs): from dify_graph.model_runtime.entities.message_entities import SystemPromptMessage, UserPromptMessage return [ diff --git a/api/tests/integration_tests/workflow/nodes/test_template_transform.py b/api/tests/integration_tests/workflow/nodes/test_template_transform.py index 970e2cae00..7bb4f905c3 100644 --- a/api/tests/integration_tests/workflow/nodes/test_template_transform.py +++ b/api/tests/integration_tests/workflow/nodes/test_template_transform.py @@ -82,7 +82,7 @@ def test_execute_template_transform(): graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") assert graph is not None node = TemplateTransformNode( diff --git a/api/tests/integration_tests/workflow/nodes/test_tool.py b/api/tests/integration_tests/workflow/nodes/test_tool.py index 23cb56d2a5..a6717ada31 100644 --- a/api/tests/integration_tests/workflow/nodes/test_tool.py +++ b/api/tests/integration_tests/workflow/nodes/test_tool.py @@ -1,6 +1,6 @@ import time import uuid -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from core.tools.utils.configuration import ToolParameterConfigurationManager @@ -54,7 +54,7 @@ def init_tool_node(config: dict): graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") tool_file_manager_factory = MagicMock(spec=ToolFileManagerProtocol) @@ -87,17 +87,20 @@ def test_tool_variable_invoke(): } ) - ToolParameterConfigurationManager.decrypt_tool_parameters = MagicMock(return_value={"format": "%Y-%m-%d %H:%M:%S"}) + with patch.object( + ToolParameterConfigurationManager, + "decrypt_tool_parameters", + return_value={"format": "%Y-%m-%d %H:%M:%S"}, + ): + node.graph_runtime_state.variable_pool.add(["1", "args1"], "1+1") - node.graph_runtime_state.variable_pool.add(["1", "args1"], "1+1") - - # execute node - result = node._run() - for item in result: - if isinstance(item, StreamCompletedEvent): - assert item.node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED - assert item.node_run_result.outputs is not None - assert item.node_run_result.outputs.get("text") is not None + # execute node + result = node._run() + for item in result: + if isinstance(item, StreamCompletedEvent): + assert item.node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED + assert item.node_run_result.outputs is not None + assert item.node_run_result.outputs.get("text") is not None def test_tool_mixed_invoke(): @@ -121,12 +124,15 @@ def test_tool_mixed_invoke(): } ) - ToolParameterConfigurationManager.decrypt_tool_parameters = MagicMock(return_value={"format": "%Y-%m-%d %H:%M:%S"}) - - # execute node - result = node._run() - for item in result: - if isinstance(item, StreamCompletedEvent): - assert item.node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED - assert item.node_run_result.outputs is not None - assert item.node_run_result.outputs.get("text") is not None + with patch.object( + ToolParameterConfigurationManager, + "decrypt_tool_parameters", + return_value={"format": "%Y-%m-%d %H:%M:%S"}, + ): + # execute node + result = node._run() + for item in result: + if isinstance(item, StreamCompletedEvent): + assert item.node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED + assert item.node_run_result.outputs is not None + assert item.node_run_result.outputs.get("text") is not None diff --git a/api/tests/test_containers_integration_tests/core/rag/pipeline/test_queue_integration.py b/api/tests/test_containers_integration_tests/core/rag/pipeline/test_queue_integration.py index cdf390b327..a60159c66a 100644 --- a/api/tests/test_containers_integration_tests/core/rag/pipeline/test_queue_integration.py +++ b/api/tests/test_containers_integration_tests/core/rag/pipeline/test_queue_integration.py @@ -18,7 +18,7 @@ from faker import Faker from core.rag.pipeline.queue import TaskWrapper, TenantIsolatedTaskQueue from extensions.ext_redis import redis_client -from models import Account, Tenant, TenantAccountJoin, TenantAccountRole +from models import Account, AccountStatus, Tenant, TenantAccountJoin, TenantAccountRole, TenantStatus @dataclass @@ -47,7 +47,7 @@ class TestTenantIsolatedTaskQueueIntegration: email=fake.email(), name=fake.name(), interface_language="en-US", - status="active", + status=AccountStatus.ACTIVE, ) db_session_with_containers.add(account) db_session_with_containers.commit() @@ -55,7 +55,7 @@ class TestTenantIsolatedTaskQueueIntegration: # Create tenant tenant = Tenant( name=fake.company(), - status="normal", + status=TenantStatus.NORMAL, ) db_session_with_containers.add(tenant) db_session_with_containers.commit() @@ -101,7 +101,7 @@ class TestTenantIsolatedTaskQueueIntegration: # Create second tenant tenant2 = Tenant( name=fake.company(), - status="normal", + status=TenantStatus.NORMAL, ) db_session_with_containers.add(tenant2) db_session_with_containers.commit() @@ -410,7 +410,7 @@ class TestTenantIsolatedTaskQueueCompatibility: email=fake.email(), name=fake.name(), interface_language="en-US", - status="active", + status=AccountStatus.ACTIVE, ) db_session_with_containers.add(account) db_session_with_containers.commit() @@ -418,7 +418,7 @@ class TestTenantIsolatedTaskQueueCompatibility: # Create tenant tenant = Tenant( name=fake.company(), - status="normal", + status=TenantStatus.NORMAL, ) db_session_with_containers.add(tenant) db_session_with_containers.commit() diff --git a/api/tests/test_containers_integration_tests/core/rag/retrieval/test_dataset_retrieval_integration.py b/api/tests/test_containers_integration_tests/core/rag/retrieval/test_dataset_retrieval_integration.py index d783a08233..75471afef8 100644 --- a/api/tests/test_containers_integration_tests/core/rag/retrieval/test_dataset_retrieval_integration.py +++ b/api/tests/test_containers_integration_tests/core/rag/retrieval/test_dataset_retrieval_integration.py @@ -5,7 +5,7 @@ import pytest from faker import Faker from core.rag.retrieval.dataset_retrieval import DatasetRetrieval -from dify_graph.repositories.rag_retrieval_protocol import KnowledgeRetrievalRequest +from core.workflow.nodes.knowledge_retrieval.retrieval import KnowledgeRetrievalRequest from models.dataset import Dataset, Document from services.account_service import AccountService, TenantService from tests.test_containers_integration_tests.helpers import generate_valid_password diff --git a/api/tests/test_containers_integration_tests/services/test_account_service.py b/api/tests/test_containers_integration_tests/services/test_account_service.py index 9354a3ac35..cc9596d15f 100644 --- a/api/tests/test_containers_integration_tests/services/test_account_service.py +++ b/api/tests/test_containers_integration_tests/services/test_account_service.py @@ -3331,7 +3331,7 @@ class TestRegisterService: TenantService.create_tenant_member(tenant, account, role="normal") # Change tenant status to non-normal - tenant.status = "suspended" + tenant.status = "archive" db_session_with_containers.commit() diff --git a/api/tests/test_containers_integration_tests/services/test_app_generate_service.py b/api/tests/test_containers_integration_tests/services/test_app_generate_service.py index 5155d50b0e..5b1a4790f5 100644 --- a/api/tests/test_containers_integration_tests/services/test_app_generate_service.py +++ b/api/tests/test_containers_integration_tests/services/test_app_generate_service.py @@ -2,6 +2,7 @@ import uuid from unittest.mock import ANY, MagicMock, patch import pytest +import sqlalchemy as sa from faker import Faker from sqlalchemy.orm import Session @@ -492,20 +493,20 @@ class TestAppGenerateService: ) # Manually set invalid mode after creation + # With EnumText, invalid values are rejected at the DB level during flush, + # raising StatementError wrapping ValueError app.mode = "invalid_mode" # Setup test arguments args = {"inputs": {"query": fake.text(max_nb_chars=50)}, "response_mode": "streaming"} - # Execute the method under test and expect ValueError - with pytest.raises(ValueError) as exc_info: + # Execute the method under test and expect either ValueError (direct) or + # StatementError (from EnumText validation during autoflush) + with pytest.raises((ValueError, sa.exc.StatementError)): AppGenerateService.generate( app_model=app, user=account, args=args, invoke_from=InvokeFrom.SERVICE_API, streaming=True ) - # Verify error message - assert "Invalid app mode" in str(exc_info.value) - def test_generate_with_workflow_id_format_error( self, db_session_with_containers: Session, mock_external_service_dependencies ): diff --git a/api/tests/test_containers_integration_tests/services/test_file_service.py b/api/tests/test_containers_integration_tests/services/test_file_service.py index 6712fe8454..50f5b7a8c0 100644 --- a/api/tests/test_containers_integration_tests/services/test_file_service.py +++ b/api/tests/test_containers_integration_tests/services/test_file_service.py @@ -263,6 +263,27 @@ class TestFileService: user=account, ) + def test_upload_file_allows_regular_punctuation_in_filename( + self, db_session_with_containers: Session, engine, mock_external_service_dependencies + ): + """ + Test file upload allows punctuation that is safe when stored as metadata. + """ + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + filename = 'candidate?resume for "dify"|v2:.txt' + content = b"test content" + mimetype = "text/plain" + + upload_file = FileService(engine).upload_file( + filename=filename, + content=content, + mimetype=mimetype, + user=account, + ) + + assert upload_file.name == filename + def test_upload_file_filename_too_long( self, db_session_with_containers: Session, engine, mock_external_service_dependencies ): diff --git a/api/tests/test_containers_integration_tests/services/test_human_input_delivery_test.py b/api/tests/test_containers_integration_tests/services/test_human_input_delivery_test.py index 08f99cf55a..70d05792ce 100644 --- a/api/tests/test_containers_integration_tests/services/test_human_input_delivery_test.py +++ b/api/tests/test_containers_integration_tests/services/test_human_input_delivery_test.py @@ -4,7 +4,7 @@ from unittest.mock import MagicMock import pytest -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.human_input.entities import ( EmailDeliveryConfig, EmailDeliveryMethod, @@ -68,7 +68,7 @@ def _create_app_with_draft_workflow(session, *, delivery_method_id: uuid.UUID) - inputs=[], user_actions=[], ).model_dump(mode="json") - node_data["type"] = NodeType.HUMAN_INPUT.value + node_data["type"] = BuiltinNodeTypes.HUMAN_INPUT graph = json.dumps({"nodes": [{"id": "human-node", "data": node_data}], "edges": []}) workflow = Workflow.new( diff --git a/api/tests/test_containers_integration_tests/services/test_saved_message_service.py b/api/tests/test_containers_integration_tests/services/test_saved_message_service.py index cc403ef5a2..dd743d46c2 100644 --- a/api/tests/test_containers_integration_tests/services/test_saved_message_service.py +++ b/api/tests/test_containers_integration_tests/services/test_saved_message_service.py @@ -163,7 +163,7 @@ class TestSavedMessageService: answer_unit_price=0.002, total_price=0.003, currency="USD", - status="success", + status="normal", ) db_session_with_containers.add(message) diff --git a/api/tests/test_containers_integration_tests/services/test_workflow_service.py b/api/tests/test_containers_integration_tests/services/test_workflow_service.py index bfb23bac68..056db41750 100644 --- a/api/tests/test_containers_integration_tests/services/test_workflow_service.py +++ b/api/tests/test_containers_integration_tests/services/test_workflow_service.py @@ -62,7 +62,7 @@ class TestWorkflowService: tenant = Tenant( name=f"Test Tenant {fake.company()}", plan="basic", - status="active", + status="normal", ) tenant.id = account.current_tenant_id tenant.created_at = fake.date_time_this_year() @@ -860,8 +860,8 @@ class TestWorkflowService: # Act try: result = workflow_service.get_default_block_config(node_type=invalid_node_type) - # If we get here, the service should return None for invalid types - assert result is None + # If we get here, the service should return an empty config for invalid types. + assert result == {} except ValueError: # It's also acceptable for the service to raise a ValueError for invalid types pass @@ -1090,20 +1090,19 @@ class TestWorkflowService: This test ensures that the service correctly handles feature validation for unsupported app modes, preventing invalid operations. + With EnumText, invalid values are rejected at the DB level during flush, + raising StatementError wrapping ValueError. """ # Arrange fake = Faker() app = self._create_test_app(db_session_with_containers, fake) app.mode = "invalid_mode" # Invalid mode - db_session_with_containers.commit() + # Act & Assert - EnumText validation rejects invalid values at DB flush + import sqlalchemy as sa - workflow_service = WorkflowService() - features = {"test": "value"} - - # Act & Assert - with pytest.raises(ValueError, match="Invalid app mode: invalid_mode"): - workflow_service.validate_features_structure(app_model=app, features=features) + with pytest.raises((ValueError, sa.exc.StatementError)): + db_session_with_containers.commit() def test_update_workflow_success(self, db_session_with_containers: Session): """ @@ -1429,14 +1428,14 @@ class TestWorkflowService: import uuid from datetime import datetime - from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus + from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.graph_events import NodeRunSucceededEvent from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node # Create mock node mock_node = MagicMock(spec=Node) - mock_node.node_type = NodeType.START + mock_node.node_type = BuiltinNodeTypes.START mock_node.title = "Test Node" mock_node.error_strategy = None @@ -1453,7 +1452,7 @@ class TestWorkflowService: mock_event = NodeRunSucceededEvent( id=str(uuid.uuid4()), node_id=node_id, - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, node_run_result=mock_result, start_at=datetime.now(), ) @@ -1474,9 +1473,9 @@ class TestWorkflowService: # Assert assert result is not None assert result.node_id == node_id - from dify_graph.enums import NodeType + from dify_graph.enums import BuiltinNodeTypes - assert result.node_type == NodeType.START # Should match the mock node type + assert result.node_type == BuiltinNodeTypes.START # Should match the mock node type assert result.title == "Test Node" # Import the enum for comparison from dify_graph.enums import WorkflowNodeExecutionStatus @@ -1504,14 +1503,14 @@ class TestWorkflowService: import uuid from datetime import datetime - from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus + from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.graph_events import NodeRunFailedEvent from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node # Create mock node mock_node = MagicMock(spec=Node) - mock_node.node_type = NodeType.LLM + mock_node.node_type = BuiltinNodeTypes.LLM mock_node.title = "Test Node" mock_node.error_strategy = None @@ -1526,7 +1525,7 @@ class TestWorkflowService: mock_event = NodeRunFailedEvent( id=str(uuid.uuid4()), node_id=node_id, - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_run_result=mock_result, error="Test error message", start_at=datetime.now(), @@ -1573,14 +1572,14 @@ class TestWorkflowService: import uuid from datetime import datetime - from dify_graph.enums import ErrorStrategy, NodeType, WorkflowNodeExecutionStatus + from dify_graph.enums import BuiltinNodeTypes, ErrorStrategy, WorkflowNodeExecutionStatus from dify_graph.graph_events import NodeRunFailedEvent from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node # Create mock node with continue_on_error mock_node = MagicMock(spec=Node) - mock_node.node_type = NodeType.TOOL + mock_node.node_type = BuiltinNodeTypes.TOOL mock_node.title = "Test Node" mock_node.error_strategy = ErrorStrategy.DEFAULT_VALUE mock_node.default_value_dict = {"default_output": "default_value"} @@ -1596,7 +1595,7 @@ class TestWorkflowService: mock_event = NodeRunFailedEvent( id=str(uuid.uuid4()), node_id=node_id, - node_type=NodeType.TOOL, + node_type=BuiltinNodeTypes.TOOL, node_run_result=mock_result, error="Test error message", start_at=datetime.now(), diff --git a/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py b/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py index 8eb881258a..41d9fc8a29 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py @@ -110,7 +110,7 @@ class TestCleanDatasetTask: tenant = Tenant( name=fake.company(), plan="basic", - status="active", + status="normal", ) db_session_with_containers.add(tenant) diff --git a/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py index bc0ed3bd2b..69ed5b632d 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py @@ -48,7 +48,7 @@ class TestDeleteSegmentFromIndexTask: Tenant: Created test tenant instance """ fake = fake or Faker() - tenant = Tenant(name=f"Test Tenant {fake.company()}", plan="basic", status="active") + tenant = Tenant(name=f"Test Tenant {fake.company()}", plan="basic", status="normal") tenant.id = fake.uuid4() tenant.created_at = fake.date_time_this_year() tenant.updated_at = tenant.created_at diff --git a/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py index 8f47b48ae2..6f7d2c28b5 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py @@ -65,7 +65,7 @@ class TestDisableSegmentsFromIndexTask: tenant = Tenant( name=f"Test Tenant {fake.company()}", plan="basic", - status="active", + status="normal", ) tenant.id = account.tenant_id tenant.created_at = fake.date_time_this_year() diff --git a/api/tests/test_containers_integration_tests/tasks/test_mail_email_code_login_task.py b/api/tests/test_containers_integration_tests/tasks/test_mail_email_code_login_task.py index 3cdec70df7..c0ddc27286 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_mail_email_code_login_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_mail_email_code_login_task.py @@ -118,7 +118,7 @@ class TestSendEmailCodeLoginMailTask: tenant = Tenant( name=fake.company(), plan="basic", - status="active", + status="normal", ) db_session_with_containers.add(tenant) diff --git a/api/tests/test_containers_integration_tests/trigger/test_trigger_e2e.py b/api/tests/test_containers_integration_tests/trigger/test_trigger_e2e.py index 7bfc6c9e13..4ea8d8c1c7 100644 --- a/api/tests/test_containers_integration_tests/trigger/test_trigger_e2e.py +++ b/api/tests/test_containers_integration_tests/trigger/test_trigger_e2e.py @@ -14,11 +14,16 @@ from sqlalchemy.orm import Session from configs import dify_config from core.plugin.entities.request import TriggerInvokeEventResponse +from core.trigger.constants import ( + TRIGGER_PLUGIN_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_WEBHOOK_NODE_TYPE, +) from core.trigger.debug import event_selectors from core.trigger.debug.event_bus import TriggerDebugEventBus from core.trigger.debug.event_selectors import PluginTriggerDebugEventPoller, WebhookTriggerDebugEventPoller from core.trigger.debug.events import PluginTriggerDebugEvent, build_plugin_pool_key -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from libs.datetime_utils import naive_utc_now from models.account import Account, Tenant from models.enums import AppTriggerStatus, AppTriggerType, CreatorUserRole, WorkflowTriggerStatus @@ -48,10 +53,10 @@ WEBHOOK_ID_DEBUG = "whdebug1234567890123456" TEST_TRIGGER_URL = "https://trigger.example.com/base" -def _build_workflow_graph(root_node_id: str, trigger_type: NodeType) -> str: +def _build_workflow_graph(root_node_id: str, trigger_type: str) -> str: """Build a minimal workflow graph JSON for testing.""" - node_data: dict[str, Any] = {"type": trigger_type.value, "title": "trigger"} - if trigger_type == NodeType.TRIGGER_WEBHOOK: + node_data: dict[str, Any] = {"type": trigger_type, "title": "trigger"} + if trigger_type == TRIGGER_WEBHOOK_NODE_TYPE: node_data.update( { "method": "POST", @@ -64,7 +69,7 @@ def _build_workflow_graph(root_node_id: str, trigger_type: NodeType) -> str: graph = { "nodes": [ {"id": root_node_id, "data": node_data}, - {"id": "answer-1", "data": {"type": NodeType.ANSWER.value, "title": "answer"}}, + {"id": "answer-1", "data": {"type": BuiltinNodeTypes.ANSWER, "title": "answer"}}, ], "edges": [{"source": root_node_id, "target": "answer-1", "sourceHandle": "success"}], } @@ -82,8 +87,8 @@ def test_publish_blocks_start_and_trigger_coexistence( graph = { "nodes": [ - {"id": "start", "data": {"type": NodeType.START.value}}, - {"id": "trig", "data": {"type": NodeType.TRIGGER_WEBHOOK.value}}, + {"id": "start", "data": {"type": BuiltinNodeTypes.START}}, + {"id": "trig", "data": {"type": TRIGGER_WEBHOOK_NODE_TYPE}}, ], "edges": [], } @@ -152,7 +157,7 @@ def test_webhook_trigger_creates_trigger_log( tenant, account = tenant_and_account webhook_node_id = "webhook-node" - graph_json = _build_workflow_graph(webhook_node_id, NodeType.TRIGGER_WEBHOOK) + graph_json = _build_workflow_graph(webhook_node_id, TRIGGER_WEBHOOK_NODE_TYPE) published_workflow = Workflow.new( tenant_id=tenant.id, app_id=app_model.id, @@ -282,7 +287,7 @@ def test_schedule_visual_debug_poll_generates_event(monkeypatch: pytest.MonkeyPa node_config = { "id": "schedule-visual", "data": { - "type": NodeType.TRIGGER_SCHEDULE.value, + "type": TRIGGER_SCHEDULE_NODE_TYPE, "mode": "visual", "frequency": "daily", "visual_config": {"time": "3:00 PM"}, @@ -372,7 +377,7 @@ def test_webhook_debug_dispatches_event( """Webhook single-step debug should dispatch debug event and be pollable.""" tenant, account = tenant_and_account webhook_node_id = "webhook-debug-node" - graph_json = _build_workflow_graph(webhook_node_id, NodeType.TRIGGER_WEBHOOK) + graph_json = _build_workflow_graph(webhook_node_id, TRIGGER_WEBHOOK_NODE_TYPE) draft_workflow = Workflow.new( tenant_id=tenant.id, app_id=app_model.id, @@ -443,7 +448,7 @@ def test_plugin_single_step_debug_flow( node_config = { "id": node_id, "data": { - "type": NodeType.TRIGGER_PLUGIN.value, + "type": TRIGGER_PLUGIN_NODE_TYPE, "title": "plugin", "plugin_id": "plugin-1", "plugin_unique_identifier": "plugin-1", @@ -519,14 +524,14 @@ def test_schedule_trigger_creates_trigger_log( { "id": schedule_node_id, "data": { - "type": NodeType.TRIGGER_SCHEDULE.value, + "type": TRIGGER_SCHEDULE_NODE_TYPE, "title": "schedule", "mode": "cron", "cron_expression": "0 9 * * *", "timezone": "UTC", }, }, - {"id": "answer-1", "data": {"type": NodeType.ANSWER.value, "title": "answer"}}, + {"id": "answer-1", "data": {"type": BuiltinNodeTypes.ANSWER, "title": "answer"}}, ], "edges": [{"source": schedule_node_id, "target": "answer-1", "sourceHandle": "success"}], } @@ -639,7 +644,7 @@ def test_schedule_visual_cron_conversion( node_config: dict[str, Any] = { "id": "schedule-node", "data": { - "type": NodeType.TRIGGER_SCHEDULE.value, + "type": TRIGGER_SCHEDULE_NODE_TYPE, "mode": mode, "timezone": "UTC", }, @@ -680,7 +685,7 @@ def test_plugin_trigger_full_chain_with_db_verification( { "id": plugin_node_id, "data": { - "type": NodeType.TRIGGER_PLUGIN.value, + "type": TRIGGER_PLUGIN_NODE_TYPE, "title": "plugin", "plugin_id": "test-plugin", "plugin_unique_identifier": "test-plugin", @@ -690,7 +695,7 @@ def test_plugin_trigger_full_chain_with_db_verification( "parameters": {}, }, }, - {"id": "answer-1", "data": {"type": NodeType.ANSWER.value, "title": "answer"}}, + {"id": "answer-1", "data": {"type": BuiltinNodeTypes.ANSWER, "title": "answer"}}, ], "edges": [{"source": plugin_node_id, "target": "answer-1", "sourceHandle": "success"}], } @@ -826,7 +831,7 @@ def test_plugin_debug_via_http_endpoint( node_config = { "id": node_id, "data": { - "type": NodeType.TRIGGER_PLUGIN.value, + "type": TRIGGER_PLUGIN_NODE_TYPE, "title": "plugin-debug", "plugin_id": "debug-plugin", "plugin_unique_identifier": "debug-plugin", diff --git a/api/tests/unit_tests/controllers/console/explore/test_message.py b/api/tests/unit_tests/controllers/console/explore/test_message.py index c3a6522e6d..6b5c304884 100644 --- a/api/tests/unit_tests/controllers/console/explore/test_message.py +++ b/api/tests/unit_tests/controllers/console/explore/test_message.py @@ -48,7 +48,7 @@ def make_message(): msg.query = "hello" msg.re_sign_file_url_answer = "" msg.user_feedback = MagicMock(rating=None) - msg.status = "success" + msg.status = "normal" msg.error = None return msg diff --git a/api/tests/unit_tests/controllers/console/workspace/test_plugin.py b/api/tests/unit_tests/controllers/console/workspace/test_plugin.py index f6db55db5b..eb19243225 100644 --- a/api/tests/unit_tests/controllers/console/workspace/test_plugin.py +++ b/api/tests/unit_tests/controllers/console/workspace/test_plugin.py @@ -200,10 +200,13 @@ class TestPluginUploadFromPkgApi: app.test_request_context("/", data=data, content_type="multipart/form-data"), patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), patch("controllers.console.workspace.plugin.dify_config.PLUGIN_MAX_PACKAGE_SIZE", 0), + patch("controllers.console.workspace.plugin.PluginService.upload_pkg") as upload_pkg_mock, ): with pytest.raises(ValueError): method(api) + upload_pkg_mock.assert_not_called() + class TestPluginInstallFromPkgApi: def test_install_from_pkg(self, app): @@ -444,10 +447,13 @@ class TestPluginUploadFromBundleApi: ), patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), patch("controllers.console.workspace.plugin.dify_config.PLUGIN_MAX_BUNDLE_SIZE", 0), + patch("controllers.console.workspace.plugin.PluginService.upload_bundle") as upload_bundle_mock, ): with pytest.raises(ValueError): method(api) + upload_bundle_mock.assert_not_called() + class TestPluginInstallFromGithubApi: def test_success(self, app): diff --git a/api/tests/unit_tests/controllers/web/test_message_list.py b/api/tests/unit_tests/controllers/web/test_message_list.py index 1c096bfbcf..2bb425cdba 100644 --- a/api/tests/unit_tests/controllers/web/test_message_list.py +++ b/api/tests/unit_tests/controllers/web/test_message_list.py @@ -137,7 +137,7 @@ def test_message_list_mapping(app: Flask) -> None: {"id": "file-dict", "filename": "a.txt", "type": "file", "transfer_method": "local"}, message_file_obj, ], - status="success", + status="normal", error=None, message_metadata_dict={"meta": "value"}, extra_contents=[ diff --git a/api/tests/unit_tests/core/agent/test_cot_agent_runner.py b/api/tests/unit_tests/core/agent/test_cot_agent_runner.py index 9518c61202..f6d1edbaf0 100644 --- a/api/tests/unit_tests/core/agent/test_cot_agent_runner.py +++ b/api/tests/unit_tests/core/agent/test_cot_agent_runner.py @@ -5,8 +5,8 @@ import pytest from core.agent.cot_agent_runner import CotAgentRunner from core.agent.entities import AgentScratchpadUnit +from core.agent.errors import AgentMaxIterationError from dify_graph.model_runtime.entities.llm_entities import LLMUsage -from dify_graph.nodes.agent.exc import AgentMaxIterationError class DummyRunner(CotAgentRunner): diff --git a/api/tests/unit_tests/core/agent/test_fc_agent_runner.py b/api/tests/unit_tests/core/agent/test_fc_agent_runner.py index 8843a8d505..299c9b31d2 100644 --- a/api/tests/unit_tests/core/agent/test_fc_agent_runner.py +++ b/api/tests/unit_tests/core/agent/test_fc_agent_runner.py @@ -4,6 +4,7 @@ from unittest.mock import MagicMock import pytest +from core.agent.errors import AgentMaxIterationError from core.agent.fc_agent_runner import FunctionCallAgentRunner from core.app.apps.base_app_queue_manager import PublishFrom from core.app.entities.queue_entities import QueueMessageFileEvent @@ -14,7 +15,6 @@ from dify_graph.model_runtime.entities.message_entities import ( TextPromptMessageContent, UserPromptMessage, ) -from dify_graph.nodes.agent.exc import AgentMaxIterationError # ============================== # Dummy Helper Classes diff --git a/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline_core.py b/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline_core.py index 67f87710a1..0a244b3fea 100644 --- a/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline_core.py +++ b/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline_core.py @@ -42,7 +42,7 @@ from core.app.entities.task_entities import ( PingStreamResponse, ) from core.base.tts.app_generator_tts_publisher import AudioTrunk -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from models.enums import MessageStatus @@ -226,7 +226,7 @@ class TestAdvancedChatGenerateTaskPipeline: pipeline._save_output_for_event = lambda event, node_execution_id: None event = SimpleNamespace( - node_type=NodeType.ANSWER, + node_type=BuiltinNodeTypes.ANSWER, outputs={"k": "v"}, node_execution_id="exec", node_id="node", @@ -254,7 +254,7 @@ class TestAdvancedChatGenerateTaskPipeline: iter_start = QueueIterationStartEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="LLM", start_at=datetime.utcnow(), node_run_index=1, @@ -263,14 +263,14 @@ class TestAdvancedChatGenerateTaskPipeline: index=1, node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="LLM", node_run_index=1, ) iter_done = QueueIterationCompletedEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="LLM", start_at=datetime.utcnow(), node_run_index=1, @@ -278,7 +278,7 @@ class TestAdvancedChatGenerateTaskPipeline: loop_start = QueueLoopStartEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="LLM", start_at=datetime.utcnow(), node_run_index=1, @@ -287,14 +287,14 @@ class TestAdvancedChatGenerateTaskPipeline: index=1, node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="LLM", node_run_index=1, ) loop_done = QueueLoopCompletedEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="LLM", start_at=datetime.utcnow(), node_run_index=1, @@ -358,7 +358,7 @@ class TestAdvancedChatGenerateTaskPipeline: failed_event = QueueNodeFailedEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=datetime.utcnow(), inputs={}, outputs={}, @@ -368,7 +368,7 @@ class TestAdvancedChatGenerateTaskPipeline: exc_event = QueueNodeExceptionEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=datetime.utcnow(), inputs={}, outputs={}, @@ -462,7 +462,7 @@ class TestAdvancedChatGenerateTaskPipeline: filled_event = QueueHumanInputFormFilledEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="title", rendered_content="content", action_id="action", @@ -470,7 +470,7 @@ class TestAdvancedChatGenerateTaskPipeline: ) timeout_event = QueueHumanInputFormTimeoutEvent( node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="title", expiration_time=datetime.utcnow(), ) @@ -589,7 +589,7 @@ class TestAdvancedChatGenerateTaskPipeline: event = QueueNodeExceptionEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=datetime.utcnow(), inputs={}, outputs={}, diff --git a/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_truncation.py b/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_truncation.py index 69d476bd13..aba7dfff8c 100644 --- a/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_truncation.py +++ b/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_truncation.py @@ -24,7 +24,7 @@ from core.app.entities.queue_entities import ( QueueNodeSucceededEvent, ) from dify_graph.entities.workflow_start_reason import WorkflowStartReason -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.system_variable import SystemVariable from libs.datetime_utils import naive_utc_now from models import Account @@ -66,7 +66,7 @@ class TestWorkflowResponseConverter: node_execution_id=node_execution_id or str(uuid.uuid4()), node_id="test-node-id", node_title="Test Node", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, start_at=naive_utc_now(), in_iteration_id=None, in_loop_id=None, @@ -83,7 +83,7 @@ class TestWorkflowResponseConverter: """Create a QueueNodeSucceededEvent for testing.""" return QueueNodeSucceededEvent( node_id="test-node-id", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, node_execution_id=node_execution_id, start_at=naive_utc_now(), in_iteration_id=None, @@ -108,7 +108,7 @@ class TestWorkflowResponseConverter: error="oops", retry_index=1, node_id="test-node-id", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, node_title="test code", provider_type="built-in", provider_id="code", @@ -319,7 +319,7 @@ class TestWorkflowResponseConverter: iteration_event = QueueNodeSucceededEvent( node_id="iteration-node", - node_type=NodeType.ITERATION, + node_type=BuiltinNodeTypes.ITERATION, node_execution_id=str(uuid.uuid4()), start_at=naive_utc_now(), in_iteration_id=None, @@ -336,7 +336,7 @@ class TestWorkflowResponseConverter: ) assert response is None - loop_event = iteration_event.model_copy(update={"node_type": NodeType.LOOP}) + loop_event = iteration_event.model_copy(update={"node_type": BuiltinNodeTypes.LOOP}) response = converter.workflow_node_finish_to_stream_response( event=loop_event, task_id="test-task-id", @@ -478,7 +478,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: event = QueueNodeSucceededEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=naive_utc_now(), inputs=large_value, process_data=large_value, @@ -523,7 +523,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: event = QueueNodeSucceededEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=naive_utc_now(), inputs=large_value, process_data=large_value, @@ -562,7 +562,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: event = QueueNodeSucceededEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=naive_utc_now(), inputs=large_value, process_data=large_value, @@ -600,7 +600,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: return QueueNodeSucceededEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=naive_utc_now(), inputs=inputs, process_data=process_data, @@ -614,7 +614,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: return QueueNodeFailedEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=naive_utc_now(), inputs=inputs, process_data=process_data, @@ -628,7 +628,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: return QueueNodeExceptionEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=naive_utc_now(), inputs=inputs, process_data=process_data, @@ -690,7 +690,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: start_event = QueueNodeStartedEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="Test Node", node_run_index=1, start_at=naive_utc_now(), @@ -706,7 +706,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: event = QueueNodeRetryEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="Test Node", node_run_index=1, start_at=naive_utc_now(), @@ -748,7 +748,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: start_event = QueueIterationStartEvent( node_execution_id="test_iter_exec_id", node_id="test_iteration", - node_type=NodeType.ITERATION, + node_type=BuiltinNodeTypes.ITERATION, node_title="Test Iteration", node_run_index=0, start_at=naive_utc_now(), @@ -776,7 +776,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: start_event = QueueLoopStartEvent( node_execution_id="test_loop_exec_id", node_id="test_loop", - node_type=NodeType.LOOP, + node_type=BuiltinNodeTypes.LOOP, node_title="Test Loop", start_at=naive_utc_now(), inputs=large_inputs, @@ -806,7 +806,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: event = QueueNodeSucceededEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=naive_utc_now(), inputs=large_inputs, process_data=large_process_data, diff --git a/api/tests/unit_tests/core/app/apps/test_base_app_generator.py b/api/tests/unit_tests/core/app/apps/test_base_app_generator.py index 8f1baaa1e4..a3ced02394 100644 --- a/api/tests/unit_tests/core/app/apps/test_base_app_generator.py +++ b/api/tests/unit_tests/core/app/apps/test_base_app_generator.py @@ -479,7 +479,7 @@ class TestBaseAppGeneratorExtras: def test_get_draft_var_saver_factory_debugger(self): from core.app.entities.app_invoke_entities import InvokeFrom - from dify_graph.enums import NodeType + from dify_graph.enums import BuiltinNodeTypes from models import Account base_app_generator = BaseAppGenerator() @@ -492,7 +492,7 @@ class TestBaseAppGeneratorExtras: session=MagicMock(), app_id="app-id", node_id="node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, node_execution_id="node-exec-id", ) diff --git a/api/tests/unit_tests/core/app/apps/test_pause_resume.py b/api/tests/unit_tests/core/app/apps/test_pause_resume.py index 4f67d9cb56..2f73a8cda8 100644 --- a/api/tests/unit_tests/core/app/apps/test_pause_resume.py +++ b/api/tests/unit_tests/core/app/apps/test_pause_resume.py @@ -12,7 +12,7 @@ from dify_graph.entities.base_node_data import BaseNodeData, RetryConfig from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter from dify_graph.entities.pause_reason import SchedulingPause from dify_graph.entities.workflow_start_reason import WorkflowStartReason -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeType, WorkflowNodeExecutionStatus from dify_graph.graph import Graph from dify_graph.graph_engine import GraphEngine from dify_graph.graph_engine.command_channels.in_memory_channel import InMemoryChannel @@ -44,12 +44,12 @@ if "core.ops.ops_trace_manager" not in sys.modules: class _StubToolNodeData(BaseNodeData): - type: NodeType = NodeType.TOOL + type: NodeType = BuiltinNodeTypes.TOOL pause_on: bool = False class _StubToolNode(Node[_StubToolNodeData]): - node_type = NodeType.TOOL + node_type = BuiltinNodeTypes.TOOL @classmethod def version(cls) -> str: @@ -94,7 +94,7 @@ def _patch_tool_node(mocker): def _patched_create_node(self, node_config: dict[str, object] | NodeConfigDict) -> Node: typed_node_config = NodeConfigDictAdapter.validate_python(node_config) node_data = typed_node_config["data"] - if node_data.type == NodeType.TOOL: + if node_data.type == BuiltinNodeTypes.TOOL: return _StubToolNode( id=str(typed_node_config["id"]), config=typed_node_config, @@ -108,7 +108,7 @@ def _patch_tool_node(mocker): def _node_data(node_type: NodeType, data: BaseNodeData) -> dict[str, object]: node_data = data.model_dump() - node_data["type"] = node_type.value + node_data["type"] = str(node_type) return node_data @@ -124,11 +124,11 @@ def _build_graph_config(*, pause_on: str | None) -> dict[str, object]: ) nodes = [ - {"id": "start", "data": _node_data(NodeType.START, start_data)}, - {"id": "tool_a", "data": _node_data(NodeType.TOOL, tool_data_a)}, - {"id": "tool_b", "data": _node_data(NodeType.TOOL, tool_data_b)}, - {"id": "tool_c", "data": _node_data(NodeType.TOOL, tool_data_c)}, - {"id": "end", "data": _node_data(NodeType.END, end_data)}, + {"id": "start", "data": _node_data(BuiltinNodeTypes.START, start_data)}, + {"id": "tool_a", "data": _node_data(BuiltinNodeTypes.TOOL, tool_data_a)}, + {"id": "tool_b", "data": _node_data(BuiltinNodeTypes.TOOL, tool_data_b)}, + {"id": "tool_c", "data": _node_data(BuiltinNodeTypes.TOOL, tool_data_c)}, + {"id": "end", "data": _node_data(BuiltinNodeTypes.END, end_data)}, ] edges = [ {"source": "start", "target": "tool_a"}, @@ -157,7 +157,7 @@ def _build_graph(runtime_state: GraphRuntimeState, *, pause_on: str | None) -> G graph_runtime_state=runtime_state, ) - return Graph.init(graph_config=graph_config, node_factory=node_factory) + return Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") def _build_runtime_state(run_id: str) -> GraphRuntimeState: diff --git a/api/tests/unit_tests/core/app/apps/test_workflow_app_runner_core.py b/api/tests/unit_tests/core/app/apps/test_workflow_app_runner_core.py index d8afd3b10a..3f1dd14569 100644 --- a/api/tests/unit_tests/core/app/apps/test_workflow_app_runner_core.py +++ b/api/tests/unit_tests/core/app/apps/test_workflow_app_runner_core.py @@ -17,7 +17,7 @@ from core.app.entities.queue_entities import ( QueueWorkflowSucceededEvent, ) from dify_graph.entities.pause_reason import HumanInputRequired -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.graph_events import ( GraphRunPausedEvent, GraphRunStartedEvent, @@ -105,10 +105,10 @@ class TestWorkflowBasedAppRunner: from core.app.apps import workflow_app_runner - monkeypatch.setitem( - workflow_app_runner.NODE_TYPE_CLASSES_MAPPING, - NodeType.START, - {"1": _NodeCls}, + monkeypatch.setattr( + workflow_app_runner, + "resolve_workflow_node_class", + lambda **_kwargs: _NodeCls, ) monkeypatch.setattr( "core.app.apps.workflow_app_runner.load_into_variable_pool", @@ -193,7 +193,7 @@ class TestWorkflowBasedAppRunner: NodeRunStartedEvent( id="exec", node_id="node", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, node_title="Start", start_at=datetime.utcnow(), ), @@ -203,7 +203,7 @@ class TestWorkflowBasedAppRunner: NodeRunStreamChunkEvent( id="exec", node_id="node", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, selector=["node", "text"], chunk="hi", is_final=False, @@ -214,7 +214,7 @@ class TestWorkflowBasedAppRunner: NodeRunAgentLogEvent( id="exec", node_id="node", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, message_id="msg", label="label", node_execution_id="exec", @@ -230,7 +230,7 @@ class TestWorkflowBasedAppRunner: NodeRunIterationSucceededEvent( id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="Iter", start_at=datetime.utcnow(), inputs={}, @@ -244,7 +244,7 @@ class TestWorkflowBasedAppRunner: NodeRunLoopFailedEvent( id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="Loop", start_at=datetime.utcnow(), inputs={}, diff --git a/api/tests/unit_tests/core/app/apps/workflow/test_generate_task_pipeline_core.py b/api/tests/unit_tests/core/app/apps/workflow/test_generate_task_pipeline_core.py index b37f7a8120..f35710d207 100644 --- a/api/tests/unit_tests/core/app/apps/workflow/test_generate_task_pipeline_core.py +++ b/api/tests/unit_tests/core/app/apps/workflow/test_generate_task_pipeline_core.py @@ -44,7 +44,7 @@ from core.app.entities.task_entities import ( WorkflowStartStreamResponse, ) from core.base.tts.app_generator_tts_publisher import AudioTrunk -from dify_graph.enums import NodeType, WorkflowExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowExecutionStatus from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from models.enums import CreatorUserRole @@ -190,7 +190,7 @@ class TestWorkflowGenerateTaskPipeline: event = QueueNodeSucceededEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, start_at=datetime.utcnow(), inputs={}, outputs={}, @@ -243,7 +243,7 @@ class TestWorkflowGenerateTaskPipeline: event = QueueNodeFailedEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, start_at=datetime.utcnow(), inputs={}, outputs={}, @@ -300,7 +300,7 @@ class TestWorkflowGenerateTaskPipeline: iter_start = QueueIterationStartEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="LLM", start_at=datetime.utcnow(), node_run_index=1, @@ -309,14 +309,14 @@ class TestWorkflowGenerateTaskPipeline: index=1, node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="LLM", node_run_index=1, ) iter_done = QueueIterationCompletedEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="LLM", start_at=datetime.utcnow(), node_run_index=1, @@ -324,7 +324,7 @@ class TestWorkflowGenerateTaskPipeline: loop_start = QueueLoopStartEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="LLM", start_at=datetime.utcnow(), node_run_index=1, @@ -333,14 +333,14 @@ class TestWorkflowGenerateTaskPipeline: index=1, node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="LLM", node_run_index=1, ) loop_done = QueueLoopCompletedEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="LLM", start_at=datetime.utcnow(), node_run_index=1, @@ -348,7 +348,7 @@ class TestWorkflowGenerateTaskPipeline: filled_event = QueueHumanInputFormFilledEvent( node_execution_id="exec", node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="title", rendered_content="content", action_id="action", @@ -356,7 +356,7 @@ class TestWorkflowGenerateTaskPipeline: ) timeout_event = QueueHumanInputFormTimeoutEvent( node_id="node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="title", expiration_time=datetime.utcnow(), ) @@ -645,7 +645,7 @@ class TestWorkflowGenerateTaskPipeline: node_execution_id="exec", node_id="node", node_title="title", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_run_index=1, start_at=datetime.utcnow(), provider_type="provider", @@ -657,7 +657,7 @@ class TestWorkflowGenerateTaskPipeline: node_execution_id="exec", node_id="node", node_title="title", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_run_index=1, start_at=datetime.utcnow(), provider_type="provider", @@ -683,7 +683,7 @@ class TestWorkflowGenerateTaskPipeline: event = QueueNodeExceptionEvent( node_execution_id="exec-id", node_id="node", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, start_at=datetime.utcnow(), inputs={}, outputs={}, @@ -855,7 +855,7 @@ class TestWorkflowGenerateTaskPipeline: event = QueueNodeSucceededEvent( node_execution_id="exec-id", node_id="node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, in_loop_id="loop-id", start_at=datetime.utcnow(), process_data={"k": "v"}, diff --git a/api/tests/unit_tests/core/app/layers/test_conversation_variable_persist_layer.py b/api/tests/unit_tests/core/app/layers/test_conversation_variable_persist_layer.py index 7d0e1d25f6..bdc889d941 100644 --- a/api/tests/unit_tests/core/app/layers/test_conversation_variable_persist_layer.py +++ b/api/tests/unit_tests/core/app/layers/test_conversation_variable_persist_layer.py @@ -4,7 +4,7 @@ from unittest.mock import Mock from core.app.layers.conversation_variable_persist_layer import ConversationVariablePersistenceLayer from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeType, WorkflowNodeExecutionStatus from dify_graph.graph_engine.protocols.command_channel import CommandChannel from dify_graph.graph_events.node import NodeRunSucceededEvent from dify_graph.node_events import NodeRunResult @@ -78,7 +78,7 @@ def test_persists_conversation_variables_from_assigner_output(): layer = ConversationVariablePersistenceLayer(updater) layer.initialize(_build_graph_runtime_state(variable_pool, conversation_id), Mock(spec=CommandChannel)) - event = _build_node_run_succeeded_event(node_type=NodeType.VARIABLE_ASSIGNER, process_data=process_data) + event = _build_node_run_succeeded_event(node_type=BuiltinNodeTypes.VARIABLE_ASSIGNER, process_data=process_data) layer.on_event(event) updater.update.assert_called_once_with(conversation_id=conversation_id, variable=variable) @@ -100,7 +100,7 @@ def test_skips_when_outputs_missing(): layer = ConversationVariablePersistenceLayer(updater) layer.initialize(_build_graph_runtime_state(variable_pool, conversation_id), Mock(spec=CommandChannel)) - event = _build_node_run_succeeded_event(node_type=NodeType.VARIABLE_ASSIGNER) + event = _build_node_run_succeeded_event(node_type=BuiltinNodeTypes.VARIABLE_ASSIGNER) layer.on_event(event) updater.update.assert_not_called() @@ -112,7 +112,7 @@ def test_skips_non_assigner_nodes(): layer = ConversationVariablePersistenceLayer(updater) layer.initialize(_build_graph_runtime_state(MockReadOnlyVariablePool()), Mock(spec=CommandChannel)) - event = _build_node_run_succeeded_event(node_type=NodeType.LLM) + event = _build_node_run_succeeded_event(node_type=BuiltinNodeTypes.LLM) layer.on_event(event) updater.update.assert_not_called() @@ -137,7 +137,7 @@ def test_skips_non_conversation_variables(): layer = ConversationVariablePersistenceLayer(updater) layer.initialize(_build_graph_runtime_state(variable_pool, conversation_id), Mock(spec=CommandChannel)) - event = _build_node_run_succeeded_event(node_type=NodeType.VARIABLE_ASSIGNER, process_data=process_data) + event = _build_node_run_succeeded_event(node_type=BuiltinNodeTypes.VARIABLE_ASSIGNER, process_data=process_data) layer.on_event(event) updater.update.assert_not_called() diff --git a/api/tests/unit_tests/core/datasource/test_file_upload.py b/api/tests/unit_tests/core/datasource/test_file_upload.py index ad86190e00..63b86e64fc 100644 --- a/api/tests/unit_tests/core/datasource/test_file_upload.py +++ b/api/tests/unit_tests/core/datasource/test_file_upload.py @@ -35,7 +35,7 @@ TEST COVERAGE OVERVIEW: - Tests hash consistency and determinism 6. Invalid Filename Handling (TestInvalidFilenameHandling) - - Validates rejection of filenames with invalid characters (/, \\, :, *, ?, ", <, >, |) + - Validates rejection of filenames with path separators (/, \\) - Tests filename length truncation (max 200 characters) - Prevents path traversal attacks - Handles edge cases like empty filenames @@ -535,30 +535,23 @@ class TestInvalidFilenameHandling: @pytest.mark.parametrize( "invalid_char", - ["/", "\\", ":", "*", "?", '"', "<", ">", "|"], + ["/", "\\"], ) def test_filename_contains_invalid_characters(self, invalid_char): """Test detection of invalid characters in filename. - Security-critical test that validates rejection of dangerous filename characters. + Security-critical test that validates rejection of path separators. These characters are blocked because they: - / and \\ : Directory separators, could enable path traversal - - : : Drive letter separator on Windows, reserved character - - * and ? : Wildcards, could cause issues in file operations - - " : Quote character, could break command-line operations - - < and > : Redirection operators, command injection risk - - | : Pipe operator, command injection risk Blocking these characters prevents: - Path traversal attacks (../../etc/passwd) - - Command injection - - File system corruption - - Cross-platform compatibility issues + - ZIP entry traversal issues + - Ambiguous path handling """ # Arrange - Create filename with invalid character filename = f"test{invalid_char}file.txt" - # Define complete list of invalid characters - invalid_chars = ["/", "\\", ":", "*", "?", '"', "<", ">", "|"] + invalid_chars = ["/", "\\"] # Act - Check if filename contains any invalid character has_invalid_char = any(c in filename for c in invalid_chars) @@ -570,7 +563,7 @@ class TestInvalidFilenameHandling: """Test that valid filenames pass validation.""" # Arrange filename = "valid_file-name_123.txt" - invalid_chars = ["/", "\\", ":", "*", "?", '"', "<", ">", "|"] + invalid_chars = ["/", "\\"] # Act has_invalid_char = any(c in filename for c in invalid_chars) @@ -578,6 +571,16 @@ class TestInvalidFilenameHandling: # Assert assert has_invalid_char is False + @pytest.mark.parametrize("safe_char", [":", "*", "?", '"', "<", ">", "|"]) + def test_filename_allows_safe_metadata_characters(self, safe_char): + """Test that non-separator punctuation remains allowed in filenames.""" + filename = f"candidate{safe_char}resume.txt" + invalid_chars = ["/", "\\"] + + has_invalid_char = any(c in filename for c in invalid_chars) + + assert has_invalid_char is False + def test_extremely_long_filename_truncation(self): """Test handling of extremely long filenames.""" # Arrange @@ -904,7 +907,7 @@ class TestFilenameValidation: """Test that filenames with spaces are handled correctly.""" # Arrange filename = "my document with spaces.pdf" - invalid_chars = ["/", "\\", ":", "*", "?", '"', "<", ">", "|"] + invalid_chars = ["/", "\\"] # Act - Check for invalid characters has_invalid = any(c in filename for c in invalid_chars) @@ -921,7 +924,7 @@ class TestFilenameValidation: "مستند.txt", # Arabic "ファイル.jpg", # Japanese ] - invalid_chars = ["/", "\\", ":", "*", "?", '"', "<", ">", "|"] + invalid_chars = ["/", "\\"] # Act & Assert - Unicode should be allowed for filename in unicode_filenames: diff --git a/api/tests/unit_tests/core/ops/aliyun_trace/test_aliyun_trace.py b/api/tests/unit_tests/core/ops/aliyun_trace/test_aliyun_trace.py index fac0597f5a..dfd61acfa7 100644 --- a/api/tests/unit_tests/core/ops/aliyun_trace/test_aliyun_trace.py +++ b/api/tests/unit_tests/core/ops/aliyun_trace/test_aliyun_trace.py @@ -35,7 +35,7 @@ from core.ops.entities.trace_entity import ( WorkflowTraceInfo, ) from dify_graph.entities import WorkflowNodeExecution -from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey class RecordingTraceClient: @@ -413,7 +413,7 @@ def test_build_workflow_node_span_routes_llm_type(trace_instance: AliyunDataTrac monkeypatch.setattr(trace_instance, "build_workflow_llm_span", MagicMock(return_value="llm")) - node_execution.node_type = NodeType.LLM + node_execution.node_type = BuiltinNodeTypes.LLM assert trace_instance.build_workflow_node_span(node_execution, trace_info, trace_metadata) == "llm" @@ -426,7 +426,7 @@ def test_build_workflow_node_span_routes_knowledge_retrieval_type( monkeypatch.setattr(trace_instance, "build_workflow_retrieval_span", MagicMock(return_value="retrieval")) - node_execution.node_type = NodeType.KNOWLEDGE_RETRIEVAL + node_execution.node_type = BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL assert trace_instance.build_workflow_node_span(node_execution, trace_info, trace_metadata) == "retrieval" @@ -437,7 +437,7 @@ def test_build_workflow_node_span_routes_tool_type(trace_instance: AliyunDataTra monkeypatch.setattr(trace_instance, "build_workflow_tool_span", MagicMock(return_value="tool")) - node_execution.node_type = NodeType.TOOL + node_execution.node_type = BuiltinNodeTypes.TOOL assert trace_instance.build_workflow_node_span(node_execution, trace_info, trace_metadata) == "tool" @@ -448,7 +448,7 @@ def test_build_workflow_node_span_routes_code_type(trace_instance: AliyunDataTra monkeypatch.setattr(trace_instance, "build_workflow_task_span", MagicMock(return_value="task")) - node_execution.node_type = NodeType.CODE + node_execution.node_type = BuiltinNodeTypes.CODE assert trace_instance.build_workflow_node_span(node_execution, trace_info, trace_metadata) == "task" @@ -460,7 +460,7 @@ def test_build_workflow_node_span_handles_errors( trace_metadata = MagicMock() monkeypatch.setattr(trace_instance, "build_workflow_task_span", MagicMock(side_effect=RuntimeError("boom"))) - node_execution.node_type = NodeType.CODE + node_execution.node_type = BuiltinNodeTypes.CODE assert trace_instance.build_workflow_node_span(node_execution, trace_info, trace_metadata) is None assert "Error occurred in build_workflow_node_span" in caplog.text diff --git a/api/tests/unit_tests/core/ops/langfuse_trace/test_langfuse_trace.py b/api/tests/unit_tests/core/ops/langfuse_trace/test_langfuse_trace.py index 8e036e4b52..0ff135562c 100644 --- a/api/tests/unit_tests/core/ops/langfuse_trace/test_langfuse_trace.py +++ b/api/tests/unit_tests/core/ops/langfuse_trace/test_langfuse_trace.py @@ -25,7 +25,7 @@ from core.ops.langfuse_trace.entities.langfuse_trace_entity import ( UnitEnum, ) from core.ops.langfuse_trace.langfuse_trace import LangFuseDataTrace -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from models import EndUser from models.enums import MessageStatus @@ -147,7 +147,7 @@ def test_workflow_trace_with_message_id(trace_instance, monkeypatch): node_llm = MagicMock() node_llm.id = "node-llm" node_llm.title = "LLM Node" - node_llm.node_type = NodeType.LLM + node_llm.node_type = BuiltinNodeTypes.LLM node_llm.status = "succeeded" node_llm.process_data = { "model_mode": "chat", @@ -164,7 +164,7 @@ def test_workflow_trace_with_message_id(trace_instance, monkeypatch): node_other = MagicMock() node_other.id = "node-other" node_other.title = "Other Node" - node_other.node_type = NodeType.CODE + node_other.node_type = BuiltinNodeTypes.CODE node_other.status = "failed" node_other.process_data = None node_other.inputs = {"code": "print"} @@ -664,7 +664,7 @@ def test_workflow_trace_handles_usage_extraction_error(trace_instance, monkeypat node = MagicMock() node.id = "n1" node.title = "LLM Node" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.status = "succeeded" class BadDict(collections.UserDict): diff --git a/api/tests/unit_tests/core/ops/langsmith_trace/test_langsmith_trace.py b/api/tests/unit_tests/core/ops/langsmith_trace/test_langsmith_trace.py index 98f9dd00cf..f656f7435f 100644 --- a/api/tests/unit_tests/core/ops/langsmith_trace/test_langsmith_trace.py +++ b/api/tests/unit_tests/core/ops/langsmith_trace/test_langsmith_trace.py @@ -21,7 +21,7 @@ from core.ops.langsmith_trace.entities.langsmith_trace_entity import ( LangSmithRunUpdateModel, ) from core.ops.langsmith_trace.langsmith_trace import LangSmithDataTrace -from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey from models import EndUser @@ -145,7 +145,7 @@ def test_workflow_trace(trace_instance, monkeypatch): node_llm = MagicMock() node_llm.id = "node-llm" node_llm.title = "LLM Node" - node_llm.node_type = NodeType.LLM + node_llm.node_type = BuiltinNodeTypes.LLM node_llm.status = "succeeded" node_llm.process_data = { "model_mode": "chat", @@ -162,7 +162,7 @@ def test_workflow_trace(trace_instance, monkeypatch): node_other = MagicMock() node_other.id = "node-other" node_other.title = "Tool Node" - node_other.node_type = NodeType.TOOL + node_other.node_type = BuiltinNodeTypes.TOOL node_other.status = "succeeded" node_other.process_data = None node_other.inputs = {"tool_input": "val"} @@ -174,7 +174,7 @@ def test_workflow_trace(trace_instance, monkeypatch): node_retrieval = MagicMock() node_retrieval.id = "node-retrieval" node_retrieval.title = "Retrieval Node" - node_retrieval.node_type = NodeType.KNOWLEDGE_RETRIEVAL + node_retrieval.node_type = BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL node_retrieval.status = "succeeded" node_retrieval.process_data = None node_retrieval.inputs = {"query": "val"} @@ -555,7 +555,7 @@ def test_workflow_trace_usage_extraction_error(trace_instance, monkeypatch, capl node_llm = MagicMock() node_llm.id = "node-llm" node_llm.title = "LLM Node" - node_llm.node_type = NodeType.LLM + node_llm.node_type = BuiltinNodeTypes.LLM node_llm.status = "succeeded" node_llm.process_data = BadDict({"model_mode": "chat", "model_name": "gpt-4", "usage": True, "prompts": ["p"]}) node_llm.inputs = {} diff --git a/api/tests/unit_tests/core/ops/mlflow_trace/test_mlflow_trace.py b/api/tests/unit_tests/core/ops/mlflow_trace/test_mlflow_trace.py index 0657acc1d9..cccedaa08c 100644 --- a/api/tests/unit_tests/core/ops/mlflow_trace/test_mlflow_trace.py +++ b/api/tests/unit_tests/core/ops/mlflow_trace/test_mlflow_trace.py @@ -21,7 +21,7 @@ from core.ops.entities.trace_entity import ( WorkflowTraceInfo, ) from core.ops.mlflow_trace.mlflow_trace import MLflowDataTrace, datetime_to_nanoseconds -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes # ── Helpers ────────────────────────────────────────────────────────────────── @@ -161,7 +161,7 @@ def _make_node(**overrides): "tenant_id": "t1", "app_id": "app-1", "title": "Node Title", - "node_type": NodeType.CODE, + "node_type": BuiltinNodeTypes.CODE, "status": "succeeded", "inputs": '{"key": "value"}', "outputs": '{"result": "ok"}', @@ -362,7 +362,7 @@ class TestWorkflowTrace: def test_workflow_with_llm_node(self, trace_instance, mock_tracing, mock_db): llm_node = _make_node( - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, process_data=json.dumps( { "prompts": [{"role": "user", "text": "hi"}], @@ -388,7 +388,7 @@ class TestWorkflowTrace: def test_workflow_with_question_classifier_node(self, trace_instance, mock_tracing, mock_db): qc_node = _make_node( - node_type=NodeType.QUESTION_CLASSIFIER, + node_type=BuiltinNodeTypes.QUESTION_CLASSIFIER, process_data=json.dumps( { "prompts": "classify this", @@ -408,7 +408,7 @@ class TestWorkflowTrace: def test_workflow_with_http_request_node(self, trace_instance, mock_tracing, mock_db): http_node = _make_node( - node_type=NodeType.HTTP_REQUEST, + node_type=BuiltinNodeTypes.HTTP_REQUEST, process_data='{"url": "https://api.com"}', ) mock_db.session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [http_node] @@ -424,7 +424,7 @@ class TestWorkflowTrace: def test_workflow_with_knowledge_retrieval_node(self, trace_instance, mock_tracing, mock_db): kr_node = _make_node( - node_type=NodeType.KNOWLEDGE_RETRIEVAL, + node_type=BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, outputs=json.dumps( { "result": [ @@ -846,13 +846,13 @@ class TestGetNodeSpanType: @pytest.mark.parametrize( ("node_type", "expected_contains"), [ - (NodeType.LLM, "LLM"), - (NodeType.QUESTION_CLASSIFIER, "LLM"), - (NodeType.KNOWLEDGE_RETRIEVAL, "RETRIEVER"), - (NodeType.TOOL, "TOOL"), - (NodeType.CODE, "TOOL"), - (NodeType.HTTP_REQUEST, "TOOL"), - (NodeType.AGENT, "AGENT"), + (BuiltinNodeTypes.LLM, "LLM"), + (BuiltinNodeTypes.QUESTION_CLASSIFIER, "LLM"), + (BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, "RETRIEVER"), + (BuiltinNodeTypes.TOOL, "TOOL"), + (BuiltinNodeTypes.CODE, "TOOL"), + (BuiltinNodeTypes.HTTP_REQUEST, "TOOL"), + (BuiltinNodeTypes.AGENT, "AGENT"), ], ) def test_mapped_types(self, trace_instance, node_type, expected_contains): diff --git a/api/tests/unit_tests/core/ops/opik_trace/test_opik_trace.py b/api/tests/unit_tests/core/ops/opik_trace/test_opik_trace.py index 80a0331c4b..b2cb7d5109 100644 --- a/api/tests/unit_tests/core/ops/opik_trace/test_opik_trace.py +++ b/api/tests/unit_tests/core/ops/opik_trace/test_opik_trace.py @@ -18,7 +18,7 @@ from core.ops.entities.trace_entity import ( WorkflowTraceInfo, ) from core.ops.opik_trace.opik_trace import OpikDataTrace, prepare_opik_uuid, wrap_dict, wrap_metadata -from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey from models import EndUser from models.enums import MessageStatus @@ -172,7 +172,7 @@ def test_workflow_trace_with_message_id(trace_instance, monkeypatch): node_llm = MagicMock() node_llm.id = LLM_NODE_ID node_llm.title = "LLM Node" - node_llm.node_type = NodeType.LLM + node_llm.node_type = BuiltinNodeTypes.LLM node_llm.status = "succeeded" node_llm.process_data = { "model_mode": "chat", @@ -189,7 +189,7 @@ def test_workflow_trace_with_message_id(trace_instance, monkeypatch): node_other = MagicMock() node_other.id = CODE_NODE_ID node_other.title = "Other Node" - node_other.node_type = NodeType.CODE + node_other.node_type = BuiltinNodeTypes.CODE node_other.status = "failed" node_other.process_data = None node_other.inputs = {"code": "print"} @@ -641,7 +641,7 @@ def test_workflow_trace_usage_extraction_error_fixed(trace_instance, monkeypatch node = MagicMock() node.id = "88e8e918-472e-4b69-8051-12502c34fc07" node.title = "LLM Node" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.status = "succeeded" class BadDict(collections.UserDict): diff --git a/api/tests/unit_tests/core/ops/tencent_trace/test_tencent_trace.py b/api/tests/unit_tests/core/ops/tencent_trace/test_tencent_trace.py index 077a92d866..f259e4639f 100644 --- a/api/tests/unit_tests/core/ops/tencent_trace/test_tencent_trace.py +++ b/api/tests/unit_tests/core/ops/tencent_trace/test_tencent_trace.py @@ -15,7 +15,7 @@ from core.ops.entities.trace_entity import ( ) from core.ops.tencent_trace.tencent_trace import TencentDataTrace from dify_graph.entities import WorkflowNodeExecution -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from models import Account, App, TenantAccountJoin logger = logging.getLogger(__name__) @@ -320,10 +320,10 @@ class TestTencentDataTrace: node1 = MagicMock(spec=WorkflowNodeExecution) node1.id = "n1" - node1.node_type = NodeType.LLM + node1.node_type = BuiltinNodeTypes.LLM node2 = MagicMock(spec=WorkflowNodeExecution) node2.id = "n2" - node2.node_type = NodeType.TOOL + node2.node_type = BuiltinNodeTypes.TOOL with patch.object(tencent_data_trace, "_get_workflow_node_executions", return_value=[node1, node2]): with patch.object(tencent_data_trace, "_build_workflow_node_span", side_effect=["span1", "span2"]): @@ -359,10 +359,10 @@ class TestTencentDataTrace: trace_info = MagicMock(spec=WorkflowTraceInfo) nodes = [ - (NodeType.LLM, mock_span_builder.build_workflow_llm_span), - (NodeType.KNOWLEDGE_RETRIEVAL, mock_span_builder.build_workflow_retrieval_span), - (NodeType.TOOL, mock_span_builder.build_workflow_tool_span), - (NodeType.CODE, mock_span_builder.build_workflow_task_span), + (BuiltinNodeTypes.LLM, mock_span_builder.build_workflow_llm_span), + (BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, mock_span_builder.build_workflow_retrieval_span), + (BuiltinNodeTypes.TOOL, mock_span_builder.build_workflow_tool_span), + (BuiltinNodeTypes.CODE, mock_span_builder.build_workflow_task_span), ] for node_type, builder_method in nodes: @@ -377,7 +377,7 @@ class TestTencentDataTrace: def test_build_workflow_node_span_exception(self, tencent_data_trace, mock_span_builder): node = MagicMock(spec=WorkflowNodeExecution) - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.id = "n1" mock_span_builder.build_workflow_llm_span.side_effect = Exception("error") diff --git a/api/tests/unit_tests/core/ops/test_arize_phoenix_trace.py b/api/tests/unit_tests/core/ops/test_arize_phoenix_trace.py index 32389b4d64..49d6b698ef 100644 --- a/api/tests/unit_tests/core/ops/test_arize_phoenix_trace.py +++ b/api/tests/unit_tests/core/ops/test_arize_phoenix_trace.py @@ -1,29 +1,29 @@ from openinference.semconv.trace import OpenInferenceSpanKindValues from core.ops.arize_phoenix_trace.arize_phoenix_trace import _NODE_TYPE_TO_SPAN_KIND, _get_node_span_kind -from dify_graph.enums import NodeType +from dify_graph.enums import BUILT_IN_NODE_TYPES, BuiltinNodeTypes class TestGetNodeSpanKind: """Tests for _get_node_span_kind helper.""" def test_all_node_types_are_mapped_correctly(self): - """Ensure every NodeType enum member is mapped to the correct span kind.""" + """Ensure every built-in node type is mapped to the correct span kind.""" # Mappings for node types that have a specialised span kind. special_mappings = { - NodeType.LLM: OpenInferenceSpanKindValues.LLM, - NodeType.KNOWLEDGE_RETRIEVAL: OpenInferenceSpanKindValues.RETRIEVER, - NodeType.TOOL: OpenInferenceSpanKindValues.TOOL, - NodeType.AGENT: OpenInferenceSpanKindValues.AGENT, + BuiltinNodeTypes.LLM: OpenInferenceSpanKindValues.LLM, + BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: OpenInferenceSpanKindValues.RETRIEVER, + BuiltinNodeTypes.TOOL: OpenInferenceSpanKindValues.TOOL, + BuiltinNodeTypes.AGENT: OpenInferenceSpanKindValues.AGENT, } - # Test that every NodeType enum member is mapped to the correct span kind. + # Test that every built-in node type is mapped to the correct span kind. # Node types not in `special_mappings` should default to CHAIN. - for node_type in NodeType: + for node_type in BUILT_IN_NODE_TYPES: expected_span_kind = special_mappings.get(node_type, OpenInferenceSpanKindValues.CHAIN) actual_span_kind = _get_node_span_kind(node_type) assert actual_span_kind == expected_span_kind, ( - f"NodeType.{node_type.name} was mapped to {actual_span_kind}, but {expected_span_kind} was expected." + f"Node type {node_type!r} was mapped to {actual_span_kind}, but {expected_span_kind} was expected." ) def test_unknown_string_defaults_to_chain(self): diff --git a/api/tests/unit_tests/core/ops/test_ops_trace_manager.py b/api/tests/unit_tests/core/ops/test_ops_trace_manager.py index 2d325ccb0e..8a4ba46bde 100644 --- a/api/tests/unit_tests/core/ops/test_ops_trace_manager.py +++ b/api/tests/unit_tests/core/ops/test_ops_trace_manager.py @@ -107,6 +107,7 @@ def make_message_data(**overrides): "agent_thoughts": [], "query": "sample-query", "inputs": "sample-input", + "app_id": "app-id", } base.update(overrides) @@ -171,10 +172,10 @@ def configure_db_query(session, *, message_file=None, workflow_app_log=None): class DummySessionContext: scalar_values = [] + _shared_index = 0 def __init__(self, engine): - self._values = list(self.scalar_values) - self._index = 0 + self._values = self.scalar_values def __enter__(self): return self @@ -183,12 +184,28 @@ class DummySessionContext: return False def scalar(self, *args, **kwargs): - if self._index >= len(self._values): + if DummySessionContext._shared_index >= len(self._values): return None - value = self._values[self._index] - self._index += 1 + value = self._values[DummySessionContext._shared_index] + DummySessionContext._shared_index += 1 return value + def scalars(self, *args, **kwargs): + class ScalarsResult: + def __init__(self, context): + self._context = context + + def all(self): + if DummySessionContext._shared_index >= len(self._context._values): + return [] + value = self._context._values[DummySessionContext._shared_index] + DummySessionContext._shared_index += 1 + if isinstance(value, list): + return value + return [value] if value is not None else [] + + return ScalarsResult(self) + @pytest.fixture(autouse=True) def patch_provider_map(monkeypatch): @@ -216,6 +233,8 @@ def patch_timer_and_current_app(monkeypatch): @pytest.fixture(autouse=True) def patch_sqlalchemy_session(monkeypatch): + DummySessionContext.scalar_values = [] + DummySessionContext._shared_index = 0 monkeypatch.setattr("core.ops.ops_trace_manager.Session", DummySessionContext) @@ -453,7 +472,7 @@ def test_trace_task_message_trace(trace_task_message, mock_db): def test_trace_task_workflow_trace(workflow_repo_fixture, mock_db): - DummySessionContext.scalar_values = ["wf-app-log", "message-ref"] + DummySessionContext.scalar_values = [[], "wf-app-log", "message-ref"] execution = SimpleNamespace(id_="run-id") task = TraceTask( trace_type=TraceTaskName.WORKFLOW_TRACE, workflow_execution=execution, conversation_id="conv", user_id="user" diff --git a/api/tests/unit_tests/core/ops/test_trace_queue_manager.py b/api/tests/unit_tests/core/ops/test_trace_queue_manager.py new file mode 100644 index 0000000000..44a58ab902 --- /dev/null +++ b/api/tests/unit_tests/core/ops/test_trace_queue_manager.py @@ -0,0 +1,200 @@ +"""Unit tests for TraceQueueManager telemetry guard. + +This test suite verifies that TraceQueueManager correctly drops trace tasks +when telemetry is disabled, proving Bug 1 from code review is a false positive. + +The guard logic moved from persistence.py to TraceQueueManager.add_trace_task() +at line 1282 of ops_trace_manager.py: + if self._enterprise_telemetry_enabled or self.trace_instance: + trace_task.app_id = self.app_id + trace_manager_queue.put(trace_task) + +Tasks are only enqueued if EITHER: +- Enterprise telemetry is enabled (_enterprise_telemetry_enabled=True), OR +- A third-party trace instance (Langfuse, etc.) is configured + +When BOTH are false, tasks are silently dropped (correct behavior). +""" + +import queue +import sys +import types +from unittest.mock import MagicMock, patch + +import pytest + + +@pytest.fixture +def trace_queue_manager_and_task(monkeypatch): + """Fixture to provide TraceQueueManager and TraceTask with delayed imports.""" + module_name = "core.ops.ops_trace_manager" + if module_name not in sys.modules: + ops_stub = types.ModuleType(module_name) + + class StubTraceTask: + def __init__(self, trace_type): + self.trace_type = trace_type + self.app_id = None + + class StubTraceQueueManager: + def __init__(self, app_id=None): + self.app_id = app_id + from core.telemetry.gateway import is_enterprise_telemetry_enabled + + self._enterprise_telemetry_enabled = is_enterprise_telemetry_enabled() + self.trace_instance = StubOpsTraceManager.get_ops_trace_instance(app_id) + + def add_trace_task(self, trace_task): + if self._enterprise_telemetry_enabled or self.trace_instance: + trace_task.app_id = self.app_id + from core.ops.ops_trace_manager import trace_manager_queue + + trace_manager_queue.put(trace_task) + + class StubOpsTraceManager: + @staticmethod + def get_ops_trace_instance(app_id): + return None + + ops_stub.TraceQueueManager = StubTraceQueueManager + ops_stub.TraceTask = StubTraceTask + ops_stub.OpsTraceManager = StubOpsTraceManager + ops_stub.trace_manager_queue = MagicMock(spec=queue.Queue) + monkeypatch.setitem(sys.modules, module_name, ops_stub) + + from core.ops.entities.trace_entity import TraceTaskName + + ops_module = __import__(module_name, fromlist=["TraceQueueManager", "TraceTask"]) + TraceQueueManager = ops_module.TraceQueueManager + TraceTask = ops_module.TraceTask + + return TraceQueueManager, TraceTask, TraceTaskName + + +class TestTraceQueueManagerTelemetryGuard: + """Test TraceQueueManager's telemetry guard in add_trace_task().""" + + def test_task_not_enqueued_when_telemetry_disabled_and_no_trace_instance(self, trace_queue_manager_and_task): + """Verify task is NOT enqueued when telemetry disabled and no trace instance. + + This is the core guard: when _enterprise_telemetry_enabled=False AND + trace_instance=None, the task should be silently dropped. + """ + TraceQueueManager, TraceTask, TraceTaskName = trace_queue_manager_and_task + + mock_queue = MagicMock(spec=queue.Queue) + + trace_task = TraceTask(trace_type=TraceTaskName.WORKFLOW_TRACE) + + with ( + patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=False), + patch("core.ops.ops_trace_manager.OpsTraceManager.get_ops_trace_instance", return_value=None), + patch("core.ops.ops_trace_manager.trace_manager_queue", mock_queue), + ): + manager = TraceQueueManager(app_id="test-app-id") + manager.add_trace_task(trace_task) + + mock_queue.put.assert_not_called() + + def test_task_enqueued_when_telemetry_enabled(self, trace_queue_manager_and_task): + """Verify task IS enqueued when enterprise telemetry is enabled. + + When _enterprise_telemetry_enabled=True, the task should be enqueued + regardless of trace_instance state. + """ + TraceQueueManager, TraceTask, TraceTaskName = trace_queue_manager_and_task + + mock_queue = MagicMock(spec=queue.Queue) + + trace_task = TraceTask(trace_type=TraceTaskName.WORKFLOW_TRACE) + + with ( + patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True), + patch("core.ops.ops_trace_manager.OpsTraceManager.get_ops_trace_instance", return_value=None), + patch("core.ops.ops_trace_manager.trace_manager_queue", mock_queue), + ): + manager = TraceQueueManager(app_id="test-app-id") + manager.add_trace_task(trace_task) + + mock_queue.put.assert_called_once() + called_task = mock_queue.put.call_args[0][0] + assert called_task.app_id == "test-app-id" + + def test_task_enqueued_when_trace_instance_configured(self, trace_queue_manager_and_task): + """Verify task IS enqueued when third-party trace instance is configured. + + When trace_instance is not None (e.g., Langfuse configured), the task + should be enqueued even if enterprise telemetry is disabled. + """ + TraceQueueManager, TraceTask, TraceTaskName = trace_queue_manager_and_task + + mock_queue = MagicMock(spec=queue.Queue) + + mock_trace_instance = MagicMock() + + trace_task = TraceTask(trace_type=TraceTaskName.WORKFLOW_TRACE) + + with ( + patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=False), + patch( + "core.ops.ops_trace_manager.OpsTraceManager.get_ops_trace_instance", return_value=mock_trace_instance + ), + patch("core.ops.ops_trace_manager.trace_manager_queue", mock_queue), + ): + manager = TraceQueueManager(app_id="test-app-id") + manager.add_trace_task(trace_task) + + mock_queue.put.assert_called_once() + called_task = mock_queue.put.call_args[0][0] + assert called_task.app_id == "test-app-id" + + def test_task_enqueued_when_both_telemetry_and_trace_instance_enabled(self, trace_queue_manager_and_task): + """Verify task IS enqueued when both telemetry and trace instance are enabled. + + When both _enterprise_telemetry_enabled=True AND trace_instance is set, + the task should definitely be enqueued. + """ + TraceQueueManager, TraceTask, TraceTaskName = trace_queue_manager_and_task + + mock_queue = MagicMock(spec=queue.Queue) + + mock_trace_instance = MagicMock() + + trace_task = TraceTask(trace_type=TraceTaskName.WORKFLOW_TRACE) + + with ( + patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True), + patch( + "core.ops.ops_trace_manager.OpsTraceManager.get_ops_trace_instance", return_value=mock_trace_instance + ), + patch("core.ops.ops_trace_manager.trace_manager_queue", mock_queue), + ): + manager = TraceQueueManager(app_id="test-app-id") + manager.add_trace_task(trace_task) + + mock_queue.put.assert_called_once() + called_task = mock_queue.put.call_args[0][0] + assert called_task.app_id == "test-app-id" + + def test_app_id_set_before_enqueue(self, trace_queue_manager_and_task): + """Verify app_id is set on the task before enqueuing. + + The guard logic sets trace_task.app_id = self.app_id before calling + trace_manager_queue.put(trace_task). This test verifies that behavior. + """ + TraceQueueManager, TraceTask, TraceTaskName = trace_queue_manager_and_task + + mock_queue = MagicMock(spec=queue.Queue) + + trace_task = TraceTask(trace_type=TraceTaskName.WORKFLOW_TRACE) + + with ( + patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True), + patch("core.ops.ops_trace_manager.OpsTraceManager.get_ops_trace_instance", return_value=None), + patch("core.ops.ops_trace_manager.trace_manager_queue", mock_queue), + ): + manager = TraceQueueManager(app_id="expected-app-id") + manager.add_trace_task(trace_task) + + called_task = mock_queue.put.call_args[0][0] + assert called_task.app_id == "expected-app-id" diff --git a/api/tests/unit_tests/core/ops/weave_trace/test_weave_trace.py b/api/tests/unit_tests/core/ops/weave_trace/test_weave_trace.py index cdd97d5369..8057bbbad5 100644 --- a/api/tests/unit_tests/core/ops/weave_trace/test_weave_trace.py +++ b/api/tests/unit_tests/core/ops/weave_trace/test_weave_trace.py @@ -22,7 +22,7 @@ from core.ops.entities.trace_entity import ( ) from core.ops.weave_trace.entities.weave_trace_entity import WeaveTraceModel from core.ops.weave_trace.weave_trace import WeaveDataTrace -from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey # ── Helpers ────────────────────────────────────────────────────────────────── @@ -173,7 +173,7 @@ def _make_node(**overrides): defaults = { "id": "node-1", "title": "Node Title", - "node_type": NodeType.CODE, + "node_type": BuiltinNodeTypes.CODE, "status": "succeeded", "inputs": {"key": "value"}, "outputs": {"result": "ok"}, @@ -633,7 +633,7 @@ class TestWorkflowTrace: """Workflow trace iterates node executions and creates node runs.""" node = _make_node( id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, inputs={"k": "v"}, outputs={"r": "ok"}, elapsed_time=0.5, @@ -655,7 +655,7 @@ class TestWorkflowTrace: def test_workflow_trace_with_llm_node(self, trace_instance, monkeypatch): """LLM node uses process_data prompts as inputs.""" node = _make_node( - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, process_data={ "prompts": [{"role": "user", "content": "hi"}], "model_mode": "chat", @@ -683,7 +683,7 @@ class TestWorkflowTrace: def test_workflow_trace_with_non_llm_node_uses_inputs(self, trace_instance, monkeypatch): """Non-LLM node uses node_execution.inputs directly.""" node = _make_node( - node_type=NodeType.TOOL, + node_type=BuiltinNodeTypes.TOOL, inputs={"tool_input": "val"}, process_data=None, ) @@ -743,7 +743,7 @@ class TestWorkflowTrace: def test_workflow_trace_chat_mode_llm_node_adds_provider(self, trace_instance, monkeypatch): """Chat mode LLM node adds ls_provider and ls_model_name to attributes.""" node = _make_node( - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, process_data={"model_mode": "chat", "model_provider": "openai", "model_name": "gpt-4", "prompts": []}, ) self._setup_repo(monkeypatch, nodes=[node]) diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/weaviate/__init__.py b/api/tests/unit_tests/core/rag/datasource/vdb/weaviate/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/weaviate/test_weaviate_vector.py b/api/tests/unit_tests/core/rag/datasource/vdb/weaviate/test_weaviate_vector.py new file mode 100644 index 0000000000..a32f27fc91 --- /dev/null +++ b/api/tests/unit_tests/core/rag/datasource/vdb/weaviate/test_weaviate_vector.py @@ -0,0 +1,330 @@ +"""Unit tests for Weaviate vector database implementation. + +Focuses on verifying that doc_type is properly handled in: +- Collection schema creation (_create_collection) +- Property migration (_ensure_properties) +- Vector search result metadata (search_by_vector) +- Full-text search result metadata (search_by_full_text) +""" + +import unittest +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +from core.rag.datasource.vdb.weaviate.weaviate_vector import WeaviateConfig, WeaviateVector +from core.rag.models.document import Document + + +class TestWeaviateVector(unittest.TestCase): + """Tests for WeaviateVector class with focus on doc_type metadata handling.""" + + def setUp(self): + self.config = WeaviateConfig( + endpoint="http://localhost:8080", + api_key="test-key", + batch_size=100, + ) + self.collection_name = "Test_Collection_Node" + self.attributes = ["doc_id", "dataset_id", "document_id", "doc_hash", "doc_type"] + + @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + def _create_weaviate_vector(self, mock_weaviate_module): + """Helper to create a WeaviateVector instance with mocked client.""" + mock_client = MagicMock() + mock_client.is_ready.return_value = True + mock_weaviate_module.connect_to_custom.return_value = mock_client + + wv = WeaviateVector( + collection_name=self.collection_name, + config=self.config, + attributes=self.attributes, + ) + return wv, mock_client + + @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + def test_init(self, mock_weaviate_module): + """Test WeaviateVector initialization stores attributes including doc_type.""" + mock_client = MagicMock() + mock_client.is_ready.return_value = True + mock_weaviate_module.connect_to_custom.return_value = mock_client + + wv = WeaviateVector( + collection_name=self.collection_name, + config=self.config, + attributes=self.attributes, + ) + + assert wv._collection_name == self.collection_name + assert "doc_type" in wv._attributes + + @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.redis_client") + @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.dify_config") + @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + def test_create_collection_includes_doc_type_property(self, mock_weaviate_module, mock_dify_config, mock_redis): + """Test that _create_collection defines doc_type in the schema properties.""" + # Mock Redis + mock_lock = MagicMock() + mock_lock.__enter__ = MagicMock() + mock_lock.__exit__ = MagicMock() + mock_redis.lock.return_value = mock_lock + mock_redis.get.return_value = None + mock_redis.set.return_value = None + + # Mock dify_config + mock_dify_config.WEAVIATE_TOKENIZATION = None + + # Mock client + mock_client = MagicMock() + mock_client.is_ready.return_value = True + mock_weaviate_module.connect_to_custom.return_value = mock_client + mock_client.collections.exists.return_value = False + + # Mock _ensure_properties to avoid side effects + mock_col = MagicMock() + mock_client.collections.use.return_value = mock_col + mock_cfg = MagicMock() + mock_cfg.properties = [] + mock_col.config.get.return_value = mock_cfg + + wv = WeaviateVector( + collection_name=self.collection_name, + config=self.config, + attributes=self.attributes, + ) + wv._create_collection() + + # Verify collections.create was called + mock_client.collections.create.assert_called_once() + + # Extract properties from the create call + call_kwargs = mock_client.collections.create.call_args + properties = call_kwargs.kwargs.get("properties") + + # Verify doc_type is among the defined properties + property_names = [p.name for p in properties] + assert "doc_type" in property_names, ( + f"doc_type should be in collection schema properties, got: {property_names}" + ) + + @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + def test_ensure_properties_adds_missing_doc_type(self, mock_weaviate_module): + """Test that _ensure_properties adds doc_type when it's missing from existing schema.""" + mock_client = MagicMock() + mock_client.is_ready.return_value = True + mock_weaviate_module.connect_to_custom.return_value = mock_client + + # Collection exists but doc_type property is missing + mock_client.collections.exists.return_value = True + mock_col = MagicMock() + mock_client.collections.use.return_value = mock_col + + # Simulate existing properties WITHOUT doc_type + existing_props = [ + SimpleNamespace(name="text"), + SimpleNamespace(name="document_id"), + SimpleNamespace(name="doc_id"), + SimpleNamespace(name="chunk_index"), + ] + mock_cfg = MagicMock() + mock_cfg.properties = existing_props + mock_col.config.get.return_value = mock_cfg + + wv = WeaviateVector( + collection_name=self.collection_name, + config=self.config, + attributes=self.attributes, + ) + wv._ensure_properties() + + # Verify add_property was called and includes doc_type + add_calls = mock_col.config.add_property.call_args_list + added_names = [call.args[0].name for call in add_calls] + assert "doc_type" in added_names, f"doc_type should be added to existing collection, added: {added_names}" + + @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + def test_ensure_properties_skips_existing_doc_type(self, mock_weaviate_module): + """Test that _ensure_properties does not add doc_type when it already exists.""" + mock_client = MagicMock() + mock_client.is_ready.return_value = True + mock_weaviate_module.connect_to_custom.return_value = mock_client + + mock_client.collections.exists.return_value = True + mock_col = MagicMock() + mock_client.collections.use.return_value = mock_col + + # Simulate existing properties WITH doc_type already present + existing_props = [ + SimpleNamespace(name="text"), + SimpleNamespace(name="document_id"), + SimpleNamespace(name="doc_id"), + SimpleNamespace(name="doc_type"), + SimpleNamespace(name="chunk_index"), + ] + mock_cfg = MagicMock() + mock_cfg.properties = existing_props + mock_col.config.get.return_value = mock_cfg + + wv = WeaviateVector( + collection_name=self.collection_name, + config=self.config, + attributes=self.attributes, + ) + wv._ensure_properties() + + # No properties should be added + mock_col.config.add_property.assert_not_called() + + @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + def test_search_by_vector_returns_doc_type_in_metadata(self, mock_weaviate_module): + """Test that search_by_vector returns doc_type in document metadata. + + This is the core bug fix verification: when doc_type is in _attributes, + it should appear in return_properties and thus be included in results. + """ + mock_client = MagicMock() + mock_client.is_ready.return_value = True + mock_weaviate_module.connect_to_custom.return_value = mock_client + + mock_client.collections.exists.return_value = True + mock_col = MagicMock() + mock_client.collections.use.return_value = mock_col + + # Simulate search result with doc_type in properties + mock_obj = MagicMock() + mock_obj.properties = { + "text": "image content description", + "doc_id": "upload_file_id_123", + "dataset_id": "dataset_1", + "document_id": "doc_1", + "doc_hash": "hash_abc", + "doc_type": "image", + } + mock_obj.metadata.distance = 0.1 + + mock_result = MagicMock() + mock_result.objects = [mock_obj] + mock_col.query.near_vector.return_value = mock_result + + wv = WeaviateVector( + collection_name=self.collection_name, + config=self.config, + attributes=self.attributes, + ) + docs = wv.search_by_vector(query_vector=[0.1] * 128, top_k=1) + + # Verify doc_type is in return_properties + call_kwargs = mock_col.query.near_vector.call_args + return_props = call_kwargs.kwargs.get("return_properties") + assert "doc_type" in return_props, f"doc_type should be in return_properties, got: {return_props}" + + # Verify doc_type is in result metadata + assert len(docs) == 1 + assert docs[0].metadata.get("doc_type") == "image" + + @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + def test_search_by_full_text_returns_doc_type_in_metadata(self, mock_weaviate_module): + """Test that search_by_full_text also returns doc_type in document metadata.""" + mock_client = MagicMock() + mock_client.is_ready.return_value = True + mock_weaviate_module.connect_to_custom.return_value = mock_client + + mock_client.collections.exists.return_value = True + mock_col = MagicMock() + mock_client.collections.use.return_value = mock_col + + # Simulate BM25 search result with doc_type + mock_obj = MagicMock() + mock_obj.properties = { + "text": "image content description", + "doc_id": "upload_file_id_456", + "doc_type": "image", + } + mock_obj.vector = {"default": [0.1] * 128} + + mock_result = MagicMock() + mock_result.objects = [mock_obj] + mock_col.query.bm25.return_value = mock_result + + wv = WeaviateVector( + collection_name=self.collection_name, + config=self.config, + attributes=self.attributes, + ) + docs = wv.search_by_full_text(query="image", top_k=1) + + # Verify doc_type is in return_properties + call_kwargs = mock_col.query.bm25.call_args + return_props = call_kwargs.kwargs.get("return_properties") + assert "doc_type" in return_props, ( + f"doc_type should be in return_properties for BM25 search, got: {return_props}" + ) + + # Verify doc_type is in result metadata + assert len(docs) == 1 + assert docs[0].metadata.get("doc_type") == "image" + + @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + def test_add_texts_stores_doc_type_in_properties(self, mock_weaviate_module): + """Test that add_texts includes doc_type from document metadata in stored properties.""" + mock_client = MagicMock() + mock_client.is_ready.return_value = True + mock_weaviate_module.connect_to_custom.return_value = mock_client + + mock_col = MagicMock() + mock_client.collections.use.return_value = mock_col + + # Create a document with doc_type metadata (as produced by multimodal indexing) + doc = Document( + page_content="an image of a cat", + metadata={ + "doc_id": "upload_file_123", + "doc_type": "image", + "dataset_id": "ds_1", + "document_id": "doc_1", + "doc_hash": "hash_xyz", + }, + ) + + wv = WeaviateVector( + collection_name=self.collection_name, + config=self.config, + attributes=self.attributes, + ) + + # Mock batch context manager + mock_batch = MagicMock() + mock_batch.__enter__ = MagicMock(return_value=mock_batch) + mock_batch.__exit__ = MagicMock(return_value=False) + mock_col.batch.dynamic.return_value = mock_batch + + wv.add_texts(documents=[doc], embeddings=[[0.1] * 128]) + + # Verify batch.add_object was called with doc_type in properties + mock_batch.add_object.assert_called_once() + call_kwargs = mock_batch.add_object.call_args + stored_props = call_kwargs.kwargs.get("properties") + assert stored_props.get("doc_type") == "image", f"doc_type should be stored in properties, got: {stored_props}" + + +class TestVectorDefaultAttributes(unittest.TestCase): + """Tests for Vector class default attributes list.""" + + @patch("core.rag.datasource.vdb.vector_factory.Vector._get_embeddings") + @patch("core.rag.datasource.vdb.vector_factory.Vector._init_vector") + def test_default_attributes_include_doc_type(self, mock_init_vector, mock_get_embeddings): + """Test that Vector class default attributes include doc_type.""" + from core.rag.datasource.vdb.vector_factory import Vector + + mock_get_embeddings.return_value = MagicMock() + mock_init_vector.return_value = MagicMock() + + mock_dataset = MagicMock() + mock_dataset.index_struct_dict = None + + vector = Vector(dataset=mock_dataset) + + assert "doc_type" in vector._attributes, f"doc_type should be in default attributes, got: {vector._attributes}" + + +if __name__ == "__main__": + unittest.main() diff --git a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py index b90c4935af..d61f01c616 100644 --- a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py +++ b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py @@ -32,10 +32,10 @@ from core.rag.models.document import Document from core.rag.rerank.rerank_type import RerankMode from core.rag.retrieval.dataset_retrieval import DatasetRetrieval from core.rag.retrieval.retrieval_methods import RetrievalMethod +from core.workflow.nodes.knowledge_retrieval import exc +from core.workflow.nodes.knowledge_retrieval.retrieval import KnowledgeRetrievalRequest from dify_graph.model_runtime.entities.llm_entities import LLMUsage from dify_graph.model_runtime.entities.model_entities import ModelFeature -from dify_graph.nodes.knowledge_retrieval import exc -from dify_graph.repositories.rag_retrieval_protocol import KnowledgeRetrievalRequest from models.dataset import Dataset # ==================== Helper Functions ==================== @@ -3730,7 +3730,7 @@ class TestDatasetRetrievalAdditionalHelpers: attachment_ids=None, dataset_ids=["d1"], app_id="a1", - user_from="web", + user_from="account", user_id="u1", ) mock_session.add_all.assert_not_called() @@ -3740,7 +3740,7 @@ class TestDatasetRetrievalAdditionalHelpers: attachment_ids=["f1"], dataset_ids=["d1", "d2"], app_id="a1", - user_from="web", + user_from="account", user_id="u1", ) mock_session.add_all.assert_called() diff --git a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_methods.py b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_methods.py index 682a451117..48782515d0 100644 --- a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_methods.py +++ b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_methods.py @@ -5,8 +5,8 @@ import pytest from core.rag.models.document import Document from core.rag.retrieval.dataset_retrieval import DatasetRetrieval -from dify_graph.nodes.knowledge_retrieval import exc -from dify_graph.repositories.rag_retrieval_protocol import KnowledgeRetrievalRequest +from core.workflow.nodes.knowledge_retrieval import exc +from core.workflow.nodes.knowledge_retrieval.retrieval import KnowledgeRetrievalRequest from models.dataset import Dataset # ==================== Helper Functions ==================== diff --git a/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py b/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py index b613573927..2a83a4e802 100644 --- a/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py +++ b/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py @@ -15,7 +15,7 @@ from dify_graph.entities.workflow_node_execution import ( WorkflowNodeExecution, WorkflowNodeExecutionStatus, ) -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.repositories.workflow_node_execution_repository import OrderConfig from libs.datetime_utils import naive_utc_now from models import Account, EndUser @@ -61,7 +61,7 @@ def sample_workflow_node_execution(): workflow_execution_id=str(uuid4()), index=1, node_id="test_node", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", inputs={"input1": "value1"}, status=WorkflowNodeExecutionStatus.RUNNING, @@ -259,7 +259,7 @@ class TestCeleryWorkflowNodeExecutionRepository: workflow_execution_id=workflow_run_id, index=1, node_id="node1", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Node 1", inputs={"input1": "value1"}, status=WorkflowNodeExecutionStatus.RUNNING, @@ -272,7 +272,7 @@ class TestCeleryWorkflowNodeExecutionRepository: workflow_execution_id=workflow_run_id, index=2, node_id="node2", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Node 2", inputs={"input2": "value2"}, status=WorkflowNodeExecutionStatus.RUNNING, @@ -310,7 +310,7 @@ class TestCeleryWorkflowNodeExecutionRepository: workflow_execution_id=workflow_run_id, index=2, node_id="node2", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Node 2", inputs={}, status=WorkflowNodeExecutionStatus.RUNNING, @@ -323,7 +323,7 @@ class TestCeleryWorkflowNodeExecutionRepository: workflow_execution_id=workflow_run_id, index=1, node_id="node1", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Node 1", inputs={}, status=WorkflowNodeExecutionStatus.RUNNING, diff --git a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py index bae5bae06d..456c3dde12 100644 --- a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py +++ b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py @@ -14,7 +14,7 @@ from dify_graph.entities.workflow_node_execution import ( WorkflowNodeExecution, WorkflowNodeExecutionStatus, ) -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from libs.datetime_utils import naive_utc_now from models import Account, WorkflowNodeExecutionTriggeredFrom @@ -70,7 +70,7 @@ class TestWorkflowNodeExecutionConflictHandling: workflow_execution_id="test-workflow-execution-id", node_execution_id="test-node-execution-id", node_id="test-node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", index=1, status=WorkflowNodeExecutionStatus.RUNNING, @@ -108,7 +108,7 @@ class TestWorkflowNodeExecutionConflictHandling: workflow_execution_id="test-workflow-execution-id", node_execution_id="test-node-execution-id", node_id="test-node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", index=1, status=WorkflowNodeExecutionStatus.SUCCEEDED, @@ -153,7 +153,7 @@ class TestWorkflowNodeExecutionConflictHandling: workflow_execution_id="test-workflow-execution-id", node_execution_id="test-node-execution-id", node_id="test-node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", index=1, status=WorkflowNodeExecutionStatus.RUNNING, @@ -195,7 +195,7 @@ class TestWorkflowNodeExecutionConflictHandling: workflow_execution_id="test-workflow-execution-id", node_execution_id="test-node-execution-id", node_id="test-node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", index=1, status=WorkflowNodeExecutionStatus.RUNNING, diff --git a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_truncation.py b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_truncation.py index c880b8d41b..eeab81a178 100644 --- a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_truncation.py +++ b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_truncation.py @@ -13,6 +13,7 @@ from unittest.mock import MagicMock from sqlalchemy import Engine +from configs import dify_config from core.repositories.sqlalchemy_workflow_node_execution_repository import ( SQLAlchemyWorkflowNodeExecutionRepository, ) @@ -20,7 +21,7 @@ from dify_graph.entities.workflow_node_execution import ( WorkflowNodeExecution, WorkflowNodeExecutionStatus, ) -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from models import Account, WorkflowNodeExecutionTriggeredFrom from models.enums import ExecutionOffLoadType from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload @@ -41,7 +42,7 @@ class TruncationTestCase: def create_test_cases() -> list[TruncationTestCase]: """Create test cases for different truncation scenarios.""" # Create large data that will definitely exceed the threshold (10KB) - large_data = {"data": "x" * (TRUNCATION_SIZE_THRESHOLD + 1000)} + large_data = {"data": "x" * (dify_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE + 1000)} small_data = {"data": "small"} return [ @@ -101,7 +102,7 @@ def create_workflow_node_execution( workflow_execution_id="test-workflow-execution-id", index=1, node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Test Node", inputs=inputs, outputs=outputs, @@ -145,7 +146,7 @@ class TestSQLAlchemyWorkflowNodeExecutionRepositoryTruncation: db_model.index = 1 db_model.predecessor_node_id = None db_model.node_id = "node-id" - db_model.node_type = NodeType.LLM + db_model.node_type = BuiltinNodeTypes.LLM db_model.title = "Test Node" db_model.inputs = json.dumps({"value": "inputs"}) db_model.process_data = json.dumps({"value": "process_data"}) diff --git a/api/tests/unit_tests/core/telemetry/test_facade.py b/api/tests/unit_tests/core/telemetry/test_facade.py new file mode 100644 index 0000000000..36e8e1bbb1 --- /dev/null +++ b/api/tests/unit_tests/core/telemetry/test_facade.py @@ -0,0 +1,181 @@ +"""Unit tests for core.telemetry.emit() routing and enterprise-only filtering.""" + +from __future__ import annotations + +import queue +import sys +import types +from unittest.mock import MagicMock, patch + +import pytest + +from core.ops.entities.trace_entity import TraceTaskName +from core.telemetry.events import TelemetryContext, TelemetryEvent + + +@pytest.fixture +def telemetry_test_setup(monkeypatch): + module_name = "core.ops.ops_trace_manager" + ops_stub = types.ModuleType(module_name) + + class StubTraceTask: + def __init__(self, trace_type, **kwargs): + self.trace_type = trace_type + self.app_id = None + self.kwargs = kwargs + + class StubTraceQueueManager: + def __init__(self, app_id=None, user_id=None): + self.app_id = app_id + self.user_id = user_id + self.trace_instance = StubOpsTraceManager.get_ops_trace_instance(app_id) + + def add_trace_task(self, trace_task): + trace_task.app_id = self.app_id + from core.ops.ops_trace_manager import trace_manager_queue + + trace_manager_queue.put(trace_task) + + class StubOpsTraceManager: + @staticmethod + def get_ops_trace_instance(app_id): + return None + + ops_stub.TraceQueueManager = StubTraceQueueManager + ops_stub.TraceTask = StubTraceTask + ops_stub.OpsTraceManager = StubOpsTraceManager + ops_stub.trace_manager_queue = MagicMock(spec=queue.Queue) + monkeypatch.setitem(sys.modules, module_name, ops_stub) + + from core.telemetry import emit + + return emit, ops_stub.trace_manager_queue + + +class TestTelemetryEmit: + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True) + def test_emit_enterprise_trace_creates_trace_task(self, mock_ee, telemetry_test_setup): + emit_fn, mock_queue = telemetry_test_setup + + event = TelemetryEvent( + name=TraceTaskName.DRAFT_NODE_EXECUTION_TRACE, + context=TelemetryContext( + tenant_id="test-tenant", + user_id="test-user", + app_id="test-app", + ), + payload={"key": "value"}, + ) + + emit_fn(event) + + mock_queue.put.assert_called_once() + called_task = mock_queue.put.call_args[0][0] + assert called_task.trace_type == TraceTaskName.DRAFT_NODE_EXECUTION_TRACE + + def test_emit_community_trace_enqueued(self, telemetry_test_setup): + emit_fn, mock_queue = telemetry_test_setup + + event = TelemetryEvent( + name=TraceTaskName.WORKFLOW_TRACE, + context=TelemetryContext( + tenant_id="test-tenant", + user_id="test-user", + app_id="test-app", + ), + payload={}, + ) + + emit_fn(event) + + mock_queue.put.assert_called_once() + + def test_emit_enterprise_only_trace_dropped_when_ee_disabled(self, telemetry_test_setup): + emit_fn, mock_queue = telemetry_test_setup + + event = TelemetryEvent( + name=TraceTaskName.DRAFT_NODE_EXECUTION_TRACE, + context=TelemetryContext( + tenant_id="test-tenant", + user_id="test-user", + app_id="test-app", + ), + payload={}, + ) + + emit_fn(event) + + mock_queue.put.assert_not_called() + + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True) + def test_emit_all_enterprise_only_traces_allowed_when_ee_enabled(self, mock_ee, telemetry_test_setup): + emit_fn, mock_queue = telemetry_test_setup + + enterprise_only_traces = [ + TraceTaskName.DRAFT_NODE_EXECUTION_TRACE, + TraceTaskName.NODE_EXECUTION_TRACE, + TraceTaskName.PROMPT_GENERATION_TRACE, + ] + + for trace_name in enterprise_only_traces: + mock_queue.reset_mock() + + event = TelemetryEvent( + name=trace_name, + context=TelemetryContext( + tenant_id="test-tenant", + user_id="test-user", + app_id="test-app", + ), + payload={}, + ) + + emit_fn(event) + + mock_queue.put.assert_called_once() + called_task = mock_queue.put.call_args[0][0] + assert called_task.trace_type == trace_name + + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True) + def test_emit_passes_name_directly_to_trace_task(self, mock_ee, telemetry_test_setup): + emit_fn, mock_queue = telemetry_test_setup + + event = TelemetryEvent( + name=TraceTaskName.DRAFT_NODE_EXECUTION_TRACE, + context=TelemetryContext( + tenant_id="test-tenant", + user_id="test-user", + app_id="test-app", + ), + payload={"extra": "data"}, + ) + + emit_fn(event) + + mock_queue.put.assert_called_once() + called_task = mock_queue.put.call_args[0][0] + assert called_task.trace_type == TraceTaskName.DRAFT_NODE_EXECUTION_TRACE + assert isinstance(called_task.trace_type, TraceTaskName) + + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True) + def test_emit_with_provided_trace_manager(self, mock_ee, telemetry_test_setup): + emit_fn, mock_queue = telemetry_test_setup + + mock_trace_manager = MagicMock() + mock_trace_manager.add_trace_task = MagicMock() + + event = TelemetryEvent( + name=TraceTaskName.NODE_EXECUTION_TRACE, + context=TelemetryContext( + tenant_id="test-tenant", + user_id="test-user", + app_id="test-app", + ), + payload={}, + ) + + emit_fn(event, trace_manager=mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_called_once() + called_task = mock_trace_manager.add_trace_task.call_args[0][0] + assert called_task.trace_type == TraceTaskName.NODE_EXECUTION_TRACE diff --git a/api/tests/unit_tests/core/telemetry/test_gateway_integration.py b/api/tests/unit_tests/core/telemetry/test_gateway_integration.py new file mode 100644 index 0000000000..a68fce5e7f --- /dev/null +++ b/api/tests/unit_tests/core/telemetry/test_gateway_integration.py @@ -0,0 +1,225 @@ +from __future__ import annotations + +import sys +from unittest.mock import MagicMock, patch + +import pytest + +from core.telemetry.gateway import emit, is_enterprise_telemetry_enabled +from enterprise.telemetry.contracts import TelemetryCase + + +class TestTelemetryCoreExports: + def test_is_enterprise_telemetry_enabled_exported(self) -> None: + from core.telemetry.gateway import is_enterprise_telemetry_enabled as exported_func + + assert callable(exported_func) + + +@pytest.fixture +def mock_ops_trace_manager(): + mock_module = MagicMock() + mock_trace_task_class = MagicMock() + mock_trace_task_class.return_value = MagicMock() + mock_module.TraceTask = mock_trace_task_class + mock_module.TraceQueueManager = MagicMock() + + mock_trace_entity = MagicMock() + mock_trace_task_name = MagicMock() + mock_trace_task_name.return_value = "workflow" + mock_trace_entity.TraceTaskName = mock_trace_task_name + + with ( + patch.dict(sys.modules, {"core.ops.ops_trace_manager": mock_module}), + patch.dict(sys.modules, {"core.ops.entities.trace_entity": mock_trace_entity}), + ): + yield mock_module, mock_trace_entity + + +class TestGatewayIntegrationTraceRouting: + @pytest.fixture + def mock_trace_manager(self) -> MagicMock: + return MagicMock() + + @pytest.mark.usefixtures("mock_ops_trace_manager") + def test_ce_eligible_trace_routed_to_trace_manager( + self, + mock_trace_manager: MagicMock, + ) -> None: + with patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True): + context = {"app_id": "app-123", "user_id": "user-456", "tenant_id": "tenant-789"} + payload = {"workflow_run_id": "run-abc"} + + emit(TelemetryCase.WORKFLOW_RUN, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_called_once() + + @pytest.mark.usefixtures("mock_ops_trace_manager") + def test_ce_eligible_trace_routed_when_ee_disabled( + self, + mock_trace_manager: MagicMock, + ) -> None: + with patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=False): + context = {"app_id": "app-123", "user_id": "user-456"} + payload = {"workflow_run_id": "run-abc"} + + emit(TelemetryCase.WORKFLOW_RUN, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_called_once() + + @pytest.mark.usefixtures("mock_ops_trace_manager") + def test_enterprise_only_trace_dropped_when_ee_disabled( + self, + mock_trace_manager: MagicMock, + ) -> None: + with patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=False): + context = {"app_id": "app-123", "user_id": "user-456"} + payload = {"node_id": "node-abc"} + + emit(TelemetryCase.NODE_EXECUTION, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_not_called() + + @pytest.mark.usefixtures("mock_ops_trace_manager") + def test_enterprise_only_trace_routed_when_ee_enabled( + self, + mock_trace_manager: MagicMock, + ) -> None: + with patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True): + context = {"app_id": "app-123", "user_id": "user-456"} + payload = {"node_id": "node-abc"} + + emit(TelemetryCase.NODE_EXECUTION, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_called_once() + + +class TestGatewayIntegrationMetricRouting: + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True) + def test_metric_case_routes_to_celery_task( + self, + mock_ee_enabled: MagicMock, + ) -> None: + from enterprise.telemetry.contracts import TelemetryEnvelope + + with patch("tasks.enterprise_telemetry_task.process_enterprise_telemetry.delay") as mock_delay: + context = {"tenant_id": "tenant-123"} + payload = {"app_id": "app-abc", "name": "My App"} + + emit(TelemetryCase.APP_CREATED, context, payload) + + mock_delay.assert_called_once() + envelope_json = mock_delay.call_args[0][0] + envelope = TelemetryEnvelope.model_validate_json(envelope_json) + assert envelope.case == TelemetryCase.APP_CREATED + assert envelope.tenant_id == "tenant-123" + assert envelope.payload["app_id"] == "app-abc" + + @pytest.mark.usefixtures("mock_ops_trace_manager") + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True) + def test_tool_execution_trace_routed( + self, + mock_ee_enabled: MagicMock, + ) -> None: + mock_trace_manager = MagicMock() + context = {"tenant_id": "tenant-123", "app_id": "app-123"} + payload = {"tool_name": "test_tool", "tool_inputs": {}, "tool_outputs": "result"} + + emit(TelemetryCase.TOOL_EXECUTION, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_called_once() + + @pytest.mark.usefixtures("mock_ops_trace_manager") + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True) + def test_moderation_check_trace_routed( + self, + mock_ee_enabled: MagicMock, + ) -> None: + mock_trace_manager = MagicMock() + context = {"tenant_id": "tenant-123", "app_id": "app-123"} + payload = {"message_id": "msg-123", "moderation_result": {"flagged": False}} + + emit(TelemetryCase.MODERATION_CHECK, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_called_once() + + +class TestGatewayIntegrationCEEligibility: + @pytest.fixture + def mock_trace_manager(self) -> MagicMock: + return MagicMock() + + @pytest.mark.usefixtures("mock_ops_trace_manager") + def test_workflow_run_is_ce_eligible( + self, + mock_trace_manager: MagicMock, + ) -> None: + with patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=False): + context = {"app_id": "app-123", "user_id": "user-456"} + payload = {"workflow_run_id": "run-abc"} + + emit(TelemetryCase.WORKFLOW_RUN, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_called_once() + + @pytest.mark.usefixtures("mock_ops_trace_manager") + def test_message_run_is_ce_eligible( + self, + mock_trace_manager: MagicMock, + ) -> None: + with patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=False): + context = {"app_id": "app-123", "user_id": "user-456"} + payload = {"message_id": "msg-abc", "conversation_id": "conv-123"} + + emit(TelemetryCase.MESSAGE_RUN, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_called_once() + + @pytest.mark.usefixtures("mock_ops_trace_manager") + def test_node_execution_not_ce_eligible( + self, + mock_trace_manager: MagicMock, + ) -> None: + with patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=False): + context = {"app_id": "app-123", "user_id": "user-456"} + payload = {"node_id": "node-abc"} + + emit(TelemetryCase.NODE_EXECUTION, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_not_called() + + @pytest.mark.usefixtures("mock_ops_trace_manager") + def test_draft_node_execution_not_ce_eligible( + self, + mock_trace_manager: MagicMock, + ) -> None: + with patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=False): + context = {"app_id": "app-123", "user_id": "user-456"} + payload = {"node_execution_data": {}} + + emit(TelemetryCase.DRAFT_NODE_EXECUTION, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_not_called() + + @pytest.mark.usefixtures("mock_ops_trace_manager") + def test_prompt_generation_not_ce_eligible( + self, + mock_trace_manager: MagicMock, + ) -> None: + with patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=False): + context = {"app_id": "app-123", "user_id": "user-456", "tenant_id": "tenant-789"} + payload = {"operation_type": "generate", "instruction": "test"} + + emit(TelemetryCase.PROMPT_GENERATION, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_not_called() + + +class TestIsEnterpriseTelemetryEnabled: + def test_returns_false_when_exporter_import_fails(self) -> None: + with patch.dict(sys.modules, {"enterprise.telemetry.exporter": None}): + result = is_enterprise_telemetry_enabled() + assert result is False + + def test_function_is_callable(self) -> None: + assert callable(is_enterprise_telemetry_enabled) diff --git a/api/tests/unit_tests/core/test_provider_manager.py b/api/tests/unit_tests/core/test_provider_manager.py index 3abfb8c9f8..69567c54eb 100644 --- a/api/tests/unit_tests/core/test_provider_manager.py +++ b/api/tests/unit_tests/core/test_provider_manager.py @@ -1,32 +1,34 @@ +from unittest.mock import Mock, PropertyMock, patch + import pytest -from pytest_mock import MockerFixture from core.entities.provider_entities import ModelSettings from core.provider_manager import ProviderManager +from dify_graph.model_runtime.entities.common_entities import I18nObject from dify_graph.model_runtime.entities.model_entities import ModelType from models.provider import LoadBalancingModelConfig, ProviderModelSetting @pytest.fixture -def mock_provider_entity(mocker: MockerFixture): - mock_entity = mocker.Mock() +def mock_provider_entity(): + mock_entity = Mock() mock_entity.provider = "openai" mock_entity.configurate_methods = ["predefined-model"] mock_entity.supported_model_types = [ModelType.LLM] # Use PropertyMock to ensure credential_form_schemas is iterable - provider_credential_schema = mocker.Mock() - type(provider_credential_schema).credential_form_schemas = mocker.PropertyMock(return_value=[]) + provider_credential_schema = Mock() + type(provider_credential_schema).credential_form_schemas = PropertyMock(return_value=[]) mock_entity.provider_credential_schema = provider_credential_schema - model_credential_schema = mocker.Mock() - type(model_credential_schema).credential_form_schemas = mocker.PropertyMock(return_value=[]) + model_credential_schema = Mock() + type(model_credential_schema).credential_form_schemas = PropertyMock(return_value=[]) mock_entity.model_credential_schema = model_credential_schema return mock_entity -def test__to_model_settings(mocker: MockerFixture, mock_provider_entity): +def test__to_model_settings(mock_provider_entity): # Mocking the inputs ps = ProviderModelSetting( tenant_id="tenant_id", @@ -63,18 +65,18 @@ def test__to_model_settings(mocker: MockerFixture, mock_provider_entity): load_balancing_model_configs[0].id = "id1" load_balancing_model_configs[1].id = "id2" - mocker.patch( - "core.helper.model_provider_cache.ProviderCredentialsCache.get", return_value={"openai_api_key": "fake_key"} - ) + with patch( + "core.helper.model_provider_cache.ProviderCredentialsCache.get", + return_value={"openai_api_key": "fake_key"}, + ): + provider_manager = ProviderManager() - provider_manager = ProviderManager() - - # Running the method - result = provider_manager._to_model_settings( - provider_entity=mock_provider_entity, - provider_model_settings=provider_model_settings, - load_balancing_model_configs=load_balancing_model_configs, - ) + # Running the method + result = provider_manager._to_model_settings( + provider_entity=mock_provider_entity, + provider_model_settings=provider_model_settings, + load_balancing_model_configs=load_balancing_model_configs, + ) # Asserting that the result is as expected assert len(result) == 1 @@ -87,7 +89,7 @@ def test__to_model_settings(mocker: MockerFixture, mock_provider_entity): assert result[0].load_balancing_configs[1].name == "first" -def test__to_model_settings_only_one_lb(mocker: MockerFixture, mock_provider_entity): +def test__to_model_settings_only_one_lb(mock_provider_entity): # Mocking the inputs ps = ProviderModelSetting( @@ -113,18 +115,18 @@ def test__to_model_settings_only_one_lb(mocker: MockerFixture, mock_provider_ent ] load_balancing_model_configs[0].id = "id1" - mocker.patch( - "core.helper.model_provider_cache.ProviderCredentialsCache.get", return_value={"openai_api_key": "fake_key"} - ) + with patch( + "core.helper.model_provider_cache.ProviderCredentialsCache.get", + return_value={"openai_api_key": "fake_key"}, + ): + provider_manager = ProviderManager() - provider_manager = ProviderManager() - - # Running the method - result = provider_manager._to_model_settings( - provider_entity=mock_provider_entity, - provider_model_settings=provider_model_settings, - load_balancing_model_configs=load_balancing_model_configs, - ) + # Running the method + result = provider_manager._to_model_settings( + provider_entity=mock_provider_entity, + provider_model_settings=provider_model_settings, + load_balancing_model_configs=load_balancing_model_configs, + ) # Asserting that the result is as expected assert len(result) == 1 @@ -135,7 +137,7 @@ def test__to_model_settings_only_one_lb(mocker: MockerFixture, mock_provider_ent assert len(result[0].load_balancing_configs) == 0 -def test__to_model_settings_lb_disabled(mocker: MockerFixture, mock_provider_entity): +def test__to_model_settings_lb_disabled(mock_provider_entity): # Mocking the inputs ps = ProviderModelSetting( tenant_id="tenant_id", @@ -170,18 +172,18 @@ def test__to_model_settings_lb_disabled(mocker: MockerFixture, mock_provider_ent load_balancing_model_configs[0].id = "id1" load_balancing_model_configs[1].id = "id2" - mocker.patch( - "core.helper.model_provider_cache.ProviderCredentialsCache.get", return_value={"openai_api_key": "fake_key"} - ) + with patch( + "core.helper.model_provider_cache.ProviderCredentialsCache.get", + return_value={"openai_api_key": "fake_key"}, + ): + provider_manager = ProviderManager() - provider_manager = ProviderManager() - - # Running the method - result = provider_manager._to_model_settings( - provider_entity=mock_provider_entity, - provider_model_settings=provider_model_settings, - load_balancing_model_configs=load_balancing_model_configs, - ) + # Running the method + result = provider_manager._to_model_settings( + provider_entity=mock_provider_entity, + provider_model_settings=provider_model_settings, + load_balancing_model_configs=load_balancing_model_configs, + ) # Asserting that the result is as expected assert len(result) == 1 @@ -190,3 +192,39 @@ def test__to_model_settings_lb_disabled(mocker: MockerFixture, mock_provider_ent assert result[0].model_type == ModelType.LLM assert result[0].enabled is True assert len(result[0].load_balancing_configs) == 0 + + +def test_get_default_model_uses_first_available_active_model(): + mock_session = Mock() + mock_session.scalar.return_value = None + + provider_configurations = Mock() + provider_configurations.get_models.return_value = [ + Mock(model="gpt-3.5-turbo", provider=Mock(provider="openai")), + Mock(model="gpt-4", provider=Mock(provider="openai")), + ] + + manager = ProviderManager() + with ( + patch("core.provider_manager.db.session", mock_session), + patch.object(manager, "get_configurations", return_value=provider_configurations), + patch("core.provider_manager.ModelProviderFactory") as mock_factory_cls, + ): + mock_factory_cls.return_value.get_provider_schema.return_value = Mock( + provider="openai", + label=I18nObject(en_US="OpenAI", zh_Hans="OpenAI"), + icon_small=I18nObject(en_US="icon_small.png", zh_Hans="icon_small.png"), + supported_model_types=[ModelType.LLM], + ) + + result = manager.get_default_model("tenant-id", ModelType.LLM) + + assert result is not None + assert result.model == "gpt-3.5-turbo" + assert result.provider.provider == "openai" + provider_configurations.get_models.assert_called_once_with(model_type=ModelType.LLM, only_active=True) + mock_session.add.assert_called_once() + saved_default_model = mock_session.add.call_args.args[0] + assert saved_default_model.model_name == "gpt-3.5-turbo" + assert saved_default_model.provider_name == "openai" + mock_session.commit.assert_called_once() diff --git a/api/tests/unit_tests/core/test_trigger_debug_event_selectors.py b/api/tests/unit_tests/core/test_trigger_debug_event_selectors.py index 14b42adbbe..2b508ca654 100644 --- a/api/tests/unit_tests/core/test_trigger_debug_event_selectors.py +++ b/api/tests/unit_tests/core/test_trigger_debug_event_selectors.py @@ -6,7 +6,7 @@ import pytest import pytz from core.trigger.debug import event_selectors -from dify_graph.nodes.trigger_schedule.entities import ScheduleConfig +from core.workflow.nodes.trigger_schedule.entities import ScheduleConfig class _DummyRedis: diff --git a/api/tests/unit_tests/core/tools/utils/test_configuration.py b/api/tests/unit_tests/core/tools/utils/test_configuration.py index 4d59affb99..5ceaa08893 100644 --- a/api/tests/unit_tests/core/tools/utils/test_configuration.py +++ b/api/tests/unit_tests/core/tools/utils/test_configuration.py @@ -5,6 +5,7 @@ from typing import Any from unittest.mock import patch from core.app.entities.app_invoke_entities import InvokeFrom +from core.helper.tool_parameter_cache import ToolParameterCache from core.tools.__base.tool import Tool from core.tools.__base.tool_runtime import ToolRuntime from core.tools.entities.common_entities import I18nObject @@ -112,37 +113,38 @@ def test_encrypt_tool_parameters(): def test_decrypt_tool_parameters_cache_hit_and_miss(): manager = _build_manager() - with patch("core.tools.utils.configuration.ToolParameterCache") as cache_cls: - cache = cache_cls.return_value - cache.get.return_value = {"secret": "cached"} + with ( + patch.object(ToolParameterCache, "get", return_value={"secret": "cached"}), + patch.object(ToolParameterCache, "set") as mock_set, + ): assert manager.decrypt_tool_parameters({"secret": "enc"}) == {"secret": "cached"} - cache.set.assert_not_called() + mock_set.assert_not_called() - with patch("core.tools.utils.configuration.ToolParameterCache") as cache_cls: - cache = cache_cls.return_value - cache.get.return_value = None - with patch("core.tools.utils.configuration.encrypter.decrypt_token", return_value="dec"): - decrypted = manager.decrypt_tool_parameters({"secret": "enc", "plain": "x"}) - - assert decrypted["secret"] == "dec" - cache.set.assert_called_once() + with ( + patch.object(ToolParameterCache, "get", return_value=None), + patch.object(ToolParameterCache, "set") as mock_set, + patch("core.tools.utils.configuration.encrypter.decrypt_token", return_value="dec"), + ): + decrypted = manager.decrypt_tool_parameters({"secret": "enc", "plain": "x"}) + assert decrypted["secret"] == "dec" + mock_set.assert_called_once() def test_delete_tool_parameters_cache(): manager = _build_manager() - with patch("core.tools.utils.configuration.ToolParameterCache") as cache_cls: + with patch.object(ToolParameterCache, "delete") as mock_delete: manager.delete_tool_parameters_cache() - cache_cls.return_value.delete.assert_called_once() + mock_delete.assert_called_once() def test_configuration_manager_decrypt_suppresses_errors(): manager = _build_manager() - with patch("core.tools.utils.configuration.ToolParameterCache") as cache_cls: - cache = cache_cls.return_value - cache.get.return_value = None - with patch("core.tools.utils.configuration.encrypter.decrypt_token", side_effect=RuntimeError("boom")): - decrypted = manager.decrypt_tool_parameters({"secret": "enc"}) + with ( + patch.object(ToolParameterCache, "get", return_value=None), + patch("core.tools.utils.configuration.encrypter.decrypt_token", side_effect=RuntimeError("boom")), + ): + decrypted = manager.decrypt_tool_parameters({"secret": "enc"}) # decryption failure is suppressed, original value is retained. assert decrypted["secret"] == "enc" diff --git a/api/tests/unit_tests/core/trigger/debug/test_debug_event_selectors.py b/api/tests/unit_tests/core/trigger/debug/test_debug_event_selectors.py index 331bcd6c25..bcb1d745e3 100644 --- a/api/tests/unit_tests/core/trigger/debug/test_debug_event_selectors.py +++ b/api/tests/unit_tests/core/trigger/debug/test_debug_event_selectors.py @@ -13,6 +13,11 @@ from unittest.mock import MagicMock, patch import pytest from core.plugin.entities.request import TriggerInvokeEventResponse +from core.trigger.constants import ( + TRIGGER_PLUGIN_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_WEBHOOK_NODE_TYPE, +) from core.trigger.debug.event_selectors import ( PluginTriggerDebugEventPoller, ScheduleTriggerDebugEventPoller, @@ -21,7 +26,7 @@ from core.trigger.debug.event_selectors import ( select_trigger_debug_events, ) from core.trigger.debug.events import PluginTriggerDebugEvent, WebhookDebugEvent -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from tests.unit_tests.core.trigger.conftest import VALID_PROVIDER_ID @@ -215,24 +220,24 @@ class TestCreateEventPoller: return wf def test_creates_plugin_poller(self): - wf = self._workflow_with_node(NodeType.TRIGGER_PLUGIN) + wf = self._workflow_with_node(TRIGGER_PLUGIN_NODE_TYPE) poller = create_event_poller(wf, "t1", "u1", "a1", "n1") assert isinstance(poller, PluginTriggerDebugEventPoller) def test_creates_webhook_poller(self): - wf = self._workflow_with_node(NodeType.TRIGGER_WEBHOOK) + wf = self._workflow_with_node(TRIGGER_WEBHOOK_NODE_TYPE) poller = create_event_poller(wf, "t1", "u1", "a1", "n1") assert isinstance(poller, WebhookTriggerDebugEventPoller) def test_creates_schedule_poller(self): - wf = self._workflow_with_node(NodeType.TRIGGER_SCHEDULE) + wf = self._workflow_with_node(TRIGGER_SCHEDULE_NODE_TYPE) poller = create_event_poller(wf, "t1", "u1", "a1", "n1") assert isinstance(poller, ScheduleTriggerDebugEventPoller) def test_raises_for_unknown_type(self): wf = MagicMock() wf.get_node_config_by_id.return_value = {"data": {}} - wf.get_node_type_from_node_config.return_value = NodeType.START + wf.get_node_type_from_node_config.return_value = BuiltinNodeTypes.START with pytest.raises(ValueError): create_event_poller(wf, "t1", "u1", "a1", "n1") @@ -249,7 +254,7 @@ class TestSelectTriggerDebugEvents: def test_returns_first_non_none_event(self): wf = MagicMock() wf.get_node_config_by_id.return_value = {"data": {}} - wf.get_node_type_from_node_config.return_value = NodeType.TRIGGER_WEBHOOK + wf.get_node_type_from_node_config.return_value = TRIGGER_WEBHOOK_NODE_TYPE app_model = MagicMock() app_model.tenant_id = "t1" app_model.id = "a1" @@ -265,7 +270,7 @@ class TestSelectTriggerDebugEvents: def test_returns_none_when_no_events(self): wf = MagicMock() wf.get_node_config_by_id.return_value = {"data": {}} - wf.get_node_type_from_node_config.return_value = NodeType.TRIGGER_WEBHOOK + wf.get_node_type_from_node_config.return_value = TRIGGER_WEBHOOK_NODE_TYPE app_model = MagicMock() app_model.tenant_id = "t1" app_model.id = "a1" diff --git a/api/tests/unit_tests/core/workflow/entities/test_workflow_node_execution.py b/api/tests/unit_tests/core/workflow/entities/test_workflow_node_execution.py index 4035c1a871..216e64db8d 100644 --- a/api/tests/unit_tests/core/workflow/entities/test_workflow_node_execution.py +++ b/api/tests/unit_tests/core/workflow/entities/test_workflow_node_execution.py @@ -9,7 +9,7 @@ from typing import Any import pytest from dify_graph.entities.workflow_node_execution import WorkflowNodeExecution -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes class TestWorkflowNodeExecutionProcessDataTruncation: @@ -25,7 +25,7 @@ class TestWorkflowNodeExecutionProcessDataTruncation: workflow_id="test-workflow-id", index=1, node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Test Node", process_data=process_data, created_at=datetime.now(), @@ -212,7 +212,7 @@ class TestWorkflowNodeExecutionProcessDataScenarios: workflow_id="test-workflow-id", index=1, node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Test Node", process_data=scenario.original_data, created_at=datetime.now(), diff --git a/api/tests/unit_tests/core/workflow/graph/test_graph.py b/api/tests/unit_tests/core/workflow/graph/test_graph.py index c46b9e51fd..24bd9ccbed 100644 --- a/api/tests/unit_tests/core/workflow/graph/test_graph.py +++ b/api/tests/unit_tests/core/workflow/graph/test_graph.py @@ -2,7 +2,7 @@ from unittest.mock import Mock -from dify_graph.enums import NodeExecutionType, NodeState, NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, NodeState from dify_graph.graph.edge import Edge from dify_graph.graph.graph import Graph from dify_graph.nodes.base.node import Node @@ -14,7 +14,7 @@ def create_mock_node(node_id: str, execution_type: NodeExecutionType, state: Nod node.id = node_id node.execution_type = execution_type node.state = state - node.node_type = NodeType.START + node.node_type = BuiltinNodeTypes.START return node diff --git a/api/tests/unit_tests/core/workflow/graph/test_graph_builder.py b/api/tests/unit_tests/core/workflow/graph/test_graph_builder.py index bd4a0f32e2..64c2eee776 100644 --- a/api/tests/unit_tests/core/workflow/graph/test_graph_builder.py +++ b/api/tests/unit_tests/core/workflow/graph/test_graph_builder.py @@ -2,12 +2,12 @@ from unittest.mock import MagicMock import pytest -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.graph import Graph from dify_graph.nodes.base.node import Node -def _make_node(node_id: str, node_type: NodeType = NodeType.START) -> Node: +def _make_node(node_id: str, node_type: NodeType = BuiltinNodeTypes.START) -> Node: node = MagicMock(spec=Node) node.id = node_id node.node_type = node_type @@ -17,9 +17,9 @@ def _make_node(node_id: str, node_type: NodeType = NodeType.START) -> Node: def test_graph_builder_creates_linear_graph(): builder = Graph.new() - root = _make_node("root", NodeType.START) - mid = _make_node("mid", NodeType.LLM) - end = _make_node("end", NodeType.END) + root = _make_node("root", BuiltinNodeTypes.START) + mid = _make_node("mid", BuiltinNodeTypes.LLM) + end = _make_node("end", BuiltinNodeTypes.END) graph = builder.add_root(root).add_node(mid).add_node(end).build() diff --git a/api/tests/unit_tests/core/workflow/graph/test_graph_skip_validation.py b/api/tests/unit_tests/core/workflow/graph/test_graph_skip_validation.py index b93f18c5bd..75de07bd8b 100644 --- a/api/tests/unit_tests/core/workflow/graph/test_graph_skip_validation.py +++ b/api/tests/unit_tests/core/workflow/graph/test_graph_skip_validation.py @@ -7,7 +7,7 @@ import pytest from core.workflow.node_factory import DifyNodeFactory from dify_graph.graph import Graph from dify_graph.graph.validation import GraphValidationError -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from tests.workflow_test_utils import build_test_graph_init_params @@ -92,7 +92,7 @@ def test_iteration_root_requires_skip_validation(): ) assert graph.root_node.id == node_id - assert graph.root_node.node_type == NodeType.ITERATION + assert graph.root_node.node_type == BuiltinNodeTypes.ITERATION def test_loop_root_requires_skip_validation(): @@ -115,4 +115,4 @@ def test_loop_root_requires_skip_validation(): ) assert graph.root_node.id == node_id - assert graph.root_node.node_type == NodeType.LOOP + assert graph.root_node.node_type == BuiltinNodeTypes.LOOP diff --git a/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py b/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py index 9e9fc2e9ec..e94ad74eb0 100644 --- a/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py +++ b/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py @@ -8,7 +8,7 @@ import pytest from dify_graph.entities import GraphInitParams from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import ErrorStrategy, NodeExecutionType, NodeType +from dify_graph.enums import BuiltinNodeTypes, ErrorStrategy, NodeExecutionType, NodeType from dify_graph.graph import Graph from dify_graph.graph.validation import GraphValidationError from dify_graph.nodes.base.node import Node @@ -18,12 +18,12 @@ from tests.workflow_test_utils import build_test_graph_init_params class _TestNodeData(BaseNodeData): - type: NodeType | str | None = None + type: NodeType | None = None execution_type: NodeExecutionType | str | None = None class _TestNode(Node[_TestNodeData]): - node_type = NodeType.ANSWER + node_type = BuiltinNodeTypes.ANSWER execution_type = NodeExecutionType.EXECUTABLE @classmethod @@ -46,13 +46,8 @@ class _TestNode(Node[_TestNodeData]): ) node_type_value = self.data.get("type") - if isinstance(node_type_value, NodeType): + if isinstance(node_type_value, str): self.node_type = node_type_value - elif isinstance(node_type_value, str): - try: - self.node_type = NodeType(node_type_value) - except ValueError: - pass def _run(self): raise NotImplementedError @@ -112,14 +107,17 @@ def test_graph_initialization_runs_default_validators( ): node_factory, graph_config = graph_init_dependencies graph_config["nodes"] = [ - {"id": "start", "data": {"type": NodeType.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}}, - {"id": "answer", "data": {"type": NodeType.ANSWER, "title": "Answer"}}, + { + "id": "start", + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, + }, + {"id": "answer", "data": {"type": BuiltinNodeTypes.ANSWER, "title": "Answer"}}, ] graph_config["edges"] = [ {"source": "start", "target": "answer", "sourceHandle": "success"}, ] - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") assert graph.root_node.id == "start" assert "answer" in graph.nodes @@ -130,14 +128,17 @@ def test_graph_validation_fails_for_unknown_edge_targets( ) -> None: node_factory, graph_config = graph_init_dependencies graph_config["nodes"] = [ - {"id": "start", "data": {"type": NodeType.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}}, + { + "id": "start", + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, + }, ] graph_config["edges"] = [ {"source": "start", "target": "missing", "sourceHandle": "success"}, ] with pytest.raises(GraphValidationError) as exc: - Graph.init(graph_config=graph_config, node_factory=node_factory) + Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") assert any(issue.code == "MISSING_NODE" for issue in exc.value.issues) @@ -147,11 +148,14 @@ def test_graph_promotes_fail_branch_nodes_to_branch_execution_type( ) -> None: node_factory, graph_config = graph_init_dependencies graph_config["nodes"] = [ - {"id": "start", "data": {"type": NodeType.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}}, + { + "id": "start", + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, + }, { "id": "branch", "data": { - "type": NodeType.IF_ELSE, + "type": BuiltinNodeTypes.IF_ELSE, "title": "Branch", "error_strategy": ErrorStrategy.FAIL_BRANCH, }, @@ -161,30 +165,11 @@ def test_graph_promotes_fail_branch_nodes_to_branch_execution_type( {"source": "start", "target": "branch", "sourceHandle": "success"}, ] - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") assert graph.nodes["branch"].execution_type == NodeExecutionType.BRANCH -def test_graph_validation_blocks_start_and_trigger_coexistence( - graph_init_dependencies: tuple[_SimpleNodeFactory, dict[str, object]], -) -> None: - node_factory, graph_config = graph_init_dependencies - graph_config["nodes"] = [ - {"id": "start", "data": {"type": NodeType.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}}, - { - "id": "trigger", - "data": {"type": NodeType.TRIGGER_WEBHOOK, "title": "Webhook", "execution_type": NodeExecutionType.ROOT}, - }, - ] - graph_config["edges"] = [] - - with pytest.raises(GraphValidationError) as exc_info: - Graph.init(graph_config=graph_config, node_factory=node_factory) - - assert any(issue.code == "TRIGGER_START_NODE_CONFLICT" for issue in exc_info.value.issues) - - def test_graph_init_ignores_custom_note_nodes_before_node_data_validation( graph_init_dependencies: tuple[_SimpleNodeFactory, dict[str, object]], ) -> None: @@ -192,9 +177,9 @@ def test_graph_init_ignores_custom_note_nodes_before_node_data_validation( graph_config["nodes"] = [ { "id": "start", - "data": {"type": NodeType.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, }, - {"id": "answer", "data": {"type": NodeType.ANSWER, "title": "Answer"}}, + {"id": "answer", "data": {"type": BuiltinNodeTypes.ANSWER, "title": "Answer"}}, { "id": "note", "type": "custom-note", @@ -211,8 +196,24 @@ def test_graph_init_ignores_custom_note_nodes_before_node_data_validation( {"source": "start", "target": "answer", "sourceHandle": "success"}, ] - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") assert graph.root_node.id == "start" assert "answer" in graph.nodes assert "note" not in graph.nodes + + +def test_graph_init_fails_for_unknown_root_node_id( + graph_init_dependencies: tuple[_SimpleNodeFactory, dict[str, object]], +) -> None: + node_factory, graph_config = graph_init_dependencies + graph_config["nodes"] = [ + { + "id": "start", + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, + }, + ] + graph_config["edges"] = [] + + with pytest.raises(ValueError, match="Root node id missing not found in the graph"): + Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="missing") diff --git a/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_handlers.py b/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_handlers.py index 2b926d754c..6f821ba799 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_handlers.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_handlers.py @@ -3,7 +3,7 @@ from __future__ import annotations from dify_graph.entities.base_node_data import RetryConfig -from dify_graph.enums import NodeExecutionType, NodeState, NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, NodeState, WorkflowNodeExecutionStatus from dify_graph.graph import Graph from dify_graph.graph_engine.domain.graph_execution import GraphExecution from dify_graph.graph_engine.event_management.event_handlers import EventHandler @@ -73,7 +73,7 @@ def test_retry_does_not_emit_additional_start_event() -> None: handler, event_manager, graph_execution = _build_event_handler(node_id) execution_id = "exec-1" - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE start_time = naive_utc_now() start_event = NodeRunStartedEvent( diff --git a/api/tests/unit_tests/core/workflow/graph_engine/layers/conftest.py b/api/tests/unit_tests/core/workflow/graph_engine/layers/conftest.py index 3d8de0a00d..9e7b3654b7 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/layers/conftest.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/layers/conftest.py @@ -10,7 +10,7 @@ from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter from opentelemetry.trace import set_tracer_provider -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes @pytest.fixture @@ -44,7 +44,7 @@ def mock_start_node(): node.id = "test-start-node-id" node.title = "Start Node" node.execution_id = "test-start-execution-id" - node.node_type = NodeType.START + node.node_type = BuiltinNodeTypes.START return node @@ -55,7 +55,7 @@ def mock_llm_node(): node.id = "test-llm-node-id" node.title = "LLM Node" node.execution_id = "test-llm-execution-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM return node @@ -69,7 +69,7 @@ def mock_tool_node(): node.id = "test-tool-node-id" node.title = "Test Tool Node" node.execution_id = "test-tool-execution-id" - node.node_type = NodeType.TOOL + node.node_type = BuiltinNodeTypes.TOOL tool_data = ToolNodeData( title="Test Tool Node", @@ -108,7 +108,7 @@ def mock_retrieval_node(): node.id = "test-retrieval-node-id" node.title = "Retrieval Node" node.execution_id = "test-retrieval-execution-id" - node.node_type = NodeType.KNOWLEDGE_RETRIEVAL + node.node_type = BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL return node @@ -130,7 +130,7 @@ def mock_result_event(): return NodeRunSucceededEvent( id="test-execution-id", node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=datetime.now(), node_run_result=node_run_result, ) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_llm_quota.py b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_llm_quota.py index 819fd67f9d..2a36f712fd 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_llm_quota.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_llm_quota.py @@ -4,7 +4,7 @@ from unittest.mock import MagicMock, patch from core.app.workflow.layers.llm_quota import LLMQuotaLayer from core.errors.error import QuotaExceededError -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.graph_engine.entities.commands import CommandType from dify_graph.graph_events.node import NodeRunSucceededEvent from dify_graph.model_runtime.entities.llm_entities import LLMUsage @@ -15,7 +15,7 @@ def _build_succeeded_event() -> NodeRunSucceededEvent: return NodeRunSucceededEvent( id="execution-id", node_id="llm-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=datetime.now(), node_run_result=NodeRunResult( status=WorkflowNodeExecutionStatus.SUCCEEDED, @@ -30,7 +30,7 @@ def test_deduct_quota_called_for_successful_llm_node() -> None: node = MagicMock() node.id = "llm-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.tenant_id = "tenant-id" node.require_dify_context.return_value.tenant_id = "tenant-id" node.model_instance = object() @@ -51,7 +51,7 @@ def test_deduct_quota_called_for_question_classifier_node() -> None: node = MagicMock() node.id = "question-classifier-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.QUESTION_CLASSIFIER + node.node_type = BuiltinNodeTypes.QUESTION_CLASSIFIER node.tenant_id = "tenant-id" node.require_dify_context.return_value.tenant_id = "tenant-id" node.model_instance = object() @@ -72,7 +72,7 @@ def test_non_llm_node_is_ignored() -> None: node = MagicMock() node.id = "start-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.START + node.node_type = BuiltinNodeTypes.START node.tenant_id = "tenant-id" node.require_dify_context.return_value.tenant_id = "tenant-id" node._model_instance = object() @@ -89,7 +89,7 @@ def test_quota_error_is_handled_in_layer() -> None: node = MagicMock() node.id = "llm-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.tenant_id = "tenant-id" node.require_dify_context.return_value.tenant_id = "tenant-id" node.model_instance = object() @@ -111,7 +111,7 @@ def test_quota_deduction_exceeded_aborts_workflow_immediately() -> None: node = MagicMock() node.id = "llm-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.tenant_id = "tenant-id" node.require_dify_context.return_value.tenant_id = "tenant-id" node.model_instance = object() @@ -140,7 +140,7 @@ def test_quota_precheck_failure_aborts_workflow_immediately() -> None: node = MagicMock() node.id = "llm-node-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.model_instance = object() node.graph_runtime_state = MagicMock() node.graph_runtime_state.stop_event = stop_event @@ -166,7 +166,7 @@ def test_quota_precheck_passes_without_abort() -> None: node = MagicMock() node.id = "llm-node-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.model_instance = object() node.graph_runtime_state = MagicMock() node.graph_runtime_state.stop_event = stop_event diff --git a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_observability.py b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_observability.py index b4a7cec494..478a2b592e 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_observability.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_observability.py @@ -16,7 +16,7 @@ import pytest from opentelemetry.trace import StatusCode from core.app.workflow.layers.observability import ObservabilityLayer -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes class TestObservabilityLayerInitialization: @@ -29,7 +29,7 @@ class TestObservabilityLayerInitialization: layer = ObservabilityLayer() assert not layer._is_disabled assert layer._tracer is not None - assert NodeType.TOOL in layer._parsers + assert BuiltinNodeTypes.TOOL in layer._parsers assert layer._default_parser is not None @patch("core.app.workflow.layers.observability.dify_config.ENABLE_OTEL", False) @@ -39,7 +39,7 @@ class TestObservabilityLayerInitialization: layer = ObservabilityLayer() assert not layer._is_disabled assert layer._tracer is not None - assert NodeType.TOOL in layer._parsers + assert BuiltinNodeTypes.TOOL in layer._parsers assert layer._default_parser is not None @@ -117,7 +117,7 @@ class TestObservabilityLayerParserIntegration: attrs = spans[0].attributes assert attrs["node.id"] == mock_start_node.id assert attrs["node.execution_id"] == mock_start_node.execution_id - assert attrs["node.type"] == mock_start_node.node_type.value + assert attrs["node.type"] == mock_start_node.node_type @patch("core.app.workflow.layers.observability.dify_config.ENABLE_OTEL", True) @pytest.mark.usefixtures("mock_is_instrument_flag_enabled_false") diff --git a/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py b/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py index 50d14ff48f..548c10ce8d 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py @@ -6,7 +6,7 @@ import queue from unittest import mock from dify_graph.entities.pause_reason import SchedulingPause -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.graph_engine.event_management.event_handlers import EventHandler from dify_graph.graph_engine.orchestration.dispatcher import Dispatcher from dify_graph.graph_engine.orchestration.execution_coordinator import ExecutionCoordinator @@ -26,7 +26,7 @@ def test_dispatcher_should_consume_remains_events_after_pause(): GraphNodeEventBase( id="test", node_id="test", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, ) ) event_handler = mock.Mock(spec=EventHandler) @@ -107,7 +107,7 @@ def _make_started_event() -> NodeRunStartedEvent: return NodeRunStartedEvent( id="start-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, node_title="Test Node", start_at=naive_utc_now(), ) @@ -117,7 +117,7 @@ def _make_succeeded_event() -> NodeRunSucceededEvent: return NodeRunSucceededEvent( id="success-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, node_title="Test Node", start_at=naive_utc_now(), node_run_result=NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED), @@ -151,20 +151,20 @@ def test_dispatcher_drain_event_queue(): NodeRunStartedEvent( id="start-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, node_title="Code", start_at=naive_utc_now(), ), NodeRunPauseRequestedEvent( id="pause-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, reason=SchedulingPause(message="test pause"), ), NodeRunSucceededEvent( id="success-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, start_at=naive_utc_now(), node_run_result=NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED), ), diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_auto_mock_system.py b/api/tests/unit_tests/core/workflow/graph_engine/test_auto_mock_system.py index f886ae1c2b..fc0d22f739 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_auto_mock_system.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_auto_mock_system.py @@ -7,7 +7,7 @@ for workflows containing nodes that require third-party services. import pytest -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from tests.workflow_test_utils import build_test_graph_init_params from .test_mock_config import MockConfig, MockConfigBuilder, NodeMockConfig @@ -227,23 +227,23 @@ def test_mock_factory_node_type_detection(): ) # Test that third-party service nodes are identified for mocking - assert factory.should_mock_node(NodeType.LLM) - assert factory.should_mock_node(NodeType.AGENT) - assert factory.should_mock_node(NodeType.TOOL) - assert factory.should_mock_node(NodeType.KNOWLEDGE_RETRIEVAL) - assert factory.should_mock_node(NodeType.HTTP_REQUEST) - assert factory.should_mock_node(NodeType.PARAMETER_EXTRACTOR) - assert factory.should_mock_node(NodeType.DOCUMENT_EXTRACTOR) + assert factory.should_mock_node(BuiltinNodeTypes.LLM) + assert factory.should_mock_node(BuiltinNodeTypes.AGENT) + assert factory.should_mock_node(BuiltinNodeTypes.TOOL) + assert factory.should_mock_node(BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL) + assert factory.should_mock_node(BuiltinNodeTypes.HTTP_REQUEST) + assert factory.should_mock_node(BuiltinNodeTypes.PARAMETER_EXTRACTOR) + assert factory.should_mock_node(BuiltinNodeTypes.DOCUMENT_EXTRACTOR) # Test that CODE and TEMPLATE_TRANSFORM are mocked (they require SSRF proxy) - assert factory.should_mock_node(NodeType.CODE) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.CODE) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Test that non-service nodes are not mocked - assert not factory.should_mock_node(NodeType.START) - assert not factory.should_mock_node(NodeType.END) - assert not factory.should_mock_node(NodeType.IF_ELSE) - assert not factory.should_mock_node(NodeType.VARIABLE_AGGREGATOR) + assert not factory.should_mock_node(BuiltinNodeTypes.START) + assert not factory.should_mock_node(BuiltinNodeTypes.END) + assert not factory.should_mock_node(BuiltinNodeTypes.IF_ELSE) + assert not factory.should_mock_node(BuiltinNodeTypes.VARIABLE_AGGREGATOR) def test_custom_mock_handler(): @@ -341,15 +341,15 @@ def test_register_custom_mock_node(): ) # TEMPLATE_TRANSFORM is mocked by default (requires SSRF proxy) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Unregister mock - factory.unregister_mock_node_type(NodeType.TEMPLATE_TRANSFORM) - assert not factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + factory.unregister_mock_node_type(BuiltinNodeTypes.TEMPLATE_TRANSFORM) + assert not factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Re-register custom mock - factory.register_mock_node_type(NodeType.TEMPLATE_TRANSFORM, MockTemplateTransformNode) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + factory.register_mock_node_type(BuiltinNodeTypes.TEMPLATE_TRANSFORM, MockTemplateTransformNode) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) def test_default_config_by_node_type(): @@ -358,7 +358,7 @@ def test_default_config_by_node_type(): # Set default config for all LLM nodes mock_config.set_default_config( - NodeType.LLM, + BuiltinNodeTypes.LLM, { "default_response": "Default LLM response for all nodes", "temperature": 0.7, @@ -367,23 +367,23 @@ def test_default_config_by_node_type(): # Set default config for all HTTP nodes mock_config.set_default_config( - NodeType.HTTP_REQUEST, + BuiltinNodeTypes.HTTP_REQUEST, { "default_status": 200, "default_timeout": 30, }, ) - llm_config = mock_config.get_default_config(NodeType.LLM) + llm_config = mock_config.get_default_config(BuiltinNodeTypes.LLM) assert llm_config["default_response"] == "Default LLM response for all nodes" assert llm_config["temperature"] == 0.7 - http_config = mock_config.get_default_config(NodeType.HTTP_REQUEST) + http_config = mock_config.get_default_config(BuiltinNodeTypes.HTTP_REQUEST) assert http_config["default_status"] == 200 assert http_config["default_timeout"] == 30 # Non-configured node type should return empty dict - tool_config = mock_config.get_default_config(NodeType.TOOL) + tool_config = mock_config.get_default_config(BuiltinNodeTypes.TOOL) assert tool_config == {} diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_conditional_streaming_vs_template_workflow.py b/api/tests/unit_tests/core/workflow/graph_engine/test_conditional_streaming_vs_template_workflow.py index cde99196c8..76bf179f33 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_conditional_streaming_vs_template_workflow.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_conditional_streaming_vs_template_workflow.py @@ -6,7 +6,7 @@ This test validates that: - When blocking != 1: NodeRunStreamChunkEvent present (direct LLM to End output) """ -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.graph_engine import GraphEngine, GraphEngineConfig from dify_graph.graph_engine.command_channels import InMemoryChannel from dify_graph.graph_events import ( @@ -74,7 +74,11 @@ def test_streaming_output_with_blocking_equals_one(): # Find indices of first LLM success event and first stream chunk event llm2_start_index = next( - (i for i, e in enumerate(events) if isinstance(e, NodeRunSucceededEvent) and e.node_type == NodeType.LLM), + ( + i + for i, e in enumerate(events) + if isinstance(e, NodeRunSucceededEvent) and e.node_type == BuiltinNodeTypes.LLM + ), -1, ) first_chunk_index = next( @@ -96,16 +100,16 @@ def test_streaming_output_with_blocking_equals_one(): # Check all Template's NodeRunStreamChunkEvent should has same id with Template's NodeRunStartedEvent start_events = [ - e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.TEMPLATE_TRANSFORM + e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.TEMPLATE_TRANSFORM ] - template_chunk_events = [e for e in stream_chunk_events if e.node_type == NodeType.TEMPLATE_TRANSFORM] + template_chunk_events = [e for e in stream_chunk_events if e.node_type == BuiltinNodeTypes.TEMPLATE_TRANSFORM] assert len(template_chunk_events) == 1, f"Expected 1 template chunk event, but got {len(template_chunk_events)}" assert all(e.id in [se.id for se in start_events] for e in template_chunk_events), ( "Expected all Template chunk events to have same id with Template's NodeRunStartedEvent" ) # Check that NodeRunStreamChunkEvent contains '\n' is from the End node - end_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.END] + end_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.END] assert len(end_events) == 1, f"Expected 1 end event, but got {len(end_events)}" newline_chunk_events = [e for e in stream_chunk_events if e.chunk == "\n"] assert len(newline_chunk_events) == 1, f"Expected 1 newline chunk event, but got {len(newline_chunk_events)}" @@ -168,7 +172,11 @@ def test_streaming_output_with_blocking_not_equals_one(): # Find indices of first LLM success event and first stream chunk event llm2_start_index = next( - (i for i, e in enumerate(events) if isinstance(e, NodeRunSucceededEvent) and e.node_type == NodeType.LLM), + ( + i + for i, e in enumerate(events) + if isinstance(e, NodeRunSucceededEvent) and e.node_type == BuiltinNodeTypes.LLM + ), -1, ) first_chunk_index = next( @@ -194,15 +202,15 @@ def test_streaming_output_with_blocking_not_equals_one(): assert all(e.id == start_event.id for e in query_chunk_events), "Expected all query chunk events to have same id" # Check all LLM's NodeRunStreamChunkEvent should be from LLM nodes - start_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.LLM] - llm_chunk_events = [e for e in stream_chunk_events if e.node_type == NodeType.LLM] + start_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.LLM] + llm_chunk_events = [e for e in stream_chunk_events if e.node_type == BuiltinNodeTypes.LLM] llm_node_ids = {se.node_id for se in start_events} assert all(e.node_id in llm_node_ids for e in llm_chunk_events), ( "Expected all LLM chunk events to be from LLM nodes" ) # Check that NodeRunStreamChunkEvent contains '\n' is from the End node - end_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.END] + end_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.END] assert len(end_events) == 1, f"Expected 1 end event, but got {len(end_events)}" newline_chunk_events = [e for e in stream_chunk_events if e.chunk == "\n"] assert len(newline_chunk_events) == 1, f"Expected 1 newline chunk event, but got {len(newline_chunk_events)}" diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher_pause_drain.py b/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher_pause_drain.py index b88c15ea2a..778dad5952 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher_pause_drain.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher_pause_drain.py @@ -1,7 +1,7 @@ import queue from datetime import datetime -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.graph_engine.orchestration.dispatcher import Dispatcher from dify_graph.graph_events import NodeRunSucceededEvent from dify_graph.node_events import NodeRunResult @@ -51,7 +51,7 @@ def test_dispatcher_drains_events_when_paused() -> None: event = NodeRunSucceededEvent( id="exec-1", node_id="node-1", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, start_at=datetime.utcnow(), node_run_result=NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED), ) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_execution_serialization.py b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_execution_serialization.py index 805e7dbbce..255784b77d 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_execution_serialization.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_execution_serialization.py @@ -6,7 +6,7 @@ import json from collections import deque from unittest.mock import MagicMock -from dify_graph.enums import NodeExecutionType, NodeState, NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, NodeState from dify_graph.graph_engine.domain import GraphExecution from dify_graph.graph_engine.response_coordinator import ResponseStreamCoordinator from dify_graph.graph_engine.response_coordinator.path import Path @@ -101,7 +101,9 @@ def test_response_stream_coordinator_serialization_round_trip(monkeypatch) -> No class DummyNode: def __init__(self, node_id: str, template: Template, execution_type: NodeExecutionType) -> None: self.id = node_id - self.node_type = NodeType.ANSWER if execution_type == NodeExecutionType.RESPONSE else NodeType.LLM + self.node_type = ( + BuiltinNodeTypes.ANSWER if execution_type == NodeExecutionType.RESPONSE else BuiltinNodeTypes.LLM + ) self.execution_type = execution_type self.state = NodeState.UNKNOWN self.title = node_id @@ -160,7 +162,7 @@ def test_response_stream_coordinator_serialization_round_trip(monkeypatch) -> No event = NodeRunStreamChunkEvent( id="exec-1", node_id="response-1", - node_type=NodeType.ANSWER, + node_type=BuiltinNodeTypes.ANSWER, selector=["node-source", "text"], chunk="chunk-1", is_final=False, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_config.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_config.py index 6041c6ff30..8a4649693d 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_config.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_config.py @@ -11,8 +11,6 @@ from collections.abc import Callable from dataclasses import dataclass, field from typing import Any -from dify_graph.enums import NodeType - @dataclass class NodeMockConfig: diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py index 338db9076e..93010eea54 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py @@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Any from core.workflow.node_factory import DifyNodeFactory from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.node import Node from .test_mock_nodes import ( @@ -61,18 +61,18 @@ class MockNodeFactory(DifyNodeFactory): # Map of node types that should be mocked self._mock_node_types = { - NodeType.LLM: MockLLMNode, - NodeType.AGENT: MockAgentNode, - NodeType.TOOL: MockToolNode, - NodeType.KNOWLEDGE_RETRIEVAL: MockKnowledgeRetrievalNode, - NodeType.HTTP_REQUEST: MockHttpRequestNode, - NodeType.QUESTION_CLASSIFIER: MockQuestionClassifierNode, - NodeType.PARAMETER_EXTRACTOR: MockParameterExtractorNode, - NodeType.DOCUMENT_EXTRACTOR: MockDocumentExtractorNode, - NodeType.ITERATION: MockIterationNode, - NodeType.LOOP: MockLoopNode, - NodeType.TEMPLATE_TRANSFORM: MockTemplateTransformNode, - NodeType.CODE: MockCodeNode, + BuiltinNodeTypes.LLM: MockLLMNode, + BuiltinNodeTypes.AGENT: MockAgentNode, + BuiltinNodeTypes.TOOL: MockToolNode, + BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: MockKnowledgeRetrievalNode, + BuiltinNodeTypes.HTTP_REQUEST: MockHttpRequestNode, + BuiltinNodeTypes.QUESTION_CLASSIFIER: MockQuestionClassifierNode, + BuiltinNodeTypes.PARAMETER_EXTRACTOR: MockParameterExtractorNode, + BuiltinNodeTypes.DOCUMENT_EXTRACTOR: MockDocumentExtractorNode, + BuiltinNodeTypes.ITERATION: MockIterationNode, + BuiltinNodeTypes.LOOP: MockLoopNode, + BuiltinNodeTypes.TEMPLATE_TRANSFORM: MockTemplateTransformNode, + BuiltinNodeTypes.CODE: MockCodeNode, } def create_node(self, node_config: dict[str, Any] | NodeConfigDict) -> Node: @@ -92,7 +92,7 @@ class MockNodeFactory(DifyNodeFactory): # Create mock node instance mock_class = self._mock_node_types[node_type] - if node_type == NodeType.CODE: + if node_type == BuiltinNodeTypes.CODE: mock_instance = mock_class( id=node_id, config=typed_node_config, @@ -102,7 +102,7 @@ class MockNodeFactory(DifyNodeFactory): code_executor=self._code_executor, code_limits=self._code_limits, ) - elif node_type == NodeType.HTTP_REQUEST: + elif node_type == BuiltinNodeTypes.HTTP_REQUEST: mock_instance = mock_class( id=node_id, config=typed_node_config, @@ -114,7 +114,11 @@ class MockNodeFactory(DifyNodeFactory): tool_file_manager_factory=self._http_request_tool_file_manager_factory, file_manager=self._http_request_file_manager, ) - elif node_type in {NodeType.LLM, NodeType.QUESTION_CLASSIFIER, NodeType.PARAMETER_EXTRACTOR}: + elif node_type in { + BuiltinNodeTypes.LLM, + BuiltinNodeTypes.QUESTION_CLASSIFIER, + BuiltinNodeTypes.PARAMETER_EXTRACTOR, + }: mock_instance = mock_class( id=node_id, config=typed_node_config, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_iteration_simple.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_iteration_simple.py index 8c8e5977c8..3e4247f33f 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_iteration_simple.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_iteration_simple.py @@ -3,7 +3,7 @@ Simple test to verify MockNodeFactory works with iteration nodes. """ from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from tests.unit_tests.core.workflow.graph_engine.test_mock_config import MockConfigBuilder from tests.unit_tests.core.workflow.graph_engine.test_mock_factory import MockNodeFactory @@ -42,20 +42,20 @@ def test_mock_factory_registers_iteration_node(): ) # Check that iteration node is registered - assert NodeType.ITERATION in factory._mock_node_types + assert BuiltinNodeTypes.ITERATION in factory._mock_node_types print("✓ Iteration node is registered in MockNodeFactory") # Check that loop node is registered - assert NodeType.LOOP in factory._mock_node_types + assert BuiltinNodeTypes.LOOP in factory._mock_node_types print("✓ Loop node is registered in MockNodeFactory") # Check the class types from tests.unit_tests.core.workflow.graph_engine.test_mock_nodes import MockIterationNode, MockLoopNode - assert factory._mock_node_types[NodeType.ITERATION] == MockIterationNode + assert factory._mock_node_types[BuiltinNodeTypes.ITERATION] == MockIterationNode print("✓ Iteration node maps to MockIterationNode class") - assert factory._mock_node_types[NodeType.LOOP] == MockLoopNode + assert factory._mock_node_types[BuiltinNodeTypes.LOOP] == MockLoopNode print("✓ Loop node maps to MockLoopNode class") diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py index 34e714a227..454263bef9 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py @@ -11,16 +11,16 @@ from typing import TYPE_CHECKING, Any, Optional from unittest.mock import MagicMock from core.model_manager import ModelInstance +from core.workflow.nodes.agent import AgentNode +from core.workflow.nodes.knowledge_retrieval.knowledge_retrieval_node import KnowledgeRetrievalNode from dify_graph.enums import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus from dify_graph.model_runtime.entities.llm_entities import LLMUsage from dify_graph.node_events import NodeRunResult, StreamChunkEvent, StreamCompletedEvent -from dify_graph.nodes.agent import AgentNode from dify_graph.nodes.code import CodeNode from dify_graph.nodes.document_extractor import DocumentExtractorNode from dify_graph.nodes.http_request import HttpRequestNode -from dify_graph.nodes.knowledge_retrieval import KnowledgeRetrievalNode from dify_graph.nodes.llm import LLMNode -from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory +from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory, TemplateRenderer from dify_graph.nodes.parameter_extractor import ParameterExtractorNode from dify_graph.nodes.protocols import HttpClientProtocol, ToolFileManagerProtocol from dify_graph.nodes.question_classifier import QuestionClassifierNode @@ -68,6 +68,8 @@ class MockNodeMixin: kwargs.setdefault("model_instance", MagicMock(spec=ModelInstance)) # LLM-like nodes now require an http_client; provide a mock by default for tests. kwargs.setdefault("http_client", MagicMock(spec=HttpClientProtocol)) + if isinstance(self, (LLMNode, QuestionClassifierNode)): + kwargs.setdefault("template_renderer", MagicMock(spec=TemplateRenderer)) # Ensure TemplateTransformNode receives a renderer now required by constructor if isinstance(self, TemplateTransformNode): @@ -79,6 +81,14 @@ class MockNodeMixin: if isinstance(self, _ToolNode): kwargs.setdefault("tool_file_manager_factory", MagicMock(spec=ToolFileManagerProtocol)) + if isinstance(self, AgentNode): + presentation_provider = MagicMock() + presentation_provider.get_icon.return_value = None + kwargs.setdefault("strategy_resolver", MagicMock()) + kwargs.setdefault("presentation_provider", presentation_provider) + kwargs.setdefault("runtime_support", MagicMock()) + kwargs.setdefault("message_transformer", MagicMock()) + super().__init__( id=id, config=config, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py index 1550dca402..a8398e8f79 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py @@ -7,7 +7,7 @@ to ensure they work correctly with the TableTestRunner. from configs import dify_config from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.nodes.code.limits import CodeNodeLimits from tests.unit_tests.core.workflow.graph_engine.test_mock_config import MockConfig, MockConfigBuilder, NodeMockConfig from tests.unit_tests.core.workflow.graph_engine.test_mock_factory import MockNodeFactory @@ -550,12 +550,12 @@ class TestMockNodeFactory: ) # Verify that CODE and TEMPLATE_TRANSFORM ARE mocked by default (they require SSRF proxy) - assert factory.should_mock_node(NodeType.CODE) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.CODE) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Verify that other third-party service nodes ARE also mocked by default - assert factory.should_mock_node(NodeType.LLM) - assert factory.should_mock_node(NodeType.AGENT) + assert factory.should_mock_node(BuiltinNodeTypes.LLM) + assert factory.should_mock_node(BuiltinNodeTypes.AGENT) def test_factory_creates_mock_template_transform_node(self): """Test that MockNodeFactory creates MockTemplateTransformNode for template-transform type.""" @@ -610,7 +610,7 @@ class TestMockNodeFactory: # Verify the correct mock type was created assert isinstance(node, MockTemplateTransformNode) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) def test_factory_creates_mock_code_node(self): """Test that MockNodeFactory creates MockCodeNode for code type.""" @@ -667,4 +667,4 @@ class TestMockNodeFactory: # Verify the correct mock type was created assert isinstance(node, MockCodeNode) - assert factory.should_mock_node(NodeType.CODE) + assert factory.should_mock_node(BuiltinNodeTypes.CODE) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_simple.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_simple.py index 693cdf9276..5b35b3310a 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_simple.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_simple.py @@ -5,7 +5,7 @@ Simple test to validate the auto-mock system without external dependencies. import sys from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from tests.unit_tests.core.workflow.graph_engine.test_mock_config import MockConfig, MockConfigBuilder, NodeMockConfig from tests.unit_tests.core.workflow.graph_engine.test_mock_factory import MockNodeFactory @@ -64,8 +64,8 @@ def test_mock_config_operations(): assert error_config.error == "Test error" # Test default configs by node type - config.set_default_config(NodeType.LLM, {"temperature": 0.7}) - llm_config = config.get_default_config(NodeType.LLM) + config.set_default_config(BuiltinNodeTypes.LLM, {"temperature": 0.7}) + llm_config = config.get_default_config(BuiltinNodeTypes.LLM) assert llm_config == {"temperature": 0.7} print("✓ MockConfig operations test passed") @@ -130,23 +130,23 @@ def test_mock_factory_detection(): ) # Test that third-party service nodes are identified for mocking - assert factory.should_mock_node(NodeType.LLM) - assert factory.should_mock_node(NodeType.AGENT) - assert factory.should_mock_node(NodeType.TOOL) - assert factory.should_mock_node(NodeType.KNOWLEDGE_RETRIEVAL) - assert factory.should_mock_node(NodeType.HTTP_REQUEST) - assert factory.should_mock_node(NodeType.PARAMETER_EXTRACTOR) - assert factory.should_mock_node(NodeType.DOCUMENT_EXTRACTOR) + assert factory.should_mock_node(BuiltinNodeTypes.LLM) + assert factory.should_mock_node(BuiltinNodeTypes.AGENT) + assert factory.should_mock_node(BuiltinNodeTypes.TOOL) + assert factory.should_mock_node(BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL) + assert factory.should_mock_node(BuiltinNodeTypes.HTTP_REQUEST) + assert factory.should_mock_node(BuiltinNodeTypes.PARAMETER_EXTRACTOR) + assert factory.should_mock_node(BuiltinNodeTypes.DOCUMENT_EXTRACTOR) # Test that CODE and TEMPLATE_TRANSFORM are mocked (they require SSRF proxy) - assert factory.should_mock_node(NodeType.CODE) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.CODE) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Test that non-service nodes are not mocked - assert not factory.should_mock_node(NodeType.START) - assert not factory.should_mock_node(NodeType.END) - assert not factory.should_mock_node(NodeType.IF_ELSE) - assert not factory.should_mock_node(NodeType.VARIABLE_AGGREGATOR) + assert not factory.should_mock_node(BuiltinNodeTypes.START) + assert not factory.should_mock_node(BuiltinNodeTypes.END) + assert not factory.should_mock_node(BuiltinNodeTypes.IF_ELSE) + assert not factory.should_mock_node(BuiltinNodeTypes.VARIABLE_AGGREGATOR) print("✓ MockNodeFactory detection test passed") @@ -186,18 +186,18 @@ def test_mock_factory_registration(): ) # TEMPLATE_TRANSFORM is mocked by default (requires SSRF proxy) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Unregister mock - factory.unregister_mock_node_type(NodeType.TEMPLATE_TRANSFORM) - assert not factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + factory.unregister_mock_node_type(BuiltinNodeTypes.TEMPLATE_TRANSFORM) + assert not factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Register custom mock (using a dummy class for testing) class DummyMockNode: pass - factory.register_mock_node_type(NodeType.TEMPLATE_TRANSFORM, DummyMockNode) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + factory.register_mock_node_type(BuiltinNodeTypes.TEMPLATE_TRANSFORM, DummyMockNode) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) print("✓ MockNodeFactory registration test passed") diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_streaming_workflow.py b/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_streaming_workflow.py index 0ac9d6618d..b954a4faac 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_streaming_workflow.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_streaming_workflow.py @@ -14,8 +14,8 @@ from uuid import uuid4 from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from core.model_manager import ModelInstance -from core.workflow.node_factory import DifyNodeFactory -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from core.workflow.node_factory import DifyNodeFactory, get_default_root_node_id +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.graph import Graph from dify_graph.graph_engine import GraphEngine, GraphEngineConfig from dify_graph.graph_engine.command_channels import InMemoryChannel @@ -118,7 +118,11 @@ def test_parallel_streaming_workflow(): with patch.object( DifyNodeFactory, "_build_model_instance_for_llm_node", return_value=MagicMock(spec=ModelInstance), autospec=True ): - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init( + graph_config=graph_config, + node_factory=node_factory, + root_node_id=get_default_root_node_id(graph_config), + ) # Create the graph engine engine = GraphEngine( @@ -164,7 +168,9 @@ def test_parallel_streaming_workflow(): stream_chunk_events = [e for e in events if isinstance(e, NodeRunStreamChunkEvent)] # Get Answer node start event - answer_start_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.ANSWER] + answer_start_events = [ + e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.ANSWER + ] assert len(answer_start_events) == 1, f"Expected 1 Answer node start event, got {len(answer_start_events)}" answer_start_event = answer_start_events[0] @@ -211,7 +217,9 @@ def test_parallel_streaming_workflow(): # Get LLM completion events llm_completed_events = [ - (i, e) for i, e in enumerate(events) if isinstance(e, NodeRunSucceededEvent) and e.node_type == NodeType.LLM + (i, e) + for i, e in enumerate(events) + if isinstance(e, NodeRunSucceededEvent) and e.node_type == BuiltinNodeTypes.LLM ] # Check LLM completion order - in the current implementation, LLMs run sequentially @@ -263,7 +271,7 @@ def test_parallel_streaming_workflow(): # According to Answer node configuration: '{{#1754339725656.text#}}{{#1754339718571.text#}}' # This means LLM 2 output should come first, then LLM 1 output answer_complete_events = [ - e for e in events if isinstance(e, NodeRunSucceededEvent) and e.node_type == NodeType.ANSWER + e for e in events if isinstance(e, NodeRunSucceededEvent) and e.node_type == BuiltinNodeTypes.ANSWER ] assert len(answer_complete_events) == 1, f"Expected 1 Answer completion event, got {len(answer_complete_events)}" diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_response_session.py b/api/tests/unit_tests/core/workflow/graph_engine/test_response_session.py new file mode 100644 index 0000000000..198e133454 --- /dev/null +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_response_session.py @@ -0,0 +1,71 @@ +"""Unit tests for response session creation.""" + +from __future__ import annotations + +import pytest + +import dify_graph.graph_engine.response_coordinator.session as response_session_module +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, NodeState, NodeType +from dify_graph.graph_engine.response_coordinator import RESPONSE_SESSION_NODE_TYPES +from dify_graph.graph_engine.response_coordinator.session import ResponseSession +from dify_graph.nodes.base.template import Template, TextSegment + + +class DummyResponseNode: + """Minimal response-capable node for session tests.""" + + def __init__(self, *, node_id: str, node_type: NodeType, template: Template) -> None: + self.id = node_id + self.node_type = node_type + self.execution_type = NodeExecutionType.RESPONSE + self.state = NodeState.UNKNOWN + self._template = template + + def get_streaming_template(self) -> Template: + return self._template + + +class DummyNodeWithoutStreamingTemplate: + """Minimal node that violates the response-session contract.""" + + def __init__(self, *, node_id: str, node_type: NodeType) -> None: + self.id = node_id + self.node_type = node_type + self.execution_type = NodeExecutionType.RESPONSE + self.state = NodeState.UNKNOWN + + +def test_response_session_from_node_rejects_node_types_outside_allowlist() -> None: + """Unsupported node types are rejected even if they expose a template.""" + node = DummyResponseNode( + node_id="llm-node", + node_type=BuiltinNodeTypes.LLM, + template=Template(segments=[TextSegment(text="hello")]), + ) + + with pytest.raises(TypeError, match="RESPONSE_SESSION_NODE_TYPES"): + ResponseSession.from_node(node) + + +def test_response_session_from_node_supports_downstream_allowlist_extension(monkeypatch) -> None: + """Downstream applications can extend the supported node-type list.""" + node = DummyResponseNode( + node_id="llm-node", + node_type=BuiltinNodeTypes.LLM, + template=Template(segments=[TextSegment(text="hello")]), + ) + extended_node_types = [*RESPONSE_SESSION_NODE_TYPES, BuiltinNodeTypes.LLM] + monkeypatch.setattr(response_session_module, "RESPONSE_SESSION_NODE_TYPES", extended_node_types) + + session = ResponseSession.from_node(node) + + assert session.node_id == "llm-node" + assert session.template.segments == [TextSegment(text="hello")] + + +def test_response_session_from_node_requires_streaming_template_method() -> None: + """Allowed node types still need to implement the streaming-template contract.""" + node = DummyNodeWithoutStreamingTemplate(node_id="answer-node", node_type=BuiltinNodeTypes.ANSWER) + + with pytest.raises(TypeError, match="get_streaming_template"): + ResponseSession.from_node(node) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_table_runner.py b/api/tests/unit_tests/core/workflow/graph_engine/test_table_runner.py index 767a8f60ce..ab8fb346b8 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_table_runner.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_table_runner.py @@ -21,7 +21,7 @@ from typing import Any, cast from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from core.tools.utils.yaml_utils import _load_yaml_file -from core.workflow.node_factory import DifyNodeFactory +from core.workflow.node_factory import DifyNodeFactory, get_default_root_node_id from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY, GraphInitParams from dify_graph.graph import Graph from dify_graph.graph_engine import GraphEngine, GraphEngineConfig @@ -257,7 +257,11 @@ class WorkflowRunner: else: node_factory = DifyNodeFactory(graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init( + graph_config=graph_config, + node_factory=node_factory, + root_node_id=get_default_root_node_id(graph_config), + ) return graph, graph_runtime_state diff --git a/api/tests/unit_tests/core/workflow/nodes/answer/test_answer.py b/api/tests/unit_tests/core/workflow/nodes/answer/test_answer.py index f0d80af1ed..fd563d1be2 100644 --- a/api/tests/unit_tests/core/workflow/nodes/answer/test_answer.py +++ b/api/tests/unit_tests/core/workflow/nodes/answer/test_answer.py @@ -64,7 +64,7 @@ def test_execute_answer(): graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "answer", diff --git a/api/tests/unit_tests/core/workflow/nodes/base/test_base_node.py b/api/tests/unit_tests/core/workflow/nodes/base/test_base_node.py index 3fb775f934..81d3f5be9c 100644 --- a/api/tests/unit_tests/core/workflow/nodes/base/test_base_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/base/test_base_node.py @@ -1,14 +1,12 @@ import pytest +from core.workflow.node_factory import get_node_type_classes_mapping from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.node import Node -# Ensures that all node classes are imported. -from dify_graph.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING - -# Ensure `NODE_TYPE_CLASSES_MAPPING` is used and not automatically removed. -_ = NODE_TYPE_CLASSES_MAPPING +# Ensures that all production node classes are imported and registered. +_ = get_node_type_classes_mapping() class _TestNodeData(BaseNodeData): @@ -43,7 +41,7 @@ def test_ensure_subclasses_of_base_node_has_node_type_and_version_method_defined node_type = cls.node_type node_version = cls.version() - assert isinstance(cls.node_type, NodeType) + assert isinstance(cls.node_type, str) assert isinstance(node_version, str) node_type_and_version = (node_type, node_version) assert node_type_and_version not in type_version_set, ( @@ -56,7 +54,7 @@ def test_extract_node_data_type_from_generic_extracts_type(): """When a class inherits from Node[T], it should extract T.""" class _ConcreteNode(Node[_TestNodeData]): - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE @staticmethod def version() -> str: @@ -108,7 +106,7 @@ def test_init_subclass_rejects_explicit_node_data_type_without_generic(): class _ExplicitNode(Node): _node_data_type = _TestNodeData - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE @staticmethod def version() -> str: @@ -119,7 +117,7 @@ def test_init_subclass_sets_node_data_type_from_generic(): """Verify that __init_subclass__ sets _node_data_type from the generic parameter.""" class _AutoNode(Node[_TestNodeData]): - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE @staticmethod def version() -> str: @@ -132,13 +130,13 @@ def test_validate_node_data_uses_declared_node_data_type(): """Public validation should hydrate the subclass-declared node data model.""" class _AutoNode(Node[_TestNodeData]): - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE @staticmethod def version() -> str: return "1" - base_node_data = BaseNodeData.model_validate({"type": NodeType.CODE, "title": "Test"}) + base_node_data = BaseNodeData.model_validate({"type": BuiltinNodeTypes.CODE, "title": "Test"}) validated = _AutoNode.validate_node_data(base_node_data) diff --git a/api/tests/unit_tests/core/workflow/nodes/base/test_get_node_type_classes_mapping.py b/api/tests/unit_tests/core/workflow/nodes/base/test_get_node_type_classes_mapping.py index 86d326aead..972a945ca0 100644 --- a/api/tests/unit_tests/core/workflow/nodes/base/test_get_node_type_classes_mapping.py +++ b/api/tests/unit_tests/core/workflow/nodes/base/test_get_node_type_classes_mapping.py @@ -1,8 +1,9 @@ import types from collections.abc import Mapping +from core.workflow.node_factory import get_node_type_classes_mapping from dify_graph.entities.base_node_data import BaseNodeData -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.node import Node # Import concrete nodes we will assert on (numeric version path) @@ -16,11 +17,11 @@ from dify_graph.nodes.variable_assigner.v2.node import ( def test_variable_assigner_latest_prefers_highest_numeric_version(): # Act - mapping: Mapping[NodeType, Mapping[str, type[Node]]] = Node.get_node_type_classes_mapping() + mapping: Mapping[NodeType, Mapping[str, type[Node]]] = get_node_type_classes_mapping() # Assert basic presence - assert NodeType.VARIABLE_ASSIGNER in mapping - va_versions = mapping[NodeType.VARIABLE_ASSIGNER] + assert BuiltinNodeTypes.VARIABLE_ASSIGNER in mapping + va_versions = mapping[BuiltinNodeTypes.VARIABLE_ASSIGNER] # Both concrete versions must be present assert va_versions.get("1") is VariableAssignerV1 @@ -34,7 +35,7 @@ def test_latest_prefers_highest_numeric_version(): # Arrange: define two ephemeral subclasses with numeric versions under a NodeType # that has no concrete implementations in production to avoid interference. class _Version1(Node[BaseNodeData]): # type: ignore[misc] - node_type = NodeType.LEGACY_VARIABLE_AGGREGATOR + node_type = BuiltinNodeTypes.LEGACY_VARIABLE_AGGREGATOR def init_node_data(self, data): pass @@ -73,11 +74,11 @@ def test_latest_prefers_highest_numeric_version(): return "version2" # Act: build a fresh mapping (it should now see our ephemeral subclasses) - mapping: Mapping[NodeType, Mapping[str, type[Node]]] = Node.get_node_type_classes_mapping() + mapping: Mapping[NodeType, Mapping[str, type[Node]]] = get_node_type_classes_mapping() # Assert: both numeric versions exist for this NodeType; 'latest' points to the higher numeric version - assert NodeType.LEGACY_VARIABLE_AGGREGATOR in mapping - legacy_versions = mapping[NodeType.LEGACY_VARIABLE_AGGREGATOR] + assert BuiltinNodeTypes.LEGACY_VARIABLE_AGGREGATOR in mapping + legacy_versions = mapping[BuiltinNodeTypes.LEGACY_VARIABLE_AGGREGATOR] assert legacy_versions.get("1") is _Version1 assert legacy_versions.get("2") is _Version2 diff --git a/api/tests/unit_tests/core/workflow/nodes/datasource/test_datasource_node.py b/api/tests/unit_tests/core/workflow/nodes/datasource/test_datasource_node.py index db096b1aed..859115ceb3 100644 --- a/api/tests/unit_tests/core/workflow/nodes/datasource/test_datasource_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/datasource/test_datasource_node.py @@ -1,7 +1,7 @@ +from core.workflow.nodes.datasource.datasource_node import DatasourceNode from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult, StreamChunkEvent, StreamCompletedEvent -from dify_graph.nodes.datasource.datasource_node import DatasourceNode class _VarSeg: @@ -74,6 +74,8 @@ def test_datasource_node_delegates_to_manager_stream(mocker): def get_upload_file_by_id(cls, **_): raise AssertionError("not called") + mocker.patch("core.workflow.nodes.datasource.datasource_node.DatasourceManager", new=_Mgr) + node = DatasourceNode( id="n", config={ @@ -90,7 +92,6 @@ def test_datasource_node_delegates_to_manager_stream(mocker): }, graph_init_params=gp, graph_runtime_state=gs, - datasource_manager=_Mgr, ) evts = list(node._run()) diff --git a/api/tests/unit_tests/core/workflow/nodes/human_input/test_human_input_form_filled_event.py b/api/tests/unit_tests/core/workflow/nodes/human_input/test_human_input_form_filled_event.py index 1fea19e795..b0ed47158d 100644 --- a/api/tests/unit_tests/core/workflow/nodes/human_input/test_human_input_form_filled_event.py +++ b/api/tests/unit_tests/core/workflow/nodes/human_input/test_human_input_form_filled_event.py @@ -3,7 +3,7 @@ from types import SimpleNamespace from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY, GraphInitParams -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.graph_events import ( NodeRunHumanInputFormFilledEvent, NodeRunHumanInputFormTimeoutEvent, @@ -47,7 +47,7 @@ def _build_node(form_content: str = "Please enter your name:\n\n{{#$output.name# config = { "id": "node-1", - "type": NodeType.HUMAN_INPUT.value, + "type": BuiltinNodeTypes.HUMAN_INPUT, "data": { "title": "Human Input", "form_content": form_content, @@ -111,7 +111,7 @@ def _build_timeout_node() -> HumanInputNode: config = { "id": "node-1", - "type": NodeType.HUMAN_INPUT.value, + "type": BuiltinNodeTypes.HUMAN_INPUT, "data": { "title": "Human Input", "form_content": "Please enter your name:\n\n{{#$output.name#}}", diff --git a/api/tests/unit_tests/core/workflow/nodes/iteration/iteration_node_spec.py b/api/tests/unit_tests/core/workflow/nodes/iteration/iteration_node_spec.py index 490df52533..fdf5f4d1f8 100644 --- a/api/tests/unit_tests/core/workflow/nodes/iteration/iteration_node_spec.py +++ b/api/tests/unit_tests/core/workflow/nodes/iteration/iteration_node_spec.py @@ -1,5 +1,5 @@ from dify_graph.entities.graph_config import NodeConfigDictAdapter -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.iteration.entities import ErrorHandleMode, IterationNodeData from dify_graph.nodes.iteration.exc import ( InvalidIteratorValueError, @@ -91,7 +91,7 @@ class TestIterationNodeClassAttributes: def test_node_type(self): """Test IterationNode node_type attribute.""" - assert IterationNode.node_type == NodeType.ITERATION + assert IterationNode.node_type == BuiltinNodeTypes.ITERATION def test_version(self): """Test IterationNode version method.""" diff --git a/api/tests/unit_tests/core/workflow/nodes/knowledge_index/test_knowledge_index_node.py b/api/tests/unit_tests/core/workflow/nodes/knowledge_index/test_knowledge_index_node.py index 8116fc8b3c..33f7ace5ab 100644 --- a/api/tests/unit_tests/core/workflow/nodes/knowledge_index/test_knowledge_index_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/knowledge_index/test_knowledge_index_node.py @@ -5,12 +5,16 @@ from unittest.mock import Mock import pytest from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom +from core.workflow.nodes.knowledge_index.entities import KnowledgeIndexNodeData +from core.workflow.nodes.knowledge_index.exc import KnowledgeIndexNodeError +from core.workflow.nodes.knowledge_index.knowledge_index_node import KnowledgeIndexNode +from core.workflow.nodes.knowledge_index.protocols import ( + IndexProcessorProtocol, + Preview, + PreviewItem, + SummaryIndexServiceProtocol, +) from dify_graph.enums import SystemVariableKey, WorkflowNodeExecutionStatus -from dify_graph.nodes.knowledge_index.entities import KnowledgeIndexNodeData -from dify_graph.nodes.knowledge_index.exc import KnowledgeIndexNodeError -from dify_graph.nodes.knowledge_index.knowledge_index_node import KnowledgeIndexNode -from dify_graph.repositories.index_processor_protocol import IndexProcessorProtocol, Preview, PreviewItem -from dify_graph.repositories.summary_index_service_protocol import SummaryIndexServiceProtocol from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from dify_graph.variables.segments import StringSegment @@ -45,16 +49,24 @@ def mock_graph_runtime_state(): @pytest.fixture -def mock_index_processor(): +def mock_index_processor(mocker): """Create mock IndexProcessorProtocol.""" mock_processor = Mock(spec=IndexProcessorProtocol) + mocker.patch( + "core.workflow.nodes.knowledge_index.knowledge_index_node.IndexProcessor", + return_value=mock_processor, + ) return mock_processor @pytest.fixture -def mock_summary_index_service(): +def mock_summary_index_service(mocker): """Create mock SummaryIndexServiceProtocol.""" mock_service = Mock(spec=SummaryIndexServiceProtocol) + mocker.patch( + "core.workflow.nodes.knowledge_index.knowledge_index_node.SummaryIndex", + return_value=mock_service, + ) return mock_service @@ -107,8 +119,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Assert @@ -137,8 +147,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act & Assert @@ -172,8 +180,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act & Assert @@ -210,8 +216,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -269,8 +273,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -334,8 +336,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -387,8 +387,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -446,8 +444,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -506,8 +502,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -546,8 +540,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -595,8 +587,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -637,8 +627,6 @@ class TestInvokeKnowledgeIndex: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act diff --git a/api/tests/unit_tests/core/workflow/nodes/knowledge_retrieval/test_knowledge_retrieval_node.py b/api/tests/unit_tests/core/workflow/nodes/knowledge_retrieval/test_knowledge_retrieval_node.py index b7a7a9c938..99997db6b2 100644 --- a/api/tests/unit_tests/core/workflow/nodes/knowledge_retrieval/test_knowledge_retrieval_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/knowledge_retrieval/test_knowledge_retrieval_node.py @@ -5,9 +5,7 @@ from unittest.mock import Mock import pytest from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom -from dify_graph.enums import WorkflowNodeExecutionStatus -from dify_graph.model_runtime.entities.llm_entities import LLMUsage -from dify_graph.nodes.knowledge_retrieval.entities import ( +from core.workflow.nodes.knowledge_retrieval.entities import ( Condition, KnowledgeRetrievalNodeData, MetadataFilteringCondition, @@ -15,9 +13,11 @@ from dify_graph.nodes.knowledge_retrieval.entities import ( RerankingModelConfig, SingleRetrievalConfig, ) -from dify_graph.nodes.knowledge_retrieval.exc import RateLimitExceededError -from dify_graph.nodes.knowledge_retrieval.knowledge_retrieval_node import KnowledgeRetrievalNode -from dify_graph.repositories.rag_retrieval_protocol import RAGRetrievalProtocol, Source +from core.workflow.nodes.knowledge_retrieval.exc import RateLimitExceededError +from core.workflow.nodes.knowledge_retrieval.knowledge_retrieval_node import KnowledgeRetrievalNode +from core.workflow.nodes.knowledge_retrieval.retrieval import RAGRetrievalProtocol, Source +from dify_graph.enums import WorkflowNodeExecutionStatus +from dify_graph.model_runtime.entities.llm_entities import LLMUsage from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from dify_graph.variables import StringSegment @@ -52,11 +52,15 @@ def mock_graph_runtime_state(): @pytest.fixture -def mock_rag_retrieval(): +def mock_rag_retrieval(mocker): """Create mock RAGRetrievalProtocol.""" mock_retrieval = Mock(spec=RAGRetrievalProtocol) mock_retrieval.knowledge_retrieval.return_value = [] mock_retrieval.llm_usage = LLMUsage.empty_usage() + mocker.patch( + "core.workflow.nodes.knowledge_retrieval.knowledge_retrieval_node.DatasetRetrieval", + return_value=mock_retrieval, + ) return mock_retrieval @@ -106,7 +110,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Assert @@ -136,7 +139,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -196,7 +198,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -241,7 +242,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -278,7 +278,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -314,7 +313,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -356,7 +354,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -396,7 +393,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -478,7 +474,6 @@ class TestFetchDatasetRetriever: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -516,7 +511,6 @@ class TestFetchDatasetRetriever: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -572,7 +566,6 @@ class TestFetchDatasetRetriever: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -621,7 +614,6 @@ class TestFetchDatasetRetriever: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) conditions = MetadataFilteringCondition( @@ -683,7 +675,6 @@ class TestFetchDatasetRetriever: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) mock_rag_retrieval.knowledge_retrieval.return_value = [] diff --git a/api/tests/unit_tests/core/workflow/nodes/list_operator/node_spec.py b/api/tests/unit_tests/core/workflow/nodes/list_operator/node_spec.py index 25760ba352..d71e0921c1 100644 --- a/api/tests/unit_tests/core/workflow/nodes/list_operator/node_spec.py +++ b/api/tests/unit_tests/core/workflow/nodes/list_operator/node_spec.py @@ -4,7 +4,7 @@ import pytest from dify_graph.entities import GraphInitParams from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.nodes.list_operator.node import ListOperatorNode from dify_graph.runtime import GraphRuntimeState from dify_graph.variables import ArrayNumberSegment, ArrayStringSegment @@ -71,7 +71,7 @@ class TestListOperatorNode: graph_runtime_state=mock_graph_runtime_state, ) - assert node.node_type == NodeType.LIST_OPERATOR + assert node.node_type == BuiltinNodeTypes.LIST_OPERATOR assert node._node_data.title == "List Operator" def test_version(self): diff --git a/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py b/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py index d56035b6bc..fc96088af1 100644 --- a/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py @@ -34,8 +34,8 @@ from dify_graph.nodes.llm.entities import ( VisionConfigOptions, ) from dify_graph.nodes.llm.file_saver import LLMFileSaver -from dify_graph.nodes.llm.node import LLMNode, _handle_memory_completion_mode -from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory +from dify_graph.nodes.llm.node import LLMNode +from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory, TemplateRenderer from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from dify_graph.variables import ArrayAnySegment, ArrayFileSegment, NoneSegment @@ -107,6 +107,7 @@ def llm_node( mock_file_saver = mock.MagicMock(spec=LLMFileSaver) mock_credentials_provider = mock.MagicMock(spec=CredentialsProvider) mock_model_factory = mock.MagicMock(spec=ModelFactory) + mock_template_renderer = mock.MagicMock(spec=TemplateRenderer) node_config = { "id": "1", "data": llm_node_data.model_dump(), @@ -121,6 +122,7 @@ def llm_node( model_factory=mock_model_factory, model_instance=mock.MagicMock(spec=ModelInstance), llm_file_saver=mock_file_saver, + template_renderer=mock_template_renderer, http_client=http_client, ) return node @@ -590,6 +592,33 @@ def test_handle_list_messages_basic(llm_node): assert result[0].content == [TextPromptMessageContent(data="Hello, world")] +def test_handle_list_messages_jinja2_uses_template_renderer(llm_node): + llm_node._template_renderer.render_jinja2.return_value = "Hello, world" + messages = [ + LLMNodeChatModelMessage( + text="", + jinja2_text="Hello, {{ name }}", + role=PromptMessageRole.USER, + edition_type="jinja2", + ) + ] + + result = llm_node.handle_list_messages( + messages=messages, + context=None, + jinja2_variables=[], + variable_pool=llm_node.graph_runtime_state.variable_pool, + vision_detail_config=ImagePromptMessageContent.DETAIL.HIGH, + template_renderer=llm_node._template_renderer, + ) + + assert result == [UserPromptMessage(content=[TextPromptMessageContent(data="Hello, world")])] + llm_node._template_renderer.render_jinja2.assert_called_once_with( + template="Hello, {{ name }}", + inputs={}, + ) + + def test_handle_memory_completion_mode_uses_prompt_message_interface(): memory = mock.MagicMock(spec=MockTokenBufferMemory) memory.get_history_prompt_messages.return_value = [ @@ -613,8 +642,8 @@ def test_handle_memory_completion_mode_uses_prompt_message_interface(): window=MemoryConfig.WindowConfig(enabled=True, size=3), ) - with mock.patch("dify_graph.nodes.llm.node._calculate_rest_token", return_value=2000) as mock_rest_token: - memory_text = _handle_memory_completion_mode( + with mock.patch("dify_graph.nodes.llm.llm_utils.calculate_rest_token", return_value=2000) as mock_rest_token: + memory_text = llm_utils.handle_memory_completion_mode( memory=memory, memory_config=memory_config, model_instance=model_instance, @@ -630,6 +659,7 @@ def llm_node_for_multimodal(llm_node_data, graph_init_params, graph_runtime_stat mock_file_saver: LLMFileSaver = mock.MagicMock(spec=LLMFileSaver) mock_credentials_provider = mock.MagicMock(spec=CredentialsProvider) mock_model_factory = mock.MagicMock(spec=ModelFactory) + mock_template_renderer = mock.MagicMock(spec=TemplateRenderer) node_config = { "id": "1", "data": llm_node_data.model_dump(), @@ -644,6 +674,7 @@ def llm_node_for_multimodal(llm_node_data, graph_init_params, graph_runtime_stat model_factory=mock_model_factory, model_instance=mock.MagicMock(spec=ModelInstance), llm_file_saver=mock_file_saver, + template_renderer=mock_template_renderer, http_client=http_client, ) return node, mock_file_saver diff --git a/api/tests/unit_tests/core/workflow/nodes/template_transform/template_transform_node_spec.py b/api/tests/unit_tests/core/workflow/nodes/template_transform/template_transform_node_spec.py index 6831626f58..332a8761f9 100644 --- a/api/tests/unit_tests/core/workflow/nodes/template_transform/template_transform_node_spec.py +++ b/api/tests/unit_tests/core/workflow/nodes/template_transform/template_transform_node_spec.py @@ -3,7 +3,7 @@ from unittest.mock import MagicMock import pytest from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom -from dify_graph.enums import ErrorStrategy, NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, ErrorStrategy, WorkflowNodeExecutionStatus from dify_graph.graph import Graph from dify_graph.nodes.template_transform.template_renderer import TemplateRenderError from dify_graph.nodes.template_transform.template_transform_node import TemplateTransformNode @@ -65,7 +65,7 @@ class TestTemplateTransformNode: template_renderer=mock_renderer, ) - assert node.node_type == NodeType.TEMPLATE_TRANSFORM + assert node.node_type == BuiltinNodeTypes.TEMPLATE_TRANSFORM assert node._node_data.title == "Template Transform" assert len(node._node_data.variables) == 2 assert node._node_data.template == "Hello {{ name }}, you are {{ age }} years old!" diff --git a/api/tests/unit_tests/core/workflow/nodes/test_base_node.py b/api/tests/unit_tests/core/workflow/nodes/test_base_node.py index 0d81e7762b..2b0205fb7b 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_base_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_base_node.py @@ -6,7 +6,7 @@ from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from dify_graph.entities import GraphInitParams from dify_graph.entities.base_node_data import BaseNodeData from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.base.node import Node from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable @@ -18,7 +18,7 @@ class _SampleNodeData(BaseNodeData): class _SampleNode(Node[_SampleNodeData]): - node_type = NodeType.ANSWER + node_type = BuiltinNodeTypes.ANSWER @classmethod def version(cls) -> str: @@ -46,7 +46,7 @@ def _build_node_config() -> NodeConfigDict: { "id": "node-1", "data": { - "type": NodeType.ANSWER.value, + "type": BuiltinNodeTypes.ANSWER, "title": "Sample", "foo": "bar", }, @@ -105,7 +105,7 @@ def test_missing_generic_argument_raises_type_error(): with pytest.raises(TypeError): class _InvalidNode(Node): # type: ignore[type-abstract] - node_type = NodeType.ANSWER + node_type = BuiltinNodeTypes.ANSWER @classmethod def version(cls) -> str: @@ -118,7 +118,7 @@ def test_missing_generic_argument_raises_type_error(): def test_base_node_data_keeps_dict_style_access_compatibility(): node_data = _SampleNodeData.model_validate( { - "type": NodeType.ANSWER.value, + "type": BuiltinNodeTypes.ANSWER, "title": "Sample", "foo": "bar", } diff --git a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py index 13275d4be6..40754974c1 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py @@ -7,7 +7,7 @@ from docx.oxml.text.paragraph import CT_P from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from dify_graph.entities import GraphInitParams -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.file import File, FileTransferMethod from dify_graph.node_events import NodeRunResult from dify_graph.nodes.document_extractor import DocumentExtractorNode, DocumentExtractorNodeData @@ -250,7 +250,7 @@ def test_extract_text_from_docx(mock_document): def test_node_type(document_extractor_node): - assert document_extractor_node.node_type == NodeType.DOCUMENT_EXTRACTOR + assert document_extractor_node.node_type == BuiltinNodeTypes.DOCUMENT_EXTRACTOR @patch("pandas.ExcelFile") diff --git a/api/tests/unit_tests/core/workflow/nodes/test_if_else.py b/api/tests/unit_tests/core/workflow/nodes/test_if_else.py index 041bd66d03..c746a945fe 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_if_else.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_if_else.py @@ -60,7 +60,7 @@ def test_execute_if_else_result_true(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "if-else", @@ -154,7 +154,7 @@ def test_execute_if_else_result_false(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "if-else", @@ -328,7 +328,7 @@ def test_execute_if_else_boolean_conditions(condition: Condition): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_data = { "title": "Boolean Test", @@ -382,7 +382,7 @@ def test_execute_if_else_boolean_false_conditions(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_data = { "title": "Boolean False Test", @@ -450,7 +450,7 @@ def test_execute_if_else_boolean_cases_structure(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_data = { "title": "Boolean Cases Test", diff --git a/api/tests/unit_tests/core/workflow/nodes/test_question_classifier_node.py b/api/tests/unit_tests/core/workflow/nodes/test_question_classifier_node.py index 4dfec5ef60..c5a02e87e4 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_question_classifier_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_question_classifier_node.py @@ -1,5 +1,14 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock + from dify_graph.model_runtime.entities import ImagePromptMessageContent -from dify_graph.nodes.question_classifier import QuestionClassifierNodeData +from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory, TemplateRenderer +from dify_graph.nodes.protocols import HttpClientProtocol +from dify_graph.nodes.question_classifier import ( + QuestionClassifierNode, + QuestionClassifierNodeData, +) +from tests.workflow_test_utils import build_test_graph_init_params def test_init_question_classifier_node_data(): @@ -65,3 +74,52 @@ def test_init_question_classifier_node_data_without_vision_config(): assert node_data.vision.enabled == False assert node_data.vision.configs.variable_selector == ["sys", "files"] assert node_data.vision.configs.detail == ImagePromptMessageContent.DETAIL.HIGH + + +def test_question_classifier_calculate_rest_token_uses_shared_prompt_builder(monkeypatch): + node_data = QuestionClassifierNodeData.model_validate( + { + "title": "test classifier node", + "query_variable_selector": ["id", "name"], + "model": {"provider": "openai", "name": "gpt-3.5-turbo", "mode": "completion", "completion_params": {}}, + "classes": [{"id": "1", "name": "class 1"}], + "instruction": "This is a test instruction", + } + ) + template_renderer = MagicMock(spec=TemplateRenderer) + node = QuestionClassifierNode( + id="node-id", + config={"id": "node-id", "data": node_data.model_dump(mode="json")}, + graph_init_params=build_test_graph_init_params( + workflow_id="workflow-id", + graph_config={}, + tenant_id="tenant-id", + app_id="app-id", + user_id="user-id", + ), + graph_runtime_state=SimpleNamespace(variable_pool=MagicMock()), + credentials_provider=MagicMock(spec=CredentialsProvider), + model_factory=MagicMock(spec=ModelFactory), + model_instance=MagicMock(), + http_client=MagicMock(spec=HttpClientProtocol), + llm_file_saver=MagicMock(), + template_renderer=template_renderer, + ) + fetch_prompt_messages = MagicMock(return_value=([], None)) + monkeypatch.setattr( + "dify_graph.nodes.question_classifier.question_classifier_node.llm_utils.fetch_prompt_messages", + fetch_prompt_messages, + ) + monkeypatch.setattr( + "dify_graph.nodes.question_classifier.question_classifier_node.llm_utils.fetch_model_schema", + MagicMock(return_value=SimpleNamespace(model_properties={}, parameter_rules=[])), + ) + + node._calculate_rest_token( + node_data=node_data, + query="hello", + model_instance=MagicMock(stop=(), parameters={}), + context="", + ) + + assert fetch_prompt_messages.call_args.kwargs["template_renderer"] is template_renderer diff --git a/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v1/test_variable_assigner_v1.py b/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v1/test_variable_assigner_v1.py index 2cd3a38fa6..e69c05dc0b 100644 --- a/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v1/test_variable_assigner_v1.py +++ b/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v1/test_variable_assigner_v1.py @@ -88,7 +88,7 @@ def test_overwrite_string_variable(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "node_id", @@ -188,7 +188,7 @@ def test_append_variable_to_array(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "node_id", @@ -277,7 +277,7 @@ def test_clear_array(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "node_id", diff --git a/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v2/test_variable_assigner_v2.py b/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v2/test_variable_assigner_v2.py index 5b285c2681..6874f3fef1 100644 --- a/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v2/test_variable_assigner_v2.py +++ b/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v2/test_variable_assigner_v2.py @@ -118,7 +118,7 @@ def test_remove_first_from_array(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "node_id", @@ -206,7 +206,7 @@ def test_remove_last_from_array(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "node_id", @@ -291,7 +291,7 @@ def test_remove_first_from_empty_array(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "node_id", @@ -376,7 +376,7 @@ def test_remove_last_from_empty_array(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "node_id", diff --git a/api/tests/unit_tests/core/workflow/nodes/webhook/test_entities.py b/api/tests/unit_tests/core/workflow/nodes/webhook/test_entities.py index 61b18566b0..6be5bb23e8 100644 --- a/api/tests/unit_tests/core/workflow/nodes/webhook/test_entities.py +++ b/api/tests/unit_tests/core/workflow/nodes/webhook/test_entities.py @@ -1,7 +1,7 @@ import pytest from pydantic import ValidationError -from dify_graph.nodes.trigger_webhook.entities import ( +from core.workflow.nodes.trigger_webhook.entities import ( ContentType, Method, WebhookBodyParameter, diff --git a/api/tests/unit_tests/core/workflow/nodes/webhook/test_exceptions.py b/api/tests/unit_tests/core/workflow/nodes/webhook/test_exceptions.py index a821e361c5..ddf1af5a59 100644 --- a/api/tests/unit_tests/core/workflow/nodes/webhook/test_exceptions.py +++ b/api/tests/unit_tests/core/workflow/nodes/webhook/test_exceptions.py @@ -1,12 +1,12 @@ import pytest -from dify_graph.entities.exc import BaseNodeError -from dify_graph.nodes.trigger_webhook.exc import ( +from core.workflow.nodes.trigger_webhook.exc import ( WebhookConfigError, WebhookNodeError, WebhookNotFoundError, WebhookTimeoutError, ) +from dify_graph.entities.exc import BaseNodeError def test_webhook_node_error_inheritance(): @@ -149,7 +149,7 @@ def test_webhook_error_attributes(): assert WebhookConfigError.__name__ == "WebhookConfigError" # Test that all error classes have proper __module__ - expected_module = "dify_graph.nodes.trigger_webhook.exc" + expected_module = "core.workflow.nodes.trigger_webhook.exc" assert WebhookNodeError.__module__ == expected_module assert WebhookTimeoutError.__module__ == expected_module assert WebhookNotFoundError.__module__ == expected_module diff --git a/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_file_conversion.py b/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_file_conversion.py index c750e74182..78dd7ce0f3 100644 --- a/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_file_conversion.py +++ b/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_file_conversion.py @@ -9,15 +9,15 @@ when passing files to downstream LLM nodes. from unittest.mock import Mock, patch from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom -from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY, GraphInitParams -from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus -from dify_graph.nodes.trigger_webhook.entities import ( +from core.workflow.nodes.trigger_webhook.entities import ( ContentType, Method, WebhookBodyParameter, WebhookData, ) -from dify_graph.nodes.trigger_webhook.node import TriggerWebhookNode +from core.workflow.nodes.trigger_webhook.node import TriggerWebhookNode +from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY, GraphInitParams +from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus from dify_graph.runtime.graph_runtime_state import GraphRuntimeState from dify_graph.runtime.variable_pool import VariablePool from dify_graph.system_variable import SystemVariable @@ -130,8 +130,8 @@ def test_webhook_node_file_conversion_to_file_variable(): # Mock the file factory and variable factory with ( patch("factories.file_factory.build_from_mapping") as mock_file_factory, - patch("dify_graph.nodes.trigger_webhook.node.build_segment_with_type") as mock_segment_factory, - patch("dify_graph.nodes.trigger_webhook.node.FileVariable") as mock_file_variable, + patch("core.workflow.nodes.trigger_webhook.node.build_segment_with_type") as mock_segment_factory, + patch("core.workflow.nodes.trigger_webhook.node.FileVariable") as mock_file_variable, ): # Setup mocks mock_file_obj = Mock() @@ -322,8 +322,8 @@ def test_webhook_node_file_conversion_mixed_parameters(): with ( patch("factories.file_factory.build_from_mapping") as mock_file_factory, - patch("dify_graph.nodes.trigger_webhook.node.build_segment_with_type") as mock_segment_factory, - patch("dify_graph.nodes.trigger_webhook.node.FileVariable") as mock_file_variable, + patch("core.workflow.nodes.trigger_webhook.node.build_segment_with_type") as mock_segment_factory, + patch("core.workflow.nodes.trigger_webhook.node.FileVariable") as mock_file_variable, ): # Setup mocks for file mock_file_obj = Mock() @@ -390,8 +390,8 @@ def test_webhook_node_different_file_types(): with ( patch("factories.file_factory.build_from_mapping") as mock_file_factory, - patch("dify_graph.nodes.trigger_webhook.node.build_segment_with_type") as mock_segment_factory, - patch("dify_graph.nodes.trigger_webhook.node.FileVariable") as mock_file_variable, + patch("core.workflow.nodes.trigger_webhook.node.build_segment_with_type") as mock_segment_factory, + patch("core.workflow.nodes.trigger_webhook.node.FileVariable") as mock_file_variable, ): # Setup mocks for all files mock_file_objs = [Mock() for _ in range(3)] diff --git a/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py b/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py index df13bbb92f..139f65d6c3 100644 --- a/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py @@ -3,17 +3,18 @@ from unittest.mock import patch import pytest from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom -from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY, GraphInitParams -from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus -from dify_graph.file import File, FileTransferMethod, FileType -from dify_graph.nodes.trigger_webhook.entities import ( +from core.trigger.constants import TRIGGER_WEBHOOK_NODE_TYPE +from core.workflow.nodes.trigger_webhook.entities import ( ContentType, Method, WebhookBodyParameter, WebhookData, WebhookParameter, ) -from dify_graph.nodes.trigger_webhook.node import TriggerWebhookNode +from core.workflow.nodes.trigger_webhook.node import TriggerWebhookNode +from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY, GraphInitParams +from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus +from dify_graph.file import File, FileTransferMethod, FileType from dify_graph.runtime.graph_runtime_state import GraphRuntimeState from dify_graph.runtime.variable_pool import VariablePool from dify_graph.system_variable import SystemVariable @@ -82,7 +83,7 @@ def test_webhook_node_basic_initialization(): node = create_webhook_node(data, variable_pool) - assert node.node_type.value == "trigger-webhook" + assert node.node_type == TRIGGER_WEBHOOK_NODE_TYPE assert node.version() == "1" assert node._get_title() == "Test Webhook" assert node._node_data.method == Method.POST diff --git a/api/tests/unit_tests/core/workflow/test_node_factory.py b/api/tests/unit_tests/core/workflow/test_node_factory.py index 4a5f561c22..367e3958ad 100644 --- a/api/tests/unit_tests/core/workflow/test_node_factory.py +++ b/api/tests/unit_tests/core/workflow/test_node_factory.py @@ -5,9 +5,10 @@ import pytest from core.app.entities.app_invoke_entities import DifyRunContext, InvokeFrom, UserFrom from core.workflow import node_factory +from core.workflow.nodes.knowledge_index import KNOWLEDGE_INDEX_NODE_TYPE from dify_graph.entities.base_node_data import BaseNodeData from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY -from dify_graph.enums import NodeType, SystemVariableKey +from dify_graph.enums import BuiltinNodeTypes, NodeType, SystemVariableKey from dify_graph.nodes.code.entities import CodeLanguage from dify_graph.variables.segments import StringSegment @@ -139,17 +140,40 @@ class TestDefaultWorkflowCodeExecutor: assert executor.is_execution_error(RuntimeError("boom")) is False +class TestDefaultLLMTemplateRenderer: + def test_render_jinja2_delegates_to_code_executor(self, monkeypatch): + renderer = node_factory.DefaultLLMTemplateRenderer() + execute_workflow_code_template = MagicMock(return_value={"result": "hello world"}) + monkeypatch.setattr( + node_factory.CodeExecutor, + "execute_workflow_code_template", + execute_workflow_code_template, + ) + + result = renderer.render_jinja2( + template="Hello {{ name }}", + inputs={"name": "world"}, + ) + + assert result == "hello world" + execute_workflow_code_template.assert_called_once_with( + language=CodeLanguage.JINJA2, + code="Hello {{ name }}", + inputs={"name": "world"}, + ) + + class TestDifyNodeFactoryInit: def test_init_builds_default_dependencies(self): graph_init_params = SimpleNamespace(run_context={"context": "value"}) graph_runtime_state = sentinel.graph_runtime_state dify_context = SimpleNamespace(tenant_id="tenant-id") template_renderer = sentinel.template_renderer - rag_retrieval = sentinel.rag_retrieval unstructured_api_config = sentinel.unstructured_api_config http_request_config = sentinel.http_request_config credentials_provider = sentinel.credentials_provider model_factory = sentinel.model_factory + llm_template_renderer = sentinel.llm_template_renderer with ( patch.object( @@ -162,7 +186,6 @@ class TestDifyNodeFactoryInit: "CodeExecutorJinja2TemplateRenderer", return_value=template_renderer, ) as renderer_factory, - patch.object(node_factory, "DatasetRetrieval", return_value=rag_retrieval), patch.object( node_factory, "UnstructuredApiConfig", @@ -173,6 +196,11 @@ class TestDifyNodeFactoryInit: "build_http_request_config", return_value=http_request_config, ), + patch.object( + node_factory, + "DefaultLLMTemplateRenderer", + return_value=llm_template_renderer, + ) as llm_renderer_factory, patch.object( node_factory, "build_dify_model_access", @@ -187,12 +215,14 @@ class TestDifyNodeFactoryInit: resolve_dify_context.assert_called_once_with(graph_init_params.run_context) build_dify_model_access.assert_called_once_with("tenant-id") renderer_factory.assert_called_once() + llm_renderer_factory.assert_called_once() assert renderer_factory.call_args.kwargs["code_executor"] is factory._code_executor assert factory.graph_init_params is graph_init_params assert factory.graph_runtime_state is graph_runtime_state assert factory._dify_context is dify_context assert factory._template_renderer is template_renderer - assert factory._rag_retrieval is rag_retrieval + + assert factory._llm_template_renderer is llm_template_renderer assert factory._document_extractor_unstructured_api_config is unstructured_api_config assert factory._http_request_config is http_request_config assert factory._llm_credentials_provider is credentials_provider @@ -244,11 +274,11 @@ class TestDifyNodeFactoryCreateNode: factory._code_executor = sentinel.code_executor factory._code_limits = sentinel.code_limits factory._template_renderer = sentinel.template_renderer + factory._llm_template_renderer = sentinel.llm_template_renderer factory._template_transform_max_output_length = 2048 factory._http_request_http_client = sentinel.http_client factory._http_request_tool_file_manager_factory = sentinel.tool_file_manager_factory factory._http_request_file_manager = sentinel.file_manager - factory._rag_retrieval = sentinel.rag_retrieval factory._document_extractor_unstructured_api_config = sentinel.unstructured_api_config factory._http_request_config = sentinel.http_request_config factory._llm_credentials_provider = sentinel.credentials_provider @@ -256,47 +286,46 @@ class TestDifyNodeFactoryCreateNode: return factory def test_rejects_unknown_node_type(self, factory): - with pytest.raises(ValueError, match="Input should be"): + with pytest.raises(ValueError, match="No class mapping found for node type: missing"): factory.create_node({"id": "node-id", "data": {"type": "missing"}}) def test_rejects_missing_class_mapping(self, monkeypatch, factory): - monkeypatch.setattr(node_factory, "NODE_TYPE_CLASSES_MAPPING", {}) + monkeypatch.setattr( + factory, + "_resolve_node_class", + MagicMock(side_effect=ValueError("No class mapping found for node type: start")), + ) with pytest.raises(ValueError, match="No class mapping found for node type: start"): - factory.create_node({"id": "node-id", "data": {"type": NodeType.START.value}}) + factory.create_node({"id": "node-id", "data": {"type": BuiltinNodeTypes.START}}) def test_rejects_missing_latest_class(self, monkeypatch, factory): monkeypatch.setattr( - node_factory, - "NODE_TYPE_CLASSES_MAPPING", - {NodeType.START: {node_factory.LATEST_VERSION: None}}, + factory, + "_resolve_node_class", + MagicMock(side_effect=ValueError("No latest version class found for node type: start")), ) with pytest.raises(ValueError, match="No latest version class found for node type: start"): - factory.create_node({"id": "node-id", "data": {"type": NodeType.START.value}}) + factory.create_node({"id": "node-id", "data": {"type": BuiltinNodeTypes.START}}) def test_uses_version_specific_class_when_available(self, monkeypatch, factory): matched_node = sentinel.matched_node latest_node_class = MagicMock(return_value=sentinel.latest_node) matched_node_class = MagicMock(return_value=matched_node) monkeypatch.setattr( - node_factory, - "NODE_TYPE_CLASSES_MAPPING", - { - NodeType.START: { - node_factory.LATEST_VERSION: latest_node_class, - "9": matched_node_class, - } - }, + factory, + "_resolve_node_class", + MagicMock(return_value=matched_node_class), ) - result = factory.create_node({"id": "node-id", "data": {"type": NodeType.START.value, "version": "9"}}) + result = factory.create_node({"id": "node-id", "data": {"type": BuiltinNodeTypes.START, "version": "9"}}) assert result is matched_node matched_node_class.assert_called_once() kwargs = matched_node_class.call_args.kwargs assert kwargs["id"] == "node-id" - _assert_typed_node_config(kwargs["config"], node_id="node-id", node_type=NodeType.START, version="9") + _assert_typed_node_config(kwargs["config"], node_id="node-id", node_type=BuiltinNodeTypes.START, version="9") assert kwargs["graph_init_params"] is sentinel.graph_init_params assert kwargs["graph_runtime_state"] is sentinel.graph_runtime_state latest_node_class.assert_not_called() @@ -305,41 +334,41 @@ class TestDifyNodeFactoryCreateNode: latest_node = sentinel.latest_node latest_node_class = MagicMock(return_value=latest_node) monkeypatch.setattr( - node_factory, - "NODE_TYPE_CLASSES_MAPPING", - {NodeType.START: {node_factory.LATEST_VERSION: latest_node_class}}, + factory, + "_resolve_node_class", + MagicMock(return_value=latest_node_class), ) - result = factory.create_node({"id": "node-id", "data": {"type": NodeType.START.value, "version": "9"}}) + result = factory.create_node({"id": "node-id", "data": {"type": BuiltinNodeTypes.START, "version": "9"}}) assert result is latest_node latest_node_class.assert_called_once() kwargs = latest_node_class.call_args.kwargs assert kwargs["id"] == "node-id" - _assert_typed_node_config(kwargs["config"], node_id="node-id", node_type=NodeType.START, version="9") + _assert_typed_node_config(kwargs["config"], node_id="node-id", node_type=BuiltinNodeTypes.START, version="9") assert kwargs["graph_init_params"] is sentinel.graph_init_params assert kwargs["graph_runtime_state"] is sentinel.graph_runtime_state @pytest.mark.parametrize( ("node_type", "constructor_name"), [ - (NodeType.CODE, "CodeNode"), - (NodeType.TEMPLATE_TRANSFORM, "TemplateTransformNode"), - (NodeType.HTTP_REQUEST, "HttpRequestNode"), - (NodeType.HUMAN_INPUT, "HumanInputNode"), - (NodeType.KNOWLEDGE_INDEX, "KnowledgeIndexNode"), - (NodeType.DATASOURCE, "DatasourceNode"), - (NodeType.KNOWLEDGE_RETRIEVAL, "KnowledgeRetrievalNode"), - (NodeType.DOCUMENT_EXTRACTOR, "DocumentExtractorNode"), + (BuiltinNodeTypes.CODE, "CodeNode"), + (BuiltinNodeTypes.TEMPLATE_TRANSFORM, "TemplateTransformNode"), + (BuiltinNodeTypes.HTTP_REQUEST, "HttpRequestNode"), + (BuiltinNodeTypes.HUMAN_INPUT, "HumanInputNode"), + (KNOWLEDGE_INDEX_NODE_TYPE, "KnowledgeIndexNode"), + (BuiltinNodeTypes.DATASOURCE, "DatasourceNode"), + (BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, "KnowledgeRetrievalNode"), + (BuiltinNodeTypes.DOCUMENT_EXTRACTOR, "DocumentExtractorNode"), ], ) def test_creates_specialized_nodes(self, monkeypatch, factory, node_type, constructor_name): created_node = object() constructor = MagicMock(name=constructor_name, return_value=created_node) monkeypatch.setattr( - node_factory, - "NODE_TYPE_CLASSES_MAPPING", - {node_type: {node_factory.LATEST_VERSION: constructor}}, + factory, + "_resolve_node_class", + MagicMock(return_value=constructor), ) if constructor_name == "HumanInputNode": @@ -350,13 +379,8 @@ class TestDifyNodeFactoryCreateNode: "HumanInputFormRepositoryImpl", form_repository_impl, ) - elif constructor_name == "KnowledgeIndexNode": - index_processor = sentinel.index_processor - summary_index = sentinel.summary_index - monkeypatch.setattr(node_factory, "IndexProcessor", MagicMock(return_value=index_processor)) - monkeypatch.setattr(node_factory, "SummaryIndex", MagicMock(return_value=summary_index)) - node_config = {"id": "node-id", "data": {"type": node_type.value}} + node_config = {"id": "node-id", "data": {"type": node_type}} result = factory.create_node(node_config) assert result is created_node @@ -380,13 +404,6 @@ class TestDifyNodeFactoryCreateNode: elif constructor_name == "HumanInputNode": assert kwargs["form_repository"] is form_repository form_repository_impl.assert_called_once_with(tenant_id="tenant-id") - elif constructor_name == "KnowledgeIndexNode": - assert kwargs["index_processor"] is index_processor - assert kwargs["summary_index_service"] is summary_index - elif constructor_name == "DatasourceNode": - assert kwargs["datasource_manager"] is node_factory.DatasourceManager - elif constructor_name == "KnowledgeRetrievalNode": - assert kwargs["rag_retrieval"] is sentinel.rag_retrieval elif constructor_name == "DocumentExtractorNode": assert kwargs["unstructured_api_config"] is sentinel.unstructured_api_config assert kwargs["http_client"] is sentinel.http_client @@ -394,9 +411,23 @@ class TestDifyNodeFactoryCreateNode: @pytest.mark.parametrize( ("node_type", "constructor_name", "expected_extra_kwargs"), [ - (NodeType.LLM, "LLMNode", {"http_client": sentinel.http_client}), - (NodeType.QUESTION_CLASSIFIER, "QuestionClassifierNode", {"http_client": sentinel.http_client}), - (NodeType.PARAMETER_EXTRACTOR, "ParameterExtractorNode", {}), + ( + BuiltinNodeTypes.LLM, + "LLMNode", + { + "http_client": sentinel.http_client, + "template_renderer": sentinel.llm_template_renderer, + }, + ), + ( + BuiltinNodeTypes.QUESTION_CLASSIFIER, + "QuestionClassifierNode", + { + "http_client": sentinel.http_client, + "template_renderer": sentinel.llm_template_renderer, + }, + ), + (BuiltinNodeTypes.PARAMETER_EXTRACTOR, "ParameterExtractorNode", {}), ], ) def test_creates_model_backed_nodes( @@ -410,9 +441,9 @@ class TestDifyNodeFactoryCreateNode: created_node = object() constructor = MagicMock(name=constructor_name, return_value=created_node) monkeypatch.setattr( - node_factory, - "NODE_TYPE_CLASSES_MAPPING", - {node_type: {node_factory.LATEST_VERSION: constructor}}, + factory, + "_resolve_node_class", + MagicMock(return_value=constructor), ) llm_init_kwargs = { "credentials_provider": sentinel.credentials_provider, @@ -424,7 +455,7 @@ class TestDifyNodeFactoryCreateNode: build_llm_init_kwargs = MagicMock(return_value=llm_init_kwargs) factory._build_llm_compatible_node_init_kwargs = build_llm_init_kwargs - node_config = {"id": "node-id", "data": {"type": node_type.value}} + node_config = {"id": "node-id", "data": {"type": node_type}} result = factory.create_node(node_config) assert result is created_node @@ -433,7 +464,7 @@ class TestDifyNodeFactoryCreateNode: assert helper_kwargs["node_class"] is constructor assert isinstance(helper_kwargs["node_data"], BaseNodeData) assert helper_kwargs["node_data"].type == node_type - assert helper_kwargs["include_http_client"] is (node_type != NodeType.PARAMETER_EXTRACTOR) + assert helper_kwargs["include_http_client"] is (node_type != BuiltinNodeTypes.PARAMETER_EXTRACTOR) constructor_kwargs = constructor.call_args.kwargs assert constructor_kwargs["id"] == "node-id" diff --git a/api/tests/unit_tests/core/workflow/test_node_mapping_bootstrap.py b/api/tests/unit_tests/core/workflow/test_node_mapping_bootstrap.py new file mode 100644 index 0000000000..8de45257ec --- /dev/null +++ b/api/tests/unit_tests/core/workflow/test_node_mapping_bootstrap.py @@ -0,0 +1,43 @@ +import os +import subprocess +import sys +import textwrap +from pathlib import Path + + +def test_moved_core_nodes_resolve_after_importing_production_entrypoints(): + api_root = Path(__file__).resolve().parents[4] + script = textwrap.dedent( + """ + from core.app.apps import workflow_app_runner + from core.workflow import workflow_entry + from core.workflow.nodes.knowledge_index import KNOWLEDGE_INDEX_NODE_TYPE + from core.workflow.node_factory import DifyNodeFactory, NODE_TYPE_CLASSES_MAPPING + from dify_graph.enums import BuiltinNodeTypes + from services import workflow_service + from services.rag_pipeline import rag_pipeline + + _ = workflow_entry, workflow_app_runner, workflow_service, rag_pipeline + + expected = ( + BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, + KNOWLEDGE_INDEX_NODE_TYPE, + BuiltinNodeTypes.DATASOURCE, + ) + + for node_type in expected: + assert node_type in NODE_TYPE_CLASSES_MAPPING, node_type + resolved = DifyNodeFactory._resolve_node_class(node_type=node_type, node_version="1") + assert resolved.__module__.startswith("core.workflow.nodes."), resolved.__module__ + """ + ) + completed = subprocess.run( + [sys.executable, "-c", script], + cwd=api_root, + env=os.environ.copy(), + capture_output=True, + text=True, + check=False, + ) + + assert completed.returncode == 0, completed.stderr or completed.stdout diff --git a/api/tests/unit_tests/core/workflow/test_workflow_entry_helpers.py b/api/tests/unit_tests/core/workflow/test_workflow_entry_helpers.py index fe211fb76a..dc4c7a00c5 100644 --- a/api/tests/unit_tests/core/workflow/test_workflow_entry_helpers.py +++ b/api/tests/unit_tests/core/workflow/test_workflow_entry_helpers.py @@ -8,11 +8,12 @@ from core.app.apps.exc import GenerateTaskStoppedError from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from core.workflow import workflow_entry from dify_graph.entities.graph_config import NodeConfigDictAdapter +from dify_graph.enums import NodeType from dify_graph.errors import WorkflowNodeRunFailedError from dify_graph.file.enums import FileTransferMethod, FileType from dify_graph.file.models import File from dify_graph.graph_events import GraphRunFailedEvent -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.runtime import ChildGraphNotFoundError @@ -240,7 +241,7 @@ class TestWorkflowEntrySingleStepRun: app_id="app-id", id="workflow-id", graph_dict={"nodes": [], "edges": []}, - get_node_config_by_id=lambda _node_id: _build_typed_node_config(NodeType.START), + get_node_config_by_id=lambda _node_id: _build_typed_node_config(BuiltinNodeTypes.START), ) node, generator = workflow_entry.WorkflowEntry.single_step_run( @@ -302,7 +303,7 @@ class TestWorkflowEntrySingleStepRun: app_id="app-id", id="workflow-id", graph_dict={"nodes": [], "edges": []}, - get_node_config_by_id=lambda _node_id: _build_typed_node_config(NodeType.DATASOURCE), + get_node_config_by_id=lambda _node_id: _build_typed_node_config(BuiltinNodeTypes.DATASOURCE), ) node, generator = workflow_entry.WorkflowEntry.single_step_run( @@ -352,7 +353,7 @@ class TestWorkflowEntrySingleStepRun: app_id="app-id", id="workflow-id", graph_dict={"nodes": [], "edges": []}, - get_node_config_by_id=lambda _node_id: _build_typed_node_config(NodeType.START), + get_node_config_by_id=lambda _node_id: _build_typed_node_config(BuiltinNodeTypes.START), ) with pytest.raises(WorkflowNodeRunFailedError): @@ -369,7 +370,7 @@ class TestWorkflowEntryHelpers: def test_create_single_node_graph_builds_start_edge(self): graph = workflow_entry.WorkflowEntry._create_single_node_graph( node_id="target-node", - node_data={"type": NodeType.PARAMETER_EXTRACTOR}, + node_data={"type": BuiltinNodeTypes.PARAMETER_EXTRACTOR}, node_width=320, node_height=180, ) @@ -390,7 +391,7 @@ class TestWorkflowEntryHelpers: def test_run_free_node_rejects_unsupported_types(self): with pytest.raises(ValueError, match="Node type start not supported"): workflow_entry.WorkflowEntry.run_free_node( - node_data={"type": NodeType.START.value}, + node_data={"type": BuiltinNodeTypes.START}, node_id="node-id", tenant_id="tenant-id", user_id="user-id", @@ -400,13 +401,13 @@ class TestWorkflowEntryHelpers: def test_run_free_node_rejects_missing_node_class(self, monkeypatch): monkeypatch.setattr( workflow_entry, - "NODE_TYPE_CLASSES_MAPPING", - {NodeType.PARAMETER_EXTRACTOR: {"1": None}}, + "resolve_workflow_node_class", + MagicMock(return_value=None), ) with pytest.raises(ValueError, match="Node class not found for node type parameter-extractor"): workflow_entry.WorkflowEntry.run_free_node( - node_data={"type": NodeType.PARAMETER_EXTRACTOR.value}, + node_data={"type": BuiltinNodeTypes.PARAMETER_EXTRACTOR}, node_id="node-id", tenant_id="tenant-id", user_id="user-id", @@ -432,8 +433,8 @@ class TestWorkflowEntryHelpers: dify_node_factory.create_node.return_value = FakeNode() monkeypatch.setattr( workflow_entry, - "NODE_TYPE_CLASSES_MAPPING", - {NodeType.PARAMETER_EXTRACTOR: {"1": FakeNodeClass}}, + "resolve_workflow_node_class", + MagicMock(return_value=FakeNodeClass), ) with ( @@ -459,7 +460,7 @@ class TestWorkflowEntryHelpers: ), ): node, generator = workflow_entry.WorkflowEntry.run_free_node( - node_data={"type": NodeType.PARAMETER_EXTRACTOR.value, "title": "Node"}, + node_data={"type": BuiltinNodeTypes.PARAMETER_EXTRACTOR, "title": "Node"}, node_id="node-id", tenant_id="tenant-id", user_id="user-id", @@ -483,7 +484,7 @@ class TestWorkflowEntryHelpers: graph_init_params.assert_called_once_with( workflow_id="", graph_config=workflow_entry.WorkflowEntry._create_single_node_graph( - "node-id", {"type": NodeType.PARAMETER_EXTRACTOR.value, "title": "Node"} + "node-id", {"type": BuiltinNodeTypes.PARAMETER_EXTRACTOR, "title": "Node"} ), run_context={"_dify": "context"}, call_depth=0, @@ -518,8 +519,8 @@ class TestWorkflowEntryHelpers: dify_node_factory.create_node.return_value = FakeNode() monkeypatch.setattr( workflow_entry, - "NODE_TYPE_CLASSES_MAPPING", - {NodeType.PARAMETER_EXTRACTOR: {"1": FakeNodeClass}}, + "resolve_workflow_node_class", + MagicMock(return_value=FakeNodeClass), ) with ( @@ -538,7 +539,7 @@ class TestWorkflowEntryHelpers: ): with pytest.raises(WorkflowNodeRunFailedError, match="Node Title run failed: boom"): workflow_entry.WorkflowEntry.run_free_node( - node_data={"type": NodeType.PARAMETER_EXTRACTOR.value, "title": "Node"}, + node_data={"type": BuiltinNodeTypes.PARAMETER_EXTRACTOR, "title": "Node"}, node_id="node-id", tenant_id="tenant-id", user_id="user-id", diff --git a/api/tests/unit_tests/enterprise/telemetry/__init__.py b/api/tests/unit_tests/enterprise/telemetry/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/enterprise/telemetry/test_contracts.py b/api/tests/unit_tests/enterprise/telemetry/test_contracts.py new file mode 100644 index 0000000000..7453525bfc --- /dev/null +++ b/api/tests/unit_tests/enterprise/telemetry/test_contracts.py @@ -0,0 +1,230 @@ +"""Unit tests for telemetry gateway contracts.""" + +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from core.telemetry.gateway import CASE_ROUTING +from enterprise.telemetry.contracts import CaseRoute, SignalType, TelemetryCase, TelemetryEnvelope + + +class TestTelemetryCase: + """Tests for TelemetryCase enum.""" + + def test_all_cases_defined(self) -> None: + """Verify all 14 telemetry cases are defined.""" + expected_cases = { + "WORKFLOW_RUN", + "NODE_EXECUTION", + "DRAFT_NODE_EXECUTION", + "MESSAGE_RUN", + "TOOL_EXECUTION", + "MODERATION_CHECK", + "SUGGESTED_QUESTION", + "DATASET_RETRIEVAL", + "GENERATE_NAME", + "PROMPT_GENERATION", + "APP_CREATED", + "APP_UPDATED", + "APP_DELETED", + "FEEDBACK_CREATED", + } + actual_cases = {case.name for case in TelemetryCase} + assert actual_cases == expected_cases + + def test_case_values(self) -> None: + """Verify case enum values are correct.""" + assert TelemetryCase.WORKFLOW_RUN.value == "workflow_run" + assert TelemetryCase.NODE_EXECUTION.value == "node_execution" + assert TelemetryCase.DRAFT_NODE_EXECUTION.value == "draft_node_execution" + assert TelemetryCase.MESSAGE_RUN.value == "message_run" + assert TelemetryCase.TOOL_EXECUTION.value == "tool_execution" + assert TelemetryCase.MODERATION_CHECK.value == "moderation_check" + assert TelemetryCase.SUGGESTED_QUESTION.value == "suggested_question" + assert TelemetryCase.DATASET_RETRIEVAL.value == "dataset_retrieval" + assert TelemetryCase.GENERATE_NAME.value == "generate_name" + assert TelemetryCase.PROMPT_GENERATION.value == "prompt_generation" + assert TelemetryCase.APP_CREATED.value == "app_created" + assert TelemetryCase.APP_UPDATED.value == "app_updated" + assert TelemetryCase.APP_DELETED.value == "app_deleted" + assert TelemetryCase.FEEDBACK_CREATED.value == "feedback_created" + + +class TestCaseRoute: + """Tests for CaseRoute model.""" + + def test_valid_trace_route(self) -> None: + """Verify valid trace route creation.""" + route = CaseRoute(signal_type=SignalType.TRACE, ce_eligible=True) + assert route.signal_type == SignalType.TRACE + assert route.ce_eligible is True + + def test_valid_metric_log_route(self) -> None: + """Verify valid metric_log route creation.""" + route = CaseRoute(signal_type=SignalType.METRIC_LOG, ce_eligible=False) + assert route.signal_type == SignalType.METRIC_LOG + assert route.ce_eligible is False + + def test_invalid_signal_type(self) -> None: + """Verify invalid signal_type is rejected.""" + with pytest.raises(ValidationError): + CaseRoute(signal_type="invalid", ce_eligible=True) + + +class TestTelemetryEnvelope: + """Tests for TelemetryEnvelope model.""" + + def test_valid_envelope_minimal(self) -> None: + """Verify valid minimal envelope creation.""" + envelope = TelemetryEnvelope( + case=TelemetryCase.WORKFLOW_RUN, + tenant_id="tenant-123", + event_id="event-456", + payload={"key": "value"}, + ) + assert envelope.case == TelemetryCase.WORKFLOW_RUN + assert envelope.tenant_id == "tenant-123" + assert envelope.event_id == "event-456" + assert envelope.payload == {"key": "value"} + assert envelope.metadata is None + + def test_valid_envelope_full(self) -> None: + """Verify valid envelope with all fields.""" + metadata = {"payload_ref": "telemetry/tenant-789/event-012.json"} + envelope = TelemetryEnvelope( + case=TelemetryCase.MESSAGE_RUN, + tenant_id="tenant-789", + event_id="event-012", + payload={"message": "hello"}, + metadata=metadata, + ) + assert envelope.case == TelemetryCase.MESSAGE_RUN + assert envelope.tenant_id == "tenant-789" + assert envelope.event_id == "event-012" + assert envelope.payload == {"message": "hello"} + assert envelope.metadata == metadata + + def test_missing_required_case(self) -> None: + """Verify missing case field is rejected.""" + with pytest.raises(ValidationError): + TelemetryEnvelope( + tenant_id="tenant-123", + event_id="event-456", + payload={"key": "value"}, + ) + + def test_missing_required_tenant_id(self) -> None: + """Verify missing tenant_id field is rejected.""" + with pytest.raises(ValidationError): + TelemetryEnvelope( + case=TelemetryCase.WORKFLOW_RUN, + event_id="event-456", + payload={"key": "value"}, + ) + + def test_missing_required_event_id(self) -> None: + """Verify missing event_id field is rejected.""" + with pytest.raises(ValidationError): + TelemetryEnvelope( + case=TelemetryCase.WORKFLOW_RUN, + tenant_id="tenant-123", + payload={"key": "value"}, + ) + + def test_missing_required_payload(self) -> None: + """Verify missing payload field is rejected.""" + with pytest.raises(ValidationError): + TelemetryEnvelope( + case=TelemetryCase.WORKFLOW_RUN, + tenant_id="tenant-123", + event_id="event-456", + ) + + def test_metadata_none(self) -> None: + """Verify metadata can be None.""" + envelope = TelemetryEnvelope( + case=TelemetryCase.WORKFLOW_RUN, + tenant_id="tenant-123", + event_id="event-456", + payload={"key": "value"}, + metadata=None, + ) + assert envelope.metadata is None + + +class TestCaseRouting: + """Tests for CASE_ROUTING table.""" + + def test_all_cases_routed(self) -> None: + """Verify all 14 cases have routing entries.""" + assert len(CASE_ROUTING) == 14 + for case in TelemetryCase: + assert case in CASE_ROUTING + + def test_trace_ce_eligible_cases(self) -> None: + """Verify trace cases with CE eligibility.""" + ce_eligible_trace_cases = { + TelemetryCase.WORKFLOW_RUN, + TelemetryCase.MESSAGE_RUN, + } + for case in ce_eligible_trace_cases: + route = CASE_ROUTING[case] + assert route.signal_type == SignalType.TRACE + assert route.ce_eligible is True + + def test_trace_enterprise_only_cases(self) -> None: + """Verify trace cases that are enterprise-only.""" + enterprise_only_trace_cases = { + TelemetryCase.NODE_EXECUTION, + TelemetryCase.DRAFT_NODE_EXECUTION, + TelemetryCase.PROMPT_GENERATION, + } + for case in enterprise_only_trace_cases: + route = CASE_ROUTING[case] + assert route.signal_type == SignalType.TRACE + assert route.ce_eligible is False + + def test_metric_log_cases(self) -> None: + """Verify metric/log-only cases.""" + metric_log_cases = { + TelemetryCase.APP_CREATED, + TelemetryCase.APP_UPDATED, + TelemetryCase.APP_DELETED, + TelemetryCase.FEEDBACK_CREATED, + } + for case in metric_log_cases: + route = CASE_ROUTING[case] + assert route.signal_type == SignalType.METRIC_LOG + assert route.ce_eligible is False + + def test_routing_table_completeness(self) -> None: + """Verify routing table covers all cases with correct types.""" + trace_cases = { + TelemetryCase.WORKFLOW_RUN, + TelemetryCase.MESSAGE_RUN, + TelemetryCase.NODE_EXECUTION, + TelemetryCase.DRAFT_NODE_EXECUTION, + TelemetryCase.PROMPT_GENERATION, + TelemetryCase.TOOL_EXECUTION, + TelemetryCase.MODERATION_CHECK, + TelemetryCase.SUGGESTED_QUESTION, + TelemetryCase.DATASET_RETRIEVAL, + TelemetryCase.GENERATE_NAME, + } + metric_log_cases = { + TelemetryCase.APP_CREATED, + TelemetryCase.APP_UPDATED, + TelemetryCase.APP_DELETED, + TelemetryCase.FEEDBACK_CREATED, + } + + all_cases = trace_cases | metric_log_cases + assert len(all_cases) == 14 + assert all_cases == set(TelemetryCase) + + for case in trace_cases: + assert CASE_ROUTING[case].signal_type == SignalType.TRACE + + for case in metric_log_cases: + assert CASE_ROUTING[case].signal_type == SignalType.METRIC_LOG diff --git a/api/tests/unit_tests/enterprise/telemetry/test_event_handlers.py b/api/tests/unit_tests/enterprise/telemetry/test_event_handlers.py new file mode 100644 index 0000000000..ad15c9f096 --- /dev/null +++ b/api/tests/unit_tests/enterprise/telemetry/test_event_handlers.py @@ -0,0 +1,121 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from enterprise.telemetry import event_handlers +from enterprise.telemetry.contracts import TelemetryCase + + +@pytest.fixture +def mock_gateway_emit(): + with patch("core.telemetry.gateway.emit") as mock: + yield mock + + +def test_handle_app_created_calls_task(mock_gateway_emit): + sender = MagicMock() + sender.id = "app-123" + sender.tenant_id = "tenant-456" + sender.mode = "chat" + + event_handlers._handle_app_created(sender) + + mock_gateway_emit.assert_called_once_with( + case=TelemetryCase.APP_CREATED, + context={"tenant_id": "tenant-456"}, + payload={"app_id": "app-123", "mode": "chat"}, + ) + + +def test_handle_app_created_no_exporter(mock_gateway_emit): + """Gateway handles exporter availability internally; handler always calls gateway.""" + sender = MagicMock() + sender.id = "app-123" + sender.tenant_id = "tenant-456" + + event_handlers._handle_app_created(sender) + + mock_gateway_emit.assert_called_once() + + +def test_handle_app_updated_calls_task(mock_gateway_emit): + sender = MagicMock() + sender.id = "app-123" + sender.tenant_id = "tenant-456" + + event_handlers._handle_app_updated(sender) + + mock_gateway_emit.assert_called_once_with( + case=TelemetryCase.APP_UPDATED, + context={"tenant_id": "tenant-456"}, + payload={"app_id": "app-123"}, + ) + + +def test_handle_app_deleted_calls_task(mock_gateway_emit): + sender = MagicMock() + sender.id = "app-123" + sender.tenant_id = "tenant-456" + + event_handlers._handle_app_deleted(sender) + + mock_gateway_emit.assert_called_once_with( + case=TelemetryCase.APP_DELETED, + context={"tenant_id": "tenant-456"}, + payload={"app_id": "app-123"}, + ) + + +def test_handle_feedback_created_calls_task(mock_gateway_emit): + sender = MagicMock() + sender.message_id = "msg-123" + sender.app_id = "app-456" + sender.conversation_id = "conv-789" + sender.from_end_user_id = "user-001" + sender.from_account_id = None + sender.rating = "like" + sender.from_source = "api" + sender.content = "Great response!" + + event_handlers._handle_feedback_created(sender, tenant_id="tenant-456") + + mock_gateway_emit.assert_called_once_with( + case=TelemetryCase.FEEDBACK_CREATED, + context={"tenant_id": "tenant-456"}, + payload={ + "message_id": "msg-123", + "app_id": "app-456", + "conversation_id": "conv-789", + "from_end_user_id": "user-001", + "from_account_id": None, + "rating": "like", + "from_source": "api", + "content": "Great response!", + }, + ) + + +def test_handle_feedback_created_no_exporter(mock_gateway_emit): + """Gateway handles exporter availability internally; handler always calls gateway.""" + sender = MagicMock() + sender.message_id = "msg-123" + + event_handlers._handle_feedback_created(sender, tenant_id="tenant-456") + + mock_gateway_emit.assert_called_once() + + +def test_handlers_create_valid_envelopes(mock_gateway_emit): + """Verify handlers pass correct TelemetryCase and payload structure.""" + sender = MagicMock() + sender.id = "app-123" + sender.tenant_id = "tenant-456" + sender.mode = "chat" + + event_handlers._handle_app_created(sender) + + call_kwargs = mock_gateway_emit.call_args[1] + assert call_kwargs["case"] == TelemetryCase.APP_CREATED + assert call_kwargs["context"]["tenant_id"] == "tenant-456" + assert call_kwargs["payload"]["app_id"] == "app-123" + assert call_kwargs["payload"]["mode"] == "chat" diff --git a/api/tests/unit_tests/enterprise/telemetry/test_exporter.py b/api/tests/unit_tests/enterprise/telemetry/test_exporter.py new file mode 100644 index 0000000000..2c367b4118 --- /dev/null +++ b/api/tests/unit_tests/enterprise/telemetry/test_exporter.py @@ -0,0 +1,263 @@ +"""Unit tests for EnterpriseExporter and _ExporterFactory.""" + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +from configs.enterprise import EnterpriseTelemetryConfig +from enterprise.telemetry.exporter import EnterpriseExporter + + +def test_config_api_key_default_empty(): + """Test that ENTERPRISE_OTLP_API_KEY defaults to empty string.""" + config = EnterpriseTelemetryConfig() + assert config.ENTERPRISE_OTLP_API_KEY == "" + + +@patch("enterprise.telemetry.exporter.GRPCSpanExporter") +@patch("enterprise.telemetry.exporter.GRPCMetricExporter") +def test_api_key_only_injects_bearer_header(mock_metric_exporter: MagicMock, mock_span_exporter: MagicMock) -> None: + """Test that API key alone injects Bearer authorization header.""" + mock_config = SimpleNamespace( + ENTERPRISE_OTLP_ENDPOINT="https://collector.example.com", + ENTERPRISE_OTLP_HEADERS="", + ENTERPRISE_OTLP_PROTOCOL="grpc", + ENTERPRISE_SERVICE_NAME="dify", + ENTERPRISE_OTEL_SAMPLING_RATE=1.0, + ENTERPRISE_INCLUDE_CONTENT=True, + ENTERPRISE_OTLP_API_KEY="test-secret-key", + ) + + EnterpriseExporter(mock_config) + + # Verify span exporter was called with Bearer header + assert mock_span_exporter.call_args is not None + headers = mock_span_exporter.call_args.kwargs.get("headers") + assert headers is not None + assert ("authorization", "Bearer test-secret-key") in headers + + +@patch("enterprise.telemetry.exporter.GRPCSpanExporter") +@patch("enterprise.telemetry.exporter.GRPCMetricExporter") +def test_empty_api_key_no_auth_header(mock_metric_exporter: MagicMock, mock_span_exporter: MagicMock) -> None: + """Test that empty API key does not inject authorization header.""" + mock_config = SimpleNamespace( + ENTERPRISE_OTLP_ENDPOINT="https://collector.example.com", + ENTERPRISE_OTLP_HEADERS="", + ENTERPRISE_OTLP_PROTOCOL="grpc", + ENTERPRISE_SERVICE_NAME="dify", + ENTERPRISE_OTEL_SAMPLING_RATE=1.0, + ENTERPRISE_INCLUDE_CONTENT=True, + ENTERPRISE_OTLP_API_KEY="", + ) + + EnterpriseExporter(mock_config) + + # Verify span exporter was called without authorization header + assert mock_span_exporter.call_args is not None + headers = mock_span_exporter.call_args.kwargs.get("headers") + # Headers should be None or not contain authorization + if headers is not None: + assert not any(key == "authorization" for key, _ in headers) + + +@patch("enterprise.telemetry.exporter.GRPCSpanExporter") +@patch("enterprise.telemetry.exporter.GRPCMetricExporter") +def test_api_key_and_custom_headers_merge(mock_metric_exporter: MagicMock, mock_span_exporter: MagicMock) -> None: + """Test that API key and custom headers are merged correctly.""" + mock_config = SimpleNamespace( + ENTERPRISE_OTLP_ENDPOINT="https://collector.example.com", + ENTERPRISE_OTLP_HEADERS="x-custom=foo", + ENTERPRISE_OTLP_PROTOCOL="grpc", + ENTERPRISE_SERVICE_NAME="dify", + ENTERPRISE_OTEL_SAMPLING_RATE=1.0, + ENTERPRISE_INCLUDE_CONTENT=True, + ENTERPRISE_OTLP_API_KEY="test-key", + ) + + EnterpriseExporter(mock_config) + + # Verify both headers are present + assert mock_span_exporter.call_args is not None + headers = mock_span_exporter.call_args.kwargs.get("headers") + assert headers is not None + assert ("authorization", "Bearer test-key") in headers + assert ("x-custom", "foo") in headers + + +@patch("enterprise.telemetry.exporter.logger") +@patch("enterprise.telemetry.exporter.GRPCSpanExporter") +@patch("enterprise.telemetry.exporter.GRPCMetricExporter") +def test_api_key_overrides_conflicting_header( + mock_metric_exporter: MagicMock, mock_span_exporter: MagicMock, mock_logger: MagicMock +) -> None: + """Test that API key overrides conflicting authorization header and logs warning.""" + mock_config = SimpleNamespace( + ENTERPRISE_OTLP_ENDPOINT="https://collector.example.com", + ENTERPRISE_OTLP_HEADERS="authorization=Basic old", + ENTERPRISE_OTLP_PROTOCOL="grpc", + ENTERPRISE_SERVICE_NAME="dify", + ENTERPRISE_OTEL_SAMPLING_RATE=1.0, + ENTERPRISE_INCLUDE_CONTENT=True, + ENTERPRISE_OTLP_API_KEY="test-key", + ) + + EnterpriseExporter(mock_config) + + # Verify Bearer header takes precedence + assert mock_span_exporter.call_args is not None + headers = mock_span_exporter.call_args.kwargs.get("headers") + assert headers is not None + assert ("authorization", "Bearer test-key") in headers + # Verify old authorization header is not present + assert ("authorization", "Basic old") not in headers + + # Verify warning was logged + mock_logger.warning.assert_called_once() + assert mock_logger.warning.call_args is not None + warning_message = mock_logger.warning.call_args[0][0] + assert "ENTERPRISE_OTLP_API_KEY is set" in warning_message + assert "authorization" in warning_message + + +@patch("enterprise.telemetry.exporter.GRPCSpanExporter") +@patch("enterprise.telemetry.exporter.GRPCMetricExporter") +def test_https_endpoint_uses_secure_grpc(mock_metric_exporter: MagicMock, mock_span_exporter: MagicMock) -> None: + """Test that https:// endpoint enables TLS (insecure=False) for gRPC.""" + mock_config = SimpleNamespace( + ENTERPRISE_OTLP_ENDPOINT="https://collector.example.com", + ENTERPRISE_OTLP_HEADERS="", + ENTERPRISE_OTLP_PROTOCOL="grpc", + ENTERPRISE_SERVICE_NAME="dify", + ENTERPRISE_OTEL_SAMPLING_RATE=1.0, + ENTERPRISE_INCLUDE_CONTENT=True, + ENTERPRISE_OTLP_API_KEY="test-key", + ) + + EnterpriseExporter(mock_config) + + # Verify insecure=False for both exporters (https:// scheme) + assert mock_span_exporter.call_args is not None + assert mock_span_exporter.call_args.kwargs["insecure"] is False + + assert mock_metric_exporter.call_args is not None + assert mock_metric_exporter.call_args.kwargs["insecure"] is False + + +@patch("enterprise.telemetry.exporter.GRPCSpanExporter") +@patch("enterprise.telemetry.exporter.GRPCMetricExporter") +def test_http_endpoint_uses_insecure_grpc(mock_metric_exporter: MagicMock, mock_span_exporter: MagicMock) -> None: + """Test that http:// endpoint uses insecure gRPC (insecure=True).""" + mock_config = SimpleNamespace( + ENTERPRISE_OTLP_ENDPOINT="http://collector.example.com", + ENTERPRISE_OTLP_HEADERS="", + ENTERPRISE_OTLP_PROTOCOL="grpc", + ENTERPRISE_SERVICE_NAME="dify", + ENTERPRISE_OTEL_SAMPLING_RATE=1.0, + ENTERPRISE_INCLUDE_CONTENT=True, + ENTERPRISE_OTLP_API_KEY="", + ) + + EnterpriseExporter(mock_config) + + # Verify insecure=True for both exporters (http:// scheme) + assert mock_span_exporter.call_args is not None + assert mock_span_exporter.call_args.kwargs["insecure"] is True + + assert mock_metric_exporter.call_args is not None + assert mock_metric_exporter.call_args.kwargs["insecure"] is True + + +@patch("enterprise.telemetry.exporter.HTTPSpanExporter") +@patch("enterprise.telemetry.exporter.HTTPMetricExporter") +def test_insecure_not_passed_to_http_exporters(mock_metric_exporter: MagicMock, mock_span_exporter: MagicMock) -> None: + """Test that insecure parameter is not passed to HTTP exporters.""" + mock_config = SimpleNamespace( + ENTERPRISE_OTLP_ENDPOINT="http://collector.example.com", + ENTERPRISE_OTLP_HEADERS="", + ENTERPRISE_OTLP_PROTOCOL="http", + ENTERPRISE_SERVICE_NAME="dify", + ENTERPRISE_OTEL_SAMPLING_RATE=1.0, + ENTERPRISE_INCLUDE_CONTENT=True, + ENTERPRISE_OTLP_API_KEY="test-key", + ) + + EnterpriseExporter(mock_config) + + # Verify insecure kwarg is NOT in HTTP exporter calls + assert mock_span_exporter.call_args is not None + assert "insecure" not in mock_span_exporter.call_args.kwargs + + assert mock_metric_exporter.call_args is not None + assert "insecure" not in mock_metric_exporter.call_args.kwargs + + +@patch("enterprise.telemetry.exporter.GRPCSpanExporter") +@patch("enterprise.telemetry.exporter.GRPCMetricExporter") +def test_api_key_with_special_chars_preserved(mock_metric_exporter: MagicMock, mock_span_exporter: MagicMock) -> None: + """Test that API key with special characters is preserved without mangling.""" + special_key = "abc+def/ghi=jkl==" + mock_config = SimpleNamespace( + ENTERPRISE_OTLP_ENDPOINT="https://collector.example.com", + ENTERPRISE_OTLP_HEADERS="", + ENTERPRISE_OTLP_PROTOCOL="grpc", + ENTERPRISE_SERVICE_NAME="dify", + ENTERPRISE_OTEL_SAMPLING_RATE=1.0, + ENTERPRISE_INCLUDE_CONTENT=True, + ENTERPRISE_OTLP_API_KEY=special_key, + ) + + EnterpriseExporter(mock_config) + + # Verify special characters are preserved in Bearer header + assert mock_span_exporter.call_args is not None + headers = mock_span_exporter.call_args.kwargs.get("headers") + assert headers is not None + assert ("authorization", f"Bearer {special_key}") in headers + + +@patch("enterprise.telemetry.exporter.GRPCSpanExporter") +@patch("enterprise.telemetry.exporter.GRPCMetricExporter") +def test_no_scheme_localhost_uses_insecure(mock_metric_exporter: MagicMock, mock_span_exporter: MagicMock) -> None: + """Test that endpoint without scheme defaults to insecure for localhost.""" + mock_config = SimpleNamespace( + ENTERPRISE_OTLP_ENDPOINT="localhost:4317", + ENTERPRISE_OTLP_HEADERS="", + ENTERPRISE_OTLP_PROTOCOL="grpc", + ENTERPRISE_SERVICE_NAME="dify", + ENTERPRISE_OTEL_SAMPLING_RATE=1.0, + ENTERPRISE_INCLUDE_CONTENT=True, + ENTERPRISE_OTLP_API_KEY="", + ) + + EnterpriseExporter(mock_config) + + # Verify insecure=True for localhost without scheme + assert mock_span_exporter.call_args is not None + assert mock_span_exporter.call_args.kwargs["insecure"] is True + + assert mock_metric_exporter.call_args is not None + assert mock_metric_exporter.call_args.kwargs["insecure"] is True + + +@patch("enterprise.telemetry.exporter.GRPCSpanExporter") +@patch("enterprise.telemetry.exporter.GRPCMetricExporter") +def test_no_scheme_production_uses_insecure(mock_metric_exporter: MagicMock, mock_span_exporter: MagicMock) -> None: + """Test that endpoint without scheme defaults to insecure (not https://).""" + mock_config = SimpleNamespace( + ENTERPRISE_OTLP_ENDPOINT="collector.example.com:4317", + ENTERPRISE_OTLP_HEADERS="", + ENTERPRISE_OTLP_PROTOCOL="grpc", + ENTERPRISE_SERVICE_NAME="dify", + ENTERPRISE_OTEL_SAMPLING_RATE=1.0, + ENTERPRISE_INCLUDE_CONTENT=True, + ENTERPRISE_OTLP_API_KEY="", + ) + + EnterpriseExporter(mock_config) + + # Verify insecure=True for any endpoint without https:// scheme + assert mock_span_exporter.call_args is not None + assert mock_span_exporter.call_args.kwargs["insecure"] is True + + assert mock_metric_exporter.call_args is not None + assert mock_metric_exporter.call_args.kwargs["insecure"] is True diff --git a/api/tests/unit_tests/enterprise/telemetry/test_gateway.py b/api/tests/unit_tests/enterprise/telemetry/test_gateway.py new file mode 100644 index 0000000000..7e6ae64693 --- /dev/null +++ b/api/tests/unit_tests/enterprise/telemetry/test_gateway.py @@ -0,0 +1,272 @@ +from __future__ import annotations + +import sys +from unittest.mock import MagicMock, patch + +import pytest + +from core.ops.entities.trace_entity import TraceTaskName +from core.telemetry.gateway import ( + CASE_ROUTING, + CASE_TO_TRACE_TASK, + PAYLOAD_SIZE_THRESHOLD_BYTES, + emit, +) +from enterprise.telemetry.contracts import SignalType, TelemetryCase, TelemetryEnvelope + + +class TestCaseRoutingTable: + def test_all_cases_have_routing(self) -> None: + for case in TelemetryCase: + assert case in CASE_ROUTING, f"Missing routing for {case}" + + def test_trace_cases(self) -> None: + trace_cases = [ + TelemetryCase.WORKFLOW_RUN, + TelemetryCase.MESSAGE_RUN, + TelemetryCase.NODE_EXECUTION, + TelemetryCase.DRAFT_NODE_EXECUTION, + TelemetryCase.PROMPT_GENERATION, + ] + for case in trace_cases: + assert CASE_ROUTING[case].signal_type is SignalType.TRACE, f"{case} should be trace" + + def test_metric_log_cases(self) -> None: + metric_log_cases = [ + TelemetryCase.APP_CREATED, + TelemetryCase.APP_UPDATED, + TelemetryCase.APP_DELETED, + TelemetryCase.FEEDBACK_CREATED, + ] + for case in metric_log_cases: + assert CASE_ROUTING[case].signal_type is SignalType.METRIC_LOG, f"{case} should be metric_log" + + def test_ce_eligible_cases(self) -> None: + ce_eligible_cases = [ + TelemetryCase.WORKFLOW_RUN, + TelemetryCase.MESSAGE_RUN, + TelemetryCase.TOOL_EXECUTION, + TelemetryCase.MODERATION_CHECK, + TelemetryCase.SUGGESTED_QUESTION, + TelemetryCase.DATASET_RETRIEVAL, + TelemetryCase.GENERATE_NAME, + ] + for case in ce_eligible_cases: + assert CASE_ROUTING[case].ce_eligible is True, f"{case} should be CE eligible" + + def test_enterprise_only_cases(self) -> None: + enterprise_only_cases = [ + TelemetryCase.NODE_EXECUTION, + TelemetryCase.DRAFT_NODE_EXECUTION, + TelemetryCase.PROMPT_GENERATION, + ] + for case in enterprise_only_cases: + assert CASE_ROUTING[case].ce_eligible is False, f"{case} should be enterprise-only" + + def test_trace_cases_have_task_name_mapping(self) -> None: + trace_cases = [c for c in TelemetryCase if CASE_ROUTING[c].signal_type is SignalType.TRACE] + for case in trace_cases: + assert case in CASE_TO_TRACE_TASK, f"Missing TraceTaskName mapping for {case}" + + +@pytest.fixture +def mock_ops_trace_manager(): + mock_module = MagicMock() + mock_trace_task_class = MagicMock() + mock_trace_task_class.return_value = MagicMock() + mock_module.TraceTask = mock_trace_task_class + mock_module.TraceQueueManager = MagicMock() + + mock_trace_entity = MagicMock() + mock_trace_task_name = MagicMock() + mock_trace_task_name.return_value = "workflow" + mock_trace_entity.TraceTaskName = mock_trace_task_name + + with ( + patch.dict(sys.modules, {"core.ops.ops_trace_manager": mock_module}), + patch.dict(sys.modules, {"core.ops.entities.trace_entity": mock_trace_entity}), + ): + yield mock_module, mock_trace_entity + + +class TestGatewayTraceRouting: + @pytest.fixture + def mock_trace_manager(self) -> MagicMock: + return MagicMock() + + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True) + def test_trace_case_routes_to_trace_manager( + self, + mock_ee_enabled: MagicMock, + mock_trace_manager: MagicMock, + mock_ops_trace_manager: tuple[MagicMock, MagicMock], + ) -> None: + context = {"app_id": "app-123", "user_id": "user-456", "tenant_id": "tenant-789"} + payload = {"workflow_run_id": "run-abc"} + + emit(TelemetryCase.WORKFLOW_RUN, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_called_once() + + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=False) + def test_ce_eligible_trace_enqueued_when_ee_disabled( + self, + mock_ee_enabled: MagicMock, + mock_trace_manager: MagicMock, + mock_ops_trace_manager: tuple[MagicMock, MagicMock], + ) -> None: + context = {"app_id": "app-123", "user_id": "user-456"} + payload = {"workflow_run_id": "run-abc"} + + emit(TelemetryCase.WORKFLOW_RUN, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_called_once() + + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=False) + def test_enterprise_only_trace_dropped_when_ee_disabled( + self, + mock_ee_enabled: MagicMock, + mock_trace_manager: MagicMock, + mock_ops_trace_manager: tuple[MagicMock, MagicMock], + ) -> None: + context = {"app_id": "app-123", "user_id": "user-456"} + payload = {"node_id": "node-abc"} + + emit(TelemetryCase.NODE_EXECUTION, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_not_called() + + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True) + def test_enterprise_only_trace_enqueued_when_ee_enabled( + self, + mock_ee_enabled: MagicMock, + mock_trace_manager: MagicMock, + mock_ops_trace_manager: tuple[MagicMock, MagicMock], + ) -> None: + context = {"app_id": "app-123", "user_id": "user-456"} + payload = {"node_id": "node-abc"} + + emit(TelemetryCase.NODE_EXECUTION, context, payload, mock_trace_manager) + + mock_trace_manager.add_trace_task.assert_called_once() + + +class TestGatewayMetricLogRouting: + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True) + @patch("tasks.enterprise_telemetry_task.process_enterprise_telemetry.delay") + def test_metric_case_routes_to_celery_task( + self, + mock_delay: MagicMock, + mock_ee_enabled: MagicMock, + ) -> None: + context = {"tenant_id": "tenant-123"} + payload = {"app_id": "app-abc", "name": "My App"} + + emit(TelemetryCase.APP_CREATED, context, payload) + + mock_delay.assert_called_once() + envelope_json = mock_delay.call_args[0][0] + envelope = TelemetryEnvelope.model_validate_json(envelope_json) + assert envelope.case == TelemetryCase.APP_CREATED + assert envelope.tenant_id == "tenant-123" + assert envelope.payload["app_id"] == "app-abc" + + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True) + @patch("tasks.enterprise_telemetry_task.process_enterprise_telemetry.delay") + def test_envelope_has_unique_event_id( + self, + mock_delay: MagicMock, + mock_ee_enabled: MagicMock, + ) -> None: + context = {"tenant_id": "tenant-123"} + payload = {"app_id": "app-abc"} + + emit(TelemetryCase.APP_CREATED, context, payload) + emit(TelemetryCase.APP_CREATED, context, payload) + + assert mock_delay.call_count == 2 + envelope1 = TelemetryEnvelope.model_validate_json(mock_delay.call_args_list[0][0][0]) + envelope2 = TelemetryEnvelope.model_validate_json(mock_delay.call_args_list[1][0][0]) + assert envelope1.event_id != envelope2.event_id + + +class TestGatewayPayloadSizing: + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True) + @patch("tasks.enterprise_telemetry_task.process_enterprise_telemetry.delay") + def test_small_payload_inlined( + self, + mock_delay: MagicMock, + mock_ee_enabled: MagicMock, + ) -> None: + context = {"tenant_id": "tenant-123"} + payload = {"key": "small_value"} + + emit(TelemetryCase.APP_CREATED, context, payload) + + envelope_json = mock_delay.call_args[0][0] + envelope = TelemetryEnvelope.model_validate_json(envelope_json) + assert envelope.payload == payload + assert envelope.metadata is None + + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True) + @patch("core.telemetry.gateway.storage") + @patch("tasks.enterprise_telemetry_task.process_enterprise_telemetry.delay") + def test_large_payload_stored( + self, + mock_delay: MagicMock, + mock_storage: MagicMock, + mock_ee_enabled: MagicMock, + ) -> None: + context = {"tenant_id": "tenant-123"} + large_value = "x" * (PAYLOAD_SIZE_THRESHOLD_BYTES + 1000) + payload = {"key": large_value} + + emit(TelemetryCase.APP_CREATED, context, payload) + + mock_storage.save.assert_called_once() + storage_key = mock_storage.save.call_args[0][0] + assert storage_key.startswith("telemetry/tenant-123/") + + envelope_json = mock_delay.call_args[0][0] + envelope = TelemetryEnvelope.model_validate_json(envelope_json) + assert envelope.payload == {} + assert envelope.metadata is not None + assert envelope.metadata["payload_ref"] == storage_key + + @patch("core.telemetry.gateway.is_enterprise_telemetry_enabled", return_value=True) + @patch("core.telemetry.gateway.storage") + @patch("tasks.enterprise_telemetry_task.process_enterprise_telemetry.delay") + def test_large_payload_fallback_on_storage_error( + self, + mock_delay: MagicMock, + mock_storage: MagicMock, + mock_ee_enabled: MagicMock, + ) -> None: + mock_storage.save.side_effect = Exception("Storage failure") + context = {"tenant_id": "tenant-123"} + large_value = "x" * (PAYLOAD_SIZE_THRESHOLD_BYTES + 1000) + payload = {"key": large_value} + + emit(TelemetryCase.APP_CREATED, context, payload) + + envelope_json = mock_delay.call_args[0][0] + envelope = TelemetryEnvelope.model_validate_json(envelope_json) + assert envelope.payload == payload + assert envelope.metadata is None + + +class TestTraceTaskNameMapping: + def test_workflow_run_mapping(self) -> None: + assert CASE_TO_TRACE_TASK[TelemetryCase.WORKFLOW_RUN] is TraceTaskName.WORKFLOW_TRACE + + def test_message_run_mapping(self) -> None: + assert CASE_TO_TRACE_TASK[TelemetryCase.MESSAGE_RUN] is TraceTaskName.MESSAGE_TRACE + + def test_node_execution_mapping(self) -> None: + assert CASE_TO_TRACE_TASK[TelemetryCase.NODE_EXECUTION] is TraceTaskName.NODE_EXECUTION_TRACE + + def test_draft_node_execution_mapping(self) -> None: + assert CASE_TO_TRACE_TASK[TelemetryCase.DRAFT_NODE_EXECUTION] is TraceTaskName.DRAFT_NODE_EXECUTION_TRACE + + def test_prompt_generation_mapping(self) -> None: + assert CASE_TO_TRACE_TASK[TelemetryCase.PROMPT_GENERATION] is TraceTaskName.PROMPT_GENERATION_TRACE diff --git a/api/tests/unit_tests/enterprise/telemetry/test_metric_handler.py b/api/tests/unit_tests/enterprise/telemetry/test_metric_handler.py new file mode 100644 index 0000000000..19822fd69f --- /dev/null +++ b/api/tests/unit_tests/enterprise/telemetry/test_metric_handler.py @@ -0,0 +1,507 @@ +"""Unit tests for EnterpriseMetricHandler.""" + +import json +from unittest.mock import MagicMock, patch + +import pytest + +from enterprise.telemetry.contracts import TelemetryCase, TelemetryEnvelope +from enterprise.telemetry.metric_handler import EnterpriseMetricHandler + + +@pytest.fixture +def mock_redis(): + with patch("enterprise.telemetry.metric_handler.redis_client") as mock: + yield mock + + +@pytest.fixture +def sample_envelope(): + return TelemetryEnvelope( + case=TelemetryCase.APP_CREATED, + tenant_id="test-tenant", + event_id="test-event-123", + payload={"app_id": "app-123", "name": "Test App"}, + ) + + +def test_dispatch_app_created(sample_envelope, mock_redis): + mock_redis.set.return_value = True + + handler = EnterpriseMetricHandler() + with patch.object(handler, "_on_app_created") as mock_handler: + handler.handle(sample_envelope) + mock_handler.assert_called_once_with(sample_envelope) + + +def test_dispatch_app_updated(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.APP_UPDATED, + tenant_id="test-tenant", + event_id="test-event-456", + payload={}, + ) + + handler = EnterpriseMetricHandler() + with patch.object(handler, "_on_app_updated") as mock_handler: + handler.handle(envelope) + mock_handler.assert_called_once_with(envelope) + + +def test_dispatch_app_deleted(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.APP_DELETED, + tenant_id="test-tenant", + event_id="test-event-789", + payload={}, + ) + + handler = EnterpriseMetricHandler() + with patch.object(handler, "_on_app_deleted") as mock_handler: + handler.handle(envelope) + mock_handler.assert_called_once_with(envelope) + + +def test_dispatch_feedback_created(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.FEEDBACK_CREATED, + tenant_id="test-tenant", + event_id="test-event-abc", + payload={}, + ) + + handler = EnterpriseMetricHandler() + with patch.object(handler, "_on_feedback_created") as mock_handler: + handler.handle(envelope) + mock_handler.assert_called_once_with(envelope) + + +def test_dispatch_message_run(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.MESSAGE_RUN, + tenant_id="test-tenant", + event_id="test-event-msg", + payload={}, + ) + + handler = EnterpriseMetricHandler() + with patch.object(handler, "_on_message_run") as mock_handler: + handler.handle(envelope) + mock_handler.assert_called_once_with(envelope) + + +def test_dispatch_tool_execution(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.TOOL_EXECUTION, + tenant_id="test-tenant", + event_id="test-event-tool", + payload={}, + ) + + handler = EnterpriseMetricHandler() + with patch.object(handler, "_on_tool_execution") as mock_handler: + handler.handle(envelope) + mock_handler.assert_called_once_with(envelope) + + +def test_dispatch_moderation_check(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.MODERATION_CHECK, + tenant_id="test-tenant", + event_id="test-event-mod", + payload={}, + ) + + handler = EnterpriseMetricHandler() + with patch.object(handler, "_on_moderation_check") as mock_handler: + handler.handle(envelope) + mock_handler.assert_called_once_with(envelope) + + +def test_dispatch_suggested_question(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.SUGGESTED_QUESTION, + tenant_id="test-tenant", + event_id="test-event-sq", + payload={}, + ) + + handler = EnterpriseMetricHandler() + with patch.object(handler, "_on_suggested_question") as mock_handler: + handler.handle(envelope) + mock_handler.assert_called_once_with(envelope) + + +def test_dispatch_dataset_retrieval(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.DATASET_RETRIEVAL, + tenant_id="test-tenant", + event_id="test-event-ds", + payload={}, + ) + + handler = EnterpriseMetricHandler() + with patch.object(handler, "_on_dataset_retrieval") as mock_handler: + handler.handle(envelope) + mock_handler.assert_called_once_with(envelope) + + +def test_dispatch_generate_name(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.GENERATE_NAME, + tenant_id="test-tenant", + event_id="test-event-gn", + payload={}, + ) + + handler = EnterpriseMetricHandler() + with patch.object(handler, "_on_generate_name") as mock_handler: + handler.handle(envelope) + mock_handler.assert_called_once_with(envelope) + + +def test_dispatch_prompt_generation(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.PROMPT_GENERATION, + tenant_id="test-tenant", + event_id="test-event-pg", + payload={}, + ) + + handler = EnterpriseMetricHandler() + with patch.object(handler, "_on_prompt_generation") as mock_handler: + handler.handle(envelope) + mock_handler.assert_called_once_with(envelope) + + +def test_all_known_cases_have_handlers(mock_redis): + mock_redis.set.return_value = True + handler = EnterpriseMetricHandler() + + for case in TelemetryCase: + envelope = TelemetryEnvelope( + case=case, + tenant_id="test-tenant", + event_id=f"test-{case.value}", + payload={}, + ) + handler.handle(envelope) + + +def test_idempotency_duplicate(sample_envelope, mock_redis): + mock_redis.set.return_value = None + + handler = EnterpriseMetricHandler() + with patch.object(handler, "_on_app_created") as mock_handler: + handler.handle(sample_envelope) + mock_handler.assert_not_called() + + +def test_idempotency_first_seen(sample_envelope, mock_redis): + mock_redis.set.return_value = True + + handler = EnterpriseMetricHandler() + is_dup = handler._is_duplicate(sample_envelope) + + assert is_dup is False + mock_redis.set.assert_called_once_with( + "telemetry:dedup:test-tenant:test-event-123", + b"1", + nx=True, + ex=3600, + ) + + +def test_idempotency_redis_failure_fails_open(sample_envelope, mock_redis, caplog): + mock_redis.set.side_effect = Exception("Redis unavailable") + + handler = EnterpriseMetricHandler() + is_dup = handler._is_duplicate(sample_envelope) + + assert is_dup is False + assert "Redis unavailable for deduplication check" in caplog.text + + +def test_rehydration_uses_payload(sample_envelope): + handler = EnterpriseMetricHandler() + payload = handler._rehydrate(sample_envelope) + + assert payload == {"app_id": "app-123", "name": "Test App"} + + +def test_rehydration_from_storage(): + """Verify _rehydrate loads payload from object storage via payload_ref.""" + stored_data = {"app_id": "app-stored", "mode": "workflow"} + envelope = TelemetryEnvelope( + case=TelemetryCase.APP_CREATED, + tenant_id="test-tenant", + event_id="test-event-fb", + payload={}, + metadata={"payload_ref": "telemetry/test-tenant/test-event-fb.json"}, + ) + + handler = EnterpriseMetricHandler() + with patch("enterprise.telemetry.metric_handler.storage") as mock_storage: + mock_storage.load.return_value = json.dumps(stored_data).encode("utf-8") + payload = handler._rehydrate(envelope) + + assert payload == stored_data + mock_storage.load.assert_called_once_with("telemetry/test-tenant/test-event-fb.json") + + +def test_rehydration_storage_failure_emits_degraded_event(): + """Verify _rehydrate emits degraded event when storage load fails.""" + envelope = TelemetryEnvelope( + case=TelemetryCase.APP_CREATED, + tenant_id="test-tenant", + event_id="test-event-fail", + payload={}, + metadata={"payload_ref": "telemetry/test-tenant/test-event-fail.json"}, + ) + + handler = EnterpriseMetricHandler() + with ( + patch("enterprise.telemetry.metric_handler.storage") as mock_storage, + patch("enterprise.telemetry.telemetry_log.emit_metric_only_event") as mock_emit, + ): + mock_storage.load.side_effect = Exception("Storage unavailable") + payload = handler._rehydrate(envelope) + + from enterprise.telemetry.entities import EnterpriseTelemetryEvent + + assert payload == {} + mock_emit.assert_called_once() + call_args = mock_emit.call_args + assert call_args[1]["event_name"] == EnterpriseTelemetryEvent.REHYDRATION_FAILED + assert call_args[1]["attributes"]["rehydration_failed"] is True + + +def test_rehydration_emits_degraded_event_on_empty_payload(): + """Verify _rehydrate emits degraded event when payload is empty and no ref exists.""" + envelope = TelemetryEnvelope( + case=TelemetryCase.APP_CREATED, + tenant_id="test-tenant", + event_id="test-event-empty", + payload={}, + ) + + handler = EnterpriseMetricHandler() + with patch("enterprise.telemetry.telemetry_log.emit_metric_only_event") as mock_emit: + payload = handler._rehydrate(envelope) + + from enterprise.telemetry.entities import EnterpriseTelemetryEvent + + assert payload == {} + mock_emit.assert_called_once() + call_args = mock_emit.call_args + assert call_args[1]["event_name"] == EnterpriseTelemetryEvent.REHYDRATION_FAILED + assert call_args[1]["attributes"]["rehydration_failed"] is True + + +def test_on_app_created_emits_correct_event(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.APP_CREATED, + tenant_id="tenant-123", + event_id="event-456", + payload={"app_id": "app-789", "mode": "chat"}, + ) + + handler = EnterpriseMetricHandler() + with ( + patch("extensions.ext_enterprise_telemetry.get_enterprise_exporter") as mock_get_exporter, + patch("enterprise.telemetry.telemetry_log.emit_metric_only_event") as mock_emit, + ): + mock_exporter = MagicMock() + mock_get_exporter.return_value = mock_exporter + + handler._on_app_created(envelope) + + from enterprise.telemetry.entities import EnterpriseTelemetryEvent + + mock_emit.assert_called_once_with( + event_name=EnterpriseTelemetryEvent.APP_CREATED, + attributes={ + "dify.app.id": "app-789", + "dify.tenant_id": "tenant-123", + "dify.event.id": "event-456", + "dify.app.mode": "chat", + }, + tenant_id="tenant-123", + ) + from enterprise.telemetry.entities import EnterpriseTelemetryCounter + + mock_exporter.increment_counter.assert_called_once() + call_args = mock_exporter.increment_counter.call_args + assert call_args[0][0] == EnterpriseTelemetryCounter.APP_CREATED + assert call_args[0][1] == 1 + assert call_args[0][2]["tenant_id"] == "tenant-123" + assert call_args[0][2]["app_id"] == "app-789" + assert call_args[0][2]["mode"] == "chat" + + +def test_on_app_updated_emits_correct_event(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.APP_UPDATED, + tenant_id="tenant-123", + event_id="event-456", + payload={"app_id": "app-789"}, + ) + + handler = EnterpriseMetricHandler() + with ( + patch("extensions.ext_enterprise_telemetry.get_enterprise_exporter") as mock_get_exporter, + patch("enterprise.telemetry.telemetry_log.emit_metric_only_event") as mock_emit, + ): + mock_exporter = MagicMock() + mock_get_exporter.return_value = mock_exporter + + handler._on_app_updated(envelope) + + from enterprise.telemetry.entities import EnterpriseTelemetryEvent + + mock_emit.assert_called_once_with( + event_name=EnterpriseTelemetryEvent.APP_UPDATED, + attributes={ + "dify.app.id": "app-789", + "dify.tenant_id": "tenant-123", + "dify.event.id": "event-456", + }, + tenant_id="tenant-123", + ) + from enterprise.telemetry.entities import EnterpriseTelemetryCounter + + mock_exporter.increment_counter.assert_called_once() + call_args = mock_exporter.increment_counter.call_args + assert call_args[0][0] == EnterpriseTelemetryCounter.APP_UPDATED + assert call_args[0][1] == 1 + assert call_args[0][2]["tenant_id"] == "tenant-123" + assert call_args[0][2]["app_id"] == "app-789" + + +def test_on_app_deleted_emits_correct_event(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.APP_DELETED, + tenant_id="tenant-123", + event_id="event-456", + payload={"app_id": "app-789"}, + ) + + handler = EnterpriseMetricHandler() + with ( + patch("extensions.ext_enterprise_telemetry.get_enterprise_exporter") as mock_get_exporter, + patch("enterprise.telemetry.telemetry_log.emit_metric_only_event") as mock_emit, + ): + mock_exporter = MagicMock() + mock_get_exporter.return_value = mock_exporter + + handler._on_app_deleted(envelope) + + from enterprise.telemetry.entities import EnterpriseTelemetryEvent + + mock_emit.assert_called_once_with( + event_name=EnterpriseTelemetryEvent.APP_DELETED, + attributes={ + "dify.app.id": "app-789", + "dify.tenant_id": "tenant-123", + "dify.event.id": "event-456", + }, + tenant_id="tenant-123", + ) + from enterprise.telemetry.entities import EnterpriseTelemetryCounter + + mock_exporter.increment_counter.assert_called_once() + call_args = mock_exporter.increment_counter.call_args + assert call_args[0][0] == EnterpriseTelemetryCounter.APP_DELETED + assert call_args[0][1] == 1 + assert call_args[0][2]["tenant_id"] == "tenant-123" + assert call_args[0][2]["app_id"] == "app-789" + + +def test_on_feedback_created_emits_correct_event(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.FEEDBACK_CREATED, + tenant_id="tenant-123", + event_id="event-456", + payload={ + "message_id": "msg-001", + "app_id": "app-789", + "conversation_id": "conv-123", + "from_end_user_id": "user-456", + "from_account_id": None, + "rating": "like", + "from_source": "api", + "content": "Great!", + }, + ) + + handler = EnterpriseMetricHandler() + with ( + patch("extensions.ext_enterprise_telemetry.get_enterprise_exporter") as mock_get_exporter, + patch("enterprise.telemetry.telemetry_log.emit_metric_only_event") as mock_emit, + ): + mock_exporter = MagicMock() + mock_exporter.include_content = True + mock_get_exporter.return_value = mock_exporter + + handler._on_feedback_created(envelope) + + mock_emit.assert_called_once() + call_args = mock_emit.call_args + assert call_args[1]["event_name"] == "dify.feedback.created" + assert call_args[1]["attributes"]["dify.message.id"] == "msg-001" + assert call_args[1]["attributes"]["dify.feedback.content"] == "Great!" + assert call_args[1]["tenant_id"] == "tenant-123" + assert call_args[1]["user_id"] == "user-456" + + mock_exporter.increment_counter.assert_called_once() + counter_args = mock_exporter.increment_counter.call_args + assert counter_args[0][2]["app_id"] == "app-789" + assert counter_args[0][2]["rating"] == "like" + + +def test_on_feedback_created_without_content(mock_redis): + mock_redis.set.return_value = True + envelope = TelemetryEnvelope( + case=TelemetryCase.FEEDBACK_CREATED, + tenant_id="tenant-123", + event_id="event-456", + payload={ + "message_id": "msg-001", + "app_id": "app-789", + "conversation_id": "conv-123", + "from_end_user_id": "user-456", + "from_account_id": None, + "rating": "like", + "from_source": "api", + "content": "Great!", + }, + ) + + handler = EnterpriseMetricHandler() + with ( + patch("extensions.ext_enterprise_telemetry.get_enterprise_exporter") as mock_get_exporter, + patch("enterprise.telemetry.telemetry_log.emit_metric_only_event") as mock_emit, + ): + mock_exporter = MagicMock() + mock_exporter.include_content = False + mock_get_exporter.return_value = mock_exporter + + handler._on_feedback_created(envelope) + + mock_emit.assert_called_once() + call_args = mock_emit.call_args + assert "dify.feedback.content" not in call_args[1]["attributes"] diff --git a/api/tests/unit_tests/libs/test_cron_compatibility.py b/api/tests/unit_tests/libs/test_cron_compatibility.py index 61103d7935..6f3a94f6dc 100644 --- a/api/tests/unit_tests/libs/test_cron_compatibility.py +++ b/api/tests/unit_tests/libs/test_cron_compatibility.py @@ -294,7 +294,7 @@ class TestFrontendBackendIntegration(unittest.TestCase): def test_schedule_service_integration(self): """Test integration with ScheduleService patterns.""" - from dify_graph.nodes.trigger_schedule.entities import VisualConfig + from core.workflow.nodes.trigger_schedule.entities import VisualConfig from services.trigger.schedule_service import ScheduleService # Test enhanced syntax through visual config conversion diff --git a/api/tests/unit_tests/models/test_account_models.py b/api/tests/unit_tests/models/test_account_models.py index cc311d447f..1726fc2e8b 100644 --- a/api/tests/unit_tests/models/test_account_models.py +++ b/api/tests/unit_tests/models/test_account_models.py @@ -98,7 +98,7 @@ class TestAccountModelValidation: ) # Assert - assert account.status == "active" + assert account.status == AccountStatus.ACTIVE def test_account_get_status_method(self): """Test the get_status method returns AccountStatus enum.""" @@ -106,7 +106,7 @@ class TestAccountModelValidation: account = Account( name="Test User", email="test@example.com", - status="pending", + status=AccountStatus.PENDING, ) # Act diff --git a/api/tests/unit_tests/models/test_workflow_models.py b/api/tests/unit_tests/models/test_workflow_models.py index f66f0b657d..4fcef34549 100644 --- a/api/tests/unit_tests/models/test_workflow_models.py +++ b/api/tests/unit_tests/models/test_workflow_models.py @@ -15,7 +15,7 @@ from uuid import uuid4 import pytest from dify_graph.enums import ( - NodeType, + BuiltinNodeTypes, WorkflowExecutionStatus, WorkflowNodeExecutionStatus, ) @@ -471,7 +471,7 @@ class TestNodeExecutionRelationships: workflow_run_id=workflow_run_id, index=1, node_id="start", - node_type=NodeType.START.value, + node_type=BuiltinNodeTypes.START, title="Start Node", status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -484,7 +484,7 @@ class TestNodeExecutionRelationships: assert node_execution.workflow_id == workflow_id assert node_execution.workflow_run_id == workflow_run_id assert node_execution.node_id == "start" - assert node_execution.node_type == NodeType.START.value + assert node_execution.node_type == BuiltinNodeTypes.START assert node_execution.index == 1 def test_node_execution_with_predecessor_relationship(self): @@ -503,7 +503,7 @@ class TestNodeExecutionRelationships: index=2, predecessor_node_id=predecessor_node_id, node_id=current_node_id, - node_type=NodeType.LLM.value, + node_type=BuiltinNodeTypes.LLM, title="LLM Node", status=WorkflowNodeExecutionStatus.RUNNING.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -526,7 +526,7 @@ class TestNodeExecutionRelationships: workflow_run_id=None, # Single-step has no workflow run index=1, node_id="llm_test", - node_type=NodeType.LLM.value, + node_type=BuiltinNodeTypes.LLM, title="Test LLM", status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -553,7 +553,7 @@ class TestNodeExecutionRelationships: workflow_run_id=str(uuid4()), index=1, node_id="llm_1", - node_type=NodeType.LLM.value, + node_type=BuiltinNodeTypes.LLM, title="LLM Node", status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -579,7 +579,7 @@ class TestNodeExecutionRelationships: workflow_run_id=str(uuid4()), index=1, node_id="code_1", - node_type=NodeType.CODE.value, + node_type=BuiltinNodeTypes.CODE, title="Code Node", status=WorkflowNodeExecutionStatus.RUNNING.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -610,7 +610,7 @@ class TestNodeExecutionRelationships: workflow_run_id=str(uuid4()), index=3, node_id="code_1", - node_type=NodeType.CODE.value, + node_type=BuiltinNodeTypes.CODE, title="Code Node", status=WorkflowNodeExecutionStatus.FAILED.value, error=error_message, @@ -641,7 +641,7 @@ class TestNodeExecutionRelationships: workflow_run_id=str(uuid4()), index=1, node_id="llm_1", - node_type=NodeType.LLM.value, + node_type=BuiltinNodeTypes.LLM, title="LLM Node", status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -664,7 +664,7 @@ class TestNodeExecutionRelationships: workflow_run_id=str(uuid4()), index=1, node_id="start", - node_type=NodeType.START.value, + node_type=BuiltinNodeTypes.START, title="Start", status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -682,12 +682,12 @@ class TestNodeExecutionRelationships: """Test node execution with different node types.""" # Test various node types node_types = [ - (NodeType.START, "Start Node"), - (NodeType.LLM, "LLM Node"), - (NodeType.CODE, "Code Node"), - (NodeType.TOOL, "Tool Node"), - (NodeType.IF_ELSE, "Conditional Node"), - (NodeType.END, "End Node"), + (BuiltinNodeTypes.START, "Start Node"), + (BuiltinNodeTypes.LLM, "LLM Node"), + (BuiltinNodeTypes.CODE, "Code Node"), + (BuiltinNodeTypes.TOOL, "Tool Node"), + (BuiltinNodeTypes.IF_ELSE, "Conditional Node"), + (BuiltinNodeTypes.END, "End Node"), ] for node_type, title in node_types: @@ -699,8 +699,8 @@ class TestNodeExecutionRelationships: triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value, workflow_run_id=str(uuid4()), index=1, - node_id=f"{node_type.value}_1", - node_type=node_type.value, + node_id=f"{node_type}_1", + node_type=node_type, title=title, status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -708,7 +708,7 @@ class TestNodeExecutionRelationships: ) # Assert - assert node_execution.node_type == node_type.value + assert node_execution.node_type == node_type assert node_execution.title == title @@ -1004,7 +1004,7 @@ class TestGraphConfigurationValidation: workflow_run_id=str(uuid4()), index=1, node_id="start", - node_type=NodeType.START.value, + node_type=BuiltinNodeTypes.START, title="Start", status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -1029,7 +1029,7 @@ class TestGraphConfigurationValidation: workflow_run_id=str(uuid4()), index=1, node_id="start", - node_type=NodeType.START.value, + node_type=BuiltinNodeTypes.START, title="Start", status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, diff --git a/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py b/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py index 06703b8e38..086d1ac52e 100644 --- a/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py +++ b/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py @@ -17,7 +17,7 @@ from dify_graph.entities import ( WorkflowNodeExecution, ) from dify_graph.enums import ( - NodeType, + BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) @@ -230,7 +230,7 @@ def test_to_db_model(repository): index=1, predecessor_node_id="test-predecessor-id", node_id="test-node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", inputs={"input_key": "input_value"}, process_data={"process_key": "process_value"}, @@ -298,7 +298,7 @@ def test_to_domain_model(repository): db_model.predecessor_node_id = "test-predecessor-id" db_model.node_execution_id = "test-node-execution-id" db_model.node_id = "test-node-id" - db_model.node_type = NodeType.START + db_model.node_type = BuiltinNodeTypes.START db_model.title = "Test Node" db_model.inputs = json.dumps(inputs_dict) db_model.process_data = json.dumps(process_data_dict) @@ -324,7 +324,7 @@ def test_to_domain_model(repository): assert domain_model.predecessor_node_id == db_model.predecessor_node_id assert domain_model.node_execution_id == db_model.node_execution_id assert domain_model.node_id == db_model.node_id - assert domain_model.node_type == NodeType(db_model.node_type) + assert domain_model.node_type == db_model.node_type assert domain_model.title == db_model.title assert domain_model.inputs == inputs_dict assert domain_model.process_data == process_data_dict diff --git a/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_workflow_node_execution_repository.py b/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_workflow_node_execution_repository.py index 95a7751273..e01fb8456f 100644 --- a/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_workflow_node_execution_repository.py +++ b/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_workflow_node_execution_repository.py @@ -12,7 +12,7 @@ from core.repositories.sqlalchemy_workflow_node_execution_repository import ( SQLAlchemyWorkflowNodeExecutionRepository, ) from dify_graph.entities.workflow_node_execution import WorkflowNodeExecution -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from models import Account, WorkflowNodeExecutionModel, WorkflowNodeExecutionTriggeredFrom @@ -62,7 +62,7 @@ class TestSQLAlchemyWorkflowNodeExecutionRepositoryProcessData: workflow_id="test-workflow-id", index=1, node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Test Node", process_data=process_data, created_at=datetime.now(), diff --git a/api/tests/unit_tests/services/retention/workflow_run/test_restore_archived_workflow_run.py b/api/tests/unit_tests/services/retention/workflow_run/test_restore_archived_workflow_run.py index 6097bcbd61..4bfdba87a0 100644 --- a/api/tests/unit_tests/services/retention/workflow_run/test_restore_archived_workflow_run.py +++ b/api/tests/unit_tests/services/retention/workflow_run/test_restore_archived_workflow_run.py @@ -13,6 +13,7 @@ from datetime import datetime from unittest.mock import Mock, create_autospec, patch import pytest +from sqlalchemy import Column, Integer, MetaData, String, Table from libs.archive_storage import ArchiveStorageNotConfiguredError from models.trigger import WorkflowTriggerLog @@ -127,10 +128,41 @@ class WorkflowRunRestoreTestDataFactory: if tables_data is None: tables_data = { - "workflow_runs": [{"id": "run-123", "tenant_id": "tenant-123"}], + "workflow_runs": [ + { + "id": "run-123", + "tenant_id": "tenant-123", + "app_id": "app-123", + "workflow_id": "workflow-123", + "type": "workflow", + "triggered_from": "app", + "version": "1", + "status": "succeeded", + "created_by_role": "account", + "created_by": "user-123", + } + ], "workflow_app_logs": [ - {"id": "log-1", "workflow_run_id": "run-123"}, - {"id": "log-2", "workflow_run_id": "run-123"}, + { + "id": "log-1", + "tenant_id": "tenant-123", + "app_id": "app-123", + "workflow_id": "workflow-123", + "workflow_run_id": "run-123", + "created_from": "app", + "created_by_role": "account", + "created_by": "user-123", + }, + { + "id": "log-2", + "tenant_id": "tenant-123", + "app_id": "app-123", + "workflow_id": "workflow-123", + "workflow_run_id": "run-123", + "created_from": "app", + "created_by_role": "account", + "created_by": "user-123", + }, ], } @@ -406,14 +438,48 @@ class TestGetModelColumnInfo: assert "created_by" in column_names assert "status" in column_names - # WorkflowRun model has no required columns (all have defaults or are auto-generated) - assert required_columns == set() + # Columns without defaults should be required for restore inserts. + assert { + "tenant_id", + "app_id", + "workflow_id", + "type", + "triggered_from", + "version", + "status", + "created_by_role", + "created_by", + }.issubset(required_columns) + assert "id" not in required_columns + assert "created_at" not in required_columns # Check columns with defaults or server defaults assert "id" in non_nullable_with_default assert "created_at" in non_nullable_with_default assert "elapsed_time" in non_nullable_with_default assert "total_tokens" in non_nullable_with_default + assert "tenant_id" not in non_nullable_with_default + + def test_non_pk_auto_autoincrement_column_is_still_required(self): + """`autoincrement='auto'` should not mark non-PK columns as defaulted.""" + restore = WorkflowRunRestore() + + test_table = Table( + "test_autoincrement", + MetaData(), + Column("id", Integer, primary_key=True, autoincrement=True), + Column("required_field", String(255), nullable=False), + Column("defaulted_field", String(255), nullable=False, default="x"), + ) + + class MockModel: + __table__ = test_table + + _, required_columns, non_nullable_with_default = restore._get_model_column_info(MockModel) + + assert required_columns == {"required_field"} + assert "id" in non_nullable_with_default + assert "defaulted_field" in non_nullable_with_default # --------------------------------------------------------------------------- @@ -465,7 +531,32 @@ class TestRestoreTableRecords: mock_stmt.on_conflict_do_nothing.return_value = mock_stmt mock_pg_insert.return_value = mock_stmt - records = [{"id": "test1", "tenant_id": "tenant-123"}, {"id": "test2", "tenant_id": "tenant-123"}] + records = [ + { + "id": "test1", + "tenant_id": "tenant-123", + "app_id": "app-123", + "workflow_id": "workflow-123", + "type": "workflow", + "triggered_from": "app", + "version": "1", + "status": "succeeded", + "created_by_role": "account", + "created_by": "user-123", + }, + { + "id": "test2", + "tenant_id": "tenant-123", + "app_id": "app-123", + "workflow_id": "workflow-123", + "type": "workflow", + "triggered_from": "app", + "version": "1", + "status": "succeeded", + "created_by_role": "account", + "created_by": "user-123", + }, + ] result = restore._restore_table_records(mock_session, "workflow_runs", records, schema_version="1.0") @@ -477,8 +568,7 @@ class TestRestoreTableRecords: restore = WorkflowRunRestore() mock_session = Mock() - # Since WorkflowRun has no required columns, we need to test with a different model - # Let's test with a mock model that has required columns + # Use a dedicated mock model to isolate required-column validation behavior. mock_model = Mock() # Mock a required column @@ -965,6 +1055,13 @@ class TestIntegration: "id": "run-123", "tenant_id": "tenant-123", "app_id": "app-123", + "workflow_id": "workflow-123", + "type": "workflow", + "triggered_from": "app", + "version": "1", + "status": "succeeded", + "created_by_role": "account", + "created_by": "user-123", "created_at": "2024-01-01T12:00:00", } ], diff --git a/api/tests/unit_tests/services/test_app_dsl_service.py b/api/tests/unit_tests/services/test_app_dsl_service.py index 33d26f4bcb..b9ab03455d 100644 --- a/api/tests/unit_tests/services/test_app_dsl_service.py +++ b/api/tests/unit_tests/services/test_app_dsl_service.py @@ -5,7 +5,12 @@ from unittest.mock import MagicMock import pytest import yaml -from dify_graph.enums import NodeType +from core.trigger.constants import ( + TRIGGER_PLUGIN_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_WEBHOOK_NODE_TYPE, +) +from dify_graph.enums import BuiltinNodeTypes from models import Account, AppMode from models.model import IconType from services import app_dsl_service @@ -522,7 +527,7 @@ def test_create_or_update_app_creates_workflow_app_and_saves_dependencies(monkey "conversation_variables": [{"y": 2}], "graph": { "nodes": [ - {"data": {"type": NodeType.KNOWLEDGE_RETRIEVAL, "dataset_ids": ["enc-1", "enc-2"]}}, + {"data": {"type": BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, "dataset_ids": ["enc-1", "enc-2"]}}, ] }, "features": {}, @@ -667,21 +672,59 @@ def test_export_dsl_delegates_by_mode(monkeypatch): assert model_calls == [True] +def test_export_dsl_preserves_icon_and_icon_type(monkeypatch): + monkeypatch.setattr(AppDslService, "_append_workflow_export_data", lambda **_kwargs: None) + + emoji_app = SimpleNamespace( + mode=AppMode.WORKFLOW.value, + tenant_id="tenant-1", + name="Emoji App", + icon="🎨", + icon_type=IconType.EMOJI, + icon_background="#FF5733", + description="App with emoji icon", + use_icon_as_answer_icon=True, + app_model_config=None, + ) + yaml_output = AppDslService.export_dsl(emoji_app) + data = yaml.safe_load(yaml_output) + assert data["app"]["icon"] == "🎨" + assert data["app"]["icon_type"] == "emoji" + assert data["app"]["icon_background"] == "#FF5733" + + image_app = SimpleNamespace( + mode=AppMode.WORKFLOW.value, + tenant_id="tenant-1", + name="Image App", + icon="https://example.com/icon.png", + icon_type=IconType.IMAGE, + icon_background="#FFEAD5", + description="App with image icon", + use_icon_as_answer_icon=False, + app_model_config=None, + ) + yaml_output = AppDslService.export_dsl(image_app) + data = yaml.safe_load(yaml_output) + assert data["app"]["icon"] == "https://example.com/icon.png" + assert data["app"]["icon_type"] == "image" + assert data["app"]["icon_background"] == "#FFEAD5" + + def test_append_workflow_export_data_filters_and_overrides(monkeypatch): workflow_dict = { "graph": { "nodes": [ - {"data": {"type": NodeType.KNOWLEDGE_RETRIEVAL, "dataset_ids": ["d1", "d2"]}}, - {"data": {"type": NodeType.TOOL, "credential_id": "secret"}}, + {"data": {"type": BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, "dataset_ids": ["d1", "d2"]}}, + {"data": {"type": BuiltinNodeTypes.TOOL, "credential_id": "secret"}}, { "data": { - "type": NodeType.AGENT, + "type": BuiltinNodeTypes.AGENT, "agent_parameters": {"tools": {"value": [{"credential_id": "secret"}]}}, } }, - {"data": {"type": NodeType.TRIGGER_SCHEDULE.value, "config": {"x": 1}}}, - {"data": {"type": NodeType.TRIGGER_WEBHOOK.value, "webhook_url": "x", "webhook_debug_url": "y"}}, - {"data": {"type": NodeType.TRIGGER_PLUGIN.value, "subscription_id": "s"}}, + {"data": {"type": TRIGGER_SCHEDULE_NODE_TYPE, "config": {"x": 1}}}, + {"data": {"type": TRIGGER_WEBHOOK_NODE_TYPE, "webhook_url": "x", "webhook_debug_url": "y"}}, + {"data": {"type": TRIGGER_PLUGIN_NODE_TYPE, "subscription_id": "s"}}, ] } } @@ -809,11 +852,11 @@ def test_extract_dependencies_from_workflow_graph_covers_all_node_types(monkeypa graph = { "nodes": [ - {"data": {"type": NodeType.TOOL}}, - {"data": {"type": NodeType.LLM}}, - {"data": {"type": NodeType.QUESTION_CLASSIFIER}}, - {"data": {"type": NodeType.PARAMETER_EXTRACTOR}}, - {"data": {"type": NodeType.KNOWLEDGE_RETRIEVAL}}, + {"data": {"type": BuiltinNodeTypes.TOOL}}, + {"data": {"type": BuiltinNodeTypes.LLM}}, + {"data": {"type": BuiltinNodeTypes.QUESTION_CLASSIFIER}}, + {"data": {"type": BuiltinNodeTypes.PARAMETER_EXTRACTOR}}, + {"data": {"type": BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL}}, {"data": {"type": "unknown"}}, ] } @@ -826,7 +869,9 @@ def test_extract_dependencies_from_workflow_graph_handles_exceptions(monkeypatch monkeypatch.setattr( app_dsl_service.ToolNodeData, "model_validate", lambda _d: (_ for _ in ()).throw(ValueError("bad")) ) - deps = AppDslService._extract_dependencies_from_workflow_graph({"nodes": [{"data": {"type": NodeType.TOOL}}]}) + deps = AppDslService._extract_dependencies_from_workflow_graph( + {"nodes": [{"data": {"type": BuiltinNodeTypes.TOOL}}]} + ) assert deps == [] diff --git a/api/tests/unit_tests/services/test_schedule_service.py b/api/tests/unit_tests/services/test_schedule_service.py index 5e3dd157e6..e28965ea2c 100644 --- a/api/tests/unit_tests/services/test_schedule_service.py +++ b/api/tests/unit_tests/services/test_schedule_service.py @@ -5,8 +5,8 @@ from unittest.mock import MagicMock, Mock, patch import pytest from sqlalchemy.orm import Session -from dify_graph.nodes.trigger_schedule.entities import ScheduleConfig, SchedulePlanUpdate, VisualConfig -from dify_graph.nodes.trigger_schedule.exc import ScheduleConfigError +from core.workflow.nodes.trigger_schedule.entities import ScheduleConfig, SchedulePlanUpdate, VisualConfig +from core.workflow.nodes.trigger_schedule.exc import ScheduleConfigError from events.event_handlers.sync_workflow_schedule_when_app_published import ( sync_schedule_from_workflow, ) @@ -136,7 +136,7 @@ class TestScheduleService(unittest.TestCase): def test_update_schedule_not_found(self): """Test updating a non-existent schedule raises exception.""" - from dify_graph.nodes.trigger_schedule.exc import ScheduleNotFoundError + from core.workflow.nodes.trigger_schedule.exc import ScheduleNotFoundError mock_session = MagicMock(spec=Session) mock_session.get.return_value = None @@ -172,7 +172,7 @@ class TestScheduleService(unittest.TestCase): def test_delete_schedule_not_found(self): """Test deleting a non-existent schedule raises exception.""" - from dify_graph.nodes.trigger_schedule.exc import ScheduleNotFoundError + from core.workflow.nodes.trigger_schedule.exc import ScheduleNotFoundError mock_session = MagicMock(spec=Session) mock_session.get.return_value = None diff --git a/api/tests/unit_tests/services/test_workflow_service.py b/api/tests/unit_tests/services/test_workflow_service.py index 8820a1acc0..57c0464dc6 100644 --- a/api/tests/unit_tests/services/test_workflow_service.py +++ b/api/tests/unit_tests/services/test_workflow_service.py @@ -14,7 +14,7 @@ from unittest.mock import MagicMock, patch import pytest -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.http_request import HTTP_REQUEST_CONFIG_FILTER_KEY, HttpRequestNode, HttpRequestNodeConfig from libs.datetime_utils import naive_utc_now from models.model import App, AppMode @@ -134,7 +134,7 @@ class TestWorkflowAssociatedDataFactory: return ( (node["id"], node["data"]) for node in nodes - if node.get("data", {}).get("type") == specific_node_type.value + if node.get("data", {}).get("type") == str(specific_node_type) ) # Return all nodes if no filter specified return ((node["id"], node["data"]) for node in nodes) @@ -179,7 +179,7 @@ class TestWorkflowAssociatedDataFactory: { "id": "start", "data": { - "type": NodeType.START.value, + "type": BuiltinNodeTypes.START, "title": "START", "variables": [], }, @@ -204,7 +204,7 @@ class TestWorkflowAssociatedDataFactory: { "id": "llm-1", "data": { - "type": NodeType.LLM.value, + "type": BuiltinNodeTypes.LLM, "title": "LLM", "model": { "provider": "openai", @@ -1001,12 +1001,12 @@ class TestWorkflowService: Used by the UI to populate the node palette and provide sensible defaults when users add new nodes to their workflow. """ - with patch("services.workflow_service.NODE_TYPE_CLASSES_MAPPING") as mock_mapping: + with patch("services.workflow_service.get_node_type_classes_mapping") as mock_mapping: # Mock node class with default config mock_node_class = MagicMock() mock_node_class.get_default_config.return_value = {"type": "llm", "config": {}} - mock_mapping.items.return_value = [(NodeType.LLM, {"latest": mock_node_class})] + mock_mapping.return_value = {BuiltinNodeTypes.LLM: {"latest": mock_node_class}} with patch("services.workflow_service.LATEST_VERSION", "latest"): result = workflow_service.get_default_block_configs() @@ -1025,7 +1025,7 @@ class TestWorkflowService: ) with ( - patch("services.workflow_service.NODE_TYPE_CLASSES_MAPPING") as mock_mapping, + patch("services.workflow_service.get_node_type_classes_mapping") as mock_mapping, patch("services.workflow_service.LATEST_VERSION", "latest"), patch( "services.workflow_service.build_http_request_config", @@ -1036,10 +1036,10 @@ class TestWorkflowService: mock_http_node_class.get_default_config.return_value = {"type": "http-request", "config": {}} mock_llm_node_class = MagicMock() mock_llm_node_class.get_default_config.return_value = {"type": "llm", "config": {}} - mock_mapping.items.return_value = [ - (NodeType.HTTP_REQUEST, {"latest": mock_http_node_class}), - (NodeType.LLM, {"latest": mock_llm_node_class}), - ] + mock_mapping.return_value = { + BuiltinNodeTypes.HTTP_REQUEST: {"latest": mock_http_node_class}, + BuiltinNodeTypes.LLM: {"latest": mock_llm_node_class}, + } result = workflow_service.get_default_block_configs() @@ -1060,7 +1060,7 @@ class TestWorkflowService: This includes default values for all required and optional parameters. """ with ( - patch("services.workflow_service.NODE_TYPE_CLASSES_MAPPING") as mock_mapping, + patch("services.workflow_service.get_node_type_classes_mapping") as mock_mapping, patch("services.workflow_service.LATEST_VERSION", "latest"), ): # Mock node class with default config @@ -1068,23 +1068,21 @@ class TestWorkflowService: mock_config = {"type": "llm", "config": {"provider": "openai"}} mock_node_class.get_default_config.return_value = mock_config - # Create a mock mapping that includes NodeType.LLM - mock_mapping.__contains__.return_value = True - mock_mapping.__getitem__.return_value = {"latest": mock_node_class} + # Create a mock mapping that includes BuiltinNodeTypes.LLM + mock_mapping.return_value = {BuiltinNodeTypes.LLM: {"latest": mock_node_class}} - result = workflow_service.get_default_block_config(NodeType.LLM.value) + result = workflow_service.get_default_block_config(BuiltinNodeTypes.LLM) assert result == mock_config mock_node_class.get_default_config.assert_called_once() def test_get_default_block_config_invalid_node_type(self, workflow_service): """Test get_default_block_config returns empty dict for invalid node type.""" - with patch("services.workflow_service.NODE_TYPE_CLASSES_MAPPING") as mock_mapping: - # Mock mapping to not contain the node type - mock_mapping.__contains__.return_value = False + with patch("services.workflow_service.get_node_type_classes_mapping") as mock_mapping: + mock_mapping.return_value = {} # Use a valid NodeType but one that's not in the mapping - result = workflow_service.get_default_block_config(NodeType.LLM.value) + result = workflow_service.get_default_block_config(BuiltinNodeTypes.LLM) assert result == {} @@ -1100,7 +1098,7 @@ class TestWorkflowService: ) with ( - patch("services.workflow_service.NODE_TYPE_CLASSES_MAPPING") as mock_mapping, + patch("services.workflow_service.get_node_type_classes_mapping") as mock_mapping, patch("services.workflow_service.LATEST_VERSION", "latest"), patch( "services.workflow_service.build_http_request_config", @@ -1110,10 +1108,9 @@ class TestWorkflowService: mock_node_class = MagicMock() expected = {"type": "http-request", "config": {}} mock_node_class.get_default_config.return_value = expected - mock_mapping.__contains__.return_value = True - mock_mapping.__getitem__.return_value = {"latest": mock_node_class} + mock_mapping.return_value = {BuiltinNodeTypes.HTTP_REQUEST: {"latest": mock_node_class}} - result = workflow_service.get_default_block_config(NodeType.HTTP_REQUEST.value) + result = workflow_service.get_default_block_config(BuiltinNodeTypes.HTTP_REQUEST) assert result == expected mock_build_config.assert_called_once() @@ -1132,18 +1129,17 @@ class TestWorkflowService: ) with ( - patch("services.workflow_service.NODE_TYPE_CLASSES_MAPPING") as mock_mapping, + patch("services.workflow_service.get_node_type_classes_mapping") as mock_mapping, patch("services.workflow_service.LATEST_VERSION", "latest"), patch("services.workflow_service.build_http_request_config") as mock_build_config, ): mock_node_class = MagicMock() expected = {"type": "http-request", "config": {}} mock_node_class.get_default_config.return_value = expected - mock_mapping.__contains__.return_value = True - mock_mapping.__getitem__.return_value = {"latest": mock_node_class} + mock_mapping.return_value = {BuiltinNodeTypes.HTTP_REQUEST: {"latest": mock_node_class}} result = workflow_service.get_default_block_config( - NodeType.HTTP_REQUEST.value, + BuiltinNodeTypes.HTTP_REQUEST, filters={HTTP_REQUEST_CONFIG_FILTER_KEY: provided_config}, ) @@ -1155,14 +1151,14 @@ class TestWorkflowService: def test_get_default_block_config_http_request_malformed_config_raises_value_error(self, workflow_service): with ( patch( - "services.workflow_service.NODE_TYPE_CLASSES_MAPPING", - {NodeType.HTTP_REQUEST: {"latest": HttpRequestNode}}, + "services.workflow_service.get_node_type_classes_mapping", + return_value={BuiltinNodeTypes.HTTP_REQUEST: {"latest": HttpRequestNode}}, ), patch("services.workflow_service.LATEST_VERSION", "latest"), ): with pytest.raises(ValueError, match="http_request_config must be an HttpRequestNodeConfig instance"): workflow_service.get_default_block_config( - NodeType.HTTP_REQUEST.value, + BuiltinNodeTypes.HTTP_REQUEST, filters={HTTP_REQUEST_CONFIG_FILTER_KEY: "invalid"}, ) diff --git a/api/tests/unit_tests/services/workflow/test_workflow_draft_variable_service.py b/api/tests/unit_tests/services/workflow/test_workflow_draft_variable_service.py index 4042e05565..9f3874b8f1 100644 --- a/api/tests/unit_tests/services/workflow/test_workflow_draft_variable_service.py +++ b/api/tests/unit_tests/services/workflow/test_workflow_draft_variable_service.py @@ -8,7 +8,7 @@ from sqlalchemy import Engine from sqlalchemy.orm import Session from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.variables.segments import StringSegment from dify_graph.variables.types import SegmentType from libs.uuid_utils import uuidv7 @@ -54,12 +54,12 @@ class TestDraftVariableSaver: session=mock_session, app_id=test_app_id, node_id="test_node_id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, node_execution_id="test_execution_id", user=mock_user, ) - assert saver._should_variable_be_visible("123_456", NodeType.IF_ELSE, "output") == False - assert saver._should_variable_be_visible("123", NodeType.START, "output") == True + assert saver._should_variable_be_visible("123_456", BuiltinNodeTypes.IF_ELSE, "output") == False + assert saver._should_variable_be_visible("123", BuiltinNodeTypes.START, "output") == True def test__normalize_variable_for_start_node(self): @dataclasses.dataclass(frozen=True) @@ -102,7 +102,7 @@ class TestDraftVariableSaver: session=mock_session, app_id=test_app_id, node_id=_NODE_ID, - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, node_execution_id="test_execution_id", user=mock_user, ) @@ -134,7 +134,7 @@ class TestDraftVariableSaver: session=mock_session, app_id="test-app-id", node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_execution_id="test-execution-id", user=mock_user, ) @@ -331,7 +331,7 @@ class TestWorkflowDraftVariableService: mock_node_config = {"type": "test_node"} with ( patch.object(workflow, "get_node_config_by_id", return_value=mock_node_config, autospec=True), - patch.object(workflow, "get_node_type_from_node_config", return_value=NodeType.LLM, autospec=True), + patch.object(workflow, "get_node_type_from_node_config", return_value=BuiltinNodeTypes.LLM, autospec=True), ): result = service._reset_node_var_or_sys_var(workflow, variable) diff --git a/api/tests/unit_tests/services/workflow/test_workflow_human_input_delivery.py b/api/tests/unit_tests/services/workflow/test_workflow_human_input_delivery.py index fcdd1c2368..c890ab6a65 100644 --- a/api/tests/unit_tests/services/workflow/test_workflow_human_input_delivery.py +++ b/api/tests/unit_tests/services/workflow/test_workflow_human_input_delivery.py @@ -6,7 +6,7 @@ import pytest from sqlalchemy.orm import sessionmaker from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.human_input.entities import ( EmailDeliveryConfig, EmailDeliveryMethod, @@ -31,7 +31,7 @@ def _build_node_config(delivery_methods: list[EmailDeliveryMethod]) -> NodeConfi inputs=[], user_actions=[], ).model_dump(mode="json") - node_data["type"] = NodeType.HUMAN_INPUT.value + node_data["type"] = BuiltinNodeTypes.HUMAN_INPUT return NodeConfigDictAdapter.validate_python({"id": "node-1", "data": node_data}) diff --git a/api/tests/unit_tests/services/workflow/test_workflow_service.py b/api/tests/unit_tests/services/workflow/test_workflow_service.py index 9ee8f88e71..ed26bcec01 100644 --- a/api/tests/unit_tests/services/workflow/test_workflow_service.py +++ b/api/tests/unit_tests/services/workflow/test_workflow_service.py @@ -5,7 +5,7 @@ from unittest.mock import MagicMock import pytest from dify_graph.entities.graph_config import NodeConfigDictAdapter -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.human_input.entities import FormInput, HumanInputNodeData, UserAction from dify_graph.nodes.human_input.enums import FormInputType from models.model import App @@ -209,7 +209,7 @@ class TestWorkflowService: workflow = MagicMock() node_config = NodeConfigDictAdapter.validate_python( - {"id": "node-1", "data": {"type": NodeType.HUMAN_INPUT.value}} + {"id": "node-1", "data": {"type": BuiltinNodeTypes.HUMAN_INPUT}} ) workflow.get_node_config_by_id.return_value = node_config workflow.get_enclosing_node_type_and_id.return_value = None @@ -279,7 +279,7 @@ class TestWorkflowService: workflow = MagicMock() workflow.get_node_config_by_id.return_value = NodeConfigDictAdapter.validate_python( - {"id": "node-1", "data": {"type": NodeType.HUMAN_INPUT.value}} + {"id": "node-1", "data": {"type": BuiltinNodeTypes.HUMAN_INPUT}} ) service.get_draft_workflow = MagicMock(return_value=workflow) # type: ignore[method-assign] @@ -312,7 +312,7 @@ class TestWorkflowService: # Mock node config mock_workflow.get_node_config_by_id.return_value = NodeConfigDictAdapter.validate_python( - {"id": "node-1", "data": {"type": NodeType.LLM.value}} + {"id": "node-1", "data": {"type": BuiltinNodeTypes.LLM}} ) mock_workflow.get_enclosing_node_type_and_id.return_value = None @@ -379,7 +379,7 @@ class TestWorkflowService: mock_workflow.environment_variables = [] mock_workflow.conversation_variables = [] mock_workflow.get_node_config_by_id.return_value = NodeConfigDictAdapter.validate_python( - {"id": "node-1", "data": {"type": NodeType.LLM.value}} + {"id": "node-1", "data": {"type": BuiltinNodeTypes.LLM}} ) mock_workflow.get_enclosing_node_type_and_id.return_value = None diff --git a/api/tests/unit_tests/tasks/test_enterprise_telemetry_task.py b/api/tests/unit_tests/tasks/test_enterprise_telemetry_task.py new file mode 100644 index 0000000000..b48c69a146 --- /dev/null +++ b/api/tests/unit_tests/tasks/test_enterprise_telemetry_task.py @@ -0,0 +1,69 @@ +"""Unit tests for enterprise telemetry Celery task.""" + +import json +from unittest.mock import MagicMock, patch + +import pytest + +from enterprise.telemetry.contracts import TelemetryCase, TelemetryEnvelope +from tasks.enterprise_telemetry_task import process_enterprise_telemetry + + +@pytest.fixture +def sample_envelope_json(): + envelope = TelemetryEnvelope( + case=TelemetryCase.APP_CREATED, + tenant_id="test-tenant", + event_id="test-event-123", + payload={"app_id": "app-123"}, + ) + return envelope.model_dump_json() + + +def test_process_enterprise_telemetry_success(sample_envelope_json): + with patch("tasks.enterprise_telemetry_task.EnterpriseMetricHandler") as mock_handler_class: + mock_handler = MagicMock() + mock_handler_class.return_value = mock_handler + + process_enterprise_telemetry(sample_envelope_json) + + mock_handler.handle.assert_called_once() + call_args = mock_handler.handle.call_args[0][0] + assert isinstance(call_args, TelemetryEnvelope) + assert call_args.case == TelemetryCase.APP_CREATED + assert call_args.tenant_id == "test-tenant" + assert call_args.event_id == "test-event-123" + + +def test_process_enterprise_telemetry_invalid_json(caplog): + invalid_json = "not valid json" + + process_enterprise_telemetry(invalid_json) + + assert "Failed to process enterprise telemetry envelope" in caplog.text + + +def test_process_enterprise_telemetry_handler_exception(sample_envelope_json, caplog): + with patch("tasks.enterprise_telemetry_task.EnterpriseMetricHandler") as mock_handler_class: + mock_handler = MagicMock() + mock_handler.handle.side_effect = Exception("Handler error") + mock_handler_class.return_value = mock_handler + + process_enterprise_telemetry(sample_envelope_json) + + assert "Failed to process enterprise telemetry envelope" in caplog.text + + +def test_process_enterprise_telemetry_validation_error(caplog): + invalid_envelope = json.dumps( + { + "case": "INVALID_CASE", + "tenant_id": "test-tenant", + "event_id": "test-event", + "payload": {}, + } + ) + + process_enterprise_telemetry(invalid_envelope) + + assert "Failed to process enterprise telemetry envelope" in caplog.text diff --git a/api/tests/unit_tests/tasks/test_workflow_node_execution_tasks.py b/api/tests/unit_tests/tasks/test_workflow_node_execution_tasks.py index 54be8379d5..a223f0119e 100644 --- a/api/tests/unit_tests/tasks/test_workflow_node_execution_tasks.py +++ b/api/tests/unit_tests/tasks/test_workflow_node_execution_tasks.py @@ -15,7 +15,7 @@ # WorkflowNodeExecution, # WorkflowNodeExecutionStatus, # ) -# from dify_graph.enums import NodeType +# from dify_graph.enums import BuiltinNodeTypes # from libs.datetime_utils import naive_utc_now # from models import WorkflowNodeExecutionModel # from models.enums import ExecutionOffLoadType @@ -41,7 +41,7 @@ # workflow_execution_id=str(uuid4()), # index=1, # node_id="test_node", -# node_type=NodeType.LLM, +# node_type=BuiltinNodeTypes.LLM, # title="Test Node", # inputs={"input_key": "input_value"}, # outputs={"output_key": "output_value"}, @@ -134,7 +134,7 @@ # workflow_execution_id=str(uuid4()), # index=1, # node_id="test_node", -# node_type=NodeType.LLM, +# node_type=BuiltinNodeTypes.LLM, # title="Test Node", # inputs=large_data, # outputs=large_data, diff --git a/api/uv.lock b/api/uv.lock index 1d03d5a360..cdc36bf462 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -3,17 +3,29 @@ revision = 3 requires-python = ">=3.11, <3.13" resolution-markers = [ "python_full_version >= '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'", - "python_full_version >= '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'", + "python_full_version >= '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform == 'win32'", + "python_full_version >= '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform == 'emscripten'", + "python_full_version >= '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'", - "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'", + "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform == 'win32'", + "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform == 'emscripten'", + "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", "python_full_version >= '3.12.4' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'", - "python_full_version >= '3.12.4' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'", + "python_full_version >= '3.12.4' and platform_python_implementation == 'PyPy' and sys_platform == 'win32'", + "python_full_version >= '3.12.4' and platform_python_implementation == 'PyPy' and sys_platform == 'emscripten'", + "python_full_version >= '3.12.4' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'", - "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'", + "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation == 'PyPy' and sys_platform == 'win32'", + "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation == 'PyPy' and sys_platform == 'emscripten'", + "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", "python_full_version < '3.12' and platform_python_implementation != 'PyPy' and sys_platform == 'linux'", - "python_full_version < '3.12' and platform_python_implementation != 'PyPy' and sys_platform != 'linux'", + "python_full_version < '3.12' and platform_python_implementation != 'PyPy' and sys_platform == 'win32'", + "python_full_version < '3.12' and platform_python_implementation != 'PyPy' and sys_platform == 'emscripten'", + "python_full_version < '3.12' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", "python_full_version < '3.12' and platform_python_implementation == 'PyPy' and sys_platform == 'linux'", - "python_full_version < '3.12' and platform_python_implementation == 'PyPy' and sys_platform != 'linux'", + "python_full_version < '3.12' and platform_python_implementation == 'PyPy' and sys_platform == 'win32'", + "python_full_version < '3.12' and platform_python_implementation == 'PyPy' and sys_platform == 'emscripten'", + "python_full_version < '3.12' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", ] [[package]] @@ -467,7 +479,7 @@ wheels = [ [[package]] name = "azure-identity" -version = "1.25.2" +version = "1.25.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "azure-core" }, @@ -476,9 +488,9 @@ dependencies = [ { name = "msal-extensions" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c2/3a/439a32a5e23e45f6a91f0405949dc66cfe6834aba15a430aebfc063a81e7/azure_identity-1.25.2.tar.gz", hash = "sha256:030dbaa720266c796221c6cdbd1999b408c079032c919fef725fcc348a540fe9", size = 284709, upload-time = "2026-02-11T01:55:42.323Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c5/0e/3a63efb48aa4a5ae2cfca61ee152fbcb668092134d3eb8bfda472dd5c617/azure_identity-1.25.3.tar.gz", hash = "sha256:ab23c0d63015f50b630ef6c6cf395e7262f439ce06e5d07a64e874c724f8d9e6", size = 286304, upload-time = "2026-03-13T01:12:20.892Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9b/77/f658c76f9e9a52c784bd836aaca6fd5b9aae176f1f53273e758a2bcda695/azure_identity-1.25.2-py3-none-any.whl", hash = "sha256:1b40060553d01a72ba0d708b9a46d0f61f56312e215d8896d836653ffdc6753d", size = 191423, upload-time = "2026-02-11T01:55:44.245Z" }, + { url = "https://files.pythonhosted.org/packages/49/9a/417b3a533e01953a7c618884df2cb05a71e7b68bdbce4fbdb62349d2a2e8/azure_identity-1.25.3-py3-none-any.whl", hash = "sha256:f4d0b956a8146f30333e071374171f3cfa7bdb8073adb8c3814b65567aa7447c", size = 192138, upload-time = "2026-03-13T01:12:22.951Z" }, ] [[package]] @@ -623,14 +635,15 @@ wheels = [ [[package]] name = "beautifulsoup4" -version = "4.12.2" +version = "4.14.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "soupsieve" }, + { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/af/0b/44c39cf3b18a9280950ad63a579ce395dda4c32193ee9da7ff0aed547094/beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da", size = 505113, upload-time = "2023-04-07T15:02:49.038Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b0/1c6a16426d389813b48d95e26898aff79abbde42ad353958ad95cc8c9b21/beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86", size = 627737, upload-time = "2025-11-30T15:08:26.084Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/57/f4/a69c20ee4f660081a7dedb1ac57f29be9378e04edfcb90c526b923d4bebc/beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a", size = 142979, upload-time = "2023-04-07T15:02:50.77Z" }, + { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" }, ] [[package]] @@ -651,18 +664,43 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" }, ] +[[package]] +name = "blis" +version = "1.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d0/d0/d8cc8c9a4488a787e7fa430f6055e5bd1ddb22c340a751d9e901b82e2efe/blis-1.3.3.tar.gz", hash = "sha256:034d4560ff3cc43e8aa37e188451b0440e3261d989bb8a42ceee865607715ecd", size = 2644873, upload-time = "2025-11-17T12:28:30.511Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/0a/a4c8736bc497d386b0ffc76d321f478c03f1a4725e52092f93b38beb3786/blis-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e10c8d3e892b1dbdff365b9d00e08291876fc336915bf1a5e9f188ed087e1a91", size = 6925522, upload-time = "2025-11-17T12:27:29.199Z" }, + { url = "https://files.pythonhosted.org/packages/83/5a/3437009282f23684ecd3963a8b034f9307cdd2bf4484972e5a6b096bf9ac/blis-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:66e6249564f1db22e8af1e0513ff64134041fa7e03c8dd73df74db3f4d8415a7", size = 1232787, upload-time = "2025-11-17T12:27:30.996Z" }, + { url = "https://files.pythonhosted.org/packages/d1/0e/82221910d16259ce3017c1442c468a3f206a4143a96fbba9f5b5b81d62e8/blis-1.3.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7260da065958b4e5475f62f44895ef9d673b0f47dcf61b672b22b7dae1a18505", size = 2844596, upload-time = "2025-11-17T12:27:32.601Z" }, + { url = "https://files.pythonhosted.org/packages/6c/93/ab547f1a5c23e20bca16fbcf04021c32aac3f969be737ea4980509a7ca90/blis-1.3.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9327a6ca67de8ae76fe071e8584cc7f3b2e8bfadece4961d40f2826e1cda2df", size = 11377746, upload-time = "2025-11-17T12:27:35.342Z" }, + { url = "https://files.pythonhosted.org/packages/6e/a6/7733820aa62da32526287a63cd85c103b2b323b186c8ee43b7772ff7017c/blis-1.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c4ae70629cf302035d268858a10ca4eb6242a01b2dc8d64422f8e6dcb8a8ee74", size = 3041954, upload-time = "2025-11-17T12:27:37.479Z" }, + { url = "https://files.pythonhosted.org/packages/87/53/e39d67fd3296b649772780ca6aab081412838ecb54e0b0c6432d01626a50/blis-1.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45866a9027d43b93e8b59980a23c5d7358b6536fc04606286e39fdcfce1101c2", size = 14251222, upload-time = "2025-11-17T12:27:39.705Z" }, + { url = "https://files.pythonhosted.org/packages/ea/44/b749f8777b020b420bceaaf60f66432fc30cc904ca5b69640ec9cbef11ed/blis-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:27f82b8633030f8d095d2b412dffa7eb6dbc8ee43813139909a20012e54422ea", size = 6171233, upload-time = "2025-11-17T12:27:41.921Z" }, + { url = "https://files.pythonhosted.org/packages/16/d1/429cf0cf693d4c7dc2efed969bd474e315aab636e4a95f66c4ed7264912d/blis-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2a1c74e100665f8e918ebdbae2794576adf1f691680b5cdb8b29578432f623ef", size = 6929663, upload-time = "2025-11-17T12:27:44.482Z" }, + { url = "https://files.pythonhosted.org/packages/11/69/363c8df8d98b3cc97be19aad6aabb2c9c53f372490d79316bdee92d476e7/blis-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3f6c595185176ce021316263e1a1d636a3425b6c48366c1fd712d08d0b71849a", size = 1230939, upload-time = "2025-11-17T12:27:46.19Z" }, + { url = "https://files.pythonhosted.org/packages/96/2a/fbf65d906d823d839076c5150a6f8eb5ecbc5f9135e0b6510609bda1e6b7/blis-1.3.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d734b19fba0be7944f272dfa7b443b37c61f9476d9ab054a9ac53555ceadd2e0", size = 2818835, upload-time = "2025-11-17T12:27:48.167Z" }, + { url = "https://files.pythonhosted.org/packages/d5/ad/58deaa3ad856dd3cc96493e40ffd2ed043d18d4d304f85a65cde1ccbf644/blis-1.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ef6d6e2b599a3a2788eb6d9b443533961265aa4ec49d574ed4bb846e548dcdb", size = 11366550, upload-time = "2025-11-17T12:27:49.958Z" }, + { url = "https://files.pythonhosted.org/packages/78/82/816a7adfe1f7acc8151f01ec86ef64467a3c833932d8f19f8e06613b8a4e/blis-1.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8c888438ae99c500422d50698e3028b65caa8ebb44e24204d87fda2df64058f7", size = 3023686, upload-time = "2025-11-17T12:27:52.062Z" }, + { url = "https://files.pythonhosted.org/packages/1e/e2/0e93b865f648b5519360846669a35f28ee8f4e1d93d054f6850d8afbabde/blis-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8177879fd3590b5eecdd377f9deafb5dc8af6d684f065bd01553302fb3fcf9a7", size = 14250939, upload-time = "2025-11-17T12:27:53.847Z" }, + { url = "https://files.pythonhosted.org/packages/20/07/fb43edc2ff0a6a367e4a94fc39eb3b85aa1e55e24cc857af2db145ce9f0d/blis-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:f20f7ad69aaffd1ce14fe77de557b6df9b61e0c9e582f75a843715d836b5c8af", size = 6192759, upload-time = "2025-11-17T12:27:56.176Z" }, +] + [[package]] name = "boto3" -version = "1.42.65" +version = "1.42.68" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "botocore" }, { name = "jmespath" }, { name = "s3transfer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1e/c9/8ff8a901cf62374f1289cf36391f855e1702c70f545c28d1b57608a84ff2/boto3-1.42.65.tar.gz", hash = "sha256:c740af6bdaebcc1a00f3827a5729050bf6fc820ee148bf7d06f28db11c80e2a1", size = 112805, upload-time = "2026-03-10T19:44:58.255Z" } +sdist = { url = "https://files.pythonhosted.org/packages/06/ae/60c642aa5413e560b671da825329f510b29a77274ed0f580bde77562294d/boto3-1.42.68.tar.gz", hash = "sha256:3f349f967ab38c23425626d130962bcb363e75f042734fe856ea8c5a00eef03c", size = 112761, upload-time = "2026-03-13T19:32:17.137Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/46/bb/ace5921655df51e3c9b787b3f0bd6aa25548e5cf1dabae02e53fa88f2d98/boto3-1.42.65-py3-none-any.whl", hash = "sha256:cc7f2e0aec6c68ee5b10232cf3e01326acf6100bc785a770385b61a0474b31f4", size = 140556, upload-time = "2026-03-10T19:44:55.433Z" }, + { url = "https://files.pythonhosted.org/packages/fb/f6/dc6e993479dbb597d68223fbf61cb026511737696b15bd7d2a33e9b2c24f/boto3-1.42.68-py3-none-any.whl", hash = "sha256:dbff353eb7dc93cbddd7926ed24793e0174c04adbe88860dfa639568442e4962", size = 140556, upload-time = "2026-03-13T19:32:14.951Z" }, ] [[package]] @@ -686,16 +724,16 @@ bedrock-runtime = [ [[package]] name = "botocore" -version = "1.42.65" +version = "1.42.68" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jmespath" }, { name = "python-dateutil" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2e/81/2c832e2117d24da4fe800861e8ddd19bbaa308623b1198eb2c2cc6fcd3d4/botocore-1.42.65.tar.gz", hash = "sha256:7d52c148df07f70c375eeda58f99b439c7c7836c25df74cccfba3bb6e12444d2", size = 14970239, upload-time = "2026-03-10T19:44:43.686Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/22/87502d5fbbfa8189406a617b30b1e2a3dc0ab2669f7268e91b385c1c1c7a/botocore-1.42.68.tar.gz", hash = "sha256:3951c69e12ac871dda245f48dac5c7dd88ea1bfdd74a8879ec356cf2874b806a", size = 14994514, upload-time = "2026-03-13T19:32:03.577Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/9e/2ca03a55408c0820d7f0a04ae52bc6dfc7e4fff1f007a90135a68e056c93/botocore-1.42.65-py3-none-any.whl", hash = "sha256:0283c332ce00cbd1b894e86b7bed89dd624a5ca3a4ee62ec4db3898d16652e98", size = 14644794, upload-time = "2026-03-10T19:44:37.442Z" }, + { url = "https://files.pythonhosted.org/packages/3c/2a/1428f6594799780fe6ee845d8e6aeffafe026cd16a70c878684e2dcbbfc8/botocore-1.42.68-py3-none-any.whl", hash = "sha256:9df7da26374601f890e2f115bfa573d65bf15b25fe136bb3aac809f6145f52ab", size = 14668816, upload-time = "2026-03-13T19:31:58.572Z" }, ] [[package]] @@ -818,9 +856,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fb/2b/a64c2d25a37aeb921fddb929111413049fc5f8b9a4c1aefaffaafe768d54/cachetools-5.3.3-py3-none-any.whl", hash = "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945", size = 9325, upload-time = "2024-02-26T20:33:20.308Z" }, ] +[[package]] +name = "catalogue" +version = "2.0.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/38/b4/244d58127e1cdf04cf2dc7d9566f0d24ef01d5ce21811bab088ecc62b5ea/catalogue-2.0.10.tar.gz", hash = "sha256:4f56daa940913d3f09d589c191c74e5a6d51762b3a9e37dd53b7437afd6cda15", size = 19561, upload-time = "2023-09-25T06:29:24.962Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/96/d32b941a501ab566a16358d68b6eb4e4acc373fab3c3c4d7d9e649f7b4bb/catalogue-2.0.10-py3-none-any.whl", hash = "sha256:58c2de0020aa90f4a2da7dfad161bf7b3b054c86a5f09fcedc0b2b740c109a9f", size = 17325, upload-time = "2023-09-25T06:29:23.337Z" }, +] + [[package]] name = "celery" -version = "5.5.3" +version = "5.6.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "billiard" }, @@ -830,11 +877,12 @@ dependencies = [ { name = "click-repl" }, { name = "kombu" }, { name = "python-dateutil" }, + { name = "tzlocal" }, { name = "vine" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bb/7d/6c289f407d219ba36d8b384b42489ebdd0c84ce9c413875a8aae0c85f35b/celery-5.5.3.tar.gz", hash = "sha256:6c972ae7968c2b5281227f01c3a3f984037d21c5129d07bf3550cc2afc6b10a5", size = 1667144, upload-time = "2025-06-01T11:08:12.563Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8f/9d/3d13596519cfa7207a6f9834f4b082554845eb3cd2684b5f8535d50c7c44/celery-5.6.2.tar.gz", hash = "sha256:4a8921c3fcf2ad76317d3b29020772103581ed2454c4c042cc55dcc43585009b", size = 1718802, upload-time = "2026-01-04T12:35:58.012Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/af/0dcccc7fdcdf170f9a1585e5e96b6fb0ba1749ef6be8c89a6202284759bd/celery-5.5.3-py3-none-any.whl", hash = "sha256:0b5761a07057acee94694464ca482416b959568904c9dfa41ce8413a7d65d525", size = 438775, upload-time = "2025-06-01T11:08:09.94Z" }, + { url = "https://files.pythonhosted.org/packages/dd/bd/9ecd619e456ae4ba73b6583cc313f26152afae13e9a82ac4fe7f8856bfd1/celery-5.6.2-py3-none-any.whl", hash = "sha256:3ffafacbe056951b629c7abcf9064c4a2366de0bdfc9fdba421b97ebb68619a5", size = 445502, upload-time = "2026-01-04T12:35:55.894Z" }, ] [[package]] @@ -1106,7 +1154,7 @@ wheels = [ [[package]] name = "clickzetta-connector-python" -version = "0.8.107" +version = "0.8.106" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "future" }, @@ -1120,7 +1168,16 @@ dependencies = [ { name = "urllib3" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/19/b4/91dfe25592bbcaf7eede05849c77d09d43a2656943585bbcf7ba4cc604bc/clickzetta_connector_python-0.8.107-py3-none-any.whl", hash = "sha256:7f28752bfa0a50e89ed218db0540c02c6bfbfdae3589ac81cf28523d7caa93b0", size = 76864, upload-time = "2025-12-01T07:56:39.177Z" }, + { url = "https://files.pythonhosted.org/packages/23/38/749c708619f402d4d582dfa73fbeb64ade77b1f250a93bd064d2a1aa3776/clickzetta_connector_python-0.8.106-py3-none-any.whl", hash = "sha256:120d6700051d97609dbd6655c002ab3bc260b7c8e67d39dfc7191e749563f7b4", size = 78121, upload-time = "2025-10-29T02:38:15.014Z" }, +] + +[[package]] +name = "cloudpathlib" +version = "0.23.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f4/18/2ac35d6b3015a0c74e923d94fc69baf8307f7c3233de015d69f99e17afa8/cloudpathlib-0.23.0.tar.gz", hash = "sha256:eb38a34c6b8a048ecfd2b2f60917f7cbad4a105b7c979196450c2f541f4d6b4b", size = 53126, upload-time = "2025-10-07T22:47:56.278Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/8a/c4bb04426d608be4a3171efa2e233d2c59a5c8937850c10d098e126df18e/cloudpathlib-0.23.0-py3-none-any.whl", hash = "sha256:8520b3b01468fee77de37ab5d50b1b524ea6b4a8731c35d1b7407ac0cd716002", size = 62755, upload-time = "2025-10-07T22:47:54.905Z" }, ] [[package]] @@ -1167,6 +1224,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", size = 46018, upload-time = "2021-06-11T10:22:42.561Z" }, ] +[[package]] +name = "confection" +version = "0.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "srsly" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/51/d3/57c6631159a1b48d273b40865c315cf51f89df7a9d1101094ef12e3a37c2/confection-0.1.5.tar.gz", hash = "sha256:8e72dd3ca6bd4f48913cd220f10b8275978e740411654b6e8ca6d7008c590f0e", size = 38924, upload-time = "2024-05-31T16:17:01.559Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/00/3106b1854b45bd0474ced037dfe6b73b90fe68a68968cef47c23de3d43d2/confection-0.1.5-py3-none-any.whl", hash = "sha256:e29d3c3f8eac06b3f77eb9dfb4bf2fc6bcc9622a98ca00a698e3d019c6430b14", size = 35451, upload-time = "2024-05-31T16:16:59.075Z" }, +] + [[package]] name = "cos-python-sdk-v5" version = "1.9.41" @@ -1342,6 +1412,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bc/58/6b3d24e6b9bc474a2dcdee65dfd1f008867015408a271562e4b690561a4d/cryptography-46.0.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8456928655f856c6e1533ff59d5be76578a7157224dbd9ce6872f25055ab9ab7", size = 3407605, upload-time = "2026-02-10T19:18:29.233Z" }, ] +[[package]] +name = "cymem" +version = "2.0.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/2f0fbb32535c3731b7c2974c569fb9325e0a38ed5565a08e1139a3b71e82/cymem-2.0.13.tar.gz", hash = "sha256:1c91a92ae8c7104275ac26bd4d29b08ccd3e7faff5893d3858cb6fadf1bc1588", size = 12320, upload-time = "2025-11-14T14:58:36.902Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/64/1db41f7576a6b69f70367e3c15e968fd775ba7419e12059c9966ceb826f8/cymem-2.0.13-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:673183466b0ff2e060d97ec5116711d44200b8f7be524323e080d215ee2d44a5", size = 43587, upload-time = "2025-11-14T14:57:22.39Z" }, + { url = "https://files.pythonhosted.org/packages/81/13/57f936fc08551323aab3f92ff6b7f4d4b89d5b4e495c870a67cb8d279757/cymem-2.0.13-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bee2791b3f6fc034ce41268851462bf662ff87e8947e35fb6dd0115b4644a61f", size = 43139, upload-time = "2025-11-14T14:57:23.363Z" }, + { url = "https://files.pythonhosted.org/packages/32/a6/9345754be51e0479aa387b7b6cffc289d0fd3201aaeb8dade4623abd1e02/cymem-2.0.13-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f3aee3adf16272bca81c5826eed55ba3c938add6d8c9e273f01c6b829ecfde22", size = 245063, upload-time = "2025-11-14T14:57:24.839Z" }, + { url = "https://files.pythonhosted.org/packages/d6/01/6bc654101526fa86e82bf6b05d99b2cd47c30a333cfe8622c26c0592beb2/cymem-2.0.13-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:30c4e75a3a1d809e89106b0b21803eb78e839881aa1f5b9bd27b454bc73afde3", size = 244496, upload-time = "2025-11-14T14:57:26.42Z" }, + { url = "https://files.pythonhosted.org/packages/c4/fb/853b7b021e701a1f41687f3704d5f469aeb2a4f898c3fbb8076806885955/cymem-2.0.13-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ec99efa03cf8ec11c8906aa4d4cc0c47df393bc9095c9dd64b89b9b43e220b04", size = 243287, upload-time = "2025-11-14T14:57:27.542Z" }, + { url = "https://files.pythonhosted.org/packages/d4/2b/0e4664cafc581de2896d75000651fd2ce7094d33263f466185c28ffc96e4/cymem-2.0.13-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c90a6ecba994a15b17a3f45d7ec74d34081df2f73bd1b090e2adc0317e4e01b6", size = 248287, upload-time = "2025-11-14T14:57:29.055Z" }, + { url = "https://files.pythonhosted.org/packages/21/0f/f94c6950edbfc2aafb81194fc40b6cacc8e994e9359d3cb4328c5705b9b5/cymem-2.0.13-cp311-cp311-win_amd64.whl", hash = "sha256:ce821e6ba59148ed17c4567113b8683a6a0be9c9ac86f14e969919121efb61a5", size = 40116, upload-time = "2025-11-14T14:57:30.592Z" }, + { url = "https://files.pythonhosted.org/packages/00/df/2455eff6ac0381ff165db6883b311f7016e222e3dd62185517f8e8187ed0/cymem-2.0.13-cp311-cp311-win_arm64.whl", hash = "sha256:0dca715e708e545fd1d97693542378a00394b20a37779c1ae2c8bdbb43acef79", size = 36349, upload-time = "2025-11-14T14:57:31.573Z" }, + { url = "https://files.pythonhosted.org/packages/c9/52/478a2911ab5028cb710b4900d64aceba6f4f882fcb13fd8d40a456a1b6dc/cymem-2.0.13-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8afbc5162a0fe14b6463e1c4e45248a1b2fe2cbcecc8a5b9e511117080da0eb", size = 43745, upload-time = "2025-11-14T14:57:32.52Z" }, + { url = "https://files.pythonhosted.org/packages/f9/71/f0f8adee945524774b16af326bd314a14a478ed369a728a22834e6785a18/cymem-2.0.13-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9251d889348fe79a75e9b3e4d1b5fa651fca8a64500820685d73a3acc21b6a8", size = 42927, upload-time = "2025-11-14T14:57:33.827Z" }, + { url = "https://files.pythonhosted.org/packages/62/6d/159780fe162ff715d62b809246e5fc20901cef87ca28b67d255a8d741861/cymem-2.0.13-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:742fc19764467a49ed22e56a4d2134c262d73a6c635409584ae3bf9afa092c33", size = 258346, upload-time = "2025-11-14T14:57:34.917Z" }, + { url = "https://files.pythonhosted.org/packages/eb/12/678d16f7aa1996f947bf17b8cfb917ea9c9674ef5e2bd3690c04123d5680/cymem-2.0.13-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f190a92fe46197ee64d32560eb121c2809bb843341733227f51538ce77b3410d", size = 260843, upload-time = "2025-11-14T14:57:36.503Z" }, + { url = "https://files.pythonhosted.org/packages/31/5d/0dd8c167c08cd85e70d274b7235cfe1e31b3cebc99221178eaf4bbb95c6f/cymem-2.0.13-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d670329ee8dbbbf241b7c08069fe3f1d3a1a3e2d69c7d05ea008a7010d826298", size = 254607, upload-time = "2025-11-14T14:57:38.036Z" }, + { url = "https://files.pythonhosted.org/packages/b7/c9/d6514a412a1160aa65db539836b3d47f9b59f6675f294ec34ae32f867c82/cymem-2.0.13-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a84ba3178d9128b9ffb52ce81ebab456e9fe959125b51109f5b73ebdfc6b60d6", size = 262421, upload-time = "2025-11-14T14:57:39.265Z" }, + { url = "https://files.pythonhosted.org/packages/dd/fe/3ee37d02ca4040f2fb22d34eb415198f955862b5dd47eee01df4c8f5454c/cymem-2.0.13-cp312-cp312-win_amd64.whl", hash = "sha256:2ff1c41fd59b789579fdace78aa587c5fc091991fa59458c382b116fc36e30dc", size = 40176, upload-time = "2025-11-14T14:57:40.706Z" }, + { url = "https://files.pythonhosted.org/packages/94/fb/1b681635bfd5f2274d0caa8f934b58435db6c091b97f5593738065ddb786/cymem-2.0.13-cp312-cp312-win_arm64.whl", hash = "sha256:6bbd701338df7bf408648191dff52472a9b334f71bcd31a21a41d83821050f67", size = 35959, upload-time = "2025-11-14T14:57:41.682Z" }, +] + [[package]] name = "databricks-sdk" version = "0.73.0" @@ -1356,19 +1450,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a7/27/b822b474aaefb684d11df358d52e012699a2a8af231f9b47c54b73f280cb/databricks_sdk-0.73.0-py3-none-any.whl", hash = "sha256:a4d3cfd19357a2b459d2dc3101454d7f0d1b62865ce099c35d0c342b66ac64ff", size = 753896, upload-time = "2025-11-05T06:52:56.451Z" }, ] -[[package]] -name = "dataclasses-json" -version = "0.6.7" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "marshmallow" }, - { name = "typing-inspect" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/64/a4/f71d9cf3a5ac257c993b5ca3f93df5f7fb395c725e7f1e6479d2514173c3/dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0", size = 32227, upload-time = "2024-06-09T16:20:19.103Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686, upload-time = "2024-06-09T16:20:16.715Z" }, -] - [[package]] name = "dateparser" version = "1.2.2" @@ -1609,6 +1690,7 @@ vdb = [ { name = "clickzetta-connector-python" }, { name = "couchbase" }, { name = "elasticsearch" }, + { name = "holo-search-sdk" }, { name = "intersystems-irispython" }, { name = "mo-vector" }, { name = "mysql-connector-python" }, @@ -1634,12 +1716,12 @@ requires-dist = [ { name = "aliyun-log-python-sdk", specifier = "~=0.9.37" }, { name = "apscheduler", specifier = ">=3.11.0" }, { name = "arize-phoenix-otel", specifier = "~=0.15.0" }, - { name = "azure-identity", specifier = "==1.25.2" }, - { name = "beautifulsoup4", specifier = "==4.12.2" }, - { name = "boto3", specifier = "==1.42.65" }, + { name = "azure-identity", specifier = "==1.25.3" }, + { name = "beautifulsoup4", specifier = "==4.14.3" }, + { name = "boto3", specifier = "==1.42.68" }, { name = "bs4", specifier = "~=0.0.1" }, { name = "cachetools", specifier = "~=5.3.0" }, - { name = "celery", specifier = "~=5.5.2" }, + { name = "celery", specifier = "~=5.6.2" }, { name = "charset-normalizer", specifier = ">=3.4.4" }, { name = "croniter", specifier = ">=6.0.0" }, { name = "fastopenapi", extras = ["flask"], specifier = ">=0.7.0" }, @@ -1667,8 +1749,8 @@ requires-dist = [ { name = "jsonschema", specifier = ">=4.25.1" }, { name = "langfuse", specifier = "~=2.51.3" }, { name = "langsmith", specifier = "~=0.7.16" }, - { name = "litellm", specifier = "==1.82.1" }, - { name = "markdown", specifier = "~=3.8.1" }, + { name = "litellm", specifier = "==1.82.2" }, + { name = "markdown", specifier = "~=3.10.2" }, { name = "mlflow-skinny", specifier = ">=3.0.0" }, { name = "numpy", specifier = "~=1.26.4" }, { name = "openpyxl", specifier = "~=3.1.5" }, @@ -1691,33 +1773,33 @@ requires-dist = [ { name = "opentelemetry-util-http", specifier = "==0.49b0" }, { name = "opik", specifier = "~=1.10.37" }, { name = "packaging", specifier = "~=23.2" }, - { name = "pandas", extras = ["excel", "output-formatting", "performance"], specifier = "~=2.2.2" }, + { name = "pandas", extras = ["excel", "output-formatting", "performance"], specifier = "~=3.0.1" }, { name = "psycogreen", specifier = "~=1.0.2" }, { name = "psycopg2-binary", specifier = "~=2.9.6" }, { name = "pycryptodome", specifier = "==3.23.0" }, { name = "pydantic", specifier = "~=2.12.5" }, { name = "pydantic-extra-types", specifier = "~=2.11.0" }, { name = "pydantic-settings", specifier = "~=2.13.1" }, - { name = "pyjwt", specifier = "~=2.11.0" }, - { name = "pypdfium2", specifier = "==5.2.0" }, + { name = "pyjwt", specifier = "~=2.12.0" }, + { name = "pypdfium2", specifier = "==5.6.0" }, { name = "python-docx", specifier = "~=1.2.0" }, - { name = "python-dotenv", specifier = "==1.0.1" }, + { name = "python-dotenv", specifier = "==1.2.2" }, { name = "pyyaml", specifier = "~=6.0.1" }, { name = "readabilipy", specifier = "~=0.3.0" }, { name = "redis", extras = ["hiredis"], specifier = "~=7.3.0" }, - { name = "resend", specifier = "~=2.9.0" }, + { name = "resend", specifier = "~=2.23.0" }, { name = "sendgrid", specifier = "~=6.12.3" }, - { name = "sentry-sdk", extras = ["flask"], specifier = "~=2.28.0" }, + { name = "sentry-sdk", extras = ["flask"], specifier = "~=2.54.0" }, { name = "sqlalchemy", specifier = "~=2.0.29" }, - { name = "sseclient-py", specifier = "~=1.8.0" }, - { name = "starlette", specifier = "==0.49.1" }, + { name = "sseclient-py", specifier = "~=1.9.0" }, + { name = "starlette", specifier = "==0.52.1" }, { name = "tiktoken", specifier = "~=0.12.0" }, { name = "transformers", specifier = "~=5.3.0" }, - { name = "unstructured", extras = ["docx", "epub", "md", "ppt", "pptx"], specifier = "~=0.18.18" }, + { name = "unstructured", extras = ["docx", "epub", "md", "ppt", "pptx"], specifier = "~=0.21.5" }, { name = "weave", specifier = ">=0.52.16" }, { name = "weaviate-client", specifier = "==4.17.0" }, { name = "webvtt-py", specifier = "~=0.5.1" }, - { name = "yarl", specifier = "~=1.18.3" }, + { name = "yarl", specifier = "~=1.23.0" }, ] [package.metadata.requires-dev] @@ -1727,7 +1809,7 @@ dev = [ { name = "celery-types", specifier = ">=0.23.0" }, { name = "coverage", specifier = "~=7.13.4" }, { name = "dotenv-linter", specifier = "~=0.7.0" }, - { name = "faker", specifier = "~=40.8.0" }, + { name = "faker", specifier = "~=40.11.0" }, { name = "hypothesis", specifier = ">=6.131.15" }, { name = "import-linter", specifier = ">=2.3" }, { name = "lxml-stubs", specifier = "~=0.5.1" }, @@ -1744,7 +1826,7 @@ dev = [ { name = "ruff", specifier = "~=0.15.5" }, { name = "scipy-stubs", specifier = ">=1.15.3.0" }, { name = "sseclient-py", specifier = ">=1.8.0" }, - { name = "testcontainers", specifier = "~=4.13.2" }, + { name = "testcontainers", specifier = "~=4.14.1" }, { name = "types-aiofiles", specifier = "~=25.1.0" }, { name = "types-beautifulsoup4", specifier = "~=4.12.0" }, { name = "types-cachetools", specifier = "~=6.2.0" }, @@ -1809,6 +1891,7 @@ vdb = [ { name = "clickzetta-connector-python", specifier = ">=0.8.102" }, { name = "couchbase", specifier = "~=4.3.0" }, { name = "elasticsearch", specifier = "==8.14.0" }, + { name = "holo-search-sdk", specifier = ">=0.4.1" }, { name = "intersystems-irispython", specifier = ">=5.1.0" }, { name = "mo-vector", specifier = "~=0.1.13" }, { name = "mysql-connector-python", specifier = ">=9.3.0" }, @@ -1977,29 +2060,30 @@ wheels = [ [[package]] name = "faker" -version = "40.8.0" +version = "40.11.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "tzdata", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/70/03/14428edc541467c460d363f6e94bee9acc271f3e62470630fc9a647d0cf2/faker-40.8.0.tar.gz", hash = "sha256:936a3c9be6c004433f20aa4d99095df5dec82b8c7ad07459756041f8c1728875", size = 1956493, upload-time = "2026-03-04T16:18:48.161Z" } +sdist = { url = "https://files.pythonhosted.org/packages/94/dc/b68e5378e5a7db0ab776efcdd53b6fe374b29d703e156fd5bb4c5437069e/faker-40.11.0.tar.gz", hash = "sha256:7c419299103b13126bd02ec14bd2b47b946edb5a5eedf305e66a193b25f9a734", size = 1957570, upload-time = "2026-03-13T14:36:11.844Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4c/3b/c6348f1e285e75b069085b18110a4e6325b763a5d35d5e204356fc7c20b3/faker-40.8.0-py3-none-any.whl", hash = "sha256:eb21bdba18f7a8375382eb94fb436fce07046893dc94cb20817d28deb0c3d579", size = 1989124, upload-time = "2026-03-04T16:18:46.45Z" }, + { url = "https://files.pythonhosted.org/packages/b1/fa/a86c6ba66f0308c95b9288b1e3eaccd934b545646f63494a86f1ec2f8c8e/faker-40.11.0-py3-none-any.whl", hash = "sha256:0e9816c950528d2a37d74863f3ef389ea9a3a936cbcde0b11b8499942e25bf90", size = 1989457, upload-time = "2026-03-13T14:36:09.792Z" }, ] [[package]] name = "fastapi" -version = "0.122.0" +version = "0.135.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-doc" }, { name = "pydantic" }, { name = "starlette" }, { name = "typing-extensions" }, + { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b2/de/3ee97a4f6ffef1fb70bf20561e4f88531633bb5045dc6cebc0f8471f764d/fastapi-0.122.0.tar.gz", hash = "sha256:cd9b5352031f93773228af8b4c443eedc2ac2aa74b27780387b853c3726fb94b", size = 346436, upload-time = "2025-11-24T19:17:47.95Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/7b/f8e0211e9380f7195ba3f3d40c292594fd81ba8ec4629e3854c353aaca45/fastapi-0.135.1.tar.gz", hash = "sha256:d04115b508d936d254cea545b7312ecaa58a7b3a0f84952535b4c9afae7668cd", size = 394962, upload-time = "2026-03-01T18:18:29.369Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7a/93/aa8072af4ff37b795f6bbf43dcaf61115f40f49935c7dbb180c9afc3f421/fastapi-0.122.0-py3-none-any.whl", hash = "sha256:a456e8915dfc6c8914a50d9651133bd47ec96d331c5b44600baa635538a30d67", size = 110671, upload-time = "2025-11-24T19:17:45.96Z" }, + { url = "https://files.pythonhosted.org/packages/e4/72/42e900510195b23a56bde950d26a51f8b723846bfcaa0286e90287f0422b/fastapi-0.135.1-py3-none-any.whl", hash = "sha256:46e2fc5745924b7c840f71ddd277382af29ce1cdb7d5eab5bf697e3fb9999c9e", size = 116999, upload-time = "2026-03-01T18:18:30.831Z" }, ] [[package]] @@ -2051,11 +2135,11 @@ wheels = [ [[package]] name = "fickling" -version = "0.1.9" +version = "0.1.10" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/25/bd/ca7127df0201596b0b30f9ab3d36e565bb9d6f8f4da1560758b817e81b65/fickling-0.1.9.tar.gz", hash = "sha256:bb518c2fd833555183bc46b6903bb4022f3ae0436a69c3fb149cfc75eebaac33", size = 336940, upload-time = "2026-03-03T23:32:19.449Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9f/06/1818b8f52267599e54041349c553d5894e17ec8a539a246eb3f9eaf05629/fickling-0.1.10.tar.gz", hash = "sha256:8c8b76abd29936f1a5932e4087b8c8becb2d7ab1cf08549e63519ebcb2f71644", size = 338062, upload-time = "2026-03-13T16:34:29.287Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/92/49/c597bad508c74917901432b41ae5a8f036839a7fb8d0d29a89765f5d3643/fickling-0.1.9-py3-none-any.whl", hash = "sha256:ccc3ce3b84733406ade2fe749717f6e428047335157c6431eefd3e7e970a06d1", size = 52786, upload-time = "2026-03-03T23:32:17.533Z" }, + { url = "https://files.pythonhosted.org/packages/05/86/620960dff970da5311f05e25fc045dac8495557d51030e5a0827084b18fd/fickling-0.1.10-py3-none-any.whl", hash = "sha256:962c35c38ece1b3632fc119c0f4cb1eebc02dc6d65bfd93a1803afd42ca91d25", size = 52853, upload-time = "2026-03-13T16:34:27.821Z" }, ] [[package]] @@ -2878,6 +2962,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f5/a9/55a4ac9c16fdf32e92e9e22c49f61affe5135e177ca19b014484e28950f7/hiredis-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:04ec150e95eea3de9ff8bac754978aa17b8bf30a86d4ab2689862020945396b0", size = 22379, upload-time = "2025-10-14T16:32:22.916Z" }, ] +[[package]] +name = "holo-search-sdk" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "psycopg", extra = ["binary"] }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0b/b8/70a4999dabbba15e98d201a7399aab76ab96931ad1a27392ba5252cc9165/holo_search_sdk-0.4.1.tar.gz", hash = "sha256:9aea98b6078b9202abb568ed69d798d5e0505d2b4cc3a136a6aa84402bcd2133", size = 56701, upload-time = "2026-01-28T01:44:57.645Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/30/3059a979272f90a96f31b167443cc27675e8cc8f970a3ac0cb80bf803c70/holo_search_sdk-0.4.1-py3-none-any.whl", hash = "sha256:ef1059895ea936ff6a087f68dac92bd1ae0320e51ec5b1d4e7bed7a5dd6beb45", size = 32647, upload-time = "2026-01-28T01:44:56.098Z" }, +] + [[package]] name = "hpack" version = "4.1.0" @@ -3088,6 +3186,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] +[[package]] +name = "installer" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/18/ceeb4e3ab3aa54495775775b38ae42b10a92f42ce42dfa44da684289b8c8/installer-0.7.0.tar.gz", hash = "sha256:a26d3e3116289bb08216e0d0f7d925fcef0b0194eedfa0c944bcaaa106c4b631", size = 474349, upload-time = "2023-03-17T20:39:38.871Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/ca/1172b6638d52f2d6caa2dd262ec4c811ba59eee96d54a7701930726bce18/installer-0.7.0-py3-none-any.whl", hash = "sha256:05d1933f0a5ba7d8d6296bb6d5018e7c94fa473ceb10cf198a92ccea19c27b53", size = 453838, upload-time = "2023-03-17T20:39:36.219Z" }, +] + [[package]] name = "intersystems-irispython" version = "5.3.1" @@ -3252,7 +3359,7 @@ wheels = [ [[package]] name = "kombu" -version = "5.5.4" +version = "5.6.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "amqp" }, @@ -3260,9 +3367,9 @@ dependencies = [ { name = "tzdata" }, { name = "vine" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0f/d3/5ff936d8319ac86b9c409f1501b07c426e6ad41966fedace9ef1b966e23f/kombu-5.5.4.tar.gz", hash = "sha256:886600168275ebeada93b888e831352fe578168342f0d1d5833d88ba0d847363", size = 461992, upload-time = "2025-06-01T10:19:22.281Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b6/a5/607e533ed6c83ae1a696969b8e1c137dfebd5759a2e9682e26ff1b97740b/kombu-5.6.2.tar.gz", hash = "sha256:8060497058066c6f5aed7c26d7cd0d3b574990b09de842a8c5aaed0b92cc5a55", size = 472594, upload-time = "2025-12-29T20:30:07.779Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/70/a07dcf4f62598c8ad579df241af55ced65bed76e42e45d3c368a6d82dbc1/kombu-5.5.4-py3-none-any.whl", hash = "sha256:a12ed0557c238897d8e518f1d1fdf84bd1516c5e305af2dacd85c2015115feb8", size = 210034, upload-time = "2025-06-01T10:19:20.436Z" }, + { url = "https://files.pythonhosted.org/packages/fb/0f/834427d8c03ff1d7e867d3db3d176470c64871753252b21b4f4897d1fa45/kombu-5.6.2-py3-none-any.whl", hash = "sha256:efcfc559da324d41d61ca311b0c64965ea35b4c55cc04ee36e55386145dace93", size = 214219, upload-time = "2025-12-29T20:30:05.74Z" }, ] [[package]] @@ -3316,7 +3423,7 @@ wheels = [ [[package]] name = "langsmith" -version = "0.7.16" +version = "0.7.17" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, @@ -3329,9 +3436,9 @@ dependencies = [ { name = "xxhash" }, { name = "zstandard" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/4d/18/b240d33e32d3f71a3c3375781cb11f3be6b27c275acdcf18c08a65a560cc/langsmith-0.7.16.tar.gz", hash = "sha256:87267d32c1220ec34bd0074d3d04b57c7394328a39a02182b62ab4ae09d28144", size = 1115428, upload-time = "2026-03-09T21:11:16.985Z" } +sdist = { url = "https://files.pythonhosted.org/packages/71/79/81041dde07a974e728db7def23c1c7255950b8874102925cc77093bc847d/langsmith-0.7.17.tar.gz", hash = "sha256:6c1b0c2863cdd6636d2a58b8d5b1b80060703d98cac2593f4233e09ac25b5a9d", size = 1132228, upload-time = "2026-03-12T20:41:10.808Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d0/a8/4202ca65561213ec84ca3800b1d4e5d37a1441cddeec533367ecbca7f408/langsmith-0.7.16-py3-none-any.whl", hash = "sha256:c84a7a06938025fe0aad992acc546dd75ce3f757ba8ee5b00ad914911d4fc02e", size = 347538, upload-time = "2026-03-09T21:11:15.02Z" }, + { url = "https://files.pythonhosted.org/packages/34/31/62689d57f4d25792bd6a3c05c868771899481be2f3e31f9e71d31e1ac4ab/langsmith-0.7.17-py3-none-any.whl", hash = "sha256:cbec10460cb6c6ecc94c18c807be88a9984838144ae6c4693c9f859f378d7d02", size = 359147, upload-time = "2026-03-12T20:41:08.758Z" }, ] [[package]] @@ -3379,7 +3486,7 @@ wheels = [ [[package]] name = "litellm" -version = "1.82.1" +version = "1.82.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -3395,9 +3502,9 @@ dependencies = [ { name = "tiktoken" }, { name = "tokenizers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/34/bd/6251e9a965ae2d7bc3342ae6c1a2d25dd265d354c502e63225451b135016/litellm-1.82.1.tar.gz", hash = "sha256:bc8427cdccc99e191e08e36fcd631c93b27328d1af789839eb3ac01a7d281890", size = 17197496, upload-time = "2026-03-10T09:10:04.438Z" } +sdist = { url = "https://files.pythonhosted.org/packages/60/12/010a86643f12ac0b004032d5927c260094299a84ed38b5ed20a8f8c7e3c4/litellm-1.82.2.tar.gz", hash = "sha256:f5f4c4049f344a88bf80b2e421bb927807687c99624515d7ff4152d533ec9dcb", size = 17353218, upload-time = "2026-03-13T21:24:24.5Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/57/77/0c6eca2cb049793ddf8ce9cdcd5123a35666c4962514788c4fc90edf1d3b/litellm-1.82.1-py3-none-any.whl", hash = "sha256:a9ec3fe42eccb1611883caaf8b1bf33c9f4e12163f94c7d1004095b14c379eb2", size = 15341896, upload-time = "2026-03-10T09:10:00.702Z" }, + { url = "https://files.pythonhosted.org/packages/96/e4/87e3ca82a8bf6e6bfffb42a539a1350dd6ced1b7169397bd439ba56fde10/litellm-1.82.2-py3-none-any.whl", hash = "sha256:641ed024774fa3d5b4dd9347f0efb1e31fa422fba2a6500aabedee085d1194cb", size = 15524224, upload-time = "2026-03-13T21:24:21.288Z" }, ] [[package]] @@ -3513,11 +3620,11 @@ wheels = [ [[package]] name = "markdown" -version = "3.8.1" +version = "3.10.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/db/7c/0738e5ff0adccd0b4e02c66d0446c03a3c557e02bb49b7c263d7ab56c57d/markdown-3.8.1.tar.gz", hash = "sha256:a2e2f01cead4828ee74ecca9623045f62216aef2212a7685d6eb9163f590b8c1", size = 361280, upload-time = "2025-06-18T14:50:49.618Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2b/f4/69fa6ed85ae003c2378ffa8f6d2e3234662abd02c10d216c0ba96081a238/markdown-3.10.2.tar.gz", hash = "sha256:994d51325d25ad8aa7ce4ebaec003febcce822c3f8c911e3b17c52f7f589f950", size = 368805, upload-time = "2026-02-09T14:57:26.942Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/50/34/3d1ff0cb4843a33817d06800e9383a2b2a2df4d508e37f53a40e829905d9/markdown-3.8.1-py3-none-any.whl", hash = "sha256:46cc0c0f1e5211ab2e9d453582f0b28a1bfaf058a9f7d5c50386b99b588d8811", size = 106642, upload-time = "2025-06-18T14:50:48.52Z" }, + { url = "https://files.pythonhosted.org/packages/de/1f/77fa3081e4f66ca3576c896ae5d31c3002ac6607f9747d2e3aa49227e464/markdown-3.10.2-py3-none-any.whl", hash = "sha256:e91464b71ae3ee7afd3017d9f358ef0baf158fd9a298db92f1d4761133824c36", size = 108180, upload-time = "2026-02-09T14:57:25.787Z" }, ] [[package]] @@ -3562,18 +3669,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" }, ] -[[package]] -name = "marshmallow" -version = "3.26.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "packaging" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/55/79/de6c16cc902f4fc372236926b0ce2ab7845268dcc30fb2fbb7f71b418631/marshmallow-3.26.2.tar.gz", hash = "sha256:bbe2adb5a03e6e3571b573f42527c6fe926e17467833660bebd11593ab8dfd57", size = 222095, upload-time = "2025-12-22T06:53:53.309Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/be/2f/5108cb3ee4ba6501748c4908b908e55f42a5b66245b4cfe0c99326e1ef6e/marshmallow-3.26.2-py3-none-any.whl", hash = "sha256:013fa8a3c4c276c24d26d84ce934dc964e2aa794345a0f8c7e5a7191482c8a73", size = 50964, upload-time = "2025-12-22T06:53:51.801Z" }, -] - [[package]] name = "mdurl" version = "0.1.2" @@ -3588,7 +3683,7 @@ name = "milvus-lite" version = "2.5.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "tqdm" }, + { name = "tqdm", marker = "sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/a9/b2/acc5024c8e8b6a0b034670b8e8af306ebd633ede777dcbf557eac4785937/milvus_lite-2.5.1-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:6b014453200ba977be37ba660cb2d021030375fa6a35bc53c2e1d92980a0c512", size = 27934713, upload-time = "2025-06-30T04:23:37.028Z" }, @@ -3692,16 +3787,16 @@ wheels = [ [[package]] name = "msal" -version = "1.34.0" +version = "1.35.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cryptography" }, { name = "pyjwt", extra = ["crypto"] }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cf/0e/c857c46d653e104019a84f22d4494f2119b4fe9f896c92b4b864b3b045cc/msal-1.34.0.tar.gz", hash = "sha256:76ba83b716ea5a6d75b0279c0ac353a0e05b820ca1f6682c0eb7f45190c43c2f", size = 153961, upload-time = "2025-09-22T23:05:48.989Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3c/aa/5a646093ac218e4a329391d5a31e5092a89db7d2ef1637a90b82cd0b6f94/msal-1.35.1.tar.gz", hash = "sha256:70cac18ab80a053bff86219ba64cfe3da1f307c74b009e2da57ef040eb1b5656", size = 165658, upload-time = "2026-03-04T23:38:51.812Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/dc/18d48843499e278538890dc709e9ee3dea8375f8be8e82682851df1b48b5/msal-1.34.0-py3-none-any.whl", hash = "sha256:f669b1644e4950115da7a176441b0e13ec2975c29528d8b9e81316023676d6e1", size = 116987, upload-time = "2025-09-22T23:05:47.294Z" }, + { url = "https://files.pythonhosted.org/packages/96/86/16815fddf056ca998853c6dc525397edf0b43559bb4073a80d2bc7fe8009/msal-1.35.1-py3-none-any.whl", hash = "sha256:8f4e82f34b10c19e326ec69f44dc6b30171f2f7098f3720ea8a9f0c11832caa3", size = 119909, upload-time = "2026-03-04T23:38:50.452Z" }, ] [[package]] @@ -3761,6 +3856,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" }, ] +[[package]] +name = "murmurhash" +version = "1.0.15" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/23/2e/88c147931ea9725d634840d538622e94122bceaf346233349b7b5c62964b/murmurhash-1.0.15.tar.gz", hash = "sha256:58e2b27b7847f9e2a6edf10b47a8c8dd70a4705f45dccb7bf76aeadacf56ba01", size = 13291, upload-time = "2025-11-14T09:51:15.272Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/ca/77d3e69924a8eb4508bb4f0ad34e46adbeedeb93616a71080e61e53dad71/murmurhash-1.0.15-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f32307fb9347680bb4fe1cbef6362fb39bd994f1b59abd8c09ca174e44199081", size = 27397, upload-time = "2025-11-14T09:50:03.077Z" }, + { url = "https://files.pythonhosted.org/packages/e6/53/a936f577d35b245d47b310f29e5e9f09fcac776c8c992f1ab51a9fb0cee2/murmurhash-1.0.15-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:539d8405885d1d19c005f3a2313b47e8e54b0ee89915eb8dfbb430b194328e6c", size = 27692, upload-time = "2025-11-14T09:50:04.144Z" }, + { url = "https://files.pythonhosted.org/packages/4d/64/5f8cfd1fd9cbeb43fcff96672f5bd9e7e1598d1c970f808ecd915490dc20/murmurhash-1.0.15-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c4cd739a00f5a4602201b74568ddabae46ec304719d9be752fd8f534a9464b5e", size = 128396, upload-time = "2025-11-14T09:50:05.268Z" }, + { url = "https://files.pythonhosted.org/packages/ac/10/d9ce29d559a75db0d8a3f13ea12c7f541ec9de2afca38dc70418b890eedb/murmurhash-1.0.15-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:44d211bcc3ec203c47dac06f48ee871093fcbdffa6652a6cc5ea7180306680a8", size = 128687, upload-time = "2025-11-14T09:50:06.527Z" }, + { url = "https://files.pythonhosted.org/packages/48/cd/dc97ab7e68cdfa1537a56e36dbc846c5a66701cc39ecee2d4399fe61996c/murmurhash-1.0.15-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f9bf47101354fb1dc4b2e313192566f04ba295c28a37e2f71c692759acc1ba3c", size = 128198, upload-time = "2025-11-14T09:50:08.062Z" }, + { url = "https://files.pythonhosted.org/packages/53/73/32f2aaa22c1e4afae337106baf0c938abf36a6cc879cfee83a00461bbbf7/murmurhash-1.0.15-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3c69b4d3bcd6233782a78907fe10b9b7a796bdc5d28060cf097d067bec280a5d", size = 127214, upload-time = "2025-11-14T09:50:09.265Z" }, + { url = "https://files.pythonhosted.org/packages/82/ed/812103a7f353eba2d83655b08205e13a38c93b4db0692f94756e1eb44516/murmurhash-1.0.15-cp311-cp311-win_amd64.whl", hash = "sha256:e43a69496342ce530bdd670264cb7c8f45490b296e4764c837ce577e3c7ebd53", size = 25241, upload-time = "2025-11-14T09:50:10.373Z" }, + { url = "https://files.pythonhosted.org/packages/eb/5f/2c511bdd28f7c24da37a00116ffd0432b65669d098f0d0260c66ac0ffdc2/murmurhash-1.0.15-cp311-cp311-win_arm64.whl", hash = "sha256:f3e99a6ee36ef5372df5f138e3d9c801420776d3641a34a49e5c2555f44edba7", size = 23216, upload-time = "2025-11-14T09:50:11.651Z" }, + { url = "https://files.pythonhosted.org/packages/b6/46/be8522d3456fdccf1b8b049c6d82e7a3c1114c4fc2cfe14b04cba4b3e701/murmurhash-1.0.15-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d37e3ae44746bca80b1a917c2ea625cf216913564ed43f69d2888e5df97db0cb", size = 27884, upload-time = "2025-11-14T09:50:13.133Z" }, + { url = "https://files.pythonhosted.org/packages/ed/cc/630449bf4f6178d7daf948ce46ad00b25d279065fc30abd8d706be3d87e0/murmurhash-1.0.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0861cb11039409eaf46878456b7d985ef17b6b484103a6fc367b2ecec846891d", size = 27855, upload-time = "2025-11-14T09:50:14.859Z" }, + { url = "https://files.pythonhosted.org/packages/ff/30/ea8f601a9bf44db99468696efd59eb9cff1157cd55cb586d67116697583f/murmurhash-1.0.15-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5a301decfaccfec70fe55cb01dde2a012c3014a874542eaa7cc73477bb749616", size = 134088, upload-time = "2025-11-14T09:50:15.958Z" }, + { url = "https://files.pythonhosted.org/packages/c9/de/c40ce8c0877d406691e735b8d6e9c815f36a82b499d358313db5dbe219d7/murmurhash-1.0.15-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:32c6fde7bd7e9407003370a07b5f4addacabe1556ad3dc2cac246b7a2bba3400", size = 133978, upload-time = "2025-11-14T09:50:17.572Z" }, + { url = "https://files.pythonhosted.org/packages/47/84/bd49963ecd84ebab2fe66595e2d1ed41d5e8b5153af5dc930f0bd827007c/murmurhash-1.0.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5d8b43a7011540dc3c7ce66f2134df9732e2bc3bbb4a35f6458bc755e48bde26", size = 132956, upload-time = "2025-11-14T09:50:18.742Z" }, + { url = "https://files.pythonhosted.org/packages/4f/7c/2530769c545074417c862583f05f4245644599f1e9ff619b3dfe2969aafc/murmurhash-1.0.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:43bf4541892ecd95963fcd307bf1c575fc0fee1682f41c93007adee71ca2bb40", size = 134184, upload-time = "2025-11-14T09:50:19.941Z" }, + { url = "https://files.pythonhosted.org/packages/84/a4/b249b042f5afe34d14ada2dc4afc777e883c15863296756179652e081c44/murmurhash-1.0.15-cp312-cp312-win_amd64.whl", hash = "sha256:f4ac15a2089dc42e6eb0966622d42d2521590a12c92480aafecf34c085302cca", size = 25647, upload-time = "2025-11-14T09:50:21.049Z" }, + { url = "https://files.pythonhosted.org/packages/13/bf/028179259aebc18fd4ba5cae2601d1d47517427a537ab44336446431a215/murmurhash-1.0.15-cp312-cp312-win_arm64.whl", hash = "sha256:4a70ca4ae19e600d9be3da64d00710e79dde388a4d162f22078d64844d0ebdda", size = 23338, upload-time = "2025-11-14T09:50:22.359Z" }, +] + [[package]] name = "mypy" version = "1.19.1" @@ -4396,7 +4515,7 @@ wheels = [ [[package]] name = "opik" -version = "1.10.37" +version = "1.10.39" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "boto3-stubs", extra = ["bedrock-runtime"] }, @@ -4415,9 +4534,9 @@ dependencies = [ { name = "tqdm" }, { name = "uuid6" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/dc/1a/89816f503ffdfa32756732bba6c1b98863db0ef0477b3fd458b34858166f/opik-1.10.37.tar.gz", hash = "sha256:410ccec8fd9710ea9a006b83ccff176704745d9a78c87b828108c9bc6fbf0502", size = 776960, upload-time = "2026-03-11T13:38:14.717Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/0f/b1e00a18cac16b4f36bf6cecc2de962fda810a9416d1159c48f46b81f5ec/opik-1.10.39.tar.gz", hash = "sha256:4d808eb2137070fc5d92a3bed3c3100d9cccfb35f4f0b71ea9990733f293dbb2", size = 780312, upload-time = "2026-03-12T14:08:25.746Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/39/01580bb6249e0c0db26b8abc3e5f4975b57565944d8d5eeb1c1beaa8d43d/opik-1.10.37-py3-none-any.whl", hash = "sha256:84d85bc0429fa4d0ef43babc720bb1dddcf275d348fe0e6deb82bdf3010ec87e", size = 1312332, upload-time = "2026-03-11T13:38:12.924Z" }, + { url = "https://files.pythonhosted.org/packages/e1/24/0f4404907a98b4aec4508504570a78a61a3a8b5e451c67326632695ba8e6/opik-1.10.39-py3-none-any.whl", hash = "sha256:a72d735b9afac62e5262294b2f704aca89ec31f5c9beda17504815f7423870c3", size = 1317833, upload-time = "2026-03-12T14:08:23.954Z" }, ] [[package]] @@ -4531,30 +4650,31 @@ wheels = [ [[package]] name = "pandas" -version = "2.2.3" +version = "3.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, { name = "python-dateutil" }, - { name = "pytz" }, - { name = "tzdata" }, + { name = "tzdata", marker = "sys_platform == 'emscripten' or sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213, upload-time = "2024-09-20T13:10:04.827Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2e/0c/b28ed414f080ee0ad153f848586d61d1878f91689950f037f976ce15f6c8/pandas-3.0.1.tar.gz", hash = "sha256:4186a699674af418f655dbd420ed87f50d56b4cd6603784279d9eef6627823c8", size = 4641901, upload-time = "2026-02-17T22:20:16.434Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/44/d9502bf0ed197ba9bf1103c9867d5904ddcaf869e52329787fc54ed70cc8/pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039", size = 12602222, upload-time = "2024-09-20T13:08:56.254Z" }, - { url = "https://files.pythonhosted.org/packages/52/11/9eac327a38834f162b8250aab32a6781339c69afe7574368fffe46387edf/pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd", size = 11321274, upload-time = "2024-09-20T13:08:58.645Z" }, - { url = "https://files.pythonhosted.org/packages/45/fb/c4beeb084718598ba19aa9f5abbc8aed8b42f90930da861fcb1acdb54c3a/pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698", size = 15579836, upload-time = "2024-09-20T19:01:57.571Z" }, - { url = "https://files.pythonhosted.org/packages/cd/5f/4dba1d39bb9c38d574a9a22548c540177f78ea47b32f99c0ff2ec499fac5/pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc", size = 13058505, upload-time = "2024-09-20T13:09:01.501Z" }, - { url = "https://files.pythonhosted.org/packages/b9/57/708135b90391995361636634df1f1130d03ba456e95bcf576fada459115a/pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3", size = 16744420, upload-time = "2024-09-20T19:02:00.678Z" }, - { url = "https://files.pythonhosted.org/packages/86/4a/03ed6b7ee323cf30404265c284cee9c65c56a212e0a08d9ee06984ba2240/pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32", size = 14440457, upload-time = "2024-09-20T13:09:04.105Z" }, - { url = "https://files.pythonhosted.org/packages/ed/8c/87ddf1fcb55d11f9f847e3c69bb1c6f8e46e2f40ab1a2d2abadb2401b007/pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5", size = 11617166, upload-time = "2024-09-20T13:09:06.917Z" }, - { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893, upload-time = "2024-09-20T13:09:09.655Z" }, - { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475, upload-time = "2024-09-20T13:09:14.718Z" }, - { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645, upload-time = "2024-09-20T19:02:03.88Z" }, - { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445, upload-time = "2024-09-20T13:09:17.621Z" }, - { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235, upload-time = "2024-09-20T19:02:07.094Z" }, - { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756, upload-time = "2024-09-20T13:09:20.474Z" }, - { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248, upload-time = "2024-09-20T13:09:23.137Z" }, + { url = "https://files.pythonhosted.org/packages/ff/07/c7087e003ceee9b9a82539b40414ec557aa795b584a1a346e89180853d79/pandas-3.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:de09668c1bf3b925c07e5762291602f0d789eca1b3a781f99c1c78f6cac0e7ea", size = 10323380, upload-time = "2026-02-17T22:18:16.133Z" }, + { url = "https://files.pythonhosted.org/packages/c1/27/90683c7122febeefe84a56f2cde86a9f05f68d53885cebcc473298dfc33e/pandas-3.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:24ba315ba3d6e5806063ac6eb717504e499ce30bd8c236d8693a5fd3f084c796", size = 9923455, upload-time = "2026-02-17T22:18:19.13Z" }, + { url = "https://files.pythonhosted.org/packages/0e/f1/ed17d927f9950643bc7631aa4c99ff0cc83a37864470bc419345b656a41f/pandas-3.0.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:406ce835c55bac912f2a0dcfaf27c06d73c6b04a5dde45f1fd3169ce31337389", size = 10753464, upload-time = "2026-02-17T22:18:21.134Z" }, + { url = "https://files.pythonhosted.org/packages/2e/7c/870c7e7daec2a6c7ff2ac9e33b23317230d4e4e954b35112759ea4a924a7/pandas-3.0.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:830994d7e1f31dd7e790045235605ab61cff6c94defc774547e8b7fdfbff3dc7", size = 11255234, upload-time = "2026-02-17T22:18:24.175Z" }, + { url = "https://files.pythonhosted.org/packages/5c/39/3653fe59af68606282b989c23d1a543ceba6e8099cbcc5f1d506a7bae2aa/pandas-3.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a64ce8b0f2de1d2efd2ae40b0abe7f8ae6b29fbfb3812098ed5a6f8e235ad9bf", size = 11767299, upload-time = "2026-02-17T22:18:26.824Z" }, + { url = "https://files.pythonhosted.org/packages/9b/31/1daf3c0c94a849c7a8dab8a69697b36d313b229918002ba3e409265c7888/pandas-3.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9832c2c69da24b602c32e0c7b1b508a03949c18ba08d4d9f1c1033426685b447", size = 12333292, upload-time = "2026-02-17T22:18:28.996Z" }, + { url = "https://files.pythonhosted.org/packages/1f/67/af63f83cd6ca603a00fe8530c10a60f0879265b8be00b5930e8e78c5b30b/pandas-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:84f0904a69e7365f79a0c77d3cdfccbfb05bf87847e3a51a41e1426b0edb9c79", size = 9892176, upload-time = "2026-02-17T22:18:31.79Z" }, + { url = "https://files.pythonhosted.org/packages/79/ab/9c776b14ac4b7b4140788eca18468ea39894bc7340a408f1d1e379856a6b/pandas-3.0.1-cp311-cp311-win_arm64.whl", hash = "sha256:4a68773d5a778afb31d12e34f7dd4612ab90de8c6fb1d8ffe5d4a03b955082a1", size = 9151328, upload-time = "2026-02-17T22:18:35.721Z" }, + { url = "https://files.pythonhosted.org/packages/37/51/b467209c08dae2c624873d7491ea47d2b47336e5403309d433ea79c38571/pandas-3.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:476f84f8c20c9f5bc47252b66b4bb25e1a9fc2fa98cead96744d8116cb85771d", size = 10344357, upload-time = "2026-02-17T22:18:38.262Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f1/e2567ffc8951ab371db2e40b2fe068e36b81d8cf3260f06ae508700e5504/pandas-3.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0ab749dfba921edf641d4036c4c21c0b3ea70fea478165cb98a998fb2a261955", size = 9884543, upload-time = "2026-02-17T22:18:41.476Z" }, + { url = "https://files.pythonhosted.org/packages/d7/39/327802e0b6d693182403c144edacbc27eb82907b57062f23ef5a4c4a5ea7/pandas-3.0.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8e36891080b87823aff3640c78649b91b8ff6eea3c0d70aeabd72ea43ab069b", size = 10396030, upload-time = "2026-02-17T22:18:43.822Z" }, + { url = "https://files.pythonhosted.org/packages/3d/fe/89d77e424365280b79d99b3e1e7d606f5165af2f2ecfaf0c6d24c799d607/pandas-3.0.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:532527a701281b9dd371e2f582ed9094f4c12dd9ffb82c0c54ee28d8ac9520c4", size = 10876435, upload-time = "2026-02-17T22:18:45.954Z" }, + { url = "https://files.pythonhosted.org/packages/b5/a6/2a75320849dd154a793f69c951db759aedb8d1dd3939eeacda9bdcfa1629/pandas-3.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:356e5c055ed9b0da1580d465657bc7d00635af4fd47f30afb23025352ba764d1", size = 11405133, upload-time = "2026-02-17T22:18:48.533Z" }, + { url = "https://files.pythonhosted.org/packages/58/53/1d68fafb2e02d7881df66aa53be4cd748d25cbe311f3b3c85c93ea5d30ca/pandas-3.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9d810036895f9ad6345b8f2a338dd6998a74e8483847403582cab67745bff821", size = 11932065, upload-time = "2026-02-17T22:18:50.837Z" }, + { url = "https://files.pythonhosted.org/packages/75/08/67cc404b3a966b6df27b38370ddd96b3b023030b572283d035181854aac5/pandas-3.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:536232a5fe26dd989bd633e7a0c450705fdc86a207fec7254a55e9a22950fe43", size = 9741627, upload-time = "2026-02-17T22:18:53.905Z" }, + { url = "https://files.pythonhosted.org/packages/86/4f/caf9952948fb00d23795f09b893d11f1cacb384e666854d87249530f7cbe/pandas-3.0.1-cp312-cp312-win_arm64.whl", hash = "sha256:0f463ebfd8de7f326d38037c7363c6dacb857c5881ab8961fb387804d6daf2f7", size = 9052483, upload-time = "2026-02-17T22:18:57.31Z" }, ] [package.optional-dependencies] @@ -4762,6 +4882,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/05/0c/8b6b20b0be71725e6e8a32dcd460cdbf62fe6df9bc656a650150dc98fedd/posthog-7.0.1-py3-none-any.whl", hash = "sha256:efe212d8d88a9ba80a20c588eab4baf4b1a5e90e40b551160a5603bb21e96904", size = 145234, upload-time = "2025-11-15T12:44:21.247Z" }, ] +[[package]] +name = "preshed" +version = "3.0.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cymem" }, + { name = "murmurhash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bf/34/eb4f5f0f678e152a96e826da867d2f41c4b18a2d589e40e1dd3347219e91/preshed-3.0.12.tar.gz", hash = "sha256:b73f9a8b54ee1d44529cc6018356896cff93d48f755f29c134734d9371c0d685", size = 15027, upload-time = "2025-11-17T13:00:33.621Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/54/d1e02d0a0ea348fb6a769506166e366abfe87ee917c2f11f7139c7acbf10/preshed-3.0.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bc45fda3fd4ae1ae15c37f18f0777cf389ce9184ef8884b39b18894416fd1341", size = 128439, upload-time = "2025-11-17T12:59:21.317Z" }, + { url = "https://files.pythonhosted.org/packages/8c/cb/685ca57ca6e438345b3f6c20226705a0e056a3de399a5bf8a9ee89b3dd2b/preshed-3.0.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75d6e628bc78c022dbb9267242715718f862c3105927732d166076ff009d65de", size = 124544, upload-time = "2025-11-17T12:59:22.944Z" }, + { url = "https://files.pythonhosted.org/packages/f8/07/018fcd3bf298304e1570065cf80601ac16acd29f799578fd47b715dd3ca2/preshed-3.0.12-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b901cff5c814facf7a864b0a4c14a16d45fa1379899a585b3fb48ee36a2dccdb", size = 824728, upload-time = "2025-11-17T12:59:24.614Z" }, + { url = "https://files.pythonhosted.org/packages/79/dc/d888b328fcedae530df53396d9fc0006026aa8793fec54d7d34f57f31ff5/preshed-3.0.12-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d1099253bf73dd3c39313280bd5331841f769637b27ddb576ff362c4e7bad298", size = 825969, upload-time = "2025-11-17T12:59:26.493Z" }, + { url = "https://files.pythonhosted.org/packages/21/51/f19933301f42ece1ffef1f7f4c370d09f0351c43c528e66fac24560e44d2/preshed-3.0.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1af4a049ffe9d0246e5dc10d6f54820ed064c40e5c3f7b6526127c664008297c", size = 842346, upload-time = "2025-11-17T12:59:28.092Z" }, + { url = "https://files.pythonhosted.org/packages/51/46/025f60fd3d51bf60606a0f8f0cd39c40068b9b5e4d249bca1682e4ff09c3/preshed-3.0.12-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:57159bcedca0cb4c99390f8a6e730f8659fdb663a5a3efcd9c4531e0f54b150e", size = 865504, upload-time = "2025-11-17T12:59:29.648Z" }, + { url = "https://files.pythonhosted.org/packages/88/b5/2e6ee5ab19b03e7983fc5e1850c812fb71dc178dd140d6aca3b45306bdf7/preshed-3.0.12-cp311-cp311-win_amd64.whl", hash = "sha256:8fe9cf1745e203e5aa58b8700436f78da1dcf0f0e2efb0054b467effd9d7d19d", size = 117736, upload-time = "2025-11-17T12:59:30.974Z" }, + { url = "https://files.pythonhosted.org/packages/1e/17/8a0a8f4b01e71b5fb7c5cd4c9fec04d7b852d42f1f9e096b01e7d2b16b17/preshed-3.0.12-cp311-cp311-win_arm64.whl", hash = "sha256:12d880f8786cb6deac34e99b8b07146fb92d22fbca0023208e03325f5944606b", size = 105127, upload-time = "2025-11-17T12:59:32.171Z" }, + { url = "https://files.pythonhosted.org/packages/4b/f7/ff3aca937eeaee19c52c45ddf92979546e52ed0686e58be4bc09c47e7d88/preshed-3.0.12-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2779861f5d69480493519ed123a622a13012d1182126779036b99d9d989bf7e9", size = 129958, upload-time = "2025-11-17T12:59:33.391Z" }, + { url = "https://files.pythonhosted.org/packages/80/24/fd654a9c0f5f3ed1a9b1d8a392f063ae9ca29ad0b462f0732ae0147f7cee/preshed-3.0.12-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffe1fd7d92f51ed34383e20d8b734780c814ca869cfdb7e07f2d31651f90cdf4", size = 124550, upload-time = "2025-11-17T12:59:34.688Z" }, + { url = "https://files.pythonhosted.org/packages/71/49/8271c7f680696f4b0880f44357d2a903d649cb9f6e60a1efc97a203104df/preshed-3.0.12-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:91893404858502cc4e856d338fef3d2a4a552135f79a1041c24eb919817c19db", size = 874987, upload-time = "2025-11-17T12:59:36.062Z" }, + { url = "https://files.pythonhosted.org/packages/a3/a5/ca200187ca1632f1e2c458b72f1bd100fa8b55deecd5d72e1e4ebf09e98c/preshed-3.0.12-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9e06e8f2ba52f183eb9817a616cdebe84a211bb859a2ffbc23f3295d0b189638", size = 866499, upload-time = "2025-11-17T12:59:37.586Z" }, + { url = "https://files.pythonhosted.org/packages/87/a1/943b61f850c44899910c21996cb542d0ef5931744c6d492fdfdd8457e693/preshed-3.0.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bbe8b8a2d4f9af14e8a39ecca524b9de6defc91d8abcc95eb28f42da1c23272c", size = 878064, upload-time = "2025-11-17T12:59:39.651Z" }, + { url = "https://files.pythonhosted.org/packages/3e/75/d7fff7f1fa3763619aa85d6ba70493a5d9c6e6ea7958a6e8c9d3e6e88bbe/preshed-3.0.12-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5d0aaac9c5862f5471fddd0c931dc64d3af2efc5fe3eb48b50765adb571243b9", size = 900540, upload-time = "2025-11-17T12:59:41.384Z" }, + { url = "https://files.pythonhosted.org/packages/e4/12/a2285b78bd097a1e53fb90a1743bc8ce0d35e5b65b6853f3b3c47da398ca/preshed-3.0.12-cp312-cp312-win_amd64.whl", hash = "sha256:0eb8d411afcb1e3b12a0602fb6a0e33140342a732a795251a0ce452aba401dc0", size = 118298, upload-time = "2025-11-17T12:59:42.65Z" }, + { url = "https://files.pythonhosted.org/packages/0b/34/4e8443fe99206a2fcfc63659969a8f8c8ab184836533594a519f3899b1ad/preshed-3.0.12-cp312-cp312-win_arm64.whl", hash = "sha256:dcd3d12903c9f720a39a5c5f1339f7f46e3ab71279fb7a39776768fb840b6077", size = 104746, upload-time = "2025-11-17T12:59:43.934Z" }, +] + [[package]] name = "prompt-toolkit" version = "3.0.52" @@ -4841,16 +4989,18 @@ wheels = [ [[package]] name = "psutil" -version = "7.1.3" +version = "7.2.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e1/88/bdd0a41e5857d5d703287598cbf08dad90aed56774ea52ae071bae9071b6/psutil-7.1.3.tar.gz", hash = "sha256:6c86281738d77335af7aec228328e944b30930899ea760ecf33a4dba66be5e74", size = 489059, upload-time = "2025-11-02T12:25:54.619Z" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/94/46b9154a800253e7ecff5aaacdf8ebf43db99de4a2dfa18575b02548654e/psutil-7.1.3-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2bdbcd0e58ca14996a42adf3621a6244f1bb2e2e528886959c72cf1e326677ab", size = 238359, upload-time = "2025-11-02T12:26:25.284Z" }, - { url = "https://files.pythonhosted.org/packages/68/3a/9f93cff5c025029a36d9a92fef47220ab4692ee7f2be0fba9f92813d0cb8/psutil-7.1.3-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:bc31fa00f1fbc3c3802141eede66f3a2d51d89716a194bf2cd6fc68310a19880", size = 239171, upload-time = "2025-11-02T12:26:27.23Z" }, - { url = "https://files.pythonhosted.org/packages/ce/b1/5f49af514f76431ba4eea935b8ad3725cdeb397e9245ab919dbc1d1dc20f/psutil-7.1.3-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3bb428f9f05c1225a558f53e30ccbad9930b11c3fc206836242de1091d3e7dd3", size = 263261, upload-time = "2025-11-02T12:26:29.48Z" }, - { url = "https://files.pythonhosted.org/packages/e0/95/992c8816a74016eb095e73585d747e0a8ea21a061ed3689474fabb29a395/psutil-7.1.3-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56d974e02ca2c8eb4812c3f76c30e28836fffc311d55d979f1465c1feeb2b68b", size = 264635, upload-time = "2025-11-02T12:26:31.74Z" }, - { url = "https://files.pythonhosted.org/packages/55/4c/c3ed1a622b6ae2fd3c945a366e64eb35247a31e4db16cf5095e269e8eb3c/psutil-7.1.3-cp37-abi3-win_amd64.whl", hash = "sha256:f39c2c19fe824b47484b96f9692932248a54c43799a84282cfe58d05a6449efd", size = 247633, upload-time = "2025-11-02T12:26:33.887Z" }, - { url = "https://files.pythonhosted.org/packages/c9/ad/33b2ccec09bf96c2b2ef3f9a6f66baac8253d7565d8839e024a6b905d45d/psutil-7.1.3-cp37-abi3-win_arm64.whl", hash = "sha256:bd0d69cee829226a761e92f28140bec9a5ee9d5b4fb4b0cc589068dbfff559b1", size = 244608, upload-time = "2025-11-02T12:26:36.136Z" }, + { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" }, + { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" }, + { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" }, + { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" }, + { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" }, + { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" }, + { url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" }, + { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" }, ] [[package]] @@ -4859,6 +5009,53 @@ version = "1.0.2" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/eb/72/4a7965cf54e341006ad74cdc72cd6572c789bc4f4e3fadc78672f1fbcfbd/psycogreen-1.0.2.tar.gz", hash = "sha256:c429845a8a49cf2f76b71265008760bcd7c7c77d80b806db4dc81116dbcd130d", size = 5411, upload-time = "2020-02-22T19:55:22.02Z" } +[[package]] +name = "psycopg" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d3/b6/379d0a960f8f435ec78720462fd94c4863e7a31237cf81bf76d0af5883bf/psycopg-3.3.3.tar.gz", hash = "sha256:5e9a47458b3c1583326513b2556a2a9473a1001a56c9efe9e587245b43148dd9", size = 165624, upload-time = "2026-02-18T16:52:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/5b/181e2e3becb7672b502f0ed7f16ed7352aca7c109cfb94cf3878a9186db9/psycopg-3.3.3-py3-none-any.whl", hash = "sha256:f96525a72bcfade6584ab17e89de415ff360748c766f0106959144dcbb38c698", size = 212768, upload-time = "2026-02-18T16:46:27.365Z" }, +] + +[package.optional-dependencies] +binary = [ + { name = "psycopg-binary", marker = "implementation_name != 'pypy'" }, +] + +[[package]] +name = "psycopg-binary" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/c0/b389119dd754483d316805260f3e73cdcad97925839107cc7a296f6132b1/psycopg_binary-3.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a89bb9ee11177b2995d87186b1d9fa892d8ea725e85eab28c6525e4cc14ee048", size = 4609740, upload-time = "2026-02-18T16:47:51.093Z" }, + { url = "https://files.pythonhosted.org/packages/cf/e3/9976eef20f61840285174d360da4c820a311ab39d6b82fa09fbb545be825/psycopg_binary-3.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f7d0cf072c6fbac3795b08c98ef9ea013f11db609659dcfc6b1f6cc31f9e181", size = 4676837, upload-time = "2026-02-18T16:47:55.523Z" }, + { url = "https://files.pythonhosted.org/packages/9f/f2/d28ba2f7404fd7f68d41e8a11df86313bd646258244cb12a8dd83b868a97/psycopg_binary-3.3.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:90eecd93073922f085967f3ed3a98ba8c325cbbc8c1a204e300282abd2369e13", size = 5497070, upload-time = "2026-02-18T16:47:59.929Z" }, + { url = "https://files.pythonhosted.org/packages/de/2f/6c5c54b815edeb30a281cfcea96dc93b3bb6be939aea022f00cab7aa1420/psycopg_binary-3.3.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dac7ee2f88b4d7bb12837989ca354c38d400eeb21bce3b73dac02622f0a3c8d6", size = 5172410, upload-time = "2026-02-18T16:48:05.665Z" }, + { url = "https://files.pythonhosted.org/packages/51/75/8206c7008b57de03c1ada46bd3110cc3743f3fd9ed52031c4601401d766d/psycopg_binary-3.3.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b62cf8784eb6d35beaee1056d54caf94ec6ecf2b7552395e305518ab61eb8fd2", size = 6763408, upload-time = "2026-02-18T16:48:13.541Z" }, + { url = "https://files.pythonhosted.org/packages/d4/5a/ea1641a1e6c8c8b3454b0fcb43c3045133a8b703e6e824fae134088e63bd/psycopg_binary-3.3.3-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a39f34c9b18e8f6794cca17bfbcd64572ca2482318db644268049f8c738f35a6", size = 5006255, upload-time = "2026-02-18T16:48:22.176Z" }, + { url = "https://files.pythonhosted.org/packages/aa/fb/538df099bf55ae1637d52d7ccb6b9620b535a40f4c733897ac2b7bb9e14c/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:883d68d48ca9ff3cb3d10c5fdebea02c79b48eecacdddbf7cce6e7cdbdc216b8", size = 4532694, upload-time = "2026-02-18T16:48:27.338Z" }, + { url = "https://files.pythonhosted.org/packages/a1/d1/00780c0e187ea3c13dfc53bd7060654b2232cd30df562aac91a5f1c545ac/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:cab7bc3d288d37a80aa8c0820033250c95e40b1c2b5c57cf59827b19c2a8b69d", size = 4222833, upload-time = "2026-02-18T16:48:31.221Z" }, + { url = "https://files.pythonhosted.org/packages/7a/34/a07f1ff713c51d64dc9f19f2c32be80299a2055d5d109d5853662b922cb4/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:56c767007ca959ca32f796b42379fc7e1ae2ed085d29f20b05b3fc394f3715cc", size = 3952818, upload-time = "2026-02-18T16:48:35.869Z" }, + { url = "https://files.pythonhosted.org/packages/d3/67/d33f268a7759b4445f3c9b5a181039b01af8c8263c865c1be7a6444d4749/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:da2f331a01af232259a21573a01338530c6016dcfad74626c01330535bcd8628", size = 4258061, upload-time = "2026-02-18T16:48:41.365Z" }, + { url = "https://files.pythonhosted.org/packages/b4/3b/0d8d2c5e8e29ccc07d28c8af38445d9d9abcd238d590186cac82ee71fc84/psycopg_binary-3.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:19f93235ece6dbfc4036b5e4f6d8b13f0b8f2b3eeb8b0bd2936d406991bcdd40", size = 3558915, upload-time = "2026-02-18T16:48:46.679Z" }, + { url = "https://files.pythonhosted.org/packages/90/15/021be5c0cbc5b7c1ab46e91cc3434eb42569f79a0592e67b8d25e66d844d/psycopg_binary-3.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6698dbab5bcef8fdb570fc9d35fd9ac52041771bfcfe6fd0fc5f5c4e36f1e99d", size = 4591170, upload-time = "2026-02-18T16:48:55.594Z" }, + { url = "https://files.pythonhosted.org/packages/f1/54/a60211c346c9a2f8c6b272b5f2bbe21f6e11800ce7f61e99ba75cf8b63e1/psycopg_binary-3.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:329ff393441e75f10b673ae99ab45276887993d49e65f141da20d915c05aafd8", size = 4670009, upload-time = "2026-02-18T16:49:03.608Z" }, + { url = "https://files.pythonhosted.org/packages/c1/53/ac7c18671347c553362aadbf65f92786eef9540676ca24114cc02f5be405/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:eb072949b8ebf4082ae24289a2b0fd724da9adc8f22743409d6fd718ddb379df", size = 5469735, upload-time = "2026-02-18T16:49:10.128Z" }, + { url = "https://files.pythonhosted.org/packages/7f/c3/4f4e040902b82a344eff1c736cde2f2720f127fe939c7e7565706f96dd44/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:263a24f39f26e19ed7fc982d7859a36f17841b05bebad3eb47bb9cd2dd785351", size = 5152919, upload-time = "2026-02-18T16:49:16.335Z" }, + { url = "https://files.pythonhosted.org/packages/0c/e7/d929679c6a5c212bcf738806c7c89f5b3d0919f2e1685a0e08d6ff877945/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5152d50798c2fa5bd9b68ec68eb68a1b71b95126c1d70adaa1a08cd5eefdc23d", size = 6738785, upload-time = "2026-02-18T16:49:22.687Z" }, + { url = "https://files.pythonhosted.org/packages/69/b0/09703aeb69a9443d232d7b5318d58742e8ca51ff79f90ffe6b88f1db45e7/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9d6a1e56dd267848edb824dbeb08cf5bac649e02ee0b03ba883ba3f4f0bd54f2", size = 4979008, upload-time = "2026-02-18T16:49:27.313Z" }, + { url = "https://files.pythonhosted.org/packages/cc/a6/e662558b793c6e13a7473b970fee327d635270e41eded3090ef14045a6a5/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73eaaf4bb04709f545606c1db2f65f4000e8a04cdbf3e00d165a23004692093e", size = 4508255, upload-time = "2026-02-18T16:49:31.575Z" }, + { url = "https://files.pythonhosted.org/packages/5f/7f/0f8b2e1d5e0093921b6f324a948a5c740c1447fbb45e97acaf50241d0f39/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:162e5675efb4704192411eaf8e00d07f7960b679cd3306e7efb120bb8d9456cc", size = 4189166, upload-time = "2026-02-18T16:49:35.801Z" }, + { url = "https://files.pythonhosted.org/packages/92/ec/ce2e91c33bc8d10b00c87e2f6b0fb570641a6a60042d6a9ae35658a3a797/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:fab6b5e37715885c69f5d091f6ff229be71e235f272ebaa35158d5a46fd548a0", size = 3924544, upload-time = "2026-02-18T16:49:41.129Z" }, + { url = "https://files.pythonhosted.org/packages/c5/2f/7718141485f73a924205af60041c392938852aa447a94c8cbd222ff389a1/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a4aab31bd6d1057f287c96c0effca3a25584eb9cc702f282ecb96ded7814e830", size = 4235297, upload-time = "2026-02-18T16:49:46.726Z" }, + { url = "https://files.pythonhosted.org/packages/57/f9/1add717e2643a003bbde31b1b220172e64fbc0cb09f06429820c9173f7fc/psycopg_binary-3.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:59aa31fe11a0e1d1bcc2ce37ed35fe2ac84cd65bb9036d049b1a1c39064d0f14", size = 3547659, upload-time = "2026-02-18T16:49:52.999Z" }, +] + [[package]] name = "psycopg2-binary" version = "2.9.11" @@ -4909,27 +5106,27 @@ wheels = [ [[package]] name = "pyarrow" -version = "17.0.0" +version = "14.0.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/27/4e/ea6d43f324169f8aec0e57569443a38bab4b398d09769ca64f7b4d467de3/pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28", size = 1112479, upload-time = "2024-07-17T10:41:25.092Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/8b/d18b7eb6fb22e5ed6ffcbc073c85dae635778dbd1270a6cf5d750b031e84/pyarrow-14.0.2.tar.gz", hash = "sha256:36cef6ba12b499d864d1def3e990f97949e0b79400d08b7cf74504ffbd3eb025", size = 1063645, upload-time = "2023-12-18T15:43:41.625Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/46/ce89f87c2936f5bb9d879473b9663ce7a4b1f4359acc2f0eb39865eaa1af/pyarrow-17.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977", size = 29028748, upload-time = "2024-07-16T10:30:02.609Z" }, - { url = "https://files.pythonhosted.org/packages/8d/8e/ce2e9b2146de422f6638333c01903140e9ada244a2a477918a368306c64c/pyarrow-17.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3", size = 27190965, upload-time = "2024-07-16T10:30:10.718Z" }, - { url = "https://files.pythonhosted.org/packages/3b/c8/5675719570eb1acd809481c6d64e2136ffb340bc387f4ca62dce79516cea/pyarrow-17.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15", size = 39269081, upload-time = "2024-07-16T10:30:18.878Z" }, - { url = "https://files.pythonhosted.org/packages/5e/78/3931194f16ab681ebb87ad252e7b8d2c8b23dad49706cadc865dff4a1dd3/pyarrow-17.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597", size = 39864921, upload-time = "2024-07-16T10:30:27.008Z" }, - { url = "https://files.pythonhosted.org/packages/d8/81/69b6606093363f55a2a574c018901c40952d4e902e670656d18213c71ad7/pyarrow-17.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420", size = 38740798, upload-time = "2024-07-16T10:30:34.814Z" }, - { url = "https://files.pythonhosted.org/packages/4c/21/9ca93b84b92ef927814cb7ba37f0774a484c849d58f0b692b16af8eebcfb/pyarrow-17.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4", size = 39871877, upload-time = "2024-07-16T10:30:42.672Z" }, - { url = "https://files.pythonhosted.org/packages/30/d1/63a7c248432c71c7d3ee803e706590a0b81ce1a8d2b2ae49677774b813bb/pyarrow-17.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03", size = 25151089, upload-time = "2024-07-16T10:30:49.279Z" }, - { url = "https://files.pythonhosted.org/packages/d4/62/ce6ac1275a432b4a27c55fe96c58147f111d8ba1ad800a112d31859fae2f/pyarrow-17.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22", size = 29019418, upload-time = "2024-07-16T10:30:55.573Z" }, - { url = "https://files.pythonhosted.org/packages/8e/0a/dbd0c134e7a0c30bea439675cc120012337202e5fac7163ba839aa3691d2/pyarrow-17.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053", size = 27152197, upload-time = "2024-07-16T10:31:02.036Z" }, - { url = "https://files.pythonhosted.org/packages/cb/05/3f4a16498349db79090767620d6dc23c1ec0c658a668d61d76b87706c65d/pyarrow-17.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a", size = 39263026, upload-time = "2024-07-16T10:31:10.351Z" }, - { url = "https://files.pythonhosted.org/packages/c2/0c/ea2107236740be8fa0e0d4a293a095c9f43546a2465bb7df34eee9126b09/pyarrow-17.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc", size = 39880798, upload-time = "2024-07-16T10:31:17.66Z" }, - { url = "https://files.pythonhosted.org/packages/f6/b0/b9164a8bc495083c10c281cc65064553ec87b7537d6f742a89d5953a2a3e/pyarrow-17.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a", size = 38715172, upload-time = "2024-07-16T10:31:25.965Z" }, - { url = "https://files.pythonhosted.org/packages/f1/c4/9625418a1413005e486c006e56675334929fad864347c5ae7c1b2e7fe639/pyarrow-17.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b", size = 39874508, upload-time = "2024-07-16T10:31:33.721Z" }, - { url = "https://files.pythonhosted.org/packages/ae/49/baafe2a964f663413be3bd1cf5c45ed98c5e42e804e2328e18f4570027c1/pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7", size = 25099235, upload-time = "2024-07-16T10:31:40.893Z" }, + { url = "https://files.pythonhosted.org/packages/94/8a/411ef0b05483076b7f548c74ccaa0f90c1e60d3875db71a821f6ffa8cf42/pyarrow-14.0.2-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:87482af32e5a0c0cce2d12eb3c039dd1d853bd905b04f3f953f147c7a196915b", size = 26904455, upload-time = "2023-12-18T15:40:43.477Z" }, + { url = "https://files.pythonhosted.org/packages/6c/6c/882a57798877e3a49ba54d8e0540bea24aed78fb42e1d860f08c3449c75e/pyarrow-14.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:059bd8f12a70519e46cd64e1ba40e97eae55e0cbe1695edd95384653d7626b23", size = 23997116, upload-time = "2023-12-18T15:40:48.533Z" }, + { url = "https://files.pythonhosted.org/packages/ec/3f/ef47fe6192ce4d82803a073db449b5292135406c364a7fc49dfbcd34c987/pyarrow-14.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f16111f9ab27e60b391c5f6d197510e3ad6654e73857b4e394861fc79c37200", size = 35944575, upload-time = "2023-12-18T15:40:55.128Z" }, + { url = "https://files.pythonhosted.org/packages/1a/90/2021e529d7f234a3909f419d4341d53382541ef77d957fa274a99c533b18/pyarrow-14.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06ff1264fe4448e8d02073f5ce45a9f934c0f3db0a04460d0b01ff28befc3696", size = 38079719, upload-time = "2023-12-18T15:41:02.565Z" }, + { url = "https://files.pythonhosted.org/packages/30/a9/474caf5fd54a6d5315aaf9284c6e8f5d071ca825325ad64c53137b646e1f/pyarrow-14.0.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6dd4f4b472ccf4042f1eab77e6c8bce574543f54d2135c7e396f413046397d5a", size = 35429706, upload-time = "2023-12-18T15:41:09.955Z" }, + { url = "https://files.pythonhosted.org/packages/d9/f8/cfba56f5353e51c19b0c240380ce39483f4c76e5c4aee5a000f3d75b72da/pyarrow-14.0.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:32356bfb58b36059773f49e4e214996888eeea3a08893e7dbde44753799b2a02", size = 38001476, upload-time = "2023-12-18T15:41:16.372Z" }, + { url = "https://files.pythonhosted.org/packages/43/3f/7bdf7dc3b3b0cfdcc60760e7880954ba99ccd0bc1e0df806f3dd61bc01cd/pyarrow-14.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:52809ee69d4dbf2241c0e4366d949ba035cbcf48409bf404f071f624ed313a2b", size = 24576230, upload-time = "2023-12-18T15:41:22.561Z" }, + { url = "https://files.pythonhosted.org/packages/69/5b/d8ab6c20c43b598228710e4e4a6cba03a01f6faa3d08afff9ce76fd0fd47/pyarrow-14.0.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:c87824a5ac52be210d32906c715f4ed7053d0180c1060ae3ff9b7e560f53f944", size = 26819585, upload-time = "2023-12-18T15:41:27.59Z" }, + { url = "https://files.pythonhosted.org/packages/2d/29/bed2643d0dd5e9570405244a61f6db66c7f4704a6e9ce313f84fa5a3675a/pyarrow-14.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a25eb2421a58e861f6ca91f43339d215476f4fe159eca603c55950c14f378cc5", size = 23965222, upload-time = "2023-12-18T15:41:32.449Z" }, + { url = "https://files.pythonhosted.org/packages/2a/34/da464632e59a8cdd083370d69e6c14eae30221acb284f671c6bc9273fadd/pyarrow-14.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c1da70d668af5620b8ba0a23f229030a4cd6c5f24a616a146f30d2386fec422", size = 35942036, upload-time = "2023-12-18T15:41:38.767Z" }, + { url = "https://files.pythonhosted.org/packages/a8/ff/cbed4836d543b29f00d2355af67575c934999ff1d43e3f438ab0b1b394f1/pyarrow-14.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2cc61593c8e66194c7cdfae594503e91b926a228fba40b5cf25cc593563bcd07", size = 38089266, upload-time = "2023-12-18T15:41:47.617Z" }, + { url = "https://files.pythonhosted.org/packages/38/41/345011cb831d3dbb2dab762fc244c745a5df94b199223a99af52a5f7dff6/pyarrow-14.0.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:78ea56f62fb7c0ae8ecb9afdd7893e3a7dbeb0b04106f5c08dbb23f9c0157591", size = 35404468, upload-time = "2023-12-18T15:41:54.49Z" }, + { url = "https://files.pythonhosted.org/packages/fd/af/2fc23ca2068ff02068d8dabf0fb85b6185df40ec825973470e613dbd8790/pyarrow-14.0.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:37c233ddbce0c67a76c0985612fef27c0c92aef9413cf5aa56952f359fcb7379", size = 38003134, upload-time = "2023-12-18T15:42:01.593Z" }, + { url = "https://files.pythonhosted.org/packages/95/1f/9d912f66a87e3864f694e000977a6a70a644ea560289eac1d733983f215d/pyarrow-14.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:e4b123ad0f6add92de898214d404e488167b87b5dd86e9a434126bc2b7a5578d", size = 25043754, upload-time = "2023-12-18T15:42:07.108Z" }, ] [[package]] @@ -5089,11 +5286,11 @@ wheels = [ [[package]] name = "pyjwt" -version = "2.11.0" +version = "2.12.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5c/5a/b46fa56bf322901eee5b0454a34343cdbdae202cd421775a8ee4e42fd519/pyjwt-2.11.0.tar.gz", hash = "sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623", size = 98019, upload-time = "2026-01-30T19:59:55.694Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a8/10/e8192be5f38f3e8e7e046716de4cae33d56fd5ae08927a823bb916be36c1/pyjwt-2.12.0.tar.gz", hash = "sha256:2f62390b667cd8257de560b850bb5a883102a388829274147f1d724453f8fb02", size = 102511, upload-time = "2026-03-12T17:15:30.831Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6f/01/c26ce75ba460d5cd503da9e13b21a33804d38c2165dec7b716d06b13010c/pyjwt-2.11.0-py3-none-any.whl", hash = "sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469", size = 28224, upload-time = "2026-01-30T19:59:54.539Z" }, + { url = "https://files.pythonhosted.org/packages/15/70/70f895f404d363d291dcf62c12c85fdd47619ad9674ac0f53364d035925a/pyjwt-2.12.0-py3-none-any.whl", hash = "sha256:9bb459d1bdd0387967d287f5656bf7ec2b9a26645d1961628cda1764e087fd6e", size = 29700, upload-time = "2026-03-12T17:15:29.257Z" }, ] [package.optional-dependencies] @@ -5160,12 +5357,17 @@ wheels = [ ] [[package]] -name = "pypandoc" -version = "1.16.2" +name = "pypandoc-binary" +version = "1.17" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0b/18/9f5f70567b97758625335209b98d5cb857e19aa1a9306e9749567a240634/pypandoc-1.16.2.tar.gz", hash = "sha256:7a72a9fbf4a5dc700465e384c3bb333d22220efc4e972cb98cf6fc723cdca86b", size = 31477, upload-time = "2025-11-13T16:30:29.608Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/e9/b145683854189bba84437ea569bfa786f408c8dc5bc16d8eb0753f5583bf/pypandoc-1.16.2-py3-none-any.whl", hash = "sha256:c200c1139c8e3247baf38d1e9279e85d9f162499d1999c6aa8418596558fe79b", size = 19451, upload-time = "2025-11-13T16:30:07.66Z" }, + { url = "https://files.pythonhosted.org/packages/80/85/681a54111f0948821a5cf87ce30a88bb0a3f6848af5112c912abac4a2b77/pypandoc_binary-1.17-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:734726dc618ef276343e272e1a6b4567e59c2ef9ef41d5533042deac3b0531f1", size = 25553945, upload-time = "2026-03-14T22:38:47.91Z" }, + { url = "https://files.pythonhosted.org/packages/15/58/8fd107c68522957868c1e785fbea7595608df118e440e424d189668294df/pypandoc_binary-1.17-py3-none-macosx_11_0_arm64.whl", hash = "sha256:fcfd28f347ed998dda28823fc6bc24f9310e7fdf3ddceaf925bf0563a100ab5b", size = 25553944, upload-time = "2026-03-14T22:38:50.74Z" }, + { url = "https://files.pythonhosted.org/packages/f4/27/ac1078239aae14b94c51975b7f46ad8e099e47d7ae26c175a5486b1c0099/pypandoc_binary-1.17-py3-none-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d6b620b21c9374e3e48aabd518492bf0776b148442ee28816f6aaf52da3d4387", size = 34460960, upload-time = "2026-03-14T22:38:53.391Z" }, + { url = "https://files.pythonhosted.org/packages/8d/7f/1e5612b52900ebe590862dabeadf546f739b27527dcd8bfd632f8adac1be/pypandoc_binary-1.17-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ada156cb980cd54fd6534231788e668c00dbb591cbd24f0be0bd86812eb8788", size = 36867598, upload-time = "2026-03-14T22:38:56.351Z" }, + { url = "https://files.pythonhosted.org/packages/3b/31/a5a867159c4080e5d368f4a53540a727501a2f31affc297dc8e0fced96a7/pypandoc_binary-1.17-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2f439dcd211183bb3460253ca4511101df6e1acf4a01f45f5617e1fa2ad24279", size = 36867584, upload-time = "2026-03-14T22:38:59.087Z" }, + { url = "https://files.pythonhosted.org/packages/0d/2d/6a51cd4e54bdf132c19416801077c34bd40ba182e85d843360d36ae03a2d/pypandoc_binary-1.17-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:f6e6d3e4cfafbe23189a08db3d41f8def260bacd6e7e382bceadab7ba1f17da6", size = 34460949, upload-time = "2026-03-14T22:39:01.71Z" }, + { url = "https://files.pythonhosted.org/packages/c6/b9/f47b77ba75ed5d47ec85fcc2ecfbf7f78e3a73347f3a09836634d930de98/pypandoc_binary-1.17-py3-none-win_amd64.whl", hash = "sha256:76fae066cd2d7e78fb97f0ec8e9e36f437b07187b689b0b415ca18216f8f898a", size = 40891661, upload-time = "2026-03-14T22:39:04.782Z" }, ] [[package]] @@ -5188,31 +5390,31 @@ wheels = [ [[package]] name = "pypdfium2" -version = "5.2.0" +version = "5.6.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f6/ab/73c7d24e4eac9ba952569403b32b7cca9412fc5b9bef54fdbd669551389f/pypdfium2-5.2.0.tar.gz", hash = "sha256:43863625231ce999c1ebbed6721a88de818b2ab4d909c1de558d413b9a400256", size = 269999, upload-time = "2025-12-12T13:20:15.353Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3b/01/be763b9081c7eb823196e7d13d9c145bf75ac43f3c1466de81c21c24b381/pypdfium2-5.6.0.tar.gz", hash = "sha256:bcb9368acfe3547054698abbdae68ba0cbd2d3bda8e8ee437e061deef061976d", size = 270714, upload-time = "2026-03-08T01:05:06.5Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fb/0c/9108ae5266ee4cdf495f99205c44d4b5c83b4eb227c2b610d35c9e9fe961/pypdfium2-5.2.0-py3-none-android_23_arm64_v8a.whl", hash = "sha256:1ba4187a45ce4cf08f2a8c7e0f8970c36b9aa1770c8a3412a70781c1d80fb145", size = 2763268, upload-time = "2025-12-12T13:19:37.354Z" }, - { url = "https://files.pythonhosted.org/packages/35/8c/55f5c8a2c6b293f5c020be4aa123eaa891e797c514e5eccd8cb042740d37/pypdfium2-5.2.0-py3-none-android_23_armeabi_v7a.whl", hash = "sha256:80c55e10a8c9242f0901d35a9a306dd09accce8e497507bb23fcec017d45fe2e", size = 2301821, upload-time = "2025-12-12T13:19:39.484Z" }, - { url = "https://files.pythonhosted.org/packages/5e/7d/efa013e3795b41c59dd1e472f7201c241232c3a6553be4917e3a26b9f225/pypdfium2-5.2.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:73523ae69cd95c084c1342096893b2143ea73c36fdde35494780ba431e6a7d6e", size = 2816428, upload-time = "2025-12-12T13:19:41.735Z" }, - { url = "https://files.pythonhosted.org/packages/ec/ae/8c30af6ff2ab41a7cb84753ee79dd1e0a8932c9bda9fe19759d69cbbf115/pypdfium2-5.2.0-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:19c501d22ef5eb98e42416d22cc3ac66d4808b436e3d06686392f24d8d9f708d", size = 2939486, upload-time = "2025-12-12T13:19:43.176Z" }, - { url = "https://files.pythonhosted.org/packages/64/64/454a73c49a04c2c290917ad86184e4da959e9e5aba94b3b046328c89be93/pypdfium2-5.2.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ed15a3f58d6ee4905f0d0a731e30b381b457c30689512589c7f57950b0cdcec", size = 2979235, upload-time = "2025-12-12T13:19:44.635Z" }, - { url = "https://files.pythonhosted.org/packages/4e/29/f1cab8e31192dd367dc7b1afa71f45cfcb8ff0b176f1d2a0f528faf04052/pypdfium2-5.2.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:329cd1e9f068e8729e0d0b79a070d6126f52bc48ff1e40505cb207a5e20ce0ba", size = 2763001, upload-time = "2025-12-12T13:19:47.598Z" }, - { url = "https://files.pythonhosted.org/packages/bc/5d/e95fad8fdac960854173469c4b6931d5de5e09d05e6ee7d9756f8b95eef0/pypdfium2-5.2.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:325259759886e66619504df4721fef3b8deabf8a233e4f4a66e0c32ebae60c2f", size = 3057024, upload-time = "2025-12-12T13:19:49.179Z" }, - { url = "https://files.pythonhosted.org/packages/f4/32/468591d017ab67f8142d40f4db8163b6d8bb404fe0d22da75a5c661dc144/pypdfium2-5.2.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5683e8f08ab38ed05e0e59e611451ec74332803d4e78f8c45658ea1d372a17af", size = 3448598, upload-time = "2025-12-12T13:19:50.979Z" }, - { url = "https://files.pythonhosted.org/packages/f9/a5/57b4e389b77ab5f7e9361dc7fc03b5378e678ba81b21e791e85350fbb235/pypdfium2-5.2.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da4815426a5adcf03bf4d2c5f26c0ff8109dbfaf2c3415984689931bc6006ef9", size = 2993946, upload-time = "2025-12-12T13:19:53.154Z" }, - { url = "https://files.pythonhosted.org/packages/84/3a/e03e9978f817632aa56183bb7a4989284086fdd45de3245ead35f147179b/pypdfium2-5.2.0-py3-none-manylinux_2_27_s390x.manylinux_2_28_s390x.whl", hash = "sha256:64bf5c039b2c314dab1fd158bfff99db96299a5b5c6d96fc056071166056f1de", size = 3673148, upload-time = "2025-12-12T13:19:54.528Z" }, - { url = "https://files.pythonhosted.org/packages/13/ee/e581506806553afa4b7939d47bf50dca35c1151b8cc960f4542a6eb135ce/pypdfium2-5.2.0-py3-none-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:76b42a17748ac7dc04d5ef04d0561c6a0a4b546d113ec1d101d59650c6a340f7", size = 2964757, upload-time = "2025-12-12T13:19:56.406Z" }, - { url = "https://files.pythonhosted.org/packages/00/be/3715c652aff30f12284523dd337843d0efe3e721020f0ec303a99ffffd8d/pypdfium2-5.2.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:9d4367d471439fae846f0aba91ff9e8d66e524edcf3c8d6e02fe96fa306e13b9", size = 4130319, upload-time = "2025-12-12T13:19:57.889Z" }, - { url = "https://files.pythonhosted.org/packages/b0/0b/28aa2ede9004dd4192266bbad394df0896787f7c7bcfa4d1a6e091ad9a2c/pypdfium2-5.2.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:613f6bb2b47d76b66c0bf2ca581c7c33e3dd9dcb29d65d8c34fef4135f933149", size = 3746488, upload-time = "2025-12-12T13:19:59.469Z" }, - { url = "https://files.pythonhosted.org/packages/bc/04/1b791e1219652bbfc51df6498267d8dcec73ad508b99388b2890902ccd9d/pypdfium2-5.2.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c03fad3f2fa68d358f5dd4deb07e438482fa26fae439c49d127576d969769ca1", size = 4336534, upload-time = "2025-12-12T13:20:01.28Z" }, - { url = "https://files.pythonhosted.org/packages/4f/e3/6f00f963bb702ffd2e3e2d9c7286bc3bb0bebcdfa96ca897d466f66976c6/pypdfium2-5.2.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:f10be1900ae21879d02d9f4d58c2d2db3a2e6da611736a8e9decc22d1fb02909", size = 4375079, upload-time = "2025-12-12T13:20:03.117Z" }, - { url = "https://files.pythonhosted.org/packages/3a/2a/7ec2b191b5e1b7716a0dfc14e6860e89bb355fb3b94ed0c1d46db526858c/pypdfium2-5.2.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:97c1a126d30378726872f94866e38c055740cae80313638dafd1cd448d05e7c0", size = 3928648, upload-time = "2025-12-12T13:20:05.041Z" }, - { url = "https://files.pythonhosted.org/packages/bf/c3/c6d972fa095ff3ace76f9d3a91ceaf8a9dbbe0d9a5a84ac1d6178a46630e/pypdfium2-5.2.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:c369f183a90781b788af9a357a877bc8caddc24801e8346d0bf23f3295f89f3a", size = 4997772, upload-time = "2025-12-12T13:20:06.453Z" }, - { url = "https://files.pythonhosted.org/packages/22/45/2c64584b7a3ca5c4652280a884f4b85b8ed24e27662adeebdc06d991c917/pypdfium2-5.2.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b391f1cceb454934b612a05b54e90f98aafeffe5e73830d71700b17f0812226b", size = 4180046, upload-time = "2025-12-12T13:20:08.715Z" }, - { url = "https://files.pythonhosted.org/packages/d6/99/8d1ff87b626649400e62a2840e6e10fe258443ba518798e071fee4cd86f9/pypdfium2-5.2.0-py3-none-win32.whl", hash = "sha256:c68067938f617c37e4d17b18de7cac231fc7ce0eb7b6653b7283ebe8764d4999", size = 2990175, upload-time = "2025-12-12T13:20:10.241Z" }, - { url = "https://files.pythonhosted.org/packages/93/fc/114fff8895b620aac4984808e93d01b6d7b93e342a1635fcfe2a5f39cf39/pypdfium2-5.2.0-py3-none-win_amd64.whl", hash = "sha256:eb0591b720e8aaeab9475c66d653655ec1be0464b946f3f48a53922e843f0f3b", size = 3098615, upload-time = "2025-12-12T13:20:11.795Z" }, - { url = "https://files.pythonhosted.org/packages/08/97/eb738bff5998760d6e0cbcb7dd04cbf1a95a97b997fac6d4e57562a58992/pypdfium2-5.2.0-py3-none-win_arm64.whl", hash = "sha256:5dd1ef579f19fa3719aee4959b28bda44b1072405756708b5e83df8806a19521", size = 2939479, upload-time = "2025-12-12T13:20:13.815Z" }, + { url = "https://files.pythonhosted.org/packages/9d/b1/129ed0177521a93a892f8a6a215dd3260093e30e77ef7035004bb8af7b6c/pypdfium2-5.6.0-py3-none-android_23_arm64_v8a.whl", hash = "sha256:fb7858c9707708555b4a719b5548a6e7f5d26bc82aef55ae4eb085d7a2190b11", size = 3346059, upload-time = "2026-03-08T01:04:21.37Z" }, + { url = "https://files.pythonhosted.org/packages/86/34/cbdece6886012180a7f2c7b2c360c415cf5e1f83f1973d2c9201dae3506a/pypdfium2-5.6.0-py3-none-android_23_armeabi_v7a.whl", hash = "sha256:6a7e1f4597317786f994bfb947eef480e53933f804a990193ab89eef8243f805", size = 2804418, upload-time = "2026-03-08T01:04:23.384Z" }, + { url = "https://files.pythonhosted.org/packages/6e/f6/9f9e190fe0e5a6b86b82f83bd8b5d3490348766062381140ca5cad8e00b1/pypdfium2-5.6.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e468c38997573f0e86f03273c2c1fbdea999de52ba43fee96acaa2f6b2ad35f7", size = 3412541, upload-time = "2026-03-08T01:04:25.45Z" }, + { url = "https://files.pythonhosted.org/packages/ee/8d/e57492cb2228ba56ed57de1ff044c8ac114b46905f8b1445c33299ba0488/pypdfium2-5.6.0-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:ad3abddc5805424f962e383253ccad6a0d1d2ebd86afa9a9e1b9ca659773cd0d", size = 3592320, upload-time = "2026-03-08T01:04:27.509Z" }, + { url = "https://files.pythonhosted.org/packages/f9/8a/8ab82e33e9c551494cbe1526ea250ca8cc4e9e98d6a4fc6b6f8d959aa1d1/pypdfium2-5.6.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6b5eb9eae5c45076395454522ca26add72ba8bd1fe473e1e4721aa58521470c", size = 3596450, upload-time = "2026-03-08T01:04:29.183Z" }, + { url = "https://files.pythonhosted.org/packages/f5/b5/602a792282312ccb158cc63849528079d94b0a11efdc61f2a359edfb41e9/pypdfium2-5.6.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:258624da8ef45cdc426e11b33e9d83f9fb723c1c201c6e0f4ab5a85966c6b876", size = 3325442, upload-time = "2026-03-08T01:04:30.886Z" }, + { url = "https://files.pythonhosted.org/packages/81/1f/9e48ec05ed8d19d736c2d1f23c1bd0f20673f02ef846a2576c69e237f15d/pypdfium2-5.6.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9367451c8a00931d6612db0822525a18c06f649d562cd323a719e46ac19c9bb", size = 3727434, upload-time = "2026-03-08T01:04:33.619Z" }, + { url = "https://files.pythonhosted.org/packages/33/90/0efd020928b4edbd65f4f3c2af0c84e20b43a3ada8fa6d04f999a97afe7a/pypdfium2-5.6.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a757869f891eac1cc1372e38a4aa01adac8abc8fe2a8a4e2ebf50595e3bf5937", size = 4139029, upload-time = "2026-03-08T01:04:36.08Z" }, + { url = "https://files.pythonhosted.org/packages/ff/49/a640b288a48dab1752281dd9b72c0679fccea107874e80a65a606b00efa9/pypdfium2-5.6.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:515be355222cc57ae9e62cd5c7c350b8e0c863efc539f80c7d75e2811ba45cb6", size = 3646387, upload-time = "2026-03-08T01:04:38.151Z" }, + { url = "https://files.pythonhosted.org/packages/b0/3b/a344c19c01021eeb5d830c102e4fc9b1602f19c04aa7d11abbe2d188fd8e/pypdfium2-5.6.0-py3-none-manylinux_2_27_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1c4753c7caf7d004211d7f57a21f10d127f5e0e5510a14d24bc073e7220a3ea", size = 3097212, upload-time = "2026-03-08T01:04:40.776Z" }, + { url = "https://files.pythonhosted.org/packages/50/96/e48e13789ace22aeb9b7510904a1b1493ec588196e11bbacc122da330b3d/pypdfium2-5.6.0-py3-none-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c49729090281fdd85775fb8912c10bd19e99178efaa98f145ab06e7ce68554d2", size = 2965026, upload-time = "2026-03-08T01:04:42.857Z" }, + { url = "https://files.pythonhosted.org/packages/cb/06/3100e44d4935f73af8f5d633d3bd40f0d36d606027085a0ef1f0566a6320/pypdfium2-5.6.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a4a1749a8d4afd62924a8d95cfa4f2e26fc32957ce34ac3b674be6f127ed252e", size = 4131431, upload-time = "2026-03-08T01:04:44.982Z" }, + { url = "https://files.pythonhosted.org/packages/64/ef/d8df63569ce9a66c8496057782eb8af78e0d28667922d62ec958434e3d4b/pypdfium2-5.6.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:36469ebd0fdffb7130ce45ed9c44f8232d91571c89eb851bd1633c64b6f6114f", size = 3747469, upload-time = "2026-03-08T01:04:46.702Z" }, + { url = "https://files.pythonhosted.org/packages/a6/47/fd2c6a67a49fade1acd719fbd11f7c375e7219912923ef2de0ea0ac1544e/pypdfium2-5.6.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9da900df09be3cf546b637a127a7b6428fb22d705951d731269e25fd3adef457", size = 4337578, upload-time = "2026-03-08T01:04:49.007Z" }, + { url = "https://files.pythonhosted.org/packages/6b/f5/836c83e54b01e09478c4d6bf4912651d6053c932250fcee953f5c72d8e4a/pypdfium2-5.6.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:45fccd5622233c5ec91a885770ae7dd4004d4320ac05a4ad8fa03a66dea40244", size = 4376104, upload-time = "2026-03-08T01:04:51.04Z" }, + { url = "https://files.pythonhosted.org/packages/6e/7f/b940b6a1664daf8f9bad87c6c99b84effa3611615b8708d10392dc33036c/pypdfium2-5.6.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:282dc030e767cd61bd0299f9d581052b91188e2b87561489057a8e7963e7e0cb", size = 3929824, upload-time = "2026-03-08T01:04:53.544Z" }, + { url = "https://files.pythonhosted.org/packages/88/79/00267d92a6a58c229e364d474f5698efe446e0c7f4f152f58d0138715e99/pypdfium2-5.6.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:a1c1dfe950382c76a7bba1ba160ec5e40df8dd26b04a1124ae268fda55bc4cbe", size = 4270201, upload-time = "2026-03-08T01:04:55.81Z" }, + { url = "https://files.pythonhosted.org/packages/e1/ab/b127f38aba41746bdf9ace15ba08411d7ef6ecba1326d529ba414eb1ed50/pypdfium2-5.6.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:43b0341ca6feb6c92e4b7a9eb4813e5466f5f5e8b6baeb14df0a94d5f312c00b", size = 4180793, upload-time = "2026-03-08T01:04:57.961Z" }, + { url = "https://files.pythonhosted.org/packages/0e/8c/a01c8e4302448b614d25a85c08298b0d3e9dfbdac5bd1b2f32c9b02e83d9/pypdfium2-5.6.0-py3-none-win32.whl", hash = "sha256:9dfcd4ff49a2b9260d00e38539ab28190d59e785e83030b30ffaf7a29c42155d", size = 3596753, upload-time = "2026-03-08T01:05:00.566Z" }, + { url = "https://files.pythonhosted.org/packages/9b/5f/2d871adf46761bb002a62686545da6348afe838d19af03df65d1ece786a2/pypdfium2-5.6.0-py3-none-win_amd64.whl", hash = "sha256:c6bc8dd63d0568f4b592f3e03de756afafc0e44aa1fe8878cc4aba1b11ae7374", size = 3716526, upload-time = "2026-03-08T01:05:02.433Z" }, + { url = "https://files.pythonhosted.org/packages/3a/80/0d9b162098597fbe3ac2b269b1682c0c3e8db9ba87679603fdd9b19afaa6/pypdfium2-5.6.0-py3-none-win_arm64.whl", hash = "sha256:5538417b199bdcb3207370c88df61f2ba3dac7a3253f82e1aa2708e6376b6f90", size = 3515049, upload-time = "2026-03-08T01:05:04.587Z" }, ] [[package]] @@ -5418,11 +5620,11 @@ wheels = [ [[package]] name = "python-dotenv" -version = "1.0.1" +version = "1.2.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bc/57/e84d88dfe0aec03b7a2d4327012c1627ab5f03652216c63d49846d7a6c58/python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca", size = 39115, upload-time = "2024-01-23T06:33:00.505Z" } +sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863, upload-time = "2024-01-23T06:32:58.246Z" }, + { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" }, ] [[package]] @@ -5436,11 +5638,11 @@ wheels = [ [[package]] name = "python-iso639" -version = "2025.11.16" +version = "2026.1.31" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a1/3b/3e07aadeeb7bbb2574d6aa6ccacbc58b17bd2b1fb6c7196bf96ab0e45129/python_iso639-2025.11.16.tar.gz", hash = "sha256:aabe941267898384415a509f5236d7cfc191198c84c5c6f73dac73d9783f5169", size = 174186, upload-time = "2025-11-16T21:53:37.031Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a3/da/701fc47ea3b0579a8ae489d50d5b54f2ef3aeb7768afd31db1d1cfe9f24e/python_iso639-2026.1.31.tar.gz", hash = "sha256:55a1612c15e5fbd3a1fa269a309cbf1e7c13019356e3d6f75bb435ed44c45ddb", size = 174144, upload-time = "2026-01-31T15:04:48.105Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/2d/563849c31e58eb2e273fa0c391a7d9987db32f4d9152fe6ecdac0a8ffe93/python_iso639-2025.11.16-py3-none-any.whl", hash = "sha256:65f6ac6c6d8e8207f6175f8bf7fff7db486c6dc5c1d8866c2b77d2a923370896", size = 167818, upload-time = "2025-11-16T21:53:35.36Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3a/03ee682b04099e6b02b591955851b0347deb2e3691ae850112000c54ba12/python_iso639-2026.1.31-py3-none-any.whl", hash = "sha256:b2c48fa1300af1299dff4f1e1995ad1059996ed9f22270ea2d6d6bdc5fb03d4c", size = 167757, upload-time = "2026-01-31T15:04:46.458Z" }, ] [[package]] @@ -5730,15 +5932,15 @@ wheels = [ [[package]] name = "resend" -version = "2.9.0" +version = "2.23.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "requests" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1f/2a/535a794e5b64f6ef4abc1342ef1a43465af2111c5185e98b4cca2a6b6b7a/resend-2.9.0.tar.gz", hash = "sha256:e8d4c909a7fe7701119789f848a6befb0a4a668e2182d7bbfe764742f1952bd3", size = 13600, upload-time = "2025-05-06T00:35:20.363Z" } +sdist = { url = "https://files.pythonhosted.org/packages/96/a3/20003e7d14604fef778bd30c69604df3560a657a95a5c29a9688610759b6/resend-2.23.0.tar.gz", hash = "sha256:df613827dcc40eb1c9de2e5ff600cd4081b89b206537dec8067af1a5016d23c7", size = 31416, upload-time = "2026-02-23T19:01:57.603Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/96/81/ba1feb9959bafbcde6466b78d4628405d69cd14613f6eba12b928a77b86a/resend-2.9.0-py2.py3-none-any.whl", hash = "sha256:6607f75e3a9257a219c0640f935b8d1211338190d553eb043c25732affb92949", size = 20173, upload-time = "2025-05-06T00:35:18.963Z" }, + { url = "https://files.pythonhosted.org/packages/e3/35/64df775b8cd95e89798fd7b1b7fcafa975b6b09f559c10c0650e65b33580/resend-2.23.0-py2.py3-none-any.whl", hash = "sha256:eca6d28a1ffd36c1fc489fa83cb6b511f384792c9f07465f7c92d96c8b4d5636", size = 52599, upload-time = "2026-02-23T19:01:55.962Z" }, ] [[package]] @@ -5916,15 +6118,15 @@ wheels = [ [[package]] name = "sentry-sdk" -version = "2.28.0" +version = "2.54.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "certifi" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5e/bb/6a41b2e0e9121bed4d2ec68d50568ab95c49f4744156a9bbb789c866c66d/sentry_sdk-2.28.0.tar.gz", hash = "sha256:14d2b73bc93afaf2a9412490329099e6217761cbab13b6ee8bc0e82927e1504e", size = 325052, upload-time = "2025-05-12T07:53:12.785Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c8/e9/2e3a46c304e7fa21eaa70612f60354e32699c7102eb961f67448e222ad7c/sentry_sdk-2.54.0.tar.gz", hash = "sha256:2620c2575128d009b11b20f7feb81e4e4e8ae08ec1d36cbc845705060b45cc1b", size = 413813, upload-time = "2026-03-02T15:12:41.355Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9b/4e/b1575833094c088dfdef63fbca794518860fcbc8002aadf51ebe8b6a387f/sentry_sdk-2.28.0-py2.py3-none-any.whl", hash = "sha256:51496e6cb3cb625b99c8e08907c67a9112360259b0ef08470e532c3ab184a232", size = 341693, upload-time = "2025-05-12T07:53:10.882Z" }, + { url = "https://files.pythonhosted.org/packages/53/39/be412cc86bc6247b8f69e9383d7950711bd86f8d0a4a4b0fe8fad685bc21/sentry_sdk-2.54.0-py2.py3-none-any.whl", hash = "sha256:fd74e0e281dcda63afff095d23ebcd6e97006102cdc8e78a29f19ecdf796a0de", size = 439198, upload-time = "2026-03-02T15:12:39.546Z" }, ] [package.optional-dependencies] @@ -5961,6 +6163,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "smart-open" +version = "7.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e8/be/a66598b305763861a9ab15ff0f2fbc44e47b1ce7a776797337a4eef37c66/smart_open-7.5.1.tar.gz", hash = "sha256:3f08e16827c4733699e6b2cc40328a3568f900cb12ad9a3ad233ba6c872d9fe7", size = 54034, upload-time = "2026-02-23T11:01:28.979Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/ea/dcdecd68acebb49d3fd560473a43499b1635076f7f1ae8641c060fe7ce74/smart_open-7.5.1-py3-none-any.whl", hash = "sha256:3e07cbbd9c8a908bcb8e25d48becf1a5cbb4886fa975e9f34c672ed171df2318", size = 64108, upload-time = "2026-02-23T11:01:27.429Z" }, +] + [[package]] name = "smmap" version = "5.0.2" @@ -6006,33 +6220,93 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679, upload-time = "2025-08-27T15:39:50.179Z" }, ] +[[package]] +name = "spacy" +version = "3.8.11" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "catalogue" }, + { name = "cymem" }, + { name = "jinja2" }, + { name = "murmurhash" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "preshed" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "setuptools" }, + { name = "spacy-legacy" }, + { name = "spacy-loggers" }, + { name = "srsly" }, + { name = "thinc" }, + { name = "tqdm" }, + { name = "typer-slim" }, + { name = "wasabi" }, + { name = "weasel" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/59/9f/424244b0e2656afc9ff82fb7a96931a47397bfce5ba382213827b198312a/spacy-3.8.11.tar.gz", hash = "sha256:54e1e87b74a2f9ea807ffd606166bf29ac45e2bd81ff7f608eadc7b05787d90d", size = 1326804, upload-time = "2025-11-17T20:40:03.079Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/74/d3/0c795e6f31ee3535b6e70d08e89fc22247b95b61f94fc8334a01d39bf871/spacy-3.8.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a12d83e8bfba07563300ae5e0086548e41aa4bfe3734c97dda87e0eec813df0d", size = 6487958, upload-time = "2025-11-17T20:38:40.378Z" }, + { url = "https://files.pythonhosted.org/packages/4e/2a/83ca9b4d0a2b31adcf0ced49fa667212d12958f75d4e238618a60eb50b10/spacy-3.8.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e07a50b69500ef376326545353a470f00d1ed7203c76341b97242af976e3681a", size = 6148078, upload-time = "2025-11-17T20:38:42.524Z" }, + { url = "https://files.pythonhosted.org/packages/2c/f0/ff520df18a6152ba2dbf808c964014308e71a48feb4c7563f2a6cd6e668d/spacy-3.8.11-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:718b7bb5e83c76cb841ed6e407f7b40255d0b46af7101a426c20e04af3afd64e", size = 32056451, upload-time = "2025-11-17T20:38:44.92Z" }, + { url = "https://files.pythonhosted.org/packages/9d/3a/6c44c0b9b6a70595888b8d021514ded065548a5b10718ac253bd39f9fd73/spacy-3.8.11-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f860f9d51c1aeb2d61852442b232576e4ca4d239cb3d1b40ac452118b8eb2c68", size = 32302908, upload-time = "2025-11-17T20:38:47.672Z" }, + { url = "https://files.pythonhosted.org/packages/db/77/00e99e00efd4c2456772befc48400c2e19255140660d663e16b6924a0f2e/spacy-3.8.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ff8d928ce70d751b7bb27f60ee5e3a308216efd4ab4517291e6ff05d9b194840", size = 32280936, upload-time = "2025-11-17T20:38:50.893Z" }, + { url = "https://files.pythonhosted.org/packages/d8/da/692b51e9e5be2766d2d1fb9a7c8122cfd99c337570e621f09c40ce94ad17/spacy-3.8.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3f3cb91d7d42fafd92b8d5bf9f696571170d2f0747f85724a2c5b997753e33c9", size = 33117270, upload-time = "2025-11-17T20:38:53.596Z" }, + { url = "https://files.pythonhosted.org/packages/9b/13/a542ac9b61d071f3328fda1fd8087b523fb7a4f2c340010bc70b1f762485/spacy-3.8.11-cp311-cp311-win_amd64.whl", hash = "sha256:745c190923584935272188c604e0cc170f4179aace1025814a25d92ee90cf3de", size = 15348350, upload-time = "2025-11-17T20:38:56.833Z" }, + { url = "https://files.pythonhosted.org/packages/23/53/975c16514322f6385d6caa5929771613d69f5458fb24f03e189ba533f279/spacy-3.8.11-cp311-cp311-win_arm64.whl", hash = "sha256:27535d81d9dee0483b66660cadd93d14c1668f55e4faf4386aca4a11a41a8b97", size = 14701913, upload-time = "2025-11-17T20:38:59.507Z" }, + { url = "https://files.pythonhosted.org/packages/51/fb/01eadf4ba70606b3054702dc41fc2ccf7d70fb14514b3cd57f0ff78ebea8/spacy-3.8.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:aa1ee8362074c30098feaaf2dd888c829a1a79c4311eec1b117a0a61f16fa6dd", size = 6073726, upload-time = "2025-11-17T20:39:01.679Z" }, + { url = "https://files.pythonhosted.org/packages/3a/f8/07b03a2997fc2621aaeafae00af50f55522304a7da6926b07027bb6d0709/spacy-3.8.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:75a036d04c2cf11d6cb566c0a689860cc5a7a75b439e8fea1b3a6b673dabf25d", size = 5724702, upload-time = "2025-11-17T20:39:03.486Z" }, + { url = "https://files.pythonhosted.org/packages/13/0c/c4fa0f379dbe3258c305d2e2df3760604a9fcd71b34f8f65c23e43f4cf55/spacy-3.8.11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cb599d2747d4a59a5f90e8a453c149b13db382a8297925cf126333141dbc4f7", size = 32727774, upload-time = "2025-11-17T20:39:05.894Z" }, + { url = "https://files.pythonhosted.org/packages/ce/8e/6a4ba82bed480211ebdf5341b0f89e7271b454307525ac91b5e447825914/spacy-3.8.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:94632e302ad2fb79dc285bf1e9e4d4a178904d5c67049e0e02b7fb4a77af85c4", size = 33215053, upload-time = "2025-11-17T20:39:08.588Z" }, + { url = "https://files.pythonhosted.org/packages/a6/bc/44d863d248e9d7358c76a0aa8b3f196b8698df520650ed8de162e18fbffb/spacy-3.8.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:aeca6cf34009d48cda9fb1bbfb532469e3d643817241a73e367b34ab99a5806f", size = 32074195, upload-time = "2025-11-17T20:39:11.601Z" }, + { url = "https://files.pythonhosted.org/packages/6f/7d/0b115f3f16e1dd2d3f99b0f89497867fc11c41aed94f4b7a4367b4b54136/spacy-3.8.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:368a79b8df925b15d89dccb5e502039446fb2ce93cf3020e092d5b962c3349b9", size = 32996143, upload-time = "2025-11-17T20:39:14.705Z" }, + { url = "https://files.pythonhosted.org/packages/7d/48/7e9581b476df76aaf9ee182888d15322e77c38b0bbbd5e80160ba0bddd4c/spacy-3.8.11-cp312-cp312-win_amd64.whl", hash = "sha256:88d65941a87f58d75afca1785bd64d01183a92f7269dcbcf28bd9d6f6a77d1a7", size = 14217511, upload-time = "2025-11-17T20:39:17.316Z" }, + { url = "https://files.pythonhosted.org/packages/7b/1f/307a16f32f90aa5ee7ad8d29ff8620a57132b80a4c8c536963d46d192e1a/spacy-3.8.11-cp312-cp312-win_arm64.whl", hash = "sha256:97b865d6d3658e2ab103a67d6c8a2d678e193e84a07f40d9938565b669ceee39", size = 13614446, upload-time = "2025-11-17T20:39:19.748Z" }, +] + +[[package]] +name = "spacy-legacy" +version = "3.0.12" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d9/79/91f9d7cc8db5642acad830dcc4b49ba65a7790152832c4eceb305e46d681/spacy-legacy-3.0.12.tar.gz", hash = "sha256:b37d6e0c9b6e1d7ca1cf5bc7152ab64a4c4671f59c85adaf7a3fcb870357a774", size = 23806, upload-time = "2023-01-23T09:04:15.104Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c3/55/12e842c70ff8828e34e543a2c7176dac4da006ca6901c9e8b43efab8bc6b/spacy_legacy-3.0.12-py2.py3-none-any.whl", hash = "sha256:476e3bd0d05f8c339ed60f40986c07387c0a71479245d6d0f4298dbd52cda55f", size = 29971, upload-time = "2023-01-23T09:04:13.45Z" }, +] + +[[package]] +name = "spacy-loggers" +version = "1.0.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/67/3d/926db774c9c98acf66cb4ed7faf6c377746f3e00b84b700d0868b95d0712/spacy-loggers-1.0.5.tar.gz", hash = "sha256:d60b0bdbf915a60e516cc2e653baeff946f0cfc461b452d11a4d5458c6fe5f24", size = 20811, upload-time = "2023-09-11T12:26:52.323Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/78/d1a1a026ef3af911159398c939b1509d5c36fe524c7b644f34a5146c4e16/spacy_loggers-1.0.5-py3-none-any.whl", hash = "sha256:196284c9c446cc0cdb944005384270d775fdeaf4f494d8e269466cfa497ef645", size = 22343, upload-time = "2023-09-11T12:26:50.586Z" }, +] + [[package]] name = "sqlalchemy" -version = "2.0.44" +version = "2.0.48" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f0/f2/840d7b9496825333f532d2e3976b8eadbf52034178aac53630d09fe6e1ef/sqlalchemy-2.0.44.tar.gz", hash = "sha256:0ae7454e1ab1d780aee69fd2aae7d6b8670a581d8847f2d1e0f7ddfbf47e5a22", size = 9819830, upload-time = "2025-10-10T14:39:12.935Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1f/73/b4a9737255583b5fa858e0bb8e116eb94b88c910164ed2ed719147bde3de/sqlalchemy-2.0.48.tar.gz", hash = "sha256:5ca74f37f3369b45e1f6b7b06afb182af1fd5dde009e4ffd831830d98cbe5fe7", size = 9886075, upload-time = "2026-03-02T15:28:51.474Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e3/81/15d7c161c9ddf0900b076b55345872ed04ff1ed6a0666e5e94ab44b0163c/sqlalchemy-2.0.44-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fe3917059c7ab2ee3f35e77757062b1bea10a0b6ca633c58391e3f3c6c488dd", size = 2140517, upload-time = "2025-10-10T15:36:15.64Z" }, - { url = "https://files.pythonhosted.org/packages/d4/d5/4abd13b245c7d91bdf131d4916fd9e96a584dac74215f8b5bc945206a974/sqlalchemy-2.0.44-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:de4387a354ff230bc979b46b2207af841dc8bf29847b6c7dbe60af186d97aefa", size = 2130738, upload-time = "2025-10-10T15:36:16.91Z" }, - { url = "https://files.pythonhosted.org/packages/cb/3c/8418969879c26522019c1025171cefbb2a8586b6789ea13254ac602986c0/sqlalchemy-2.0.44-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3678a0fb72c8a6a29422b2732fe423db3ce119c34421b5f9955873eb9b62c1e", size = 3304145, upload-time = "2025-10-10T15:34:19.569Z" }, - { url = "https://files.pythonhosted.org/packages/94/2d/fdb9246d9d32518bda5d90f4b65030b9bf403a935cfe4c36a474846517cb/sqlalchemy-2.0.44-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cf6872a23601672d61a68f390e44703442639a12ee9dd5a88bbce52a695e46e", size = 3304511, upload-time = "2025-10-10T15:47:05.088Z" }, - { url = "https://files.pythonhosted.org/packages/7d/fb/40f2ad1da97d5c83f6c1269664678293d3fe28e90ad17a1093b735420549/sqlalchemy-2.0.44-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:329aa42d1be9929603f406186630135be1e7a42569540577ba2c69952b7cf399", size = 3235161, upload-time = "2025-10-10T15:34:21.193Z" }, - { url = "https://files.pythonhosted.org/packages/95/cb/7cf4078b46752dca917d18cf31910d4eff6076e5b513c2d66100c4293d83/sqlalchemy-2.0.44-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:70e03833faca7166e6a9927fbee7c27e6ecde436774cd0b24bbcc96353bce06b", size = 3261426, upload-time = "2025-10-10T15:47:07.196Z" }, - { url = "https://files.pythonhosted.org/packages/f8/3b/55c09b285cb2d55bdfa711e778bdffdd0dc3ffa052b0af41f1c5d6e582fa/sqlalchemy-2.0.44-cp311-cp311-win32.whl", hash = "sha256:253e2f29843fb303eca6b2fc645aca91fa7aa0aa70b38b6950da92d44ff267f3", size = 2105392, upload-time = "2025-10-10T15:38:20.051Z" }, - { url = "https://files.pythonhosted.org/packages/c7/23/907193c2f4d680aedbfbdf7bf24c13925e3c7c292e813326c1b84a0b878e/sqlalchemy-2.0.44-cp311-cp311-win_amd64.whl", hash = "sha256:7a8694107eb4308a13b425ca8c0e67112f8134c846b6e1f722698708741215d5", size = 2130293, upload-time = "2025-10-10T15:38:21.601Z" }, - { url = "https://files.pythonhosted.org/packages/62/c4/59c7c9b068e6813c898b771204aad36683c96318ed12d4233e1b18762164/sqlalchemy-2.0.44-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:72fea91746b5890f9e5e0997f16cbf3d53550580d76355ba2d998311b17b2250", size = 2139675, upload-time = "2025-10-10T16:03:31.064Z" }, - { url = "https://files.pythonhosted.org/packages/d6/ae/eeb0920537a6f9c5a3708e4a5fc55af25900216bdb4847ec29cfddf3bf3a/sqlalchemy-2.0.44-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:585c0c852a891450edbb1eaca8648408a3cc125f18cf433941fa6babcc359e29", size = 2127726, upload-time = "2025-10-10T16:03:35.934Z" }, - { url = "https://files.pythonhosted.org/packages/d8/d5/2ebbabe0379418eda8041c06b0b551f213576bfe4c2f09d77c06c07c8cc5/sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b94843a102efa9ac68a7a30cd46df3ff1ed9c658100d30a725d10d9c60a2f44", size = 3327603, upload-time = "2025-10-10T15:35:28.322Z" }, - { url = "https://files.pythonhosted.org/packages/45/e5/5aa65852dadc24b7d8ae75b7efb8d19303ed6ac93482e60c44a585930ea5/sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:119dc41e7a7defcefc57189cfa0e61b1bf9c228211aba432b53fb71ef367fda1", size = 3337842, upload-time = "2025-10-10T15:43:45.431Z" }, - { url = "https://files.pythonhosted.org/packages/41/92/648f1afd3f20b71e880ca797a960f638d39d243e233a7082c93093c22378/sqlalchemy-2.0.44-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0765e318ee9179b3718c4fd7ba35c434f4dd20332fbc6857a5e8df17719c24d7", size = 3264558, upload-time = "2025-10-10T15:35:29.93Z" }, - { url = "https://files.pythonhosted.org/packages/40/cf/e27d7ee61a10f74b17740918e23cbc5bc62011b48282170dc4c66da8ec0f/sqlalchemy-2.0.44-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2e7b5b079055e02d06a4308d0481658e4f06bc7ef211567edc8f7d5dce52018d", size = 3301570, upload-time = "2025-10-10T15:43:48.407Z" }, - { url = "https://files.pythonhosted.org/packages/3b/3d/3116a9a7b63e780fb402799b6da227435be878b6846b192f076d2f838654/sqlalchemy-2.0.44-cp312-cp312-win32.whl", hash = "sha256:846541e58b9a81cce7dee8329f352c318de25aa2f2bbe1e31587eb1f057448b4", size = 2103447, upload-time = "2025-10-10T15:03:21.678Z" }, - { url = "https://files.pythonhosted.org/packages/25/83/24690e9dfc241e6ab062df82cc0df7f4231c79ba98b273fa496fb3dd78ed/sqlalchemy-2.0.44-cp312-cp312-win_amd64.whl", hash = "sha256:7cbcb47fd66ab294703e1644f78971f6f2f1126424d2b300678f419aa73c7b6e", size = 2130912, upload-time = "2025-10-10T15:03:24.656Z" }, - { url = "https://files.pythonhosted.org/packages/9c/5e/6a29fa884d9fb7ddadf6b69490a9d45fded3b38541713010dad16b77d015/sqlalchemy-2.0.44-py3-none-any.whl", hash = "sha256:19de7ca1246fbef9f9d1bff8f1ab25641569df226364a0e40457dc5457c54b05", size = 1928718, upload-time = "2025-10-10T15:29:45.32Z" }, + { url = "https://files.pythonhosted.org/packages/d7/6d/b8b78b5b80f3c3ab3f7fa90faa195ec3401f6d884b60221260fd4d51864c/sqlalchemy-2.0.48-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b4c575df7368b3b13e0cebf01d4679f9a28ed2ae6c1cd0b1d5beffb6b2007dc", size = 2157184, upload-time = "2026-03-02T15:38:28.161Z" }, + { url = "https://files.pythonhosted.org/packages/21/4b/4f3d4a43743ab58b95b9ddf5580a265b593d017693df9e08bd55780af5bb/sqlalchemy-2.0.48-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e83e3f959aaa1c9df95c22c528096d94848a1bc819f5d0ebf7ee3df0ca63db6c", size = 3313555, upload-time = "2026-03-02T15:58:57.21Z" }, + { url = "https://files.pythonhosted.org/packages/21/dd/3b7c53f1dbbf736fd27041aee68f8ac52226b610f914085b1652c2323442/sqlalchemy-2.0.48-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f7b7243850edd0b8b97043f04748f31de50cf426e939def5c16bedb540698f7", size = 3313057, upload-time = "2026-03-02T15:52:29.366Z" }, + { url = "https://files.pythonhosted.org/packages/d9/cc/3e600a90ae64047f33313d7d32e5ad025417f09d2ded487e8284b5e21a15/sqlalchemy-2.0.48-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:82745b03b4043e04600a6b665cb98697c4339b24e34d74b0a2ac0a2488b6f94d", size = 3265431, upload-time = "2026-03-02T15:58:59.096Z" }, + { url = "https://files.pythonhosted.org/packages/8b/19/780138dacfe3f5024f4cf96e4005e91edf6653d53d3673be4844578faf1d/sqlalchemy-2.0.48-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e5e088bf43f6ee6fec7dbf1ef7ff7774a616c236b5c0cb3e00662dd71a56b571", size = 3287646, upload-time = "2026-03-02T15:52:31.569Z" }, + { url = "https://files.pythonhosted.org/packages/40/fd/f32ced124f01a23151f4777e4c705f3a470adc7bd241d9f36a7c941a33bf/sqlalchemy-2.0.48-cp311-cp311-win32.whl", hash = "sha256:9c7d0a77e36b5f4b01ca398482230ab792061d243d715299b44a0b55c89fe617", size = 2116956, upload-time = "2026-03-02T15:46:54.535Z" }, + { url = "https://files.pythonhosted.org/packages/58/d5/dd767277f6feef12d05651538f280277e661698f617fa4d086cce6055416/sqlalchemy-2.0.48-cp311-cp311-win_amd64.whl", hash = "sha256:583849c743e0e3c9bb7446f5b5addeacedc168d657a69b418063dfdb2d90081c", size = 2141627, upload-time = "2026-03-02T15:46:55.849Z" }, + { url = "https://files.pythonhosted.org/packages/ef/91/a42ae716f8925e9659df2da21ba941f158686856107a61cc97a95e7647a3/sqlalchemy-2.0.48-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:348174f228b99f33ca1f773e85510e08927620caa59ffe7803b37170df30332b", size = 2155737, upload-time = "2026-03-02T15:49:13.207Z" }, + { url = "https://files.pythonhosted.org/packages/b9/52/f75f516a1f3888f027c1cfb5d22d4376f4b46236f2e8669dcb0cddc60275/sqlalchemy-2.0.48-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53667b5f668991e279d21f94ccfa6e45b4e3f4500e7591ae59a8012d0f010dcb", size = 3337020, upload-time = "2026-03-02T15:50:34.547Z" }, + { url = "https://files.pythonhosted.org/packages/37/9a/0c28b6371e0cdcb14f8f1930778cb3123acfcbd2c95bb9cf6b4a2ba0cce3/sqlalchemy-2.0.48-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34634e196f620c7a61d18d5cf7dc841ca6daa7961aed75d532b7e58b309ac894", size = 3349983, upload-time = "2026-03-02T15:53:25.542Z" }, + { url = "https://files.pythonhosted.org/packages/1c/46/0aee8f3ff20b1dcbceb46ca2d87fcc3d48b407925a383ff668218509d132/sqlalchemy-2.0.48-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:546572a1793cc35857a2ffa1fe0e58571af1779bcc1ffa7c9fb0839885ed69a9", size = 3279690, upload-time = "2026-03-02T15:50:36.277Z" }, + { url = "https://files.pythonhosted.org/packages/ce/8c/a957bc91293b49181350bfd55e6dfc6e30b7f7d83dc6792d72043274a390/sqlalchemy-2.0.48-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:07edba08061bc277bfdc772dd2a1a43978f5a45994dd3ede26391b405c15221e", size = 3314738, upload-time = "2026-03-02T15:53:27.519Z" }, + { url = "https://files.pythonhosted.org/packages/4b/44/1d257d9f9556661e7bdc83667cc414ba210acfc110c82938cb3611eea58f/sqlalchemy-2.0.48-cp312-cp312-win32.whl", hash = "sha256:908a3fa6908716f803b86896a09a2c4dde5f5ce2bb07aacc71ffebb57986ce99", size = 2115546, upload-time = "2026-03-02T15:54:31.591Z" }, + { url = "https://files.pythonhosted.org/packages/f2/af/c3c7e1f3a2b383155a16454df62ae8c62a30dd238e42e68c24cebebbfae6/sqlalchemy-2.0.48-cp312-cp312-win_amd64.whl", hash = "sha256:68549c403f79a8e25984376480959975212a670405e3913830614432b5daa07a", size = 2142484, upload-time = "2026-03-02T15:54:34.072Z" }, + { url = "https://files.pythonhosted.org/packages/46/2c/9664130905f03db57961b8980b05cab624afd114bf2be2576628a9f22da4/sqlalchemy-2.0.48-py3-none-any.whl", hash = "sha256:a66fe406437dd65cacd96a72689a3aaaecaebbcd62d81c5ac1c0fdbeac835096", size = 1940202, upload-time = "2026-03-02T15:52:43.285Z" }, ] [[package]] @@ -6054,25 +6328,49 @@ wheels = [ ] [[package]] -name = "sseclient-py" -version = "1.8.0" +name = "srsly" +version = "2.5.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e8/ed/3df5ab8bb0c12f86c28d0cadb11ed1de44a92ed35ce7ff4fd5518a809325/sseclient-py-1.8.0.tar.gz", hash = "sha256:c547c5c1a7633230a38dc599a21a2dc638f9b5c297286b48b46b935c71fac3e8", size = 7791, upload-time = "2023-09-01T19:39:20.45Z" } +dependencies = [ + { name = "catalogue" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cf/77/5633c4ba65e3421b72b5b4bd93aa328360b351b3a1e5bf3c90eb224668e5/srsly-2.5.2.tar.gz", hash = "sha256:4092bc843c71b7595c6c90a0302a197858c5b9fe43067f62ae6a45bc3baa1c19", size = 492055, upload-time = "2025-11-17T14:11:02.543Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/49/58/97655efdfeb5b4eeab85b1fc5d3fa1023661246c2ab2a26ea8e47402d4f2/sseclient_py-1.8.0-py2.py3-none-any.whl", hash = "sha256:4ecca6dc0b9f963f8384e9d7fd529bf93dd7d708144c4fb5da0e0a1a926fee83", size = 8828, upload-time = "2023-09-01T19:39:17.627Z" }, + { url = "https://files.pythonhosted.org/packages/59/6e/2e3d07b38c1c2e98487f0af92f93b392c6741062d85c65cdc18c7b77448a/srsly-2.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7e07babdcece2405b32c9eea25ef415749f214c889545e38965622bb66837ce", size = 655286, upload-time = "2025-11-17T14:09:52.468Z" }, + { url = "https://files.pythonhosted.org/packages/a1/e7/587bcade6b72f919133e587edf60e06039d88049aef9015cd0bdea8df189/srsly-2.5.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1718fe40b73e5cc73b14625233f57e15fb23643d146f53193e8fe653a49e9a0f", size = 653094, upload-time = "2025-11-17T14:09:53.837Z" }, + { url = "https://files.pythonhosted.org/packages/8d/24/5c3aabe292cb4eb906c828f2866624e3a65603ef0a73e964e486ff146b84/srsly-2.5.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7b07e6103db7dd3199c0321935b0c8b9297fd6e018a66de97dc836068440111", size = 1141286, upload-time = "2025-11-17T14:09:55.535Z" }, + { url = "https://files.pythonhosted.org/packages/2a/fe/2cbdcef2495e0c40dafb96da205d9ab3b9e59f64938277800bf65f923281/srsly-2.5.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f2dedf03b2ae143dd70039f097d128fb901deba2482c3a749ac0a985ac735aad", size = 1144667, upload-time = "2025-11-17T14:09:57.24Z" }, + { url = "https://files.pythonhosted.org/packages/91/7c/9a2c9d8141daf7b7a6f092c2be403421a0ab280e7c03cc62c223f37fdf47/srsly-2.5.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9d5be1d8b79a4c4180073461425cb49c8924a184ab49d976c9c81a7bf87731d9", size = 1103935, upload-time = "2025-11-17T14:09:58.576Z" }, + { url = "https://files.pythonhosted.org/packages/f1/ad/8ae727430368fedbb1a7fa41b62d7a86237558bc962c5c5a9aa8bfa82548/srsly-2.5.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c8e42d6bcddda2e6fc1a8438cc050c4a36d0e457a63bcc7117d23c5175dfedec", size = 1117985, upload-time = "2025-11-17T14:10:00.348Z" }, + { url = "https://files.pythonhosted.org/packages/60/69/d6afaef1a8d5192fd802752115c7c3cc104493a7d604b406112b8bc2b610/srsly-2.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:e7362981e687eead00248525c3ef3b8ddd95904c93362c481988d91b26b6aeef", size = 654148, upload-time = "2025-11-17T14:10:01.772Z" }, + { url = "https://files.pythonhosted.org/packages/8f/1c/21f658d98d602a559491b7886c7ca30245c2cd8987ff1b7709437c0f74b1/srsly-2.5.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6f92b4f883e6be4ca77f15980b45d394d310f24903e25e1b2c46df783c7edcce", size = 656161, upload-time = "2025-11-17T14:10:03.181Z" }, + { url = "https://files.pythonhosted.org/packages/2f/a2/bc6fd484ed703857043ae9abd6c9aea9152f9480a6961186ee6c1e0c49e8/srsly-2.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ac4790a54b00203f1af5495b6b8ac214131139427f30fcf05cf971dde81930eb", size = 653237, upload-time = "2025-11-17T14:10:04.636Z" }, + { url = "https://files.pythonhosted.org/packages/ab/ea/e3895da29a15c8d325e050ad68a0d1238eece1d2648305796adf98dcba66/srsly-2.5.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ce5c6b016050857a7dd365c9dcdd00d96e7ac26317cfcb175db387e403de05bf", size = 1174418, upload-time = "2025-11-17T14:10:05.945Z" }, + { url = "https://files.pythonhosted.org/packages/a6/a5/21996231f53ee97191d0746c3a672ba33a4d86a19ffad85a1c0096c91c5f/srsly-2.5.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:539c6d0016e91277b5e9be31ebed03f03c32580d49c960e4a92c9003baecf69e", size = 1183089, upload-time = "2025-11-17T14:10:07.335Z" }, + { url = "https://files.pythonhosted.org/packages/7b/df/eb17aa8e4a828e8df7aa7dc471295529d9126e6b710f1833ebe0d8568a8e/srsly-2.5.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9f24b2c4f4c29da04083f09158543eb3f8893ba0ac39818693b3b259ee8044f0", size = 1122594, upload-time = "2025-11-17T14:10:08.899Z" }, + { url = "https://files.pythonhosted.org/packages/80/74/1654a80e6c8ec3ee32370ea08a78d3651e0ba1c4d6e6be31c9efdb9a2d10/srsly-2.5.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d34675047460a3f6999e43478f40d9b43917ea1e93a75c41d05bf7648f3e872d", size = 1139594, upload-time = "2025-11-17T14:10:10.286Z" }, + { url = "https://files.pythonhosted.org/packages/73/aa/8393344ca7f0e81965febba07afc5cad68335ed0426408d480b861ab915b/srsly-2.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:81fd133ba3c66c07f0e3a889d2b4c852984d71ea833a665238a9d47d8e051ba5", size = 654750, upload-time = "2025-11-17T14:10:11.637Z" }, +] + +[[package]] +name = "sseclient-py" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/2e/59920f7d66b7f9932a3d83dd0ec53fab001be1e058bf582606fe414a5198/sseclient_py-1.9.0-py3-none-any.whl", hash = "sha256:340062b1587fc2880892811e2ab5b176d98ef3eee98b3672ff3a3ba1e8ed0f6f", size = 8351, upload-time = "2026-01-02T23:39:30.995Z" }, ] [[package]] name = "starlette" -version = "0.49.1" +version = "0.52.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1b/3f/507c21db33b66fb027a332f2cb3abbbe924cc3a79ced12f01ed8645955c9/starlette-0.49.1.tar.gz", hash = "sha256:481a43b71e24ed8c43b11ea02f5353d77840e01480881b8cb5a26b8cae64a8cb", size = 2654703, upload-time = "2025-10-28T17:34:10.928Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/51/da/545b75d420bb23b5d494b0517757b351963e974e79933f01e05c929f20a6/starlette-0.49.1-py3-none-any.whl", hash = "sha256:d92ce9f07e4a3caa3ac13a79523bd18e3bc0042bb8ff2d759a8e7dd0e1859875", size = 74175, upload-time = "2025-10-28T17:34:09.13Z" }, + { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" }, ] [[package]] @@ -6226,7 +6524,7 @@ wheels = [ [[package]] name = "testcontainers" -version = "4.13.3" +version = "4.14.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "docker" }, @@ -6235,9 +6533,47 @@ dependencies = [ { name = "urllib3" }, { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fc/b3/c272537f3ea2f312555efeb86398cc382cd07b740d5f3c730918c36e64e1/testcontainers-4.13.3.tar.gz", hash = "sha256:9d82a7052c9a53c58b69e1dc31da8e7a715e8b3ec1c4df5027561b47e2efe646", size = 79064, upload-time = "2025-11-14T05:08:47.584Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/02/ef62dec9e4f804189c44df23f0b86897c738d38e9c48282fcd410308632f/testcontainers-4.14.1.tar.gz", hash = "sha256:316f1bb178d829c003acd650233e3ff3c59a833a08d8661c074f58a4fbd42a64", size = 80148, upload-time = "2026-01-31T23:13:46.915Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/73/27/c2f24b19dafa197c514abe70eda69bc031c5152c6b1f1e5b20099e2ceedd/testcontainers-4.13.3-py3-none-any.whl", hash = "sha256:063278c4805ffa6dd85e56648a9da3036939e6c0ac1001e851c9276b19b05970", size = 124784, upload-time = "2025-11-14T05:08:46.053Z" }, + { url = "https://files.pythonhosted.org/packages/c8/31/5e7b23f9e43ff7fd46d243808d70c5e8daf3bc08ecf5a7fb84d5e38f7603/testcontainers-4.14.1-py3-none-any.whl", hash = "sha256:03dfef4797b31c82e7b762a454b6afec61a2a512ad54af47ab41e4fa5415f891", size = 125640, upload-time = "2026-01-31T23:13:45.464Z" }, +] + +[[package]] +name = "thinc" +version = "8.3.10" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "blis" }, + { name = "catalogue" }, + { name = "confection" }, + { name = "cymem" }, + { name = "murmurhash" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "preshed" }, + { name = "pydantic" }, + { name = "setuptools" }, + { name = "srsly" }, + { name = "wasabi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2f/3a/2d0f0be132b9faaa6d56f04565ae122684273e4bf4eab8dee5f48dc00f68/thinc-8.3.10.tar.gz", hash = "sha256:5a75109f4ee1c968fc055ce651a17cb44b23b000d9e95f04a4d047ab3cb3e34e", size = 194196, upload-time = "2025-11-17T17:21:46.435Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/43/01b662540888140b5e9f76c957c7118c203cb91f17867ce78fc4f2d3800f/thinc-8.3.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:72793e0bd3f0f391ca36ab0996b3c21db7045409bd3740840e7d6fcd9a044d81", size = 818632, upload-time = "2025-11-17T17:20:49.123Z" }, + { url = "https://files.pythonhosted.org/packages/f0/ba/e0edcc84014bdde1bc9a082408279616a061566a82b5e3b90b9e64f33c1b/thinc-8.3.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4b13311acb061e04e3a0c4bd677b85ec2971e3a3674558252443b5446e378256", size = 770622, upload-time = "2025-11-17T17:20:50.467Z" }, + { url = "https://files.pythonhosted.org/packages/f3/51/0558f8cb69c13e1114428726a3fb36fe1adc5821a62ccd3fa7b7c1a5bd9a/thinc-8.3.10-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9ffddcf311fb7c998eb8988d22c618dc0f33b26303853c0445edb8a69819ac60", size = 4094652, upload-time = "2025-11-17T17:20:52.104Z" }, + { url = "https://files.pythonhosted.org/packages/a0/c9/bb78601f74f9bcadb2d3d4d5b057c4dc3f2e52d9771bad3d93a4e38a9dc1/thinc-8.3.10-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9b1e0511e8421f20abe4f22d8c8073a0d7ce4a31597cc7a404fdbad72bf38058", size = 4124379, upload-time = "2025-11-17T17:20:53.781Z" }, + { url = "https://files.pythonhosted.org/packages/f6/3e/961e1b9794111c89f2ceadfef5692aba5097bec4aaaf89f1b8a04c5bc961/thinc-8.3.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e31e49441dfad8fd64b8ca5f5c9b8c33ee87a553bf79c830a15b4cd02efcc444", size = 5094221, upload-time = "2025-11-17T17:20:55.466Z" }, + { url = "https://files.pythonhosted.org/packages/e5/de/da163a1533faaef5b17dd11dfb9ffd9fd5627dbef56e1160da6edbe1b224/thinc-8.3.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9de5dd73ce7135dcf41d68625d35cd9f5cf8e5f55a3932001a188b45057c3379", size = 5262834, upload-time = "2025-11-17T17:20:57.459Z" }, + { url = "https://files.pythonhosted.org/packages/4c/4e/449d29e33f7ddda6ba1b9e06de3ea5155c2dc33c21f438f8faafebde4e13/thinc-8.3.10-cp311-cp311-win_amd64.whl", hash = "sha256:b6d64e390a1996d489872b9d99a584142542aba59ebdc60f941f473732582f6f", size = 1791864, upload-time = "2025-11-17T17:20:59.817Z" }, + { url = "https://files.pythonhosted.org/packages/4a/b3/68038d88d45d83a501c3f19bd654d275b7ac730c807f52bbb46f35f591bc/thinc-8.3.10-cp311-cp311-win_arm64.whl", hash = "sha256:3991b6ad72e611dfbfb58235de5b67bcc9f61426127cc023607f97e8c5f43e0e", size = 1717563, upload-time = "2025-11-17T17:21:01.634Z" }, + { url = "https://files.pythonhosted.org/packages/d3/34/ba3b386d92edf50784b60ee34318d47c7f49c198268746ef7851c5bbe8cf/thinc-8.3.10-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51bc6ef735bdbcab75ab2916731b8f61f94c66add6f9db213d900d3c6a244f95", size = 794509, upload-time = "2025-11-17T17:21:03.21Z" }, + { url = "https://files.pythonhosted.org/packages/07/f3/9f52d18115cd9d8d7b2590d226cb2752d2a5ffec61576b19462b48410184/thinc-8.3.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4f48b4d346915f98e9722c0c50ef911cc16c6790a2b7afebc6e1a2c96a6ce6c6", size = 741084, upload-time = "2025-11-17T17:21:04.568Z" }, + { url = "https://files.pythonhosted.org/packages/ad/9c/129c2b740c4e3d3624b6fb3dec1577ef27cb804bc1647f9bc3e1801ea20c/thinc-8.3.10-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5003f4db2db22cc8d686db8db83509acc3c50f4c55ebdcb2bbfcc1095096f7d2", size = 3846337, upload-time = "2025-11-17T17:21:06.079Z" }, + { url = "https://files.pythonhosted.org/packages/22/d2/738cf188dea8240c2be081c83ea47270fea585eba446171757d2cdb9b675/thinc-8.3.10-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b12484c3ed0632331fada2c334680dd6bc35972d0717343432dfc701f04a9b4c", size = 3901216, upload-time = "2025-11-17T17:21:07.842Z" }, + { url = "https://files.pythonhosted.org/packages/22/92/32f66eb9b1a29b797bf378a0874615d810d79eefca1d6c736c5ca3f8b918/thinc-8.3.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8677c446d3f9b97a465472c58683b785b25dfcf26c683e3f4e8f8c7c188e4362", size = 4827286, upload-time = "2025-11-17T17:21:09.62Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5f/7ceae1e1f2029efd67ed88e23cd6dc13a5ee647cdc2b35113101b2a62c10/thinc-8.3.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:759c385ac08dcf950238b60b96a28f9c04618861141766928dff4a51b1679b25", size = 5024421, upload-time = "2025-11-17T17:21:11.199Z" }, + { url = "https://files.pythonhosted.org/packages/0b/66/30f9d8d41049b78bc614213d492792fbcfeb1b28642adf661c42110a7ebd/thinc-8.3.10-cp312-cp312-win_amd64.whl", hash = "sha256:bf3f188c3fa1fdcefd547d1f90a1245c29025d6d0e3f71d7fdf21dad210b990c", size = 1718631, upload-time = "2025-11-17T17:21:12.965Z" }, + { url = "https://files.pythonhosted.org/packages/f8/44/32e2a5018a1165a304d25eb9b1c74e5310da19a533a35331e8d824dc6a88/thinc-8.3.10-cp312-cp312-win_arm64.whl", hash = "sha256:234b7e57a6ef4e0260d99f4e8fdc328ed12d0ba9bbd98fdaa567294a17700d1c", size = 1642224, upload-time = "2025-11-17T17:21:14.371Z" }, ] [[package]] @@ -6353,14 +6689,14 @@ sdist = { url = "https://files.pythonhosted.org/packages/9a/b3/13451226f564f88d9 [[package]] name = "tqdm" -version = "4.67.1" +version = "4.67.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" } +sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, + { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, ] [[package]] @@ -6398,6 +6734,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/64/7713ffe4b5983314e9d436a90d5bd4f63b6054e2aca783a3cfc44cb95bbf/typer-0.20.0-py3-none-any.whl", hash = "sha256:5b463df6793ec1dca6213a3cf4c0f03bc6e322ac5e16e13ddd622a889489784a", size = 47028, upload-time = "2025-10-20T17:03:47.617Z" }, ] +[[package]] +name = "typer-slim" +version = "0.21.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "click" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a5/ca/0d9d822fd8a4c7e830cba36a2557b070d4b4a9558a0460377a61f8fb315d/typer_slim-0.21.2.tar.gz", hash = "sha256:78f20d793036a62aaf9c3798306142b08261d4b2a941c6e463081239f062a2f9", size = 120497, upload-time = "2026-02-10T19:33:45.836Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/03/e09325cfc40a33a82b31ba1a3f1d97e85246736856a45a43b19fcb48b1c2/typer_slim-0.21.2-py3-none-any.whl", hash = "sha256:4705082bb6c66c090f60e47c8be09a93158c139ce0aa98df7c6c47e723395e5f", size = 56790, upload-time = "2026-02-10T19:33:47.221Z" }, +] + [[package]] name = "types-aiofiles" version = "25.1.0.20251011" @@ -6851,19 +7200,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, ] -[[package]] -name = "typing-inspect" -version = "0.9.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "mypy-extensions" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/dc/74/1789779d91f1961fa9438e9a8710cdae6bd138c80d7303996933d117264a/typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78", size = 13825, upload-time = "2023-05-24T20:25:47.612Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/65/f3/107a22063bf27bdccf2024833d3445f4eea42b2e598abfbd46f6a63b6cb0/typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f", size = 8827, upload-time = "2023-05-24T20:25:45.287Z" }, -] - [[package]] name = "typing-inspection" version = "0.4.2" @@ -6927,19 +7263,18 @@ wheels = [ [[package]] name = "unstructured" -version = "0.18.31" +version = "0.21.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "backoff" }, { name = "beautifulsoup4" }, { name = "charset-normalizer" }, - { name = "dataclasses-json" }, { name = "emoji" }, + { name = "filelock" }, { name = "filetype" }, { name = "html5lib" }, + { name = "installer" }, { name = "langdetect" }, { name = "lxml" }, - { name = "nltk" }, { name = "numba" }, { name = "numpy" }, { name = "psutil" }, @@ -6947,15 +7282,17 @@ dependencies = [ { name = "python-magic" }, { name = "python-oxmsg" }, { name = "rapidfuzz" }, + { name = "regex" }, { name = "requests" }, + { name = "spacy" }, { name = "tqdm" }, { name = "typing-extensions" }, { name = "unstructured-client" }, { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a9/5f/64285bd69a538bc28753f1423fcaa9d64cd79a9e7c097171b1f0d27e9cdb/unstructured-0.18.31.tar.gz", hash = "sha256:af4bbe32d1894ae6e755f0da6fc0dd307a1d0adeebe0e7cc6278f6cf744339ca", size = 1707700, upload-time = "2026-01-27T15:33:05.378Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b3/e6/fbef61517d130af1def3b81681e253a5679f19de2f04e439afbbf1f021e0/unstructured-0.21.5.tar.gz", hash = "sha256:3e220d0c2b9c8ec12c99767162b95ab0acfca75e979b82c66c15ca15caa60139", size = 1501811, upload-time = "2026-02-24T15:29:27.84Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/4a/9c43f39d9e443c9bc3f2e379b305bca27110adc653b071221b3132c18de5/unstructured-0.18.31-py3-none-any.whl", hash = "sha256:fab4641176cb9b192ed38048758aa0d9843121d03626d18f42275afb31e5b2d3", size = 1794889, upload-time = "2026-01-27T15:33:03.136Z" }, + { url = "https://files.pythonhosted.org/packages/b8/b6/7e6dd60bde81d5a4d4ddf426f566a5d1b4c30490053caed69e47f55c676f/unstructured-0.21.5-py3-none-any.whl", hash = "sha256:d88a277c368462b69a8843b9cb22476f3cc4d0a58455536520359387224b3366", size = 1554925, upload-time = "2026-02-24T15:29:26.009Z" }, ] [package.optional-dependencies] @@ -6963,7 +7300,7 @@ docx = [ { name = "python-docx" }, ] epub = [ - { name = "pypandoc" }, + { name = "pypandoc-binary" }, ] md = [ { name = "markdown" }, @@ -7170,6 +7507,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/30/20/6c091d451e2a07689bfbfaeb7592d488011420e721de170884fedd68c644/wandb-0.23.1-py3-none-win_arm64.whl", hash = "sha256:8aee7f3bb573f2c0acf860f497ca9c684f9b35f2ca51011ba65af3d4592b77c1", size = 20137463, upload-time = "2025-12-03T02:25:08.317Z" }, ] +[[package]] +name = "wasabi" +version = "1.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ac/f9/054e6e2f1071e963b5e746b48d1e3727470b2a490834d18ad92364929db3/wasabi-1.1.3.tar.gz", hash = "sha256:4bb3008f003809db0c3e28b4daf20906ea871a2bb43f9914197d540f4f2e0878", size = 30391, upload-time = "2024-05-31T16:56:18.99Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/7c/34330a89da55610daa5f245ddce5aab81244321101614751e7537f125133/wasabi-1.1.3-py3-none-any.whl", hash = "sha256:f76e16e8f7e79f8c4c8be49b4024ac725713ab10cd7f19350ad18a8e3f71728c", size = 27880, upload-time = "2024-05-31T16:56:16.699Z" }, +] + [[package]] name = "watchfiles" version = "1.1.1" @@ -7220,6 +7569,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/af/b5/123f13c975e9f27ab9c0770f514345bd406d0e8d3b7a0723af9d43f710af/wcwidth-0.2.14-py2.py3-none-any.whl", hash = "sha256:a7bb560c8aee30f9957e5f9895805edd20602f2d7f720186dfd906e82b4982e1", size = 37286, upload-time = "2025-09-22T16:29:51.641Z" }, ] +[[package]] +name = "weasel" +version = "0.4.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cloudpathlib" }, + { name = "confection" }, + { name = "packaging" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "smart-open" }, + { name = "srsly" }, + { name = "typer-slim" }, + { name = "wasabi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/09/d7/edd9c24e60cf8e5de130aa2e8af3b01521f4d0216c371d01212f580d0d8e/weasel-0.4.3.tar.gz", hash = "sha256:f293d6174398e8f478c78481e00c503ee4b82ea7a3e6d0d6a01e46a6b1396845", size = 38733, upload-time = "2025-11-13T23:52:28.193Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/74/a148b41572656904a39dfcfed3f84dd1066014eed94e209223ae8e9d088d/weasel-0.4.3-py3-none-any.whl", hash = "sha256:08f65b5d0dbded4879e08a64882de9b9514753d9eaa4c4e2a576e33666ac12cf", size = 50757, upload-time = "2025-11-13T23:52:26.982Z" }, +] + [[package]] name = "weave" version = "0.52.17" @@ -7446,48 +7815,52 @@ wheels = [ [[package]] name = "yarl" -version = "1.18.3" +version = "1.23.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "idna" }, { name = "multidict" }, { name = "propcache" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b7/9d/4b94a8e6d2b51b599516a5cb88e5bc99b4d8d4583e468057eaa29d5f0918/yarl-1.18.3.tar.gz", hash = "sha256:ac1801c45cbf77b6c99242eeff4fffb5e4e73a800b5c4ad4fc0be5def634d2e1", size = 181062, upload-time = "2024-12-01T20:35:23.292Z" } +sdist = { url = "https://files.pythonhosted.org/packages/23/6e/beb1beec874a72f23815c1434518bfc4ed2175065173fb138c3705f658d4/yarl-1.23.0.tar.gz", hash = "sha256:53b1ea6ca88ebd4420379c330aea57e258408dd0df9af0992e5de2078dc9f5d5", size = 194676, upload-time = "2026-03-01T22:07:53.373Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/40/93/282b5f4898d8e8efaf0790ba6d10e2245d2c9f30e199d1a85cae9356098c/yarl-1.18.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8503ad47387b8ebd39cbbbdf0bf113e17330ffd339ba1144074da24c545f0069", size = 141555, upload-time = "2024-12-01T20:33:08.819Z" }, - { url = "https://files.pythonhosted.org/packages/6d/9c/0a49af78df099c283ca3444560f10718fadb8a18dc8b3edf8c7bd9fd7d89/yarl-1.18.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:02ddb6756f8f4517a2d5e99d8b2f272488e18dd0bfbc802f31c16c6c20f22193", size = 94351, upload-time = "2024-12-01T20:33:10.609Z" }, - { url = "https://files.pythonhosted.org/packages/5a/a1/205ab51e148fdcedad189ca8dd587794c6f119882437d04c33c01a75dece/yarl-1.18.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:67a283dd2882ac98cc6318384f565bffc751ab564605959df4752d42483ad889", size = 92286, upload-time = "2024-12-01T20:33:12.322Z" }, - { url = "https://files.pythonhosted.org/packages/ed/fe/88b690b30f3f59275fb674f5f93ddd4a3ae796c2b62e5bb9ece8a4914b83/yarl-1.18.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d980e0325b6eddc81331d3f4551e2a333999fb176fd153e075c6d1c2530aa8a8", size = 340649, upload-time = "2024-12-01T20:33:13.842Z" }, - { url = "https://files.pythonhosted.org/packages/07/eb/3b65499b568e01f36e847cebdc8d7ccb51fff716dbda1ae83c3cbb8ca1c9/yarl-1.18.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b643562c12680b01e17239be267bc306bbc6aac1f34f6444d1bded0c5ce438ca", size = 356623, upload-time = "2024-12-01T20:33:15.535Z" }, - { url = "https://files.pythonhosted.org/packages/33/46/f559dc184280b745fc76ec6b1954de2c55595f0ec0a7614238b9ebf69618/yarl-1.18.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c017a3b6df3a1bd45b9fa49a0f54005e53fbcad16633870104b66fa1a30a29d8", size = 354007, upload-time = "2024-12-01T20:33:17.518Z" }, - { url = "https://files.pythonhosted.org/packages/af/ba/1865d85212351ad160f19fb99808acf23aab9a0f8ff31c8c9f1b4d671fc9/yarl-1.18.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75674776d96d7b851b6498f17824ba17849d790a44d282929c42dbb77d4f17ae", size = 344145, upload-time = "2024-12-01T20:33:20.071Z" }, - { url = "https://files.pythonhosted.org/packages/94/cb/5c3e975d77755d7b3d5193e92056b19d83752ea2da7ab394e22260a7b824/yarl-1.18.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ccaa3a4b521b780a7e771cc336a2dba389a0861592bbce09a476190bb0c8b4b3", size = 336133, upload-time = "2024-12-01T20:33:22.515Z" }, - { url = "https://files.pythonhosted.org/packages/19/89/b77d3fd249ab52a5c40859815765d35c91425b6bb82e7427ab2f78f5ff55/yarl-1.18.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2d06d3005e668744e11ed80812e61efd77d70bb7f03e33c1598c301eea20efbb", size = 347967, upload-time = "2024-12-01T20:33:24.139Z" }, - { url = "https://files.pythonhosted.org/packages/35/bd/f6b7630ba2cc06c319c3235634c582a6ab014d52311e7d7c22f9518189b5/yarl-1.18.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:9d41beda9dc97ca9ab0b9888cb71f7539124bc05df02c0cff6e5acc5a19dcc6e", size = 346397, upload-time = "2024-12-01T20:33:26.205Z" }, - { url = "https://files.pythonhosted.org/packages/18/1a/0b4e367d5a72d1f095318344848e93ea70da728118221f84f1bf6c1e39e7/yarl-1.18.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ba23302c0c61a9999784e73809427c9dbedd79f66a13d84ad1b1943802eaaf59", size = 350206, upload-time = "2024-12-01T20:33:27.83Z" }, - { url = "https://files.pythonhosted.org/packages/b5/cf/320fff4367341fb77809a2d8d7fe75b5d323a8e1b35710aafe41fdbf327b/yarl-1.18.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:6748dbf9bfa5ba1afcc7556b71cda0d7ce5f24768043a02a58846e4a443d808d", size = 362089, upload-time = "2024-12-01T20:33:29.565Z" }, - { url = "https://files.pythonhosted.org/packages/57/cf/aadba261d8b920253204085268bad5e8cdd86b50162fcb1b10c10834885a/yarl-1.18.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0b0cad37311123211dc91eadcb322ef4d4a66008d3e1bdc404808992260e1a0e", size = 366267, upload-time = "2024-12-01T20:33:31.449Z" }, - { url = "https://files.pythonhosted.org/packages/54/58/fb4cadd81acdee6dafe14abeb258f876e4dd410518099ae9a35c88d8097c/yarl-1.18.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0fb2171a4486bb075316ee754c6d8382ea6eb8b399d4ec62fde2b591f879778a", size = 359141, upload-time = "2024-12-01T20:33:33.79Z" }, - { url = "https://files.pythonhosted.org/packages/9a/7a/4c571597589da4cd5c14ed2a0b17ac56ec9ee7ee615013f74653169e702d/yarl-1.18.3-cp311-cp311-win32.whl", hash = "sha256:61b1a825a13bef4a5f10b1885245377d3cd0bf87cba068e1d9a88c2ae36880e1", size = 84402, upload-time = "2024-12-01T20:33:35.689Z" }, - { url = "https://files.pythonhosted.org/packages/ae/7b/8600250b3d89b625f1121d897062f629883c2f45339623b69b1747ec65fa/yarl-1.18.3-cp311-cp311-win_amd64.whl", hash = "sha256:b9d60031cf568c627d028239693fd718025719c02c9f55df0a53e587aab951b5", size = 91030, upload-time = "2024-12-01T20:33:37.511Z" }, - { url = "https://files.pythonhosted.org/packages/33/85/bd2e2729752ff4c77338e0102914897512e92496375e079ce0150a6dc306/yarl-1.18.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1dd4bdd05407ced96fed3d7f25dbbf88d2ffb045a0db60dbc247f5b3c5c25d50", size = 142644, upload-time = "2024-12-01T20:33:39.204Z" }, - { url = "https://files.pythonhosted.org/packages/ff/74/1178322cc0f10288d7eefa6e4a85d8d2e28187ccab13d5b844e8b5d7c88d/yarl-1.18.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7c33dd1931a95e5d9a772d0ac5e44cac8957eaf58e3c8da8c1414de7dd27c576", size = 94962, upload-time = "2024-12-01T20:33:40.808Z" }, - { url = "https://files.pythonhosted.org/packages/be/75/79c6acc0261e2c2ae8a1c41cf12265e91628c8c58ae91f5ff59e29c0787f/yarl-1.18.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25b411eddcfd56a2f0cd6a384e9f4f7aa3efee14b188de13048c25b5e91f1640", size = 92795, upload-time = "2024-12-01T20:33:42.322Z" }, - { url = "https://files.pythonhosted.org/packages/6b/32/927b2d67a412c31199e83fefdce6e645247b4fb164aa1ecb35a0f9eb2058/yarl-1.18.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:436c4fc0a4d66b2badc6c5fc5ef4e47bb10e4fd9bf0c79524ac719a01f3607c2", size = 332368, upload-time = "2024-12-01T20:33:43.956Z" }, - { url = "https://files.pythonhosted.org/packages/19/e5/859fca07169d6eceeaa4fde1997c91d8abde4e9a7c018e371640c2da2b71/yarl-1.18.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e35ef8683211db69ffe129a25d5634319a677570ab6b2eba4afa860f54eeaf75", size = 342314, upload-time = "2024-12-01T20:33:46.046Z" }, - { url = "https://files.pythonhosted.org/packages/08/75/76b63ccd91c9e03ab213ef27ae6add2e3400e77e5cdddf8ed2dbc36e3f21/yarl-1.18.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:84b2deecba4a3f1a398df819151eb72d29bfeb3b69abb145a00ddc8d30094512", size = 341987, upload-time = "2024-12-01T20:33:48.352Z" }, - { url = "https://files.pythonhosted.org/packages/1a/e1/a097d5755d3ea8479a42856f51d97eeff7a3a7160593332d98f2709b3580/yarl-1.18.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00e5a1fea0fd4f5bfa7440a47eff01d9822a65b4488f7cff83155a0f31a2ecba", size = 336914, upload-time = "2024-12-01T20:33:50.875Z" }, - { url = "https://files.pythonhosted.org/packages/0b/42/e1b4d0e396b7987feceebe565286c27bc085bf07d61a59508cdaf2d45e63/yarl-1.18.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d0e883008013c0e4aef84dcfe2a0b172c4d23c2669412cf5b3371003941f72bb", size = 325765, upload-time = "2024-12-01T20:33:52.641Z" }, - { url = "https://files.pythonhosted.org/packages/7e/18/03a5834ccc9177f97ca1bbb245b93c13e58e8225276f01eedc4cc98ab820/yarl-1.18.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5a3f356548e34a70b0172d8890006c37be92995f62d95a07b4a42e90fba54272", size = 344444, upload-time = "2024-12-01T20:33:54.395Z" }, - { url = "https://files.pythonhosted.org/packages/c8/03/a713633bdde0640b0472aa197b5b86e90fbc4c5bc05b727b714cd8a40e6d/yarl-1.18.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ccd17349166b1bee6e529b4add61727d3f55edb7babbe4069b5764c9587a8cc6", size = 340760, upload-time = "2024-12-01T20:33:56.286Z" }, - { url = "https://files.pythonhosted.org/packages/eb/99/f6567e3f3bbad8fd101886ea0276c68ecb86a2b58be0f64077396cd4b95e/yarl-1.18.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b958ddd075ddba5b09bb0be8a6d9906d2ce933aee81100db289badbeb966f54e", size = 346484, upload-time = "2024-12-01T20:33:58.375Z" }, - { url = "https://files.pythonhosted.org/packages/8e/a9/84717c896b2fc6cb15bd4eecd64e34a2f0a9fd6669e69170c73a8b46795a/yarl-1.18.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c7d79f7d9aabd6011004e33b22bc13056a3e3fb54794d138af57f5ee9d9032cb", size = 359864, upload-time = "2024-12-01T20:34:00.22Z" }, - { url = "https://files.pythonhosted.org/packages/1e/2e/d0f5f1bef7ee93ed17e739ec8dbcb47794af891f7d165fa6014517b48169/yarl-1.18.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4891ed92157e5430874dad17b15eb1fda57627710756c27422200c52d8a4e393", size = 364537, upload-time = "2024-12-01T20:34:03.54Z" }, - { url = "https://files.pythonhosted.org/packages/97/8a/568d07c5d4964da5b02621a517532adb8ec5ba181ad1687191fffeda0ab6/yarl-1.18.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ce1af883b94304f493698b00d0f006d56aea98aeb49d75ec7d98cd4a777e9285", size = 357861, upload-time = "2024-12-01T20:34:05.73Z" }, - { url = "https://files.pythonhosted.org/packages/7d/e3/924c3f64b6b3077889df9a1ece1ed8947e7b61b0a933f2ec93041990a677/yarl-1.18.3-cp312-cp312-win32.whl", hash = "sha256:f91c4803173928a25e1a55b943c81f55b8872f0018be83e3ad4938adffb77dd2", size = 84097, upload-time = "2024-12-01T20:34:07.664Z" }, - { url = "https://files.pythonhosted.org/packages/34/45/0e055320daaabfc169b21ff6174567b2c910c45617b0d79c68d7ab349b02/yarl-1.18.3-cp312-cp312-win_amd64.whl", hash = "sha256:7e2ee16578af3b52ac2f334c3b1f92262f47e02cc6193c598502bd46f5cd1477", size = 90399, upload-time = "2024-12-01T20:34:09.61Z" }, - { url = "https://files.pythonhosted.org/packages/f5/4b/a06e0ec3d155924f77835ed2d167ebd3b211a7b0853da1cf8d8414d784ef/yarl-1.18.3-py3-none-any.whl", hash = "sha256:b57f4f58099328dfb26c6a771d09fb20dbbae81d20cfb66141251ea063bd101b", size = 45109, upload-time = "2024-12-01T20:35:20.834Z" }, + { url = "https://files.pythonhosted.org/packages/a2/aa/60da938b8f0997ba3a911263c40d82b6f645a67902a490b46f3355e10fae/yarl-1.23.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b35d13d549077713e4414f927cdc388d62e543987c572baee613bf82f11a4b99", size = 123641, upload-time = "2026-03-01T22:04:42.841Z" }, + { url = "https://files.pythonhosted.org/packages/24/84/e237607faf4e099dbb8a4f511cfd5efcb5f75918baad200ff7380635631b/yarl-1.23.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cbb0fef01f0c6b38cb0f39b1f78fc90b807e0e3c86a7ff3ce74ad77ce5c7880c", size = 86248, upload-time = "2026-03-01T22:04:44.757Z" }, + { url = "https://files.pythonhosted.org/packages/b2/0d/71ceabc14c146ba8ee3804ca7b3d42b1664c8440439de5214d366fec7d3a/yarl-1.23.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dc52310451fc7c629e13c4e061cbe2dd01684d91f2f8ee2821b083c58bd72432", size = 85988, upload-time = "2026-03-01T22:04:46.365Z" }, + { url = "https://files.pythonhosted.org/packages/8c/6c/4a90d59c572e46b270ca132aca66954f1175abd691f74c1ef4c6711828e2/yarl-1.23.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b2c6b50c7b0464165472b56b42d4c76a7b864597007d9c085e8b63e185cf4a7a", size = 100566, upload-time = "2026-03-01T22:04:47.639Z" }, + { url = "https://files.pythonhosted.org/packages/49/fb/c438fb5108047e629f6282a371e6e91cf3f97ee087c4fb748a1f32ceef55/yarl-1.23.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:aafe5dcfda86c8af00386d7781d4c2181b5011b7be3f2add5e99899ea925df05", size = 92079, upload-time = "2026-03-01T22:04:48.925Z" }, + { url = "https://files.pythonhosted.org/packages/d9/13/d269aa1aed3e4f50a5a103f96327210cc5fa5dd2d50882778f13c7a14606/yarl-1.23.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9ee33b875f0b390564c1fb7bc528abf18c8ee6073b201c6ae8524aca778e2d83", size = 108741, upload-time = "2026-03-01T22:04:50.838Z" }, + { url = "https://files.pythonhosted.org/packages/85/fb/115b16f22c37ea4437d323e472945bea97301c8ec6089868fa560abab590/yarl-1.23.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4c41e021bc6d7affb3364dc1e1e5fa9582b470f283748784bd6ea0558f87f42c", size = 108099, upload-time = "2026-03-01T22:04:52.499Z" }, + { url = "https://files.pythonhosted.org/packages/9a/64/c53487d9f4968045b8afa51aed7ca44f58b2589e772f32745f3744476c82/yarl-1.23.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:99c8a9ed30f4164bc4c14b37a90208836cbf50d4ce2a57c71d0f52c7fb4f7598", size = 102678, upload-time = "2026-03-01T22:04:55.176Z" }, + { url = "https://files.pythonhosted.org/packages/85/59/cd98e556fbb2bf8fab29c1a722f67ad45c5f3447cac798ab85620d1e70af/yarl-1.23.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f2af5c81a1f124609d5f33507082fc3f739959d4719b56877ab1ee7e7b3d602b", size = 100803, upload-time = "2026-03-01T22:04:56.588Z" }, + { url = "https://files.pythonhosted.org/packages/9e/c0/b39770b56d4a9f0bb5f77e2f1763cd2d75cc2f6c0131e3b4c360348fcd65/yarl-1.23.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6b41389c19b07c760c7e427a3462e8ab83c4bb087d127f0e854c706ce1b9215c", size = 100163, upload-time = "2026-03-01T22:04:58.492Z" }, + { url = "https://files.pythonhosted.org/packages/e7/64/6980f99ab00e1f0ff67cb84766c93d595b067eed07439cfccfc8fb28c1a6/yarl-1.23.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:1dc702e42d0684f42d6519c8d581e49c96cefaaab16691f03566d30658ee8788", size = 93859, upload-time = "2026-03-01T22:05:00.268Z" }, + { url = "https://files.pythonhosted.org/packages/38/69/912e6c5e146793e5d4b5fe39ff5b00f4d22463dfd5a162bec565ac757673/yarl-1.23.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:0e40111274f340d32ebcc0a5668d54d2b552a6cca84c9475859d364b380e3222", size = 108202, upload-time = "2026-03-01T22:05:02.273Z" }, + { url = "https://files.pythonhosted.org/packages/59/97/35ca6767524687ad64e5f5c31ad54bc76d585585a9fcb40f649e7e82ffed/yarl-1.23.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:4764a6a7588561a9aef92f65bda2c4fb58fe7c675c0883862e6df97559de0bfb", size = 99866, upload-time = "2026-03-01T22:05:03.597Z" }, + { url = "https://files.pythonhosted.org/packages/d3/1c/1a3387ee6d73589f6f2a220ae06f2984f6c20b40c734989b0a44f5987308/yarl-1.23.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:03214408cfa590df47728b84c679ae4ef00be2428e11630277be0727eba2d7cc", size = 107852, upload-time = "2026-03-01T22:05:04.986Z" }, + { url = "https://files.pythonhosted.org/packages/a4/b8/35c0750fcd5a3f781058bfd954515dd4b1eab45e218cbb85cf11132215f1/yarl-1.23.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:170e26584b060879e29fac213e4228ef063f39128723807a312e5c7fec28eff2", size = 102919, upload-time = "2026-03-01T22:05:06.397Z" }, + { url = "https://files.pythonhosted.org/packages/e5/1c/9a1979aec4a81896d597bcb2177827f2dbee3f5b7cc48b2d0dadb644b41d/yarl-1.23.0-cp311-cp311-win32.whl", hash = "sha256:51430653db848d258336cfa0244427b17d12db63d42603a55f0d4546f50f25b5", size = 82602, upload-time = "2026-03-01T22:05:08.444Z" }, + { url = "https://files.pythonhosted.org/packages/93/22/b85eca6fa2ad9491af48c973e4c8cf6b103a73dbb271fe3346949449fca0/yarl-1.23.0-cp311-cp311-win_amd64.whl", hash = "sha256:bf49a3ae946a87083ef3a34c8f677ae4243f5b824bfc4c69672e72b3d6719d46", size = 87461, upload-time = "2026-03-01T22:05:10.145Z" }, + { url = "https://files.pythonhosted.org/packages/93/95/07e3553fe6f113e6864a20bdc53a78113cda3b9ced8784ee52a52c9f80d8/yarl-1.23.0-cp311-cp311-win_arm64.whl", hash = "sha256:b39cb32a6582750b6cc77bfb3c49c0f8760dc18dc96ec9fb55fbb0f04e08b928", size = 82336, upload-time = "2026-03-01T22:05:11.554Z" }, + { url = "https://files.pythonhosted.org/packages/88/8a/94615bc31022f711add374097ad4144d569e95ff3c38d39215d07ac153a0/yarl-1.23.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1932b6b8bba8d0160a9d1078aae5838a66039e8832d41d2992daa9a3a08f7860", size = 124737, upload-time = "2026-03-01T22:05:12.897Z" }, + { url = "https://files.pythonhosted.org/packages/e3/6f/c6554045d59d64052698add01226bc867b52fe4a12373415d7991fdca95d/yarl-1.23.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:411225bae281f114067578891bc75534cfb3d92a3b4dfef7a6ca78ba354e6069", size = 87029, upload-time = "2026-03-01T22:05:14.376Z" }, + { url = "https://files.pythonhosted.org/packages/19/2a/725ecc166d53438bc88f76822ed4b1e3b10756e790bafd7b523fe97c322d/yarl-1.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13a563739ae600a631c36ce096615fe307f131344588b0bc0daec108cdb47b25", size = 86310, upload-time = "2026-03-01T22:05:15.71Z" }, + { url = "https://files.pythonhosted.org/packages/99/30/58260ed98e6ff7f90ba84442c1ddd758c9170d70327394a6227b310cd60f/yarl-1.23.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9cbf44c5cb4a7633d078788e1b56387e3d3cf2b8139a3be38040b22d6c3221c8", size = 97587, upload-time = "2026-03-01T22:05:17.384Z" }, + { url = "https://files.pythonhosted.org/packages/76/0a/8b08aac08b50682e65759f7f8dde98ae8168f72487e7357a5d684c581ef9/yarl-1.23.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53ad387048f6f09a8969631e4de3f1bf70c50e93545d64af4f751b2498755072", size = 92528, upload-time = "2026-03-01T22:05:18.804Z" }, + { url = "https://files.pythonhosted.org/packages/52/07/0b7179101fe5f8385ec6c6bb5d0cb9f76bd9fb4a769591ab6fb5cdbfc69a/yarl-1.23.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4a59ba56f340334766f3a4442e0efd0af895fae9e2b204741ef885c446b3a1a8", size = 105339, upload-time = "2026-03-01T22:05:20.235Z" }, + { url = "https://files.pythonhosted.org/packages/d3/8a/36d82869ab5ec829ca8574dfcb92b51286fcfb1e9c7a73659616362dc880/yarl-1.23.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:803a3c3ce4acc62eaf01eaca1208dcf0783025ef27572c3336502b9c232005e7", size = 105061, upload-time = "2026-03-01T22:05:22.268Z" }, + { url = "https://files.pythonhosted.org/packages/66/3e/868e5c3364b6cee19ff3e1a122194fa4ce51def02c61023970442162859e/yarl-1.23.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3d2bff8f37f8d0f96c7ec554d16945050d54462d6e95414babaa18bfafc7f51", size = 100132, upload-time = "2026-03-01T22:05:23.638Z" }, + { url = "https://files.pythonhosted.org/packages/cf/26/9c89acf82f08a52cb52d6d39454f8d18af15f9d386a23795389d1d423823/yarl-1.23.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c75eb09e8d55bceb4367e83496ff8ef2bc7ea6960efb38e978e8073ea59ecb67", size = 99289, upload-time = "2026-03-01T22:05:25.749Z" }, + { url = "https://files.pythonhosted.org/packages/6f/54/5b0db00d2cb056922356104468019c0a132e89c8d3ab67d8ede9f4483d2a/yarl-1.23.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:877b0738624280e34c55680d6054a307aa94f7d52fa0e3034a9cc6e790871da7", size = 96950, upload-time = "2026-03-01T22:05:27.318Z" }, + { url = "https://files.pythonhosted.org/packages/f6/40/10fa93811fd439341fad7e0718a86aca0de9548023bbb403668d6555acab/yarl-1.23.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b5405bb8f0e783a988172993cfc627e4d9d00432d6bbac65a923041edacf997d", size = 93960, upload-time = "2026-03-01T22:05:28.738Z" }, + { url = "https://files.pythonhosted.org/packages/bc/d2/8ae2e6cd77d0805f4526e30ec43b6f9a3dfc542d401ac4990d178e4bf0cf/yarl-1.23.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1c3a3598a832590c5a3ce56ab5576361b5688c12cb1d39429cf5dba30b510760", size = 104703, upload-time = "2026-03-01T22:05:30.438Z" }, + { url = "https://files.pythonhosted.org/packages/2f/0c/b3ceacf82c3fe21183ce35fa2acf5320af003d52bc1fcf5915077681142e/yarl-1.23.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8419ebd326430d1cbb7efb5292330a2cf39114e82df5cc3d83c9a0d5ebeaf2f2", size = 98325, upload-time = "2026-03-01T22:05:31.835Z" }, + { url = "https://files.pythonhosted.org/packages/9d/e0/12900edd28bdab91a69bd2554b85ad7b151f64e8b521fe16f9ad2f56477a/yarl-1.23.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:be61f6fff406ca40e3b1d84716fde398fc08bc63dd96d15f3a14230a0973ed86", size = 105067, upload-time = "2026-03-01T22:05:33.358Z" }, + { url = "https://files.pythonhosted.org/packages/15/61/74bb1182cf79c9bbe4eb6b1f14a57a22d7a0be5e9cedf8e2d5c2086474c3/yarl-1.23.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3ceb13c5c858d01321b5d9bb65e4cf37a92169ea470b70fec6f236b2c9dd7e34", size = 100285, upload-time = "2026-03-01T22:05:35.4Z" }, + { url = "https://files.pythonhosted.org/packages/69/7f/cd5ef733f2550de6241bd8bd8c3febc78158b9d75f197d9c7baa113436af/yarl-1.23.0-cp312-cp312-win32.whl", hash = "sha256:fffc45637bcd6538de8b85f51e3df3223e4ad89bccbfca0481c08c7fc8b7ed7d", size = 82359, upload-time = "2026-03-01T22:05:36.811Z" }, + { url = "https://files.pythonhosted.org/packages/f5/be/25216a49daeeb7af2bec0db22d5e7df08ed1d7c9f65d78b14f3b74fd72fc/yarl-1.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:f69f57305656a4852f2a7203efc661d8c042e6cc67f7acd97d8667fb448a426e", size = 87674, upload-time = "2026-03-01T22:05:38.171Z" }, + { url = "https://files.pythonhosted.org/packages/d2/35/aeab955d6c425b227d5b7247eafb24f2653fedc32f95373a001af5dfeb9e/yarl-1.23.0-cp312-cp312-win_arm64.whl", hash = "sha256:6e87a6e8735b44816e7db0b2fbc9686932df473c826b0d9743148432e10bb9b9", size = 81879, upload-time = "2026-03-01T22:05:40.006Z" }, + { url = "https://files.pythonhosted.org/packages/69/68/c8739671f5699c7dc470580a4f821ef37c32c4cb0b047ce223a7f115757f/yarl-1.23.0-py3-none-any.whl", hash = "sha256:a2df6afe50dea8ae15fa34c9f824a3ee958d785fd5d089063d960bae1daa0a3f", size = 48288, upload-time = "2026-03-01T22:07:51.388Z" }, ] [[package]] diff --git a/dev/pyrefly-check-local b/dev/pyrefly-check-local index 80f90927bb..8fa5f121fc 100755 --- a/dev/pyrefly-check-local +++ b/dev/pyrefly-check-local @@ -10,6 +10,8 @@ EXCLUDES_FILE="api/pyrefly-local-excludes.txt" pyrefly_args=( "--summary=none" + "--use-ignore-files=false" + "--disable-project-excludes-heuristics=true" "--project-excludes=.venv" "--project-excludes=migrations/" "--project-excludes=tests/" diff --git a/dev/pytest/pytest_vdb.sh b/dev/pytest/pytest_vdb.sh index 3c11a079cc..126aebf7bd 100755 --- a/dev/pytest/pytest_vdb.sh +++ b/dev/pytest/pytest_vdb.sh @@ -21,3 +21,4 @@ pytest --timeout "${PYTEST_TIMEOUT}" api/tests/integration_tests/vdb/chroma \ api/tests/integration_tests/vdb/oceanbase \ api/tests/integration_tests/vdb/tidb_vector \ api/tests/integration_tests/vdb/huawei \ + api/tests/integration_tests/vdb/hologres \ diff --git a/docker/.env.example b/docker/.env.example index 399242cea3..4ef856b431 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -541,7 +541,7 @@ SUPABASE_URL=your-server-url # ------------------------------ # The type of vector store to use. -# Supported values are `weaviate`, `oceanbase`, `seekdb`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`, `vastbase`, `tidb`, `tidb_on_qdrant`, `baidu`, `lindorm`, `huawei_cloud`, `upstash`, `matrixone`, `clickzetta`, `alibabacloud_mysql`, `iris`. +# Supported values are `weaviate`, `oceanbase`, `seekdb`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`, `vastbase`, `tidb`, `tidb_on_qdrant`, `baidu`, `lindorm`, `huawei_cloud`, `upstash`, `matrixone`, `clickzetta`, `alibabacloud_mysql`, `iris`, `hologres`. VECTOR_STORE=weaviate # Prefix used to create collection name in vector database VECTOR_INDEX_NAME_PREFIX=Vector_index @@ -605,6 +605,20 @@ COUCHBASE_PASSWORD=password COUCHBASE_BUCKET_NAME=Embeddings COUCHBASE_SCOPE_NAME=_default +# Hologres configurations, only available when VECTOR_STORE is `hologres` +# access_key_id is used as the PG username, access_key_secret is used as the PG password +HOLOGRES_HOST= +HOLOGRES_PORT=80 +HOLOGRES_DATABASE= +HOLOGRES_ACCESS_KEY_ID= +HOLOGRES_ACCESS_KEY_SECRET= +HOLOGRES_SCHEMA=public +HOLOGRES_TOKENIZER=jieba +HOLOGRES_DISTANCE_METHOD=Cosine +HOLOGRES_BASE_QUANTIZATION_TYPE=rabitq +HOLOGRES_MAX_DEGREE=64 +HOLOGRES_EF_CONSTRUCTION=400 + # pgvector configurations, only available when VECTOR_STORE is `pgvector` PGVECTOR_HOST=pgvector PGVECTOR_PORT=5432 diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 8ab3af9788..d5dfdbd16b 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -215,6 +215,17 @@ x-shared-env: &shared-api-worker-env COUCHBASE_PASSWORD: ${COUCHBASE_PASSWORD:-password} COUCHBASE_BUCKET_NAME: ${COUCHBASE_BUCKET_NAME:-Embeddings} COUCHBASE_SCOPE_NAME: ${COUCHBASE_SCOPE_NAME:-_default} + HOLOGRES_HOST: ${HOLOGRES_HOST:-} + HOLOGRES_PORT: ${HOLOGRES_PORT:-80} + HOLOGRES_DATABASE: ${HOLOGRES_DATABASE:-} + HOLOGRES_ACCESS_KEY_ID: ${HOLOGRES_ACCESS_KEY_ID:-} + HOLOGRES_ACCESS_KEY_SECRET: ${HOLOGRES_ACCESS_KEY_SECRET:-} + HOLOGRES_SCHEMA: ${HOLOGRES_SCHEMA:-public} + HOLOGRES_TOKENIZER: ${HOLOGRES_TOKENIZER:-jieba} + HOLOGRES_DISTANCE_METHOD: ${HOLOGRES_DISTANCE_METHOD:-Cosine} + HOLOGRES_BASE_QUANTIZATION_TYPE: ${HOLOGRES_BASE_QUANTIZATION_TYPE:-rabitq} + HOLOGRES_MAX_DEGREE: ${HOLOGRES_MAX_DEGREE:-64} + HOLOGRES_EF_CONSTRUCTION: ${HOLOGRES_EF_CONSTRUCTION:-400} PGVECTOR_HOST: ${PGVECTOR_HOST:-pgvector} PGVECTOR_PORT: ${PGVECTOR_PORT:-5432} PGVECTOR_USER: ${PGVECTOR_USER:-postgres} diff --git a/web/__tests__/share/text-generation-index-flow.test.tsx b/web/__tests__/share/text-generation-index-flow.test.tsx new file mode 100644 index 0000000000..3292474bec --- /dev/null +++ b/web/__tests__/share/text-generation-index-flow.test.tsx @@ -0,0 +1,235 @@ +import type { AccessMode } from '@/models/access-control' +import { fireEvent, render, screen, waitFor } from '@testing-library/react' +import * as React from 'react' +import TextGeneration from '@/app/components/share/text-generation' + +const useSearchParamsMock = vi.fn(() => new URLSearchParams()) + +vi.mock('next/navigation', () => ({ + useSearchParams: () => useSearchParamsMock(), +})) + +vi.mock('@/hooks/use-breakpoints', () => ({ + default: vi.fn(() => 'pc'), + MediaType: { pc: 'pc', pad: 'pad', mobile: 'mobile' }, +})) + +vi.mock('@/hooks/use-app-favicon', () => ({ + useAppFavicon: vi.fn(), +})) + +vi.mock('@/hooks/use-document-title', () => ({ + default: vi.fn(), +})) + +vi.mock('@/i18n-config/client', () => ({ + changeLanguage: vi.fn(() => Promise.resolve()), +})) + +vi.mock('@/app/components/share/text-generation/run-once', () => ({ + default: ({ + inputs, + onInputsChange, + onSend, + runControl, + }: { + inputs: Record + onInputsChange: (inputs: Record) => void + onSend: () => void + runControl?: { isStopping: boolean } | null + }) => ( +
+ {String(inputs.name ?? '')} + + + {runControl ? 'stop-ready' : 'idle'} +
+ ), +})) + +vi.mock('@/app/components/share/text-generation/run-batch', () => ({ + default: ({ onSend }: { onSend: (data: string[][]) => void }) => ( + + ), +})) + +vi.mock('@/app/components/app/text-generate/saved-items', () => ({ + default: ({ list }: { list: { id: string }[] }) =>
{list.length}
, +})) + +vi.mock('@/app/components/share/text-generation/menu-dropdown', () => ({ + default: () =>
, +})) + +vi.mock('@/app/components/share/text-generation/result', () => { + const MockResult = ({ + isCallBatchAPI, + onRunControlChange, + onRunStart, + taskId, + }: { + isCallBatchAPI: boolean + onRunControlChange?: (control: { onStop: () => void, isStopping: boolean } | null) => void + onRunStart: () => void + taskId?: number + }) => { + const runControlRef = React.useRef(false) + + React.useEffect(() => { + onRunStart() + }, [onRunStart]) + + React.useEffect(() => { + if (!isCallBatchAPI && !runControlRef.current) { + runControlRef.current = true + onRunControlChange?.({ onStop: vi.fn(), isStopping: false }) + } + }, [isCallBatchAPI, onRunControlChange]) + + return
+ } + + return { + default: MockResult, + } +}) + +const fetchSavedMessageMock = vi.fn() + +vi.mock('@/service/share', async () => { + const actual = await vi.importActual('@/service/share') + return { + ...actual, + fetchSavedMessage: (...args: Parameters) => fetchSavedMessageMock(...args), + removeMessage: vi.fn(), + saveMessage: vi.fn(), + } +}) + +const mockSystemFeatures = { + branding: { + enabled: false, + workspace_logo: null, + }, +} + +const mockWebAppState = { + appInfo: { + app_id: 'app-123', + site: { + title: 'Text Generation', + description: 'Share description', + default_language: 'en-US', + icon_type: 'emoji', + icon: 'robot', + icon_background: '#fff', + icon_url: '', + }, + custom_config: { + remove_webapp_brand: false, + replace_webapp_logo: '', + }, + }, + appParams: { + user_input_form: [ + { + 'text-input': { + label: 'Name', + variable: 'name', + required: true, + max_length: 48, + default: '', + hide: false, + }, + }, + ], + more_like_this: { + enabled: true, + }, + file_upload: { + enabled: false, + number_limits: 2, + detail: 'low', + allowed_upload_methods: ['local_file'], + }, + text_to_speech: { + enabled: true, + }, + system_parameters: { + image_file_size_limit: 10, + }, + }, + webAppAccessMode: 'public' as AccessMode, +} + +vi.mock('@/context/global-public-context', () => ({ + useGlobalPublicStore: (selector: (state: { systemFeatures: typeof mockSystemFeatures }) => unknown) => + selector({ systemFeatures: mockSystemFeatures }), +})) + +vi.mock('@/context/web-app-context', () => ({ + useWebAppStore: (selector: (state: typeof mockWebAppState) => unknown) => selector(mockWebAppState), +})) + +describe('TextGeneration', () => { + beforeEach(() => { + vi.clearAllMocks() + useSearchParamsMock.mockReturnValue(new URLSearchParams()) + fetchSavedMessageMock.mockResolvedValue({ + data: [{ id: 'saved-1' }, { id: 'saved-2' }], + }) + }) + + it('should switch between create, batch, and saved tabs after app state loads', async () => { + render() + + await waitFor(() => { + expect(screen.getByTestId('run-once-mock')).toBeInTheDocument() + }) + expect(screen.getByTestId('run-once-input-name')).toHaveTextContent('') + + fireEvent.click(screen.getByRole('button', { name: 'change-inputs' })) + await waitFor(() => { + expect(screen.getByTestId('run-once-input-name')).toHaveTextContent('Gamma') + }) + + fireEvent.click(screen.getByTestId('tab-header-item-batch')) + expect(screen.getByRole('button', { name: 'run-batch' })).toBeInTheDocument() + + fireEvent.click(screen.getByTestId('tab-header-item-saved')) + expect(screen.getByTestId('saved-items-mock')).toHaveTextContent('2') + + fireEvent.click(screen.getByTestId('tab-header-item-create')) + expect(screen.getByTestId('run-once-mock')).toBeInTheDocument() + }) + + it('should wire single-run stop control and clear it when batch execution starts', async () => { + render() + + await waitFor(() => { + expect(screen.getByTestId('run-once-mock')).toBeInTheDocument() + }) + + fireEvent.click(screen.getByRole('button', { name: 'run-once' })) + await waitFor(() => { + expect(screen.getByText('stop-ready')).toBeInTheDocument() + }) + expect(screen.getByTestId('result-single')).toBeInTheDocument() + + fireEvent.click(screen.getByTestId('tab-header-item-batch')) + fireEvent.click(screen.getByRole('button', { name: 'run-batch' })) + await waitFor(() => { + expect(screen.getByText('idle')).toBeInTheDocument() + }) + expect(screen.getByTestId('result-task-1')).toBeInTheDocument() + expect(screen.getByTestId('result-task-2')).toBeInTheDocument() + }) +}) diff --git a/web/app/components/app/overview/settings/index.spec.tsx b/web/app/components/app/overview/settings/index.spec.tsx index d98e02ad57..b849b4f015 100644 --- a/web/app/components/app/overview/settings/index.spec.tsx +++ b/web/app/components/app/overview/settings/index.spec.tsx @@ -6,7 +6,7 @@ import type { ModalContextState } from '@/context/modal-context' import type { ProviderContextState } from '@/context/provider-context' import type { AppDetailResponse } from '@/models/app' import type { AppSSO } from '@/types/app' -import { fireEvent, render, screen, waitFor } from '@testing-library/react' +import { act, fireEvent, render, screen, waitFor } from '@testing-library/react' import { Plan } from '@/app/components/billing/type' import { baseProviderContextValue } from '@/context/provider-context' import { AppModeEnum } from '@/types/app' @@ -131,6 +131,10 @@ describe('SettingsModal', () => { }) }) + afterEach(() => { + vi.useRealTimers() + }) + it('should render the modal and expose the expanded settings section', async () => { renderSettingsModal() expect(screen.getByText('appOverview.overview.appInfo.settings.title')).toBeInTheDocument() @@ -212,4 +216,54 @@ describe('SettingsModal', () => { })) expect(mockOnClose).toHaveBeenCalled() }) + + it('should clear the delayed hide-more timer when the modal unmounts after closing', () => { + vi.useFakeTimers() + const clearTimeoutSpy = vi.spyOn(globalThis, 'clearTimeout') + const { unmount } = renderSettingsModal() + + fireEvent.click(screen.getByText('appOverview.overview.appInfo.settings.more.entry')) + fireEvent.click(screen.getByText('common.operation.cancel')) + unmount() + + expect(clearTimeoutSpy).toHaveBeenCalled() + vi.runAllTimers() + }) + + it('should replace the pending hide-more timer and clear the ref after the timeout completes', async () => { + const hideCallbacks: Array<() => void> = [] + const originalSetTimeout = globalThis.setTimeout + const setTimeoutSpy = vi.spyOn(globalThis, 'setTimeout').mockImplementation((( + callback: TimerHandler, + delay?: number, + ...args: unknown[] + ) => { + if (delay === 200) { + hideCallbacks.push(() => { + if (typeof callback === 'function') + callback(...args) + }) + return hideCallbacks.length as unknown as ReturnType + } + + return originalSetTimeout(callback, delay, ...args) + }) as unknown as typeof setTimeout) + const clearTimeoutSpy = vi.spyOn(globalThis, 'clearTimeout') + renderSettingsModal() + + act(() => { + fireEvent.click(screen.getByText('common.operation.cancel')) + fireEvent.click(screen.getByText('common.operation.cancel')) + }) + + expect(clearTimeoutSpy).toHaveBeenCalled() + expect(hideCallbacks.length).toBeGreaterThanOrEqual(2) + + act(() => { + hideCallbacks.at(-1)?.() + }) + + setTimeoutSpy.mockRestore() + clearTimeoutSpy.mockRestore() + }) }) diff --git a/web/app/components/app/overview/settings/index.tsx b/web/app/components/app/overview/settings/index.tsx index 20461dda7e..f7c9e309ab 100644 --- a/web/app/components/app/overview/settings/index.tsx +++ b/web/app/components/app/overview/settings/index.tsx @@ -6,7 +6,7 @@ import type { AppIconType, AppSSO, Language } from '@/types/app' import { RiArrowRightSLine, RiCloseLine } from '@remixicon/react' import Link from 'next/link' import * as React from 'react' -import { useCallback, useEffect, useState } from 'react' +import { useCallback, useEffect, useRef, useState } from 'react' import { Trans, useTranslation } from 'react-i18next' import ActionButton from '@/app/components/base/action-button' import AppIcon from '@/app/components/base/app-icon' @@ -99,6 +99,7 @@ const SettingsModal: FC = ({ const [language, setLanguage] = useState(default_language) const [saveLoading, setSaveLoading] = useState(false) const { t } = useTranslation() + const hideMoreTimerRef = useRef | null>(null) const [showAppIconPicker, setShowAppIconPicker] = useState(false) const [appIcon, setAppIcon] = useState( @@ -137,10 +138,22 @@ const SettingsModal: FC = ({ : { type: 'emoji', icon, background: icon_background! }) }, [appInfo, chat_color_theme, chat_color_theme_inverted, copyright, custom_disclaimer, default_language, description, icon, icon_background, icon_type, icon_url, privacy_policy, show_workflow_steps, title, use_icon_as_answer_icon]) + useEffect(() => { + return () => { + if (hideMoreTimerRef.current) { + clearTimeout(hideMoreTimerRef.current) + hideMoreTimerRef.current = null + } + } + }, []) + const onHide = () => { onClose() - setTimeout(() => { + if (hideMoreTimerRef.current) + clearTimeout(hideMoreTimerRef.current) + hideMoreTimerRef.current = setTimeout(() => { setIsShowMore(false) + hideMoreTimerRef.current = null }, 200) } diff --git a/web/app/components/share/text-generation/__tests__/text-generation-result-panel.spec.tsx b/web/app/components/share/text-generation/__tests__/text-generation-result-panel.spec.tsx new file mode 100644 index 0000000000..60c109feec --- /dev/null +++ b/web/app/components/share/text-generation/__tests__/text-generation-result-panel.spec.tsx @@ -0,0 +1,190 @@ +import type { PromptConfig } from '@/models/debug' +import type { SiteInfo } from '@/models/share' +import type { VisionSettings } from '@/types/app' +import { fireEvent, render, screen } from '@testing-library/react' +import { AppSourceType } from '@/service/share' +import { Resolution, TransferMethod } from '@/types/app' +import TextGenerationResultPanel from '../text-generation-result-panel' +import { TaskStatus } from '../types' + +const resPropsSpy = vi.fn() +const resDownloadPropsSpy = vi.fn() + +vi.mock('@/app/components/share/text-generation/result', () => ({ + default: (props: Record) => { + resPropsSpy(props) + return
+ }, +})) + +vi.mock('@/app/components/share/text-generation/run-batch/res-download', () => ({ + default: (props: Record) => { + resDownloadPropsSpy(props) + return
+ }, +})) + +const promptConfig: PromptConfig = { + prompt_template: 'template', + prompt_variables: [ + { key: 'name', name: 'Name', type: 'string', required: true }, + ], +} + +const siteInfo: SiteInfo = { + title: 'Text Generation', + description: 'Share description', + icon_type: 'emoji', + icon: 'robot', +} + +const visionConfig: VisionSettings = { + enabled: false, + number_limits: 2, + detail: Resolution.low, + transfer_methods: [TransferMethod.local_file], +} + +const batchTasks = [ + { + id: 1, + status: TaskStatus.completed, + params: { inputs: { name: 'Alpha' } }, + }, + { + id: 2, + status: TaskStatus.failed, + params: { inputs: { name: 'Beta' } }, + }, +] + +const baseProps = { + allFailedTaskList: [], + allSuccessTaskList: [], + allTaskList: batchTasks, + appId: 'app-123', + appSourceType: AppSourceType.webApp, + completionFiles: [], + controlRetry: 88, + controlSend: 77, + controlStopResponding: 66, + exportRes: [{ 'Name': 'Alpha', 'share.generation.completionResult': 'Done' }], + handleCompleted: vi.fn(), + handleRetryAllFailedTask: vi.fn(), + handleSaveMessage: vi.fn(async () => {}), + inputs: { name: 'Alice' }, + isCallBatchAPI: false, + isPC: true, + isShowResultPanel: true, + isWorkflow: false, + moreLikeThisEnabled: true, + noPendingTask: true, + onHideResultPanel: vi.fn(), + onRunControlChange: vi.fn(), + onRunStart: vi.fn(), + onShowResultPanel: vi.fn(), + promptConfig, + resultExisted: true, + showTaskList: batchTasks, + siteInfo, + textToSpeechEnabled: true, + visionConfig, +} + +describe('TextGenerationResultPanel', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('should render a single result in run-once mode and pass non-batch props', () => { + render() + + expect(screen.getByTestId('res-single')).toBeInTheDocument() + expect(resPropsSpy).toHaveBeenCalledWith(expect.objectContaining({ + appId: 'app-123', + appSourceType: AppSourceType.webApp, + completionFiles: [], + controlSend: 77, + controlStopResponding: 66, + hideInlineStopButton: true, + inputs: { name: 'Alice' }, + isCallBatchAPI: false, + moreLikeThisEnabled: true, + taskId: undefined, + })) + expect(screen.queryByTestId('res-download-mock')).not.toBeInTheDocument() + }) + + it('should render batch results, download entry, loading area, and retry banner', () => { + const handleRetryAllFailedTask = vi.fn() + + render( + , + ) + + expect(screen.getByTestId('res-1')).toBeInTheDocument() + expect(screen.getByTestId('res-2')).toBeInTheDocument() + expect(resPropsSpy).toHaveBeenNthCalledWith(1, expect.objectContaining({ + inputs: { name: 'Alpha' }, + isError: false, + controlRetry: 0, + taskId: 1, + onRunControlChange: undefined, + })) + expect(resPropsSpy).toHaveBeenNthCalledWith(2, expect.objectContaining({ + inputs: { name: 'Beta' }, + isError: true, + controlRetry: 88, + taskId: 2, + })) + expect(screen.getByText('share.generation.executions:{"num":2}')).toBeInTheDocument() + expect(screen.getByTestId('res-download-mock')).toBeInTheDocument() + expect(resDownloadPropsSpy).toHaveBeenCalledWith(expect.objectContaining({ + isMobile: false, + values: baseProps.exportRes, + })) + expect(screen.getByText('share.generation.batchFailed.info:{"num":1}')).toBeInTheDocument() + expect(screen.getByText('share.generation.batchFailed.retry')).toBeInTheDocument() + expect(screen.getByRole('status', { name: 'appApi.loading' })).toBeInTheDocument() + + fireEvent.click(screen.getByText('share.generation.batchFailed.retry')) + expect(handleRetryAllFailedTask).toHaveBeenCalledTimes(1) + }) + + it('should toggle mobile result panel handle between show and hide actions', () => { + const onHideResultPanel = vi.fn() + const onShowResultPanel = vi.fn() + const { rerender } = render( + , + ) + + fireEvent.click(document.querySelector('.cursor-grab') as HTMLElement) + expect(onHideResultPanel).toHaveBeenCalledTimes(1) + + rerender( + , + ) + + fireEvent.click(document.querySelector('.cursor-grab') as HTMLElement) + expect(onShowResultPanel).toHaveBeenCalledTimes(1) + }) +}) diff --git a/web/app/components/share/text-generation/__tests__/text-generation-sidebar.spec.tsx b/web/app/components/share/text-generation/__tests__/text-generation-sidebar.spec.tsx new file mode 100644 index 0000000000..6ff46d94c7 --- /dev/null +++ b/web/app/components/share/text-generation/__tests__/text-generation-sidebar.spec.tsx @@ -0,0 +1,261 @@ +import type { ComponentProps } from 'react' +import type { PromptConfig, SavedMessage } from '@/models/debug' +import type { SiteInfo } from '@/models/share' +import type { VisionSettings } from '@/types/app' +import { fireEvent, render, screen } from '@testing-library/react' +import { AccessMode } from '@/models/access-control' +import { Resolution, TransferMethod } from '@/types/app' +import { defaultSystemFeatures } from '@/types/feature' +import TextGenerationSidebar from '../text-generation-sidebar' + +const runOncePropsSpy = vi.fn() +const runBatchPropsSpy = vi.fn() +const savedItemsPropsSpy = vi.fn() + +vi.mock('@/app/components/share/text-generation/run-once', () => ({ + default: (props: Record) => { + runOncePropsSpy(props) + return
+ }, +})) + +vi.mock('@/app/components/share/text-generation/run-batch', () => ({ + default: (props: Record) => { + runBatchPropsSpy(props) + return
+ }, +})) + +vi.mock('@/app/components/app/text-generate/saved-items', () => ({ + default: (props: { onStartCreateContent: () => void, list: Array<{ id: string }> }) => { + savedItemsPropsSpy(props) + return ( +
+ {props.list.length} + +
+ ) + }, +})) + +vi.mock('@/app/components/share/text-generation/menu-dropdown', () => ({ + default: () =>
, +})) + +const promptConfig: PromptConfig = { + prompt_template: 'template', + prompt_variables: [ + { key: 'name', name: 'Name', type: 'string', required: true }, + ], +} + +const savedMessages: SavedMessage[] = [ + { id: 'saved-1', answer: 'Answer 1' }, + { id: 'saved-2', answer: 'Answer 2' }, +] + +const siteInfo: SiteInfo = { + title: 'Text Generation', + description: 'Share description', + icon_type: 'emoji', + icon: 'robot', + icon_background: '#fff', + icon_url: '', +} + +const visionConfig: VisionSettings = { + enabled: false, + number_limits: 2, + detail: Resolution.low, + transfer_methods: [TransferMethod.local_file], +} + +const baseProps: ComponentProps = { + accessMode: AccessMode.PUBLIC, + allTasksRun: true, + currentTab: 'create', + customConfig: { + remove_webapp_brand: false, + replace_webapp_logo: '', + }, + inputs: { name: 'Alice' }, + inputsRef: { current: { name: 'Alice' } }, + isInstalledApp: false, + isPC: true, + isWorkflow: false, + onBatchSend: vi.fn(), + onInputsChange: vi.fn(), + onRemoveSavedMessage: vi.fn(async () => {}), + onRunOnceSend: vi.fn(), + onTabChange: vi.fn(), + onVisionFilesChange: vi.fn(), + promptConfig, + resultExisted: false, + runControl: null, + savedMessages, + siteInfo, + systemFeatures: defaultSystemFeatures, + textToSpeechConfig: { enabled: true }, + visionConfig, +} + +const renderSidebar = (overrides: Partial = {}) => { + return render() +} + +describe('TextGenerationSidebar', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('should render create tab content and pass orchestration props to RunOnce', () => { + renderSidebar() + + expect(screen.getByText('Text Generation')).toBeInTheDocument() + expect(screen.getByText('Share description')).toBeInTheDocument() + expect(screen.getByTestId('run-once-mock')).toBeInTheDocument() + expect(runOncePropsSpy).toHaveBeenCalledWith(expect.objectContaining({ + inputs: { name: 'Alice' }, + promptConfig, + runControl: null, + visionConfig, + })) + expect(screen.queryByTestId('saved-items-mock')).not.toBeInTheDocument() + }) + + it('should render batch tab and hide saved tab for workflow apps', () => { + renderSidebar({ + currentTab: 'batch', + isWorkflow: true, + }) + + expect(screen.getByTestId('run-batch-mock')).toBeInTheDocument() + expect(runBatchPropsSpy).toHaveBeenCalledWith(expect.objectContaining({ + vars: promptConfig.prompt_variables, + isAllFinished: true, + })) + expect(screen.queryByTestId('tab-header-item-saved')).not.toBeInTheDocument() + }) + + it('should render saved items and allow switching back to create tab', () => { + const onTabChange = vi.fn() + + renderSidebar({ + currentTab: 'saved', + onTabChange, + }) + + expect(screen.getByTestId('saved-items-mock')).toBeInTheDocument() + expect(savedItemsPropsSpy).toHaveBeenCalledWith(expect.objectContaining({ + list: baseProps.savedMessages, + isShowTextToSpeech: true, + })) + + fireEvent.click(screen.getByRole('button', { name: 'back-to-create' })) + expect(onTabChange).toHaveBeenCalledWith('create') + }) + + it('should prefer workspace branding and hide powered-by block when branding is removed', () => { + const { rerender } = renderSidebar({ + systemFeatures: { + ...defaultSystemFeatures, + branding: { + ...defaultSystemFeatures.branding, + enabled: true, + workspace_logo: 'https://example.com/workspace-logo.png', + }, + }, + }) + + const brandingLogo = screen.getByRole('img', { name: 'logo' }) + expect(brandingLogo).toHaveAttribute('src', 'https://example.com/workspace-logo.png') + + rerender( + , + ) + + expect(screen.queryByText('share.chat.poweredBy')).not.toBeInTheDocument() + }) + + it('should render mobile installed-app layout without saved badge when no saved messages exist', () => { + const { container } = renderSidebar({ + accessMode: AccessMode.SPECIFIC_GROUPS_MEMBERS, + isInstalledApp: true, + isPC: false, + resultExisted: false, + savedMessages: [], + siteInfo: { + ...siteInfo, + description: '', + icon_background: '', + }, + }) + + const root = container.firstElementChild as HTMLElement + const header = root.children[0] as HTMLElement + const body = root.children[1] as HTMLElement + + expect(root).toHaveClass('rounded-l-2xl') + expect(root).not.toHaveClass('h-[calc(100%_-_64px)]') + expect(header).toHaveClass('p-4', 'pb-0') + expect(body).toHaveClass('px-4') + expect(screen.queryByText('Share description')).not.toBeInTheDocument() + }) + + it('should render mobile saved tab with compact spacing and no text-to-speech flag', () => { + const { container } = renderSidebar({ + accessMode: AccessMode.SPECIFIC_GROUPS_MEMBERS, + currentTab: 'saved', + isPC: false, + resultExisted: true, + textToSpeechConfig: null, + }) + + const root = container.firstElementChild as HTMLElement + const body = root.children[1] as HTMLElement + const footer = root.children[2] as HTMLElement + + expect(root).toHaveClass('h-[calc(100%_-_64px)]') + expect(body).toHaveClass('px-4') + expect(footer).toHaveClass('px-4', 'rounded-b-2xl') + expect(savedItemsPropsSpy).toHaveBeenCalledWith(expect.objectContaining({ + className: expect.stringContaining('mt-4'), + isShowTextToSpeech: undefined, + })) + }) + + it('should round the mobile panel body and hide branding when the webapp brand is removed', () => { + const { container } = renderSidebar({ + isPC: false, + resultExisted: true, + customConfig: { + remove_webapp_brand: true, + replace_webapp_logo: '', + }, + }) + + const root = container.firstElementChild as HTMLElement + const body = root.children[1] as HTMLElement + + expect(body).toHaveClass('rounded-b-2xl') + expect(screen.queryByText('share.chat.poweredBy')).not.toBeInTheDocument() + }) + + it('should render the custom webapp logo when workspace branding is unavailable', () => { + renderSidebar({ + customConfig: { + remove_webapp_brand: false, + replace_webapp_logo: 'https://example.com/custom-logo.png', + }, + }) + + const brandingLogo = screen.getByRole('img', { name: 'logo' }) + expect(brandingLogo).toHaveAttribute('src', 'https://example.com/custom-logo.png') + }) +}) diff --git a/web/app/components/share/text-generation/hooks/__tests__/use-text-generation-app-state.spec.ts b/web/app/components/share/text-generation/hooks/__tests__/use-text-generation-app-state.spec.ts new file mode 100644 index 0000000000..3339dce403 --- /dev/null +++ b/web/app/components/share/text-generation/hooks/__tests__/use-text-generation-app-state.spec.ts @@ -0,0 +1,298 @@ +import { act, renderHook, waitFor } from '@testing-library/react' +import { AppSourceType } from '@/service/share' +import { useTextGenerationAppState } from '../use-text-generation-app-state' + +const { + changeLanguageMock, + fetchSavedMessageMock, + notifyMock, + removeMessageMock, + saveMessageMock, + useAppFaviconMock, + useDocumentTitleMock, +} = vi.hoisted(() => ({ + changeLanguageMock: vi.fn(() => Promise.resolve()), + fetchSavedMessageMock: vi.fn(), + notifyMock: vi.fn(), + removeMessageMock: vi.fn(), + saveMessageMock: vi.fn(), + useAppFaviconMock: vi.fn(), + useDocumentTitleMock: vi.fn(), +})) + +vi.mock('@/app/components/base/toast', () => ({ + default: { + notify: notifyMock, + }, +})) + +vi.mock('@/hooks/use-app-favicon', () => ({ + useAppFavicon: useAppFaviconMock, +})) + +vi.mock('@/hooks/use-document-title', () => ({ + default: useDocumentTitleMock, +})) + +vi.mock('@/i18n-config/client', () => ({ + changeLanguage: changeLanguageMock, +})) + +vi.mock('@/service/share', async () => { + const actual = await vi.importActual('@/service/share') + return { + ...actual, + fetchSavedMessage: (...args: Parameters) => fetchSavedMessageMock(...args), + removeMessage: (...args: Parameters) => removeMessageMock(...args), + saveMessage: (...args: Parameters) => saveMessageMock(...args), + } +}) + +const mockSystemFeatures = { + branding: { + enabled: false, + workspace_logo: null, + }, +} + +const defaultAppInfo = { + app_id: 'app-123', + site: { + title: 'Share title', + description: 'Share description', + default_language: 'en-US', + icon_type: 'emoji', + icon: 'robot', + icon_background: '#fff', + icon_url: '', + }, + custom_config: { + remove_webapp_brand: false, + replace_webapp_logo: '', + }, +} + +type MockAppInfo = Omit & { + custom_config: typeof defaultAppInfo.custom_config | null +} + +const defaultAppParams = { + user_input_form: [ + { + 'text-input': { + label: 'Name', + variable: 'name', + required: true, + max_length: 48, + default: 'Alice', + hide: false, + }, + }, + { + checkbox: { + label: 'Enabled', + variable: 'enabled', + required: false, + default: true, + hide: false, + }, + }, + ], + more_like_this: { + enabled: true, + }, + file_upload: { + enabled: true, + number_limits: 2, + detail: 'low', + allowed_upload_methods: ['local_file'], + }, + text_to_speech: { + enabled: true, + }, + system_parameters: { + image_file_size_limit: 10, + }, +} + +type MockWebAppState = { + appInfo: MockAppInfo | null + appParams: typeof defaultAppParams | null + webAppAccessMode: string +} + +const mockWebAppState: MockWebAppState = { + appInfo: defaultAppInfo, + appParams: defaultAppParams, + webAppAccessMode: 'public', +} + +const resetMockWebAppState = () => { + mockWebAppState.appInfo = { + ...defaultAppInfo, + site: { + ...defaultAppInfo.site, + }, + custom_config: { + ...defaultAppInfo.custom_config, + }, + } + mockWebAppState.appParams = { + ...defaultAppParams, + user_input_form: [...defaultAppParams.user_input_form], + more_like_this: { + enabled: true, + }, + file_upload: { + ...defaultAppParams.file_upload, + allowed_upload_methods: [...defaultAppParams.file_upload.allowed_upload_methods], + }, + text_to_speech: { + ...defaultAppParams.text_to_speech, + }, + system_parameters: { + image_file_size_limit: 10, + }, + } + mockWebAppState.webAppAccessMode = 'public' +} + +vi.mock('@/context/global-public-context', () => ({ + useGlobalPublicStore: (selector: (state: { systemFeatures: typeof mockSystemFeatures }) => unknown) => + selector({ systemFeatures: mockSystemFeatures }), +})) + +vi.mock('@/context/web-app-context', () => ({ + useWebAppStore: (selector: (state: typeof mockWebAppState) => unknown) => selector(mockWebAppState), +})) + +describe('useTextGenerationAppState', () => { + beforeEach(() => { + vi.clearAllMocks() + resetMockWebAppState() + fetchSavedMessageMock.mockResolvedValue({ + data: [{ id: 'saved-1' }], + }) + removeMessageMock.mockResolvedValue(undefined) + saveMessageMock.mockResolvedValue(undefined) + }) + + it('should initialize app state and fetch saved messages for non-workflow web apps', async () => { + const { result } = renderHook(() => useTextGenerationAppState({ + isInstalledApp: false, + isWorkflow: false, + })) + + await waitFor(() => { + expect(result.current.appId).toBe('app-123') + expect(result.current.promptConfig?.prompt_variables.map(item => item.name)).toEqual(['Name', 'Enabled']) + expect(result.current.savedMessages).toEqual([{ id: 'saved-1' }]) + }) + + expect(result.current.appSourceType).toBe(AppSourceType.webApp) + expect(result.current.siteInfo?.title).toBe('Share title') + expect(result.current.visionConfig.transfer_methods).toEqual(['local_file']) + expect(result.current.visionConfig.image_file_size_limit).toBe(10) + expect(changeLanguageMock).toHaveBeenCalledWith('en-US') + expect(fetchSavedMessageMock).toHaveBeenCalledWith(AppSourceType.webApp, 'app-123') + expect(useDocumentTitleMock).toHaveBeenCalledWith('Share title') + expect(useAppFaviconMock).toHaveBeenCalledWith(expect.objectContaining({ + enable: true, + icon: 'robot', + })) + }) + + it('should no-op save actions before the app id is initialized', async () => { + mockWebAppState.appInfo = null + mockWebAppState.appParams = null + + const { result } = renderHook(() => useTextGenerationAppState({ + isInstalledApp: false, + isWorkflow: false, + })) + + await act(async () => { + await result.current.fetchSavedMessages('') + await result.current.handleSaveMessage('message-1') + await result.current.handleRemoveSavedMessage('message-1') + }) + + expect(result.current.appId).toBe('') + expect(fetchSavedMessageMock).not.toHaveBeenCalled() + expect(saveMessageMock).not.toHaveBeenCalled() + expect(removeMessageMock).not.toHaveBeenCalled() + expect(notifyMock).not.toHaveBeenCalled() + }) + + it('should fallback to null custom config when the share metadata omits it', async () => { + mockWebAppState.appInfo = { + ...defaultAppInfo, + custom_config: null, + } + + const { result } = renderHook(() => useTextGenerationAppState({ + isInstalledApp: false, + isWorkflow: false, + })) + + await waitFor(() => { + expect(result.current.appId).toBe('app-123') + expect(result.current.customConfig).toBeNull() + }) + }) + + it('should save and remove messages then refresh saved messages', async () => { + const { result } = renderHook(() => useTextGenerationAppState({ + isInstalledApp: false, + isWorkflow: false, + })) + + await waitFor(() => { + expect(result.current.appId).toBe('app-123') + }) + + fetchSavedMessageMock.mockClear() + + await act(async () => { + await result.current.handleSaveMessage('message-1') + }) + + expect(saveMessageMock).toHaveBeenCalledWith('message-1', AppSourceType.webApp, 'app-123') + expect(fetchSavedMessageMock).toHaveBeenCalledWith(AppSourceType.webApp, 'app-123') + expect(notifyMock).toHaveBeenCalledWith({ + type: 'success', + message: 'common.api.saved', + }) + + fetchSavedMessageMock.mockClear() + notifyMock.mockClear() + + await act(async () => { + await result.current.handleRemoveSavedMessage('message-1') + }) + + expect(removeMessageMock).toHaveBeenCalledWith('message-1', AppSourceType.webApp, 'app-123') + expect(fetchSavedMessageMock).toHaveBeenCalledWith(AppSourceType.webApp, 'app-123') + expect(notifyMock).toHaveBeenCalledWith({ + type: 'success', + message: 'common.api.remove', + }) + }) + + it('should skip saved message fetching for workflows and disable favicon for installed apps', async () => { + const { result } = renderHook(() => useTextGenerationAppState({ + isInstalledApp: true, + isWorkflow: true, + })) + + await waitFor(() => { + expect(result.current.appId).toBe('app-123') + }) + + expect(result.current.appSourceType).toBe(AppSourceType.installedApp) + expect(fetchSavedMessageMock).not.toHaveBeenCalled() + expect(useAppFaviconMock).toHaveBeenCalledWith(expect.objectContaining({ + enable: false, + })) + }) +}) diff --git a/web/app/components/share/text-generation/hooks/__tests__/use-text-generation-batch.spec.ts b/web/app/components/share/text-generation/hooks/__tests__/use-text-generation-batch.spec.ts new file mode 100644 index 0000000000..3dab88c578 --- /dev/null +++ b/web/app/components/share/text-generation/hooks/__tests__/use-text-generation-batch.spec.ts @@ -0,0 +1,314 @@ +import type { PromptConfig, PromptVariable } from '@/models/debug' +import { act, renderHook } from '@testing-library/react' +import { BATCH_CONCURRENCY } from '@/config' +import { TaskStatus } from '../../types' +import { useTextGenerationBatch } from '../use-text-generation-batch' + +const createVariable = (overrides: Partial): PromptVariable => ({ + key: 'input', + name: 'Input', + type: 'string', + required: true, + ...overrides, +}) + +const createPromptConfig = (): PromptConfig => ({ + prompt_template: 'template', + prompt_variables: [ + createVariable({ key: 'name', name: 'Name', type: 'string', required: true }), + createVariable({ key: 'score', name: 'Score', type: 'number', required: false }), + ], +}) + +const createTranslator = () => vi.fn((key: string) => key) + +const renderBatchHook = (promptConfig: PromptConfig = createPromptConfig()) => { + const notify = vi.fn() + const onStart = vi.fn() + const t = createTranslator() + + const hook = renderHook(() => useTextGenerationBatch({ + promptConfig, + notify, + t, + })) + + return { + ...hook, + notify, + onStart, + t, + } +} + +describe('useTextGenerationBatch', () => { + it('should initialize the first batch group when csv content is valid', () => { + const { result, onStart } = renderBatchHook() + const csvData = [ + ['Name', 'Score'], + ...Array.from({ length: BATCH_CONCURRENCY + 1 }, (_, index) => [`Item ${index + 1}`, '']), + ] + + let isStarted = false + act(() => { + isStarted = result.current.handleRunBatch(csvData, { onStart }) + }) + + expect(isStarted).toBe(true) + expect(onStart).toHaveBeenCalledTimes(1) + expect(result.current.isCallBatchAPI).toBe(true) + expect(result.current.allTaskList).toHaveLength(BATCH_CONCURRENCY + 1) + expect(result.current.allTaskList.slice(0, BATCH_CONCURRENCY).every(task => task.status === TaskStatus.running)).toBe(true) + expect(result.current.allTaskList.at(-1)?.status).toBe(TaskStatus.pending) + expect(result.current.allTaskList[0]?.params.inputs).toEqual({ + name: 'Item 1', + score: undefined, + }) + }) + + it('should reject csv data when the header does not match prompt variables', () => { + const { result, notify, onStart } = renderBatchHook() + + let isStarted = true + act(() => { + isStarted = result.current.handleRunBatch([ + ['Prompt', 'Score'], + ['Hello', '1'], + ], { onStart }) + }) + + expect(isStarted).toBe(false) + expect(onStart).not.toHaveBeenCalled() + expect(notify).toHaveBeenCalledWith({ + type: 'error', + message: 'generation.errorMsg.fileStructNotMatch', + }) + expect(result.current.allTaskList).toEqual([]) + }) + + it('should reject empty batch inputs and rows without executable payload', () => { + const { result, notify, onStart } = renderBatchHook() + + let isStarted = true + act(() => { + isStarted = result.current.handleRunBatch([], { onStart }) + }) + + expect(isStarted).toBe(false) + expect(notify).toHaveBeenLastCalledWith({ + type: 'error', + message: 'generation.errorMsg.empty', + }) + + notify.mockClear() + + act(() => { + isStarted = result.current.handleRunBatch([ + ['Name', 'Score'], + ], { onStart }) + }) + + expect(isStarted).toBe(false) + expect(notify).toHaveBeenLastCalledWith({ + type: 'error', + message: 'generation.errorMsg.atLeastOne', + }) + + notify.mockClear() + + act(() => { + isStarted = result.current.handleRunBatch([ + ['Name', 'Score'], + ['', ''], + ], { onStart }) + }) + + expect(isStarted).toBe(false) + expect(notify).toHaveBeenLastCalledWith({ + type: 'error', + message: 'generation.errorMsg.atLeastOne', + }) + }) + + it('should reject csv data when empty rows appear in the middle of the payload', () => { + const { result, notify, onStart } = renderBatchHook() + + let isStarted = true + act(() => { + isStarted = result.current.handleRunBatch([ + ['Name', 'Score'], + ['Alice', '1'], + ['', ''], + ['Bob', '2'], + ['', ''], + ['', ''], + ], { onStart }) + }) + + expect(isStarted).toBe(false) + expect(notify).toHaveBeenCalledWith({ + type: 'error', + message: 'generation.errorMsg.emptyLine', + }) + }) + + it('should reject rows with missing required values', () => { + const { result, notify, onStart } = renderBatchHook() + + let isStarted = true + act(() => { + isStarted = result.current.handleRunBatch([ + ['Name', 'Score'], + ['', '1'], + ], { onStart }) + }) + + expect(isStarted).toBe(false) + expect(notify).toHaveBeenCalledWith({ + type: 'error', + message: 'generation.errorMsg.invalidLine', + }) + }) + + it('should reject rows that exceed the configured max length', () => { + const { result, notify, onStart } = renderBatchHook({ + prompt_template: 'template', + prompt_variables: [ + createVariable({ key: 'name', name: 'Name', type: 'string', required: true, max_length: 3 }), + createVariable({ key: 'score', name: 'Score', type: 'number', required: false }), + ], + }) + + let isStarted = true + act(() => { + isStarted = result.current.handleRunBatch([ + ['Name', 'Score'], + ['Alice', '1'], + ], { onStart }) + }) + + expect(isStarted).toBe(false) + expect(notify).toHaveBeenCalledWith({ + type: 'error', + message: 'generation.errorMsg.moreThanMaxLengthLine', + }) + }) + + it('should promote pending tasks after the current batch group completes', () => { + const { result } = renderBatchHook() + const csvData = [ + ['Name', 'Score'], + ...Array.from({ length: BATCH_CONCURRENCY + 1 }, (_, index) => [`Item ${index + 1}`, `${index + 1}`]), + ] + + act(() => { + result.current.handleRunBatch(csvData, { onStart: vi.fn() }) + }) + + act(() => { + Array.from({ length: BATCH_CONCURRENCY }).forEach((_, index) => { + result.current.handleCompleted(`Result ${index + 1}`, index + 1, true) + }) + }) + + expect(result.current.allTaskList.at(-1)?.status).toBe(TaskStatus.running) + expect(result.current.exportRes.at(0)).toEqual({ + 'Name': 'Item 1', + 'Score': '1', + 'generation.completionResult': 'Result 1', + }) + }) + + it('should block starting a new batch while previous tasks are still running', () => { + const { result, notify, onStart } = renderBatchHook() + const csvData = [ + ['Name', 'Score'], + ...Array.from({ length: BATCH_CONCURRENCY + 1 }, (_, index) => [`Item ${index + 1}`, `${index + 1}`]), + ] + + act(() => { + result.current.handleRunBatch(csvData, { onStart }) + }) + + notify.mockClear() + + let isStarted = true + act(() => { + isStarted = result.current.handleRunBatch(csvData, { onStart }) + }) + + expect(isStarted).toBe(false) + expect(onStart).toHaveBeenCalledTimes(1) + expect(notify).toHaveBeenCalledWith({ + type: 'info', + message: 'errorMessage.waitForBatchResponse', + }) + }) + + it('should ignore completion updates without a task id', () => { + const { result } = renderBatchHook() + + act(() => { + result.current.handleRunBatch([ + ['Name', 'Score'], + ['Alice', '1'], + ], { onStart: vi.fn() }) + }) + + const taskSnapshot = result.current.allTaskList + + act(() => { + result.current.handleCompleted('ignored') + }) + + expect(result.current.allTaskList).toEqual(taskSnapshot) + }) + + it('should expose failed tasks, retry signals, and reset state after batch failures', () => { + const { result } = renderBatchHook() + + act(() => { + result.current.handleRunBatch([ + ['Name', 'Score'], + ['Alice', ''], + ], { onStart: vi.fn() }) + }) + + act(() => { + result.current.handleCompleted({ answer: 'failed' } as unknown as string, 1, false) + }) + + expect(result.current.allFailedTaskList).toEqual([ + expect.objectContaining({ + id: 1, + status: TaskStatus.failed, + }), + ]) + expect(result.current.allTasksFinished).toBe(false) + expect(result.current.allTasksRun).toBe(true) + expect(result.current.noPendingTask).toBe(true) + expect(result.current.exportRes).toEqual([ + { + 'Name': 'Alice', + 'Score': '', + 'generation.completionResult': JSON.stringify({ answer: 'failed' }), + }, + ]) + + act(() => { + result.current.handleRetryAllFailedTask() + }) + + expect(result.current.controlRetry).toBeGreaterThan(0) + + act(() => { + result.current.resetBatchExecution() + }) + + expect(result.current.allTaskList).toEqual([]) + expect(result.current.allFailedTaskList).toEqual([]) + expect(result.current.showTaskList).toEqual([]) + expect(result.current.exportRes).toEqual([]) + expect(result.current.noPendingTask).toBe(true) + }) +}) diff --git a/web/app/components/share/text-generation/hooks/use-text-generation-app-state.ts b/web/app/components/share/text-generation/hooks/use-text-generation-app-state.ts new file mode 100644 index 0000000000..83fca4d0d6 --- /dev/null +++ b/web/app/components/share/text-generation/hooks/use-text-generation-app-state.ts @@ -0,0 +1,158 @@ +import type { TextGenerationCustomConfig } from '../types' +import type { + MoreLikeThisConfig, + PromptConfig, + SavedMessage, + TextToSpeechConfig, +} from '@/models/debug' +import type { SiteInfo } from '@/models/share' +import type { VisionSettings } from '@/types/app' +import { useCallback, useEffect, useState } from 'react' +import { useTranslation } from 'react-i18next' +import Toast from '@/app/components/base/toast' +import { useGlobalPublicStore } from '@/context/global-public-context' +import { useWebAppStore } from '@/context/web-app-context' +import { useAppFavicon } from '@/hooks/use-app-favicon' +import useDocumentTitle from '@/hooks/use-document-title' +import { changeLanguage } from '@/i18n-config/client' +import { AppSourceType, fetchSavedMessage as doFetchSavedMessage, removeMessage, saveMessage } from '@/service/share' +import { Resolution, TransferMethod } from '@/types/app' +import { userInputsFormToPromptVariables } from '@/utils/model-config' + +type UseTextGenerationAppStateOptions = { + isInstalledApp: boolean + isWorkflow: boolean +} + +type ShareAppParams = { + user_input_form: Parameters[0] + more_like_this: MoreLikeThisConfig | null + file_upload: VisionSettings & { + allowed_file_upload_methods?: TransferMethod[] + allowed_upload_methods?: TransferMethod[] + } + text_to_speech: TextToSpeechConfig | null + system_parameters?: Record & { + image_file_size_limit?: number + } +} + +export const useTextGenerationAppState = ({ + isInstalledApp, + isWorkflow, +}: UseTextGenerationAppStateOptions) => { + const { notify } = Toast + const { t } = useTranslation() + const appSourceType = isInstalledApp ? AppSourceType.installedApp : AppSourceType.webApp + const systemFeatures = useGlobalPublicStore(s => s.systemFeatures) + const appData = useWebAppStore(s => s.appInfo) + const appParams = useWebAppStore(s => s.appParams) + const accessMode = useWebAppStore(s => s.webAppAccessMode) + + const [appId, setAppId] = useState('') + const [siteInfo, setSiteInfo] = useState(null) + const [customConfig, setCustomConfig] = useState(null) + const [promptConfig, setPromptConfig] = useState(null) + const [moreLikeThisConfig, setMoreLikeThisConfig] = useState(null) + const [textToSpeechConfig, setTextToSpeechConfig] = useState(null) + const [savedMessages, setSavedMessages] = useState([]) + const [visionConfig, setVisionConfig] = useState({ + enabled: false, + number_limits: 2, + detail: Resolution.low, + transfer_methods: [TransferMethod.local_file], + }) + + const fetchSavedMessages = useCallback(async (targetAppId = appId) => { + if (!targetAppId) + return + const res = await doFetchSavedMessage(appSourceType, targetAppId) as { data: SavedMessage[] } + setSavedMessages(res.data) + }, [appId, appSourceType]) + + const handleSaveMessage = useCallback(async (messageId: string) => { + if (!appId) + return + await saveMessage(messageId, appSourceType, appId) + notify({ type: 'success', message: t('api.saved', { ns: 'common' }) }) + await fetchSavedMessages(appId) + }, [appId, appSourceType, fetchSavedMessages, notify, t]) + + const handleRemoveSavedMessage = useCallback(async (messageId: string) => { + if (!appId) + return + await removeMessage(messageId, appSourceType, appId) + notify({ type: 'success', message: t('api.remove', { ns: 'common' }) }) + await fetchSavedMessages(appId) + }, [appId, appSourceType, fetchSavedMessages, notify, t]) + + useEffect(() => { + let cancelled = false + + const initialize = async () => { + if (!appData || !appParams) + return + + const { app_id: nextAppId, site, custom_config } = appData + + setAppId(nextAppId) + setSiteInfo(site as SiteInfo) + setCustomConfig((custom_config || null) as TextGenerationCustomConfig | null) + await changeLanguage(site.default_language) + + const { user_input_form, more_like_this, file_upload, text_to_speech } = appParams as unknown as ShareAppParams + if (cancelled) + return + + setVisionConfig({ + ...file_upload, + transfer_methods: file_upload?.allowed_file_upload_methods || file_upload?.allowed_upload_methods, + image_file_size_limit: appParams?.system_parameters.image_file_size_limit, + fileUploadConfig: appParams?.system_parameters, + } as VisionSettings) + setPromptConfig({ + prompt_template: '', + prompt_variables: userInputsFormToPromptVariables(user_input_form), + } as PromptConfig) + setMoreLikeThisConfig(more_like_this) + setTextToSpeechConfig(text_to_speech) + + if (!isWorkflow) + await fetchSavedMessages(nextAppId) + } + + void initialize() + + return () => { + cancelled = true + } + }, [appData, appParams, fetchSavedMessages, isWorkflow]) + + useDocumentTitle(siteInfo?.title || t('generation.title', { ns: 'share' })) + + useAppFavicon({ + enable: !isInstalledApp, + icon_type: siteInfo?.icon_type, + icon: siteInfo?.icon, + icon_background: siteInfo?.icon_background, + icon_url: siteInfo?.icon_url, + }) + + return { + accessMode, + appId, + appSourceType, + customConfig, + fetchSavedMessages, + handleRemoveSavedMessage, + handleSaveMessage, + moreLikeThisConfig, + promptConfig, + savedMessages, + siteInfo, + systemFeatures, + textToSpeechConfig, + visionConfig, + setVisionConfig, + } +} diff --git a/web/app/components/share/text-generation/hooks/use-text-generation-batch.ts b/web/app/components/share/text-generation/hooks/use-text-generation-batch.ts new file mode 100644 index 0000000000..522d2e4681 --- /dev/null +++ b/web/app/components/share/text-generation/hooks/use-text-generation-batch.ts @@ -0,0 +1,270 @@ +import type { Task } from '../types' +import type { PromptConfig } from '@/models/debug' +import { useCallback, useMemo, useRef, useState } from 'react' +import { BATCH_CONCURRENCY } from '@/config' +import { TaskStatus } from '../types' + +type BatchNotify = (payload: { type: 'error' | 'info', message: string }) => void +type BatchTranslate = (key: string, options?: Record) => string + +type UseTextGenerationBatchOptions = { + promptConfig: PromptConfig | null + notify: BatchNotify + t: BatchTranslate +} + +type RunBatchCallbacks = { + onStart: () => void +} + +const GROUP_SIZE = BATCH_CONCURRENCY + +export const useTextGenerationBatch = ({ + promptConfig, + notify, + t, +}: UseTextGenerationBatchOptions) => { + const [isCallBatchAPI, setIsCallBatchAPI] = useState(false) + const [controlRetry, setControlRetry] = useState(0) + const [allTaskList, setAllTaskList] = useState([]) + const [batchCompletionMap, setBatchCompletionMap] = useState>({}) + const allTaskListRef = useRef([]) + const currGroupNumRef = useRef(0) + const batchCompletionResRef = useRef>({}) + + const updateAllTaskList = useCallback((taskList: Task[]) => { + setAllTaskList(taskList) + allTaskListRef.current = taskList + }, []) + + const updateBatchCompletionRes = useCallback((res: Record) => { + batchCompletionResRef.current = res + setBatchCompletionMap(res) + }, []) + + const resetBatchExecution = useCallback(() => { + updateAllTaskList([]) + updateBatchCompletionRes({}) + currGroupNumRef.current = 0 + }, [updateAllTaskList, updateBatchCompletionRes]) + + const checkBatchInputs = useCallback((data: string[][]) => { + if (!data || data.length === 0) { + notify({ type: 'error', message: t('generation.errorMsg.empty', { ns: 'share' }) }) + return false + } + + const promptVariables = promptConfig?.prompt_variables ?? [] + const headerData = data[0] + let isMapVarName = true + promptVariables.forEach((item, index) => { + if (!isMapVarName) + return + + if (item.name !== headerData[index]) + isMapVarName = false + }) + + if (!isMapVarName) { + notify({ type: 'error', message: t('generation.errorMsg.fileStructNotMatch', { ns: 'share' }) }) + return false + } + + let payloadData = data.slice(1) + if (payloadData.length === 0) { + notify({ type: 'error', message: t('generation.errorMsg.atLeastOne', { ns: 'share' }) }) + return false + } + + const emptyLineIndexes = payloadData + .filter(item => item.every(value => value === '')) + .map(item => payloadData.indexOf(item)) + if (emptyLineIndexes.length > 0) { + let hasMiddleEmptyLine = false + let startIndex = emptyLineIndexes[0] - 1 + emptyLineIndexes.forEach((index) => { + if (hasMiddleEmptyLine) + return + if (startIndex + 1 !== index) { + hasMiddleEmptyLine = true + return + } + startIndex += 1 + }) + + if (hasMiddleEmptyLine) { + notify({ type: 'error', message: t('generation.errorMsg.emptyLine', { ns: 'share', rowIndex: startIndex + 2 }) }) + return false + } + } + + payloadData = payloadData.filter(item => !item.every(value => value === '')) + if (payloadData.length === 0) { + notify({ type: 'error', message: t('generation.errorMsg.atLeastOne', { ns: 'share' }) }) + return false + } + + let errorRowIndex = 0 + let requiredVarName = '' + let tooLongVarName = '' + let maxLength = 0 + + for (const [index, item] of payloadData.entries()) { + for (const [varIndex, varItem] of promptVariables.entries()) { + const value = item[varIndex] ?? '' + + if (varItem.type === 'string' && varItem.max_length && value.length > varItem.max_length) { + tooLongVarName = varItem.name + maxLength = varItem.max_length + errorRowIndex = index + 1 + break + } + + if (varItem.required && value.trim() === '') { + requiredVarName = varItem.name + errorRowIndex = index + 1 + break + } + } + + if (errorRowIndex !== 0) + break + } + + if (errorRowIndex !== 0) { + if (requiredVarName) { + notify({ + type: 'error', + message: t('generation.errorMsg.invalidLine', { ns: 'share', rowIndex: errorRowIndex + 1, varName: requiredVarName }), + }) + } + + if (tooLongVarName) { + notify({ + type: 'error', + message: t('generation.errorMsg.moreThanMaxLengthLine', { + ns: 'share', + rowIndex: errorRowIndex + 1, + varName: tooLongVarName, + maxLength, + }), + }) + } + + return false + } + + return true + }, [notify, promptConfig, t]) + + const handleRunBatch = useCallback((data: string[][], { onStart }: RunBatchCallbacks) => { + if (!checkBatchInputs(data)) + return false + + const latestTaskList = allTaskListRef.current + const allTasksFinished = latestTaskList.every(task => task.status === TaskStatus.completed) + if (!allTasksFinished && latestTaskList.length > 0) { + notify({ type: 'info', message: t('errorMessage.waitForBatchResponse', { ns: 'appDebug' }) }) + return false + } + + const payloadData = data.filter(item => !item.every(value => value === '')).slice(1) + const promptVariables = promptConfig?.prompt_variables ?? [] + const nextTaskList: Task[] = payloadData.map((item, index) => { + const inputs: Record = {} + promptVariables.forEach((variable, varIndex) => { + const input = item[varIndex] + inputs[variable.key] = input + if (!input) + inputs[variable.key] = variable.type === 'string' || variable.type === 'paragraph' ? '' : undefined + }) + + return { + id: index + 1, + status: index < GROUP_SIZE ? TaskStatus.running : TaskStatus.pending, + params: { inputs }, + } + }) + + setIsCallBatchAPI(true) + updateAllTaskList(nextTaskList) + updateBatchCompletionRes({}) + currGroupNumRef.current = 0 + onStart() + return true + }, [checkBatchInputs, notify, promptConfig, t, updateAllTaskList, updateBatchCompletionRes]) + + const handleCompleted = useCallback((completionRes: string, taskId?: number, isSuccess?: boolean) => { + if (!taskId) + return + + const latestTaskList = allTaskListRef.current + const latestBatchCompletionRes = batchCompletionResRef.current + const pendingTaskList = latestTaskList.filter(task => task.status === TaskStatus.pending) + const runTasksCount = 1 + latestTaskList.filter(task => [TaskStatus.completed, TaskStatus.failed].includes(task.status)).length + const shouldStartNextGroup = currGroupNumRef.current !== runTasksCount + && pendingTaskList.length > 0 + && (runTasksCount % GROUP_SIZE === 0 || (latestTaskList.length - runTasksCount < GROUP_SIZE)) + + if (shouldStartNextGroup) + currGroupNumRef.current = runTasksCount + + const nextPendingTaskIds = shouldStartNextGroup ? pendingTaskList.slice(0, GROUP_SIZE).map(item => item.id) : [] + updateAllTaskList(latestTaskList.map((task) => { + if (task.id === taskId) + return { ...task, status: isSuccess ? TaskStatus.completed : TaskStatus.failed } + if (shouldStartNextGroup && nextPendingTaskIds.includes(task.id)) + return { ...task, status: TaskStatus.running } + return task + })) + updateBatchCompletionRes({ + ...latestBatchCompletionRes, + [taskId]: completionRes, + }) + }, [updateAllTaskList, updateBatchCompletionRes]) + + const handleRetryAllFailedTask = useCallback(() => { + setControlRetry(Date.now()) + }, []) + + const pendingTaskList = allTaskList.filter(task => task.status === TaskStatus.pending) + const showTaskList = allTaskList.filter(task => task.status !== TaskStatus.pending) + const allSuccessTaskList = allTaskList.filter(task => task.status === TaskStatus.completed) + const allFailedTaskList = allTaskList.filter(task => task.status === TaskStatus.failed) + const allTasksFinished = allTaskList.every(task => task.status === TaskStatus.completed) + const allTasksRun = allTaskList.every(task => [TaskStatus.completed, TaskStatus.failed].includes(task.status)) + + const exportRes = useMemo(() => { + return allTaskList.map((task) => { + const result: Record = {} + promptConfig?.prompt_variables.forEach((variable) => { + result[variable.name] = String(task.params.inputs[variable.key] ?? '') + }) + + let completionValue = batchCompletionMap[String(task.id)] + if (typeof completionValue === 'object') + completionValue = JSON.stringify(completionValue) + + result[t('generation.completionResult', { ns: 'share' })] = completionValue + return result + }) + }, [allTaskList, batchCompletionMap, promptConfig, t]) + + return { + allFailedTaskList, + allSuccessTaskList, + allTaskList, + allTasksFinished, + allTasksRun, + controlRetry, + exportRes, + handleCompleted, + handleRetryAllFailedTask, + handleRunBatch, + isCallBatchAPI, + noPendingTask: pendingTaskList.length === 0, + resetBatchExecution, + setIsCallBatchAPI, + showTaskList, + } +} diff --git a/web/app/components/share/text-generation/index.tsx b/web/app/components/share/text-generation/index.tsx index 87d7a98c22..779358bfc6 100644 --- a/web/app/components/share/text-generation/index.tsx +++ b/web/app/components/share/text-generation/index.tsx @@ -1,65 +1,20 @@ 'use client' import type { FC } from 'react' -import type { - MoreLikeThisConfig, - PromptConfig, - SavedMessage, - TextToSpeechConfig, -} from '@/models/debug' +import type { InputValueTypes, TextGenerationRunControl } from './types' import type { InstalledApp } from '@/models/explore' -import type { SiteInfo } from '@/models/share' -import type { VisionFile, VisionSettings } from '@/types/app' -import { - RiBookmark3Line, - RiErrorWarningFill, -} from '@remixicon/react' +import type { VisionFile } from '@/types/app' import { useBoolean } from 'ahooks' import { useSearchParams } from 'next/navigation' -import * as React from 'react' import { useCallback, useEffect, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' -import SavedItems from '@/app/components/app/text-generate/saved-items' -import AppIcon from '@/app/components/base/app-icon' -import Badge from '@/app/components/base/badge' import Loading from '@/app/components/base/loading' -import DifyLogo from '@/app/components/base/logo/dify-logo' import Toast from '@/app/components/base/toast' -import Res from '@/app/components/share/text-generation/result' -import RunOnce from '@/app/components/share/text-generation/run-once' -import { appDefaultIconBackground, BATCH_CONCURRENCY } from '@/config' -import { useGlobalPublicStore } from '@/context/global-public-context' -import { useWebAppStore } from '@/context/web-app-context' -import { useAppFavicon } from '@/hooks/use-app-favicon' import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' -import useDocumentTitle from '@/hooks/use-document-title' -import { changeLanguage } from '@/i18n-config/client' -import { AccessMode } from '@/models/access-control' -import { AppSourceType, fetchSavedMessage as doFetchSavedMessage, removeMessage, saveMessage } from '@/service/share' -import { Resolution, TransferMethod } from '@/types/app' import { cn } from '@/utils/classnames' -import { userInputsFormToPromptVariables } from '@/utils/model-config' -import TabHeader from '../../base/tab-header' -import MenuDropdown from './menu-dropdown' -import RunBatch from './run-batch' -import ResDownload from './run-batch/res-download' - -const GROUP_SIZE = BATCH_CONCURRENCY // to avoid RPM(Request per minute) limit. The group task finished then the next group. -enum TaskStatus { - pending = 'pending', - running = 'running', - completed = 'completed', - failed = 'failed', -} - -type TaskParam = { - inputs: Record -} - -type Task = { - id: number - status: TaskStatus - params: TaskParam -} +import { useTextGenerationAppState } from './hooks/use-text-generation-app-state' +import { useTextGenerationBatch } from './hooks/use-text-generation-batch' +import TextGenerationResultPanel from './text-generation-result-panel' +import TextGenerationSidebar from './text-generation-sidebar' export type IMainProps = { isInstalledApp?: boolean @@ -72,8 +27,6 @@ const TextGeneration: FC = ({ isWorkflow = false, }) => { const { notify } = Toast - const appSourceType = isInstalledApp ? AppSourceType.installedApp : AppSourceType.webApp - const { t } = useTranslation() const media = useBreakpoints() const isPC = media === MediaType.pc @@ -81,428 +34,90 @@ const TextGeneration: FC = ({ const searchParams = useSearchParams() const mode = searchParams.get('mode') || 'create' const [currentTab, setCurrentTab] = useState(['create', 'batch'].includes(mode) ? mode : 'create') - - // Notice this situation isCallBatchAPI but not in batch tab - const [isCallBatchAPI, setIsCallBatchAPI] = useState(false) - const isInBatchTab = currentTab === 'batch' - const [inputs, doSetInputs] = useState>({}) + const [inputs, setInputs] = useState>({}) const inputsRef = useRef(inputs) - const setInputs = useCallback((newInputs: Record) => { - doSetInputs(newInputs) - inputsRef.current = newInputs - }, []) - const systemFeatures = useGlobalPublicStore(s => s.systemFeatures) - const [appId, setAppId] = useState('') - const [siteInfo, setSiteInfo] = useState(null) - const [customConfig, setCustomConfig] = useState | null>(null) - const [promptConfig, setPromptConfig] = useState(null) - const [moreLikeThisConfig, setMoreLikeThisConfig] = useState(null) - const [textToSpeechConfig, setTextToSpeechConfig] = useState(null) - - // save message - const [savedMessages, setSavedMessages] = useState([]) - const fetchSavedMessage = useCallback(async () => { - if (!appId) - return - const res: any = await doFetchSavedMessage(appSourceType, appId) - setSavedMessages(res.data) - }, [appSourceType, appId]) - const handleSaveMessage = async (messageId: string) => { - await saveMessage(messageId, appSourceType, appId) - notify({ type: 'success', message: t('api.saved', { ns: 'common' }) }) - fetchSavedMessage() - } - const handleRemoveSavedMessage = async (messageId: string) => { - await removeMessage(messageId, appSourceType, appId) - notify({ type: 'success', message: t('api.remove', { ns: 'common' }) }) - fetchSavedMessage() - } - - // send message task + const [completionFiles, setCompletionFiles] = useState([]) + const [runControl, setRunControl] = useState(null) const [controlSend, setControlSend] = useState(0) const [controlStopResponding, setControlStopResponding] = useState(0) - const [visionConfig, setVisionConfig] = useState({ - enabled: false, - number_limits: 2, - detail: Resolution.low, - transfer_methods: [TransferMethod.local_file], + const [resultExisted, setResultExisted] = useState(false) + const [isShowResultPanel, { setTrue: showResultPanelState, setFalse: hideResultPanel }] = useBoolean(false) + + const updateInputs = useCallback((newInputs: Record) => { + setInputs(newInputs) + inputsRef.current = newInputs + }, []) + + const { + accessMode, + appId, + appSourceType, + customConfig, + handleRemoveSavedMessage, + handleSaveMessage, + moreLikeThisConfig, + promptConfig, + savedMessages, + siteInfo, + systemFeatures, + textToSpeechConfig, + visionConfig, + } = useTextGenerationAppState({ + isInstalledApp, + isWorkflow, + }) + + const { + allFailedTaskList, + allSuccessTaskList, + allTaskList, + allTasksRun, + controlRetry, + exportRes, + handleCompleted, + handleRetryAllFailedTask, + handleRunBatch: runBatchExecution, + isCallBatchAPI, + noPendingTask, + resetBatchExecution, + setIsCallBatchAPI, + showTaskList, + } = useTextGenerationBatch({ + promptConfig, + notify, + t, }) - const [completionFiles, setCompletionFiles] = useState([]) - const [runControl, setRunControl] = useState<{ onStop: () => Promise | void, isStopping: boolean } | null>(null) useEffect(() => { if (isCallBatchAPI) setRunControl(null) }, [isCallBatchAPI]) - const handleSend = () => { + const showResultPanel = useCallback(() => { + setTimeout(() => { + showResultPanelState() + }, 0) + }, [showResultPanelState]) + const handleRunStart = useCallback(() => { + setResultExisted(true) + }, []) + + const handleRunOnce = useCallback(() => { setIsCallBatchAPI(false) setControlSend(Date.now()) - - // eslint-disable-next-line ts/no-use-before-define - setAllTaskList([]) // clear batch task running status - - // eslint-disable-next-line ts/no-use-before-define + resetBatchExecution() showResultPanel() - } + }, [resetBatchExecution, setIsCallBatchAPI, showResultPanel]) - const [controlRetry, setControlRetry] = useState(0) - const handleRetryAllFailedTask = () => { - setControlRetry(Date.now()) - } - const [allTaskList, doSetAllTaskList] = useState([]) - const allTaskListRef = useRef([]) - const getLatestTaskList = () => allTaskListRef.current - const setAllTaskList = (taskList: Task[]) => { - doSetAllTaskList(taskList) - allTaskListRef.current = taskList - } - const pendingTaskList = allTaskList.filter(task => task.status === TaskStatus.pending) - const noPendingTask = pendingTaskList.length === 0 - const showTaskList = allTaskList.filter(task => task.status !== TaskStatus.pending) - const currGroupNumRef = useRef(0) - - const setCurrGroupNum = (num: number) => { - currGroupNumRef.current = num - } - const getCurrGroupNum = () => { - return currGroupNumRef.current - } - const allSuccessTaskList = allTaskList.filter(task => task.status === TaskStatus.completed) - const allFailedTaskList = allTaskList.filter(task => task.status === TaskStatus.failed) - const allTasksFinished = allTaskList.every(task => task.status === TaskStatus.completed) - const allTasksRun = allTaskList.every(task => [TaskStatus.completed, TaskStatus.failed].includes(task.status)) - const batchCompletionResRef = useRef>({}) - const setBatchCompletionRes = (res: Record) => { - batchCompletionResRef.current = res - } - const getBatchCompletionRes = () => batchCompletionResRef.current - const exportRes = allTaskList.map((task) => { - const batchCompletionResLatest = getBatchCompletionRes() - const res: Record = {} - const { inputs } = task.params - promptConfig?.prompt_variables.forEach((v) => { - res[v.name] = inputs[v.key] + const handleRunBatch = useCallback((data: string[][]) => { + runBatchExecution(data, { + onStart: () => { + setControlSend(Date.now()) + setControlStopResponding(Date.now()) + showResultPanel() + }, }) - let result = batchCompletionResLatest[task.id] - // task might return multiple fields, should marshal object to string - if (typeof batchCompletionResLatest[task.id] === 'object') - result = JSON.stringify(result) - - res[t('generation.completionResult', { ns: 'share' })] = result - return res - }) - const checkBatchInputs = (data: string[][]) => { - if (!data || data.length === 0) { - notify({ type: 'error', message: t('generation.errorMsg.empty', { ns: 'share' }) }) - return false - } - const headerData = data[0] - let isMapVarName = true - promptConfig?.prompt_variables.forEach((item, index) => { - if (!isMapVarName) - return - - if (item.name !== headerData[index]) - isMapVarName = false - }) - - if (!isMapVarName) { - notify({ type: 'error', message: t('generation.errorMsg.fileStructNotMatch', { ns: 'share' }) }) - return false - } - - let payloadData = data.slice(1) - if (payloadData.length === 0) { - notify({ type: 'error', message: t('generation.errorMsg.atLeastOne', { ns: 'share' }) }) - return false - } - - // check middle empty line - const allEmptyLineIndexes = payloadData.filter(item => item.every(i => i === '')).map(item => payloadData.indexOf(item)) - if (allEmptyLineIndexes.length > 0) { - let hasMiddleEmptyLine = false - let startIndex = allEmptyLineIndexes[0] - 1 - allEmptyLineIndexes.forEach((index) => { - if (hasMiddleEmptyLine) - return - - if (startIndex + 1 !== index) { - hasMiddleEmptyLine = true - return - } - startIndex++ - }) - - if (hasMiddleEmptyLine) { - notify({ type: 'error', message: t('generation.errorMsg.emptyLine', { ns: 'share', rowIndex: startIndex + 2 }) }) - return false - } - } - - // check row format - payloadData = payloadData.filter(item => !item.every(i => i === '')) - // after remove empty rows in the end, checked again - if (payloadData.length === 0) { - notify({ type: 'error', message: t('generation.errorMsg.atLeastOne', { ns: 'share' }) }) - return false - } - let errorRowIndex = 0 - let requiredVarName = '' - let moreThanMaxLengthVarName = '' - let maxLength = 0 - payloadData.forEach((item, index) => { - if (errorRowIndex !== 0) - return - - promptConfig?.prompt_variables.forEach((varItem, varIndex) => { - if (errorRowIndex !== 0) - return - if (varItem.type === 'string' && varItem.max_length) { - if (item[varIndex].length > varItem.max_length) { - moreThanMaxLengthVarName = varItem.name - maxLength = varItem.max_length - errorRowIndex = index + 1 - return - } - } - if (!varItem.required) - return - - if (item[varIndex].trim() === '') { - requiredVarName = varItem.name - errorRowIndex = index + 1 - } - }) - }) - - if (errorRowIndex !== 0) { - if (requiredVarName) - notify({ type: 'error', message: t('generation.errorMsg.invalidLine', { ns: 'share', rowIndex: errorRowIndex + 1, varName: requiredVarName }) }) - - if (moreThanMaxLengthVarName) - notify({ type: 'error', message: t('generation.errorMsg.moreThanMaxLengthLine', { ns: 'share', rowIndex: errorRowIndex + 1, varName: moreThanMaxLengthVarName, maxLength }) }) - - return false - } - return true - } - const handleRunBatch = (data: string[][]) => { - if (!checkBatchInputs(data)) - return - if (!allTasksFinished) { - notify({ type: 'info', message: t('errorMessage.waitForBatchResponse', { ns: 'appDebug' }) }) - return - } - - const payloadData = data.filter(item => !item.every(i => i === '')).slice(1) - const varLen = promptConfig?.prompt_variables.length || 0 - setIsCallBatchAPI(true) - const allTaskList: Task[] = payloadData.map((item, i) => { - const inputs: Record = {} - if (varLen > 0) { - item.slice(0, varLen).forEach((input, index) => { - const varSchema = promptConfig?.prompt_variables[index] - inputs[varSchema?.key as string] = input - if (!input) { - if (varSchema?.type === 'string' || varSchema?.type === 'paragraph') - inputs[varSchema?.key as string] = '' - else - inputs[varSchema?.key as string] = undefined - } - }) - } - return { - id: i + 1, - status: i < GROUP_SIZE ? TaskStatus.running : TaskStatus.pending, - params: { - inputs, - }, - } - }) - setAllTaskList(allTaskList) - setCurrGroupNum(0) - setControlSend(Date.now()) - // clear run once task status - setControlStopResponding(Date.now()) - - // eslint-disable-next-line ts/no-use-before-define - showResultPanel() - } - const handleCompleted = (completionRes: string, taskId?: number, isSuccess?: boolean) => { - const allTaskListLatest = getLatestTaskList() - const batchCompletionResLatest = getBatchCompletionRes() - const pendingTaskList = allTaskListLatest.filter(task => task.status === TaskStatus.pending) - const runTasksCount = 1 + allTaskListLatest.filter(task => [TaskStatus.completed, TaskStatus.failed].includes(task.status)).length - const needToAddNextGroupTask = (getCurrGroupNum() !== runTasksCount) && pendingTaskList.length > 0 && (runTasksCount % GROUP_SIZE === 0 || (allTaskListLatest.length - runTasksCount < GROUP_SIZE)) - // avoid add many task at the same time - if (needToAddNextGroupTask) - setCurrGroupNum(runTasksCount) - - const nextPendingTaskIds = needToAddNextGroupTask ? pendingTaskList.slice(0, GROUP_SIZE).map(item => item.id) : [] - const newAllTaskList = allTaskListLatest.map((item) => { - if (item.id === taskId) { - return { - ...item, - status: isSuccess ? TaskStatus.completed : TaskStatus.failed, - } - } - if (needToAddNextGroupTask && nextPendingTaskIds.includes(item.id)) { - return { - ...item, - status: TaskStatus.running, - } - } - return item - }) - setAllTaskList(newAllTaskList) - if (taskId) { - setBatchCompletionRes({ - ...batchCompletionResLatest, - [`${taskId}`]: completionRes, - }) - } - } - - const appData = useWebAppStore(s => s.appInfo) - const appParams = useWebAppStore(s => s.appParams) - const accessMode = useWebAppStore(s => s.webAppAccessMode) - useEffect(() => { - (async () => { - if (!appData || !appParams) - return - if (!isWorkflow) - fetchSavedMessage() - const { app_id: appId, site: siteInfo, custom_config } = appData - setAppId(appId) - setSiteInfo(siteInfo as SiteInfo) - setCustomConfig(custom_config) - await changeLanguage(siteInfo.default_language) - - const { user_input_form, more_like_this, file_upload, text_to_speech }: any = appParams - setVisionConfig({ - // legacy of image upload compatible - ...file_upload, - transfer_methods: file_upload?.allowed_file_upload_methods || file_upload?.allowed_upload_methods, - // legacy of image upload compatible - image_file_size_limit: appParams?.system_parameters.image_file_size_limit, - fileUploadConfig: appParams?.system_parameters, - } as any) - const prompt_variables = userInputsFormToPromptVariables(user_input_form) - setPromptConfig({ - prompt_template: '', // placeholder for future - prompt_variables, - } as PromptConfig) - setMoreLikeThisConfig(more_like_this) - setTextToSpeechConfig(text_to_speech) - })() - }, [appData, appParams, fetchSavedMessage, isWorkflow]) - - // Can Use metadata(https://beta.nextjs.org/docs/api-reference/metadata) to set title. But it only works in server side client. - useDocumentTitle(siteInfo?.title || t('generation.title', { ns: 'share' })) - - useAppFavicon({ - enable: !isInstalledApp, - icon_type: siteInfo?.icon_type, - icon: siteInfo?.icon, - icon_background: siteInfo?.icon_background, - icon_url: siteInfo?.icon_url, - }) - - const [isShowResultPanel, { setTrue: doShowResultPanel, setFalse: hideResultPanel }] = useBoolean(false) - const showResultPanel = () => { - // fix: useClickAway hideResSidebar will close sidebar - setTimeout(() => { - doShowResultPanel() - }, 0) - } - const [resultExisted, setResultExisted] = useState(false) - - const renderRes = (task?: Task) => ( - setResultExisted(true)} - onRunControlChange={!isCallBatchAPI ? setRunControl : undefined} - hideInlineStopButton={!isCallBatchAPI} - /> - ) - - const renderBatchRes = () => { - return (showTaskList.map(task => renderRes(task))) - } - - const renderResWrap = ( -
- {isCallBatchAPI && ( -
-
{t('generation.executions', { ns: 'share', num: allTaskList.length })}
- {allSuccessTaskList.length > 0 && ( - - )} -
- )} -
- {!isCallBatchAPI ? renderRes() : renderBatchRes()} - {!noPendingTask && ( -
- -
- )} -
- {isCallBatchAPI && allFailedTaskList.length > 0 && ( -
- -
{t('generation.batchFailed.info', { ns: 'share', num: allFailedTaskList.length })}
-
-
{t('generation.batchFailed.retry', { ns: 'share' })}
-
- )} -
- ) + }, [runBatchExecution, showResultPanel]) if (!appId || !siteInfo || !promptConfig) { return ( @@ -511,147 +126,72 @@ const TextGeneration: FC = ({
) } + return ( -
- {/* Left */} -
- {/* header */} -
-
- -
{siteInfo.title}
- -
- {siteInfo.description && ( -
{siteInfo.description}
- )} - , - extra: savedMessages.length > 0 - ? ( - - {savedMessages.length} - - ) - : null, - }] - : []), - ]} - value={currentTab} - onChange={setCurrentTab} - /> -
- {/* form */} -
-
- -
-
- -
- {currentTab === 'saved' && ( - setCurrentTab('create')} - /> - )} -
- {/* powered by */} - {!customConfig?.remove_webapp_brand && ( -
-
{t('chat.poweredBy', { ns: 'share' })}
- { - systemFeatures.branding.enabled && systemFeatures.branding.workspace_logo - ? logo - : customConfig?.replace_webapp_logo - ? logo - : - } -
- )} -
- {/* Result */} -
- {!isPC && ( -
{ - if (isShowResultPanel) - hideResultPanel() - else - showResultPanel() - }} - > -
-
- )} - {renderResWrap} -
+ +
) } diff --git a/web/app/components/share/text-generation/run-once/index.tsx b/web/app/components/share/text-generation/run-once/index.tsx index 6bf0cc0cc1..6c98cb5de5 100644 --- a/web/app/components/share/text-generation/run-once/index.tsx +++ b/web/app/components/share/text-generation/run-once/index.tsx @@ -1,5 +1,6 @@ import type { ChangeEvent, FC, FormEvent } from 'react' import type { InputValueTypes } from '../types' +import type { FileEntity } from '@/app/components/base/file-uploader/types' import type { PromptConfig } from '@/models/debug' import type { SiteInfo } from '@/models/share' import type { VisionFile, VisionSettings } from '@/types/app' @@ -169,7 +170,9 @@ const RunOnce: FC = ({ )} {item.type === 'file' && ( { handleInputsChange({ ...inputsRef.current, [item.key]: files[0] }) }} fileConfig={{ ...item.config, @@ -179,7 +182,7 @@ const RunOnce: FC = ({ )} {item.type === 'file-list' && ( { handleInputsChange({ ...inputsRef.current, [item.key]: files }) }} fileConfig={{ ...item.config, diff --git a/web/app/components/share/text-generation/text-generation-result-panel.tsx b/web/app/components/share/text-generation/text-generation-result-panel.tsx new file mode 100644 index 0000000000..a47ad2858c --- /dev/null +++ b/web/app/components/share/text-generation/text-generation-result-panel.tsx @@ -0,0 +1,195 @@ +import type { FC } from 'react' +import type { InputValueTypes, Task, TextGenerationRunControl } from './types' +import type { PromptConfig } from '@/models/debug' +import type { SiteInfo } from '@/models/share' +import type { AppSourceType } from '@/service/share' +import type { VisionFile, VisionSettings } from '@/types/app' +import { useTranslation } from 'react-i18next' +import Loading from '@/app/components/base/loading' +import Res from '@/app/components/share/text-generation/result' +import { cn } from '@/utils/classnames' +import ResDownload from './run-batch/res-download' +import { TaskStatus } from './types' + +type TextGenerationResultPanelProps = { + allFailedTaskList: Task[] + allSuccessTaskList: Task[] + allTaskList: Task[] + appId: string + appSourceType: AppSourceType + completionFiles: VisionFile[] + controlRetry: number + controlSend: number + controlStopResponding: number + exportRes: Record[] + handleCompleted: (completionRes: string, taskId?: number, isSuccess?: boolean) => void + handleRetryAllFailedTask: () => void + handleSaveMessage: (messageId: string) => Promise + inputs: Record + isCallBatchAPI: boolean + isPC: boolean + isShowResultPanel: boolean + isWorkflow: boolean + moreLikeThisEnabled: boolean + noPendingTask: boolean + onHideResultPanel: () => void + onRunControlChange: (control: TextGenerationRunControl | null) => void + onRunStart: () => void + onShowResultPanel: () => void + promptConfig: PromptConfig + resultExisted: boolean + showTaskList: Task[] + siteInfo: SiteInfo + textToSpeechEnabled: boolean + visionConfig: VisionSettings +} + +const TextGenerationResultPanel: FC = ({ + allFailedTaskList, + allSuccessTaskList, + allTaskList, + appId, + appSourceType, + completionFiles, + controlRetry, + controlSend, + controlStopResponding, + exportRes, + handleCompleted, + handleRetryAllFailedTask, + handleSaveMessage, + inputs, + isCallBatchAPI, + isPC, + isShowResultPanel, + isWorkflow, + moreLikeThisEnabled, + noPendingTask, + onHideResultPanel, + onRunControlChange, + onRunStart, + onShowResultPanel, + promptConfig, + resultExisted, + showTaskList, + siteInfo, + textToSpeechEnabled, + visionConfig, +}) => { + const { t } = useTranslation() + + const renderResult = (task?: Task) => ( + + ) + + return ( +
+ {!isPC && ( +
{ + if (isShowResultPanel) + onHideResultPanel() + else + onShowResultPanel() + }} + > +
+
+ )} +
+ {isCallBatchAPI && ( +
+
{t('generation.executions', { ns: 'share', num: allTaskList.length })}
+ {allSuccessTaskList.length > 0 && ( + + )} +
+ )} +
+ {isCallBatchAPI ? showTaskList.map(task => renderResult(task)) : renderResult()} + {!noPendingTask && ( +
+ +
+ )} +
+ {isCallBatchAPI && allFailedTaskList.length > 0 && ( +
+ +
{t('generation.batchFailed.info', { ns: 'share', num: allFailedTaskList.length })}
+
+
{t('generation.batchFailed.retry', { ns: 'share' })}
+
+ )} +
+
+ ) +} + +export default TextGenerationResultPanel diff --git a/web/app/components/share/text-generation/text-generation-sidebar.tsx b/web/app/components/share/text-generation/text-generation-sidebar.tsx new file mode 100644 index 0000000000..70b65f59e9 --- /dev/null +++ b/web/app/components/share/text-generation/text-generation-sidebar.tsx @@ -0,0 +1,177 @@ +import type { FC, RefObject } from 'react' +import type { InputValueTypes, TextGenerationCustomConfig, TextGenerationRunControl } from './types' +import type { PromptConfig, SavedMessage, TextToSpeechConfig } from '@/models/debug' +import type { SiteInfo } from '@/models/share' +import type { VisionFile, VisionSettings } from '@/types/app' +import type { SystemFeatures } from '@/types/feature' +import { useTranslation } from 'react-i18next' +import SavedItems from '@/app/components/app/text-generate/saved-items' +import AppIcon from '@/app/components/base/app-icon' +import Badge from '@/app/components/base/badge' +import DifyLogo from '@/app/components/base/logo/dify-logo' +import { appDefaultIconBackground } from '@/config' +import { AccessMode } from '@/models/access-control' +import { cn } from '@/utils/classnames' +import TabHeader from '../../base/tab-header' +import MenuDropdown from './menu-dropdown' +import RunBatch from './run-batch' +import RunOnce from './run-once' + +type TextGenerationSidebarProps = { + accessMode: AccessMode + allTasksRun: boolean + currentTab: string + customConfig: TextGenerationCustomConfig | null + inputs: Record + inputsRef: RefObject> + isInstalledApp: boolean + isPC: boolean + isWorkflow: boolean + onBatchSend: (data: string[][]) => void + onInputsChange: (inputs: Record) => void + onRemoveSavedMessage: (messageId: string) => Promise + onRunOnceSend: () => void + onTabChange: (tab: string) => void + onVisionFilesChange: (files: VisionFile[]) => void + promptConfig: PromptConfig + resultExisted: boolean + runControl: TextGenerationRunControl | null + savedMessages: SavedMessage[] + siteInfo: SiteInfo + systemFeatures: SystemFeatures + textToSpeechConfig: TextToSpeechConfig | null + visionConfig: VisionSettings +} + +const TextGenerationSidebar: FC = ({ + accessMode, + allTasksRun, + currentTab, + customConfig, + inputs, + inputsRef, + isInstalledApp, + isPC, + isWorkflow, + onBatchSend, + onInputsChange, + onRemoveSavedMessage, + onRunOnceSend, + onTabChange, + onVisionFilesChange, + promptConfig, + resultExisted, + runControl, + savedMessages, + siteInfo, + systemFeatures, + textToSpeechConfig, + visionConfig, +}) => { + const { t } = useTranslation() + + return ( +
+
+
+ +
{siteInfo.title}
+ +
+ {siteInfo.description && ( +
{siteInfo.description}
+ )} + , + extra: savedMessages.length > 0 + ? ( + + {savedMessages.length} + + ) + : null, + }] + : []), + ]} + value={currentTab} + onChange={onTabChange} + /> +
+
+
+ +
+
+ +
+ {currentTab === 'saved' && ( + onTabChange('create')} + /> + )} +
+ {!customConfig?.remove_webapp_brand && ( +
+
{t('chat.poweredBy', { ns: 'share' })}
+ {systemFeatures.branding.enabled && systemFeatures.branding.workspace_logo + ? logo + : customConfig?.replace_webapp_logo + ? logo + : } +
+ )} +
+ ) +} + +export default TextGenerationSidebar diff --git a/web/app/components/share/text-generation/types.ts b/web/app/components/share/text-generation/types.ts index 144ced28a2..b2c1a605c3 100644 --- a/web/app/components/share/text-generation/types.ts +++ b/web/app/components/share/text-generation/types.ts @@ -1,3 +1,5 @@ +import type { FileEntity } from '@/app/components/base/file-uploader/types' + type TaskParam = { inputs: Record } @@ -15,5 +17,22 @@ export enum TaskStatus { failed = 'failed', } -// eslint-disable-next-line ts/no-explicit-any -export type InputValueTypes = string | boolean | number | string[] | object | undefined | any +export type InputValueTypes + = | string + | boolean + | number + | string[] + | Record + | FileEntity + | FileEntity[] + | undefined + +export type TextGenerationRunControl = { + onStop: () => Promise | void + isStopping: boolean +} + +export type TextGenerationCustomConfig = Record & { + remove_webapp_brand?: boolean + replace_webapp_logo?: string +} diff --git a/web/eslint-suppressions.json b/web/eslint-suppressions.json index 8f1b202044..041ff78a1b 100644 --- a/web/eslint-suppressions.json +++ b/web/eslint-suppressions.json @@ -5616,12 +5616,6 @@ "app/components/share/text-generation/index.tsx": { "react-hooks-extra/no-direct-set-state-in-use-effect": { "count": 1 - }, - "tailwindcss/enforce-consistent-class-order": { - "count": 6 - }, - "ts/no-explicit-any": { - "count": 8 } }, "app/components/share/text-generation/info-modal.tsx": {