Merge branch 'main' into feat/trigger

This commit is contained in:
lyzno1
2025-10-10 15:09:38 +08:00
124 changed files with 886 additions and 271 deletions

View File

@ -373,11 +373,11 @@ class HttpConfig(BaseSettings):
)
HTTP_REQUEST_MAX_READ_TIMEOUT: int = Field(
ge=1, description="Maximum read timeout in seconds for HTTP requests", default=60
ge=1, description="Maximum read timeout in seconds for HTTP requests", default=600
)
HTTP_REQUEST_MAX_WRITE_TIMEOUT: int = Field(
ge=1, description="Maximum write timeout in seconds for HTTP requests", default=20
ge=1, description="Maximum write timeout in seconds for HTTP requests", default=600
)
HTTP_REQUEST_NODE_MAX_BINARY_SIZE: PositiveInt = Field(
@ -782,7 +782,7 @@ class MailConfig(BaseSettings):
MAIL_TEMPLATING_TIMEOUT: int = Field(
description="""
Timeout for email templating in seconds. Used to prevent infinite loops in malicious templates.
Timeout for email templating in seconds. Used to prevent infinite loops in malicious templates.
Only available in sandbox mode.""",
default=3,
)

View File

@ -1,4 +1,5 @@
from configs import dify_config
from libs.collection_utils import convert_to_lower_and_upper_set
HIDDEN_VALUE = "[__HIDDEN__]"
UNKNOWN_VALUE = "[__UNKNOWN__]"
@ -6,24 +7,39 @@ UUID_NIL = "00000000-0000-0000-0000-000000000000"
DEFAULT_FILE_NUMBER_LIMITS = 3
IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "webp", "gif", "svg"]
IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
IMAGE_EXTENSIONS = convert_to_lower_and_upper_set({"jpg", "jpeg", "png", "webp", "gif", "svg"})
VIDEO_EXTENSIONS = ["mp4", "mov", "mpeg", "webm"]
VIDEO_EXTENSIONS.extend([ext.upper() for ext in VIDEO_EXTENSIONS])
VIDEO_EXTENSIONS = convert_to_lower_and_upper_set({"mp4", "mov", "mpeg", "webm"})
AUDIO_EXTENSIONS = ["mp3", "m4a", "wav", "amr", "mpga"]
AUDIO_EXTENSIONS.extend([ext.upper() for ext in AUDIO_EXTENSIONS])
AUDIO_EXTENSIONS = convert_to_lower_and_upper_set({"mp3", "m4a", "wav", "amr", "mpga"})
_doc_extensions: list[str]
_doc_extensions: set[str]
if dify_config.ETL_TYPE == "Unstructured":
_doc_extensions = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls", "vtt", "properties"]
_doc_extensions.extend(("doc", "docx", "csv", "eml", "msg", "pptx", "xml", "epub"))
_doc_extensions = {
"txt",
"markdown",
"md",
"mdx",
"pdf",
"html",
"htm",
"xlsx",
"xls",
"vtt",
"properties",
"doc",
"docx",
"csv",
"eml",
"msg",
"pptx",
"xml",
"epub",
}
if dify_config.UNSTRUCTURED_API_URL:
_doc_extensions.append("ppt")
_doc_extensions.add("ppt")
else:
_doc_extensions = [
_doc_extensions = {
"txt",
"markdown",
"md",
@ -37,5 +53,5 @@ else:
"csv",
"vtt",
"properties",
]
DOCUMENT_EXTENSIONS = _doc_extensions + [ext.upper() for ext in _doc_extensions]
}
DOCUMENT_EXTENSIONS: set[str] = convert_to_lower_and_upper_set(_doc_extensions)

View File

@ -61,9 +61,6 @@ class AppRunner:
if model_context_tokens is None:
return -1
if max_tokens is None:
max_tokens = 0
prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
if prompt_tokens + max_tokens > model_context_tokens:

View File

@ -1,6 +1,6 @@
from collections.abc import Generator
from dataclasses import dataclass, field
from typing import TypeVar, Union, cast
from typing import TypeVar, Union
from core.agent.entities import AgentInvokeMessage
from core.tools.entities.tool_entities import ToolInvokeMessage
@ -87,7 +87,8 @@ def merge_blob_chunks(
),
meta=resp.meta,
)
yield cast(MessageType, merged_message)
assert isinstance(merged_message, (ToolInvokeMessage, AgentInvokeMessage))
yield merged_message # type: ignore
# Clean up the buffer
del files[chunk_id]
else:

View File

@ -342,10 +342,13 @@ class IterationNode(Node):
iterator_list_value: Sequence[object],
iter_run_map: dict[str, float],
) -> Generator[NodeEventBase, None, None]:
# Flatten the list of lists if all outputs are lists
flattened_outputs = self._flatten_outputs_if_needed(outputs)
yield IterationSucceededEvent(
start_at=started_at,
inputs=inputs,
outputs={"output": outputs},
outputs={"output": flattened_outputs},
steps=len(iterator_list_value),
metadata={
WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: self.graph_runtime_state.total_tokens,
@ -357,13 +360,39 @@ class IterationNode(Node):
yield StreamCompletedEvent(
node_run_result=NodeRunResult(
status=WorkflowNodeExecutionStatus.SUCCEEDED,
outputs={"output": outputs},
outputs={"output": flattened_outputs},
metadata={
WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: self.graph_runtime_state.total_tokens,
},
)
)
def _flatten_outputs_if_needed(self, outputs: list[object]) -> list[object]:
"""
Flatten the outputs list if all elements are lists.
This maintains backward compatibility with version 1.8.1 behavior.
"""
if not outputs:
return outputs
# Check if all non-None outputs are lists
non_none_outputs = [output for output in outputs if output is not None]
if not non_none_outputs:
return outputs
if all(isinstance(output, list) for output in non_none_outputs):
# Flatten the list of lists
flattened: list[Any] = []
for output in outputs:
if isinstance(output, list):
flattened.extend(output)
elif output is not None:
# This shouldn't happen based on our check, but handle it gracefully
flattened.append(output)
return flattened
return outputs
def _handle_iteration_failure(
self,
started_at: datetime,
@ -373,10 +402,13 @@ class IterationNode(Node):
iter_run_map: dict[str, float],
error: IterationNodeError,
) -> Generator[NodeEventBase, None, None]:
# Flatten the list of lists if all outputs are lists (even in failure case)
flattened_outputs = self._flatten_outputs_if_needed(outputs)
yield IterationFailedEvent(
start_at=started_at,
inputs=inputs,
outputs={"output": outputs},
outputs={"output": flattened_outputs},
steps=len(iterator_list_value),
metadata={
WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: self.graph_runtime_state.total_tokens,

View File

@ -2,7 +2,7 @@ import datetime
import logging
import time
from collections.abc import Mapping
from typing import Any, cast
from typing import Any
from sqlalchemy import func, select
@ -62,7 +62,7 @@ class KnowledgeIndexNode(Node):
return self._node_data
def _run(self) -> NodeRunResult: # type: ignore
node_data = cast(KnowledgeIndexNodeData, self._node_data)
node_data = self._node_data
variable_pool = self.graph_runtime_state.variable_pool
dataset_id = variable_pool.get(["sys", SystemVariableKey.DATASET_ID])
if not dataset_id:

View File

@ -136,6 +136,7 @@ def init_app(app: DifyApp):
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPSpanExporter
from opentelemetry.instrumentation.celery import CeleryInstrumentor
from opentelemetry.instrumentation.flask import FlaskInstrumentor
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
from opentelemetry.instrumentation.redis import RedisInstrumentor
from opentelemetry.instrumentation.requests import RequestsInstrumentor
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
@ -238,6 +239,7 @@ def init_app(app: DifyApp):
init_sqlalchemy_instrumentor(app)
RedisInstrumentor().instrument()
RequestsInstrumentor().instrument()
HTTPXClientInstrumentor().instrument()
atexit.register(shutdown_tracer)

View File

@ -0,0 +1,14 @@
def convert_to_lower_and_upper_set(inputs: list[str] | set[str]) -> set[str]:
"""
Convert a list or set of strings to a set containing both lower and upper case versions of each string.
Args:
inputs (list[str] | set[str]): A list or set of strings to be converted.
Returns:
set[str]: A set containing both lower and upper case versions of each string.
"""
if not inputs:
return set()
else:
return {case for s in inputs if s for case in (s.lower(), s.upper())}

View File

@ -46,6 +46,7 @@ dependencies = [
"opentelemetry-instrumentation==0.48b0",
"opentelemetry-instrumentation-celery==0.48b0",
"opentelemetry-instrumentation-flask==0.48b0",
"opentelemetry-instrumentation-httpx==0.48b0",
"opentelemetry-instrumentation-redis==0.48b0",
"opentelemetry-instrumentation-requests==0.48b0",
"opentelemetry-instrumentation-sqlalchemy==0.48b0",

View File

@ -12,6 +12,7 @@
"flask_login",
"opentelemetry.instrumentation.celery",
"opentelemetry.instrumentation.flask",
"opentelemetry.instrumentation.httpx",
"opentelemetry.instrumentation.requests",
"opentelemetry.instrumentation.sqlalchemy",
"opentelemetry.instrumentation.redis"
@ -23,9 +24,7 @@
"reportUnknownLambdaType": "hint",
"reportMissingParameterType": "hint",
"reportMissingTypeArgument": "hint",
"reportUnnecessaryContains": "hint",
"reportUnnecessaryComparison": "hint",
"reportUnnecessaryCast": "hint",
"reportUnnecessaryIsInstance": "hint",
"reportUntypedFunctionDecorator": "hint",

View File

@ -149,8 +149,7 @@ class RagPipelineTransformService:
file_extensions = node.get("data", {}).get("fileExtensions", [])
if not file_extensions:
return node
file_extensions = [file_extension.lower() for file_extension in file_extensions]
node["data"]["fileExtensions"] = DOCUMENT_EXTENSIONS
node["data"]["fileExtensions"] = [ext.lower() for ext in file_extensions if ext in DOCUMENT_EXTENSIONS]
return node
def _deal_knowledge_index(

View File

@ -1,7 +1,7 @@
import hashlib
import json
from datetime import datetime
from typing import Any, cast
from typing import Any
from sqlalchemy import or_
from sqlalchemy.exc import IntegrityError
@ -55,7 +55,7 @@ class MCPToolManageService:
cache=NoOpProviderCredentialCache(),
)
return cast(dict[str, str], encrypter_instance.encrypt(headers))
return encrypter_instance.encrypt(headers)
@staticmethod
def get_mcp_provider_by_provider_id(provider_id: str, tenant_id: str) -> MCPToolProvider:

View File

@ -5,15 +5,10 @@ These tasks provide asynchronous storage capabilities for workflow execution dat
improving performance by offloading storage operations to background workers.
"""
import logging
from celery import shared_task # type: ignore[import-untyped]
from sqlalchemy.orm import Session
from extensions.ext_database import db
_logger = logging.getLogger(__name__)
from services.workflow_draft_variable_service import DraftVarFileDeletion, WorkflowDraftVariableService

View File

@ -15,13 +15,13 @@ def test_dify_config(monkeypatch: pytest.MonkeyPatch):
# Set environment variables using monkeypatch
monkeypatch.setenv("CONSOLE_API_URL", "https://example.com")
monkeypatch.setenv("CONSOLE_WEB_URL", "https://example.com")
monkeypatch.setenv("HTTP_REQUEST_MAX_WRITE_TIMEOUT", "30")
monkeypatch.setenv("HTTP_REQUEST_MAX_WRITE_TIMEOUT", "30") # Custom value for testing
monkeypatch.setenv("DB_USERNAME", "postgres")
monkeypatch.setenv("DB_PASSWORD", "postgres")
monkeypatch.setenv("DB_HOST", "localhost")
monkeypatch.setenv("DB_PORT", "5432")
monkeypatch.setenv("DB_DATABASE", "dify")
monkeypatch.setenv("HTTP_REQUEST_MAX_READ_TIMEOUT", "600")
monkeypatch.setenv("HTTP_REQUEST_MAX_READ_TIMEOUT", "300") # Custom value for testing
# load dotenv file with pydantic-settings
config = DifyConfig()
@ -35,16 +35,36 @@ def test_dify_config(monkeypatch: pytest.MonkeyPatch):
assert config.SENTRY_TRACES_SAMPLE_RATE == 1.0
assert config.TEMPLATE_TRANSFORM_MAX_LENGTH == 400_000
# annotated field with default value
assert config.HTTP_REQUEST_MAX_READ_TIMEOUT == 600
# annotated field with custom configured value
assert config.HTTP_REQUEST_MAX_READ_TIMEOUT == 300
# annotated field with configured value
# annotated field with custom configured value
assert config.HTTP_REQUEST_MAX_WRITE_TIMEOUT == 30
# values from pyproject.toml
assert Version(config.project.version) >= Version("1.0.0")
def test_http_timeout_defaults(monkeypatch: pytest.MonkeyPatch):
"""Test that HTTP timeout defaults are correctly set"""
# clear system environment variables
os.environ.clear()
# Set minimal required env vars
monkeypatch.setenv("DB_USERNAME", "postgres")
monkeypatch.setenv("DB_PASSWORD", "postgres")
monkeypatch.setenv("DB_HOST", "localhost")
monkeypatch.setenv("DB_PORT", "5432")
monkeypatch.setenv("DB_DATABASE", "dify")
config = DifyConfig()
# Verify default timeout values
assert config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT == 10
assert config.HTTP_REQUEST_MAX_READ_TIMEOUT == 600
assert config.HTTP_REQUEST_MAX_WRITE_TIMEOUT == 600
# NOTE: If there is a `.env` file in your Workspace, this test might not succeed as expected.
# This is due to `pymilvus` loading all the variables from the `.env` file into `os.environ`.
def test_flask_configs(monkeypatch: pytest.MonkeyPatch):
@ -55,7 +75,6 @@ def test_flask_configs(monkeypatch: pytest.MonkeyPatch):
# Set environment variables using monkeypatch
monkeypatch.setenv("CONSOLE_API_URL", "https://example.com")
monkeypatch.setenv("CONSOLE_WEB_URL", "https://example.com")
monkeypatch.setenv("HTTP_REQUEST_MAX_WRITE_TIMEOUT", "30")
monkeypatch.setenv("DB_USERNAME", "postgres")
monkeypatch.setenv("DB_PASSWORD", "postgres")
monkeypatch.setenv("DB_HOST", "localhost")
@ -105,7 +124,6 @@ def test_inner_api_config_exist(monkeypatch: pytest.MonkeyPatch):
# Set environment variables using monkeypatch
monkeypatch.setenv("CONSOLE_API_URL", "https://example.com")
monkeypatch.setenv("CONSOLE_WEB_URL", "https://example.com")
monkeypatch.setenv("HTTP_REQUEST_MAX_WRITE_TIMEOUT", "30")
monkeypatch.setenv("DB_USERNAME", "postgres")
monkeypatch.setenv("DB_PASSWORD", "postgres")
monkeypatch.setenv("DB_HOST", "localhost")

17
api/uv.lock generated
View File

@ -1337,6 +1337,7 @@ dependencies = [
{ name = "opentelemetry-instrumentation" },
{ name = "opentelemetry-instrumentation-celery" },
{ name = "opentelemetry-instrumentation-flask" },
{ name = "opentelemetry-instrumentation-httpx" },
{ name = "opentelemetry-instrumentation-redis" },
{ name = "opentelemetry-instrumentation-requests" },
{ name = "opentelemetry-instrumentation-sqlalchemy" },
@ -1528,6 +1529,7 @@ requires-dist = [
{ name = "opentelemetry-instrumentation", specifier = "==0.48b0" },
{ name = "opentelemetry-instrumentation-celery", specifier = "==0.48b0" },
{ name = "opentelemetry-instrumentation-flask", specifier = "==0.48b0" },
{ name = "opentelemetry-instrumentation-httpx", specifier = "==0.48b0" },
{ name = "opentelemetry-instrumentation-redis", specifier = "==0.48b0" },
{ name = "opentelemetry-instrumentation-requests", specifier = "==0.48b0" },
{ name = "opentelemetry-instrumentation-sqlalchemy", specifier = "==0.48b0" },
@ -3893,6 +3895,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/78/3d/fcde4f8f0bf9fa1ee73a12304fa538076fb83fe0a2ae966ab0f0b7da5109/opentelemetry_instrumentation_flask-0.48b0-py3-none-any.whl", hash = "sha256:26b045420b9d76e85493b1c23fcf27517972423480dc6cf78fd6924248ba5808", size = 14588, upload-time = "2024-08-28T21:26:58.504Z" },
]
[[package]]
name = "opentelemetry-instrumentation-httpx"
version = "0.48b0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "opentelemetry-api" },
{ name = "opentelemetry-instrumentation" },
{ name = "opentelemetry-semantic-conventions" },
{ name = "opentelemetry-util-http" },
]
sdist = { url = "https://files.pythonhosted.org/packages/d3/d9/c65d818607c16d1b7ea8d2de6111c6cecadf8d2fd38c1885a72733a7c6d3/opentelemetry_instrumentation_httpx-0.48b0.tar.gz", hash = "sha256:ee977479e10398931921fb995ac27ccdeea2e14e392cb27ef012fc549089b60a", size = 16931, upload-time = "2024-08-28T21:28:03.794Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c2/fe/f2daa9d6d988c093b8c7b1d35df675761a8ece0b600b035dc04982746c9d/opentelemetry_instrumentation_httpx-0.48b0-py3-none-any.whl", hash = "sha256:d94f9d612c82d09fe22944d1904a30a464c19bea2ba76be656c99a28ad8be8e5", size = 13900, upload-time = "2024-08-28T21:27:01.566Z" },
]
[[package]]
name = "opentelemetry-instrumentation-redis"
version = "0.48b0"