mirror of
https://github.com/langgenius/dify.git
synced 2026-05-05 01:48:04 +08:00
Merge branch 'main' into feat/trigger
This commit is contained in:
@ -373,11 +373,11 @@ class HttpConfig(BaseSettings):
|
||||
)
|
||||
|
||||
HTTP_REQUEST_MAX_READ_TIMEOUT: int = Field(
|
||||
ge=1, description="Maximum read timeout in seconds for HTTP requests", default=60
|
||||
ge=1, description="Maximum read timeout in seconds for HTTP requests", default=600
|
||||
)
|
||||
|
||||
HTTP_REQUEST_MAX_WRITE_TIMEOUT: int = Field(
|
||||
ge=1, description="Maximum write timeout in seconds for HTTP requests", default=20
|
||||
ge=1, description="Maximum write timeout in seconds for HTTP requests", default=600
|
||||
)
|
||||
|
||||
HTTP_REQUEST_NODE_MAX_BINARY_SIZE: PositiveInt = Field(
|
||||
@ -782,7 +782,7 @@ class MailConfig(BaseSettings):
|
||||
|
||||
MAIL_TEMPLATING_TIMEOUT: int = Field(
|
||||
description="""
|
||||
Timeout for email templating in seconds. Used to prevent infinite loops in malicious templates.
|
||||
Timeout for email templating in seconds. Used to prevent infinite loops in malicious templates.
|
||||
Only available in sandbox mode.""",
|
||||
default=3,
|
||||
)
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
from configs import dify_config
|
||||
from libs.collection_utils import convert_to_lower_and_upper_set
|
||||
|
||||
HIDDEN_VALUE = "[__HIDDEN__]"
|
||||
UNKNOWN_VALUE = "[__UNKNOWN__]"
|
||||
@ -6,24 +7,39 @@ UUID_NIL = "00000000-0000-0000-0000-000000000000"
|
||||
|
||||
DEFAULT_FILE_NUMBER_LIMITS = 3
|
||||
|
||||
IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "webp", "gif", "svg"]
|
||||
IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
|
||||
IMAGE_EXTENSIONS = convert_to_lower_and_upper_set({"jpg", "jpeg", "png", "webp", "gif", "svg"})
|
||||
|
||||
VIDEO_EXTENSIONS = ["mp4", "mov", "mpeg", "webm"]
|
||||
VIDEO_EXTENSIONS.extend([ext.upper() for ext in VIDEO_EXTENSIONS])
|
||||
VIDEO_EXTENSIONS = convert_to_lower_and_upper_set({"mp4", "mov", "mpeg", "webm"})
|
||||
|
||||
AUDIO_EXTENSIONS = ["mp3", "m4a", "wav", "amr", "mpga"]
|
||||
AUDIO_EXTENSIONS.extend([ext.upper() for ext in AUDIO_EXTENSIONS])
|
||||
AUDIO_EXTENSIONS = convert_to_lower_and_upper_set({"mp3", "m4a", "wav", "amr", "mpga"})
|
||||
|
||||
|
||||
_doc_extensions: list[str]
|
||||
_doc_extensions: set[str]
|
||||
if dify_config.ETL_TYPE == "Unstructured":
|
||||
_doc_extensions = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls", "vtt", "properties"]
|
||||
_doc_extensions.extend(("doc", "docx", "csv", "eml", "msg", "pptx", "xml", "epub"))
|
||||
_doc_extensions = {
|
||||
"txt",
|
||||
"markdown",
|
||||
"md",
|
||||
"mdx",
|
||||
"pdf",
|
||||
"html",
|
||||
"htm",
|
||||
"xlsx",
|
||||
"xls",
|
||||
"vtt",
|
||||
"properties",
|
||||
"doc",
|
||||
"docx",
|
||||
"csv",
|
||||
"eml",
|
||||
"msg",
|
||||
"pptx",
|
||||
"xml",
|
||||
"epub",
|
||||
}
|
||||
if dify_config.UNSTRUCTURED_API_URL:
|
||||
_doc_extensions.append("ppt")
|
||||
_doc_extensions.add("ppt")
|
||||
else:
|
||||
_doc_extensions = [
|
||||
_doc_extensions = {
|
||||
"txt",
|
||||
"markdown",
|
||||
"md",
|
||||
@ -37,5 +53,5 @@ else:
|
||||
"csv",
|
||||
"vtt",
|
||||
"properties",
|
||||
]
|
||||
DOCUMENT_EXTENSIONS = _doc_extensions + [ext.upper() for ext in _doc_extensions]
|
||||
}
|
||||
DOCUMENT_EXTENSIONS: set[str] = convert_to_lower_and_upper_set(_doc_extensions)
|
||||
|
||||
@ -61,9 +61,6 @@ class AppRunner:
|
||||
if model_context_tokens is None:
|
||||
return -1
|
||||
|
||||
if max_tokens is None:
|
||||
max_tokens = 0
|
||||
|
||||
prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
|
||||
|
||||
if prompt_tokens + max_tokens > model_context_tokens:
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
from collections.abc import Generator
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TypeVar, Union, cast
|
||||
from typing import TypeVar, Union
|
||||
|
||||
from core.agent.entities import AgentInvokeMessage
|
||||
from core.tools.entities.tool_entities import ToolInvokeMessage
|
||||
@ -87,7 +87,8 @@ def merge_blob_chunks(
|
||||
),
|
||||
meta=resp.meta,
|
||||
)
|
||||
yield cast(MessageType, merged_message)
|
||||
assert isinstance(merged_message, (ToolInvokeMessage, AgentInvokeMessage))
|
||||
yield merged_message # type: ignore
|
||||
# Clean up the buffer
|
||||
del files[chunk_id]
|
||||
else:
|
||||
|
||||
@ -342,10 +342,13 @@ class IterationNode(Node):
|
||||
iterator_list_value: Sequence[object],
|
||||
iter_run_map: dict[str, float],
|
||||
) -> Generator[NodeEventBase, None, None]:
|
||||
# Flatten the list of lists if all outputs are lists
|
||||
flattened_outputs = self._flatten_outputs_if_needed(outputs)
|
||||
|
||||
yield IterationSucceededEvent(
|
||||
start_at=started_at,
|
||||
inputs=inputs,
|
||||
outputs={"output": outputs},
|
||||
outputs={"output": flattened_outputs},
|
||||
steps=len(iterator_list_value),
|
||||
metadata={
|
||||
WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: self.graph_runtime_state.total_tokens,
|
||||
@ -357,13 +360,39 @@ class IterationNode(Node):
|
||||
yield StreamCompletedEvent(
|
||||
node_run_result=NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
outputs={"output": outputs},
|
||||
outputs={"output": flattened_outputs},
|
||||
metadata={
|
||||
WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: self.graph_runtime_state.total_tokens,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
def _flatten_outputs_if_needed(self, outputs: list[object]) -> list[object]:
|
||||
"""
|
||||
Flatten the outputs list if all elements are lists.
|
||||
This maintains backward compatibility with version 1.8.1 behavior.
|
||||
"""
|
||||
if not outputs:
|
||||
return outputs
|
||||
|
||||
# Check if all non-None outputs are lists
|
||||
non_none_outputs = [output for output in outputs if output is not None]
|
||||
if not non_none_outputs:
|
||||
return outputs
|
||||
|
||||
if all(isinstance(output, list) for output in non_none_outputs):
|
||||
# Flatten the list of lists
|
||||
flattened: list[Any] = []
|
||||
for output in outputs:
|
||||
if isinstance(output, list):
|
||||
flattened.extend(output)
|
||||
elif output is not None:
|
||||
# This shouldn't happen based on our check, but handle it gracefully
|
||||
flattened.append(output)
|
||||
return flattened
|
||||
|
||||
return outputs
|
||||
|
||||
def _handle_iteration_failure(
|
||||
self,
|
||||
started_at: datetime,
|
||||
@ -373,10 +402,13 @@ class IterationNode(Node):
|
||||
iter_run_map: dict[str, float],
|
||||
error: IterationNodeError,
|
||||
) -> Generator[NodeEventBase, None, None]:
|
||||
# Flatten the list of lists if all outputs are lists (even in failure case)
|
||||
flattened_outputs = self._flatten_outputs_if_needed(outputs)
|
||||
|
||||
yield IterationFailedEvent(
|
||||
start_at=started_at,
|
||||
inputs=inputs,
|
||||
outputs={"output": outputs},
|
||||
outputs={"output": flattened_outputs},
|
||||
steps=len(iterator_list_value),
|
||||
metadata={
|
||||
WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: self.graph_runtime_state.total_tokens,
|
||||
|
||||
@ -2,7 +2,7 @@ import datetime
|
||||
import logging
|
||||
import time
|
||||
from collections.abc import Mapping
|
||||
from typing import Any, cast
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import func, select
|
||||
|
||||
@ -62,7 +62,7 @@ class KnowledgeIndexNode(Node):
|
||||
return self._node_data
|
||||
|
||||
def _run(self) -> NodeRunResult: # type: ignore
|
||||
node_data = cast(KnowledgeIndexNodeData, self._node_data)
|
||||
node_data = self._node_data
|
||||
variable_pool = self.graph_runtime_state.variable_pool
|
||||
dataset_id = variable_pool.get(["sys", SystemVariableKey.DATASET_ID])
|
||||
if not dataset_id:
|
||||
|
||||
@ -136,6 +136,7 @@ def init_app(app: DifyApp):
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPSpanExporter
|
||||
from opentelemetry.instrumentation.celery import CeleryInstrumentor
|
||||
from opentelemetry.instrumentation.flask import FlaskInstrumentor
|
||||
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
|
||||
from opentelemetry.instrumentation.redis import RedisInstrumentor
|
||||
from opentelemetry.instrumentation.requests import RequestsInstrumentor
|
||||
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
|
||||
@ -238,6 +239,7 @@ def init_app(app: DifyApp):
|
||||
init_sqlalchemy_instrumentor(app)
|
||||
RedisInstrumentor().instrument()
|
||||
RequestsInstrumentor().instrument()
|
||||
HTTPXClientInstrumentor().instrument()
|
||||
atexit.register(shutdown_tracer)
|
||||
|
||||
|
||||
|
||||
14
api/libs/collection_utils.py
Normal file
14
api/libs/collection_utils.py
Normal file
@ -0,0 +1,14 @@
|
||||
def convert_to_lower_and_upper_set(inputs: list[str] | set[str]) -> set[str]:
|
||||
"""
|
||||
Convert a list or set of strings to a set containing both lower and upper case versions of each string.
|
||||
|
||||
Args:
|
||||
inputs (list[str] | set[str]): A list or set of strings to be converted.
|
||||
|
||||
Returns:
|
||||
set[str]: A set containing both lower and upper case versions of each string.
|
||||
"""
|
||||
if not inputs:
|
||||
return set()
|
||||
else:
|
||||
return {case for s in inputs if s for case in (s.lower(), s.upper())}
|
||||
@ -46,6 +46,7 @@ dependencies = [
|
||||
"opentelemetry-instrumentation==0.48b0",
|
||||
"opentelemetry-instrumentation-celery==0.48b0",
|
||||
"opentelemetry-instrumentation-flask==0.48b0",
|
||||
"opentelemetry-instrumentation-httpx==0.48b0",
|
||||
"opentelemetry-instrumentation-redis==0.48b0",
|
||||
"opentelemetry-instrumentation-requests==0.48b0",
|
||||
"opentelemetry-instrumentation-sqlalchemy==0.48b0",
|
||||
|
||||
@ -12,6 +12,7 @@
|
||||
"flask_login",
|
||||
"opentelemetry.instrumentation.celery",
|
||||
"opentelemetry.instrumentation.flask",
|
||||
"opentelemetry.instrumentation.httpx",
|
||||
"opentelemetry.instrumentation.requests",
|
||||
"opentelemetry.instrumentation.sqlalchemy",
|
||||
"opentelemetry.instrumentation.redis"
|
||||
@ -23,9 +24,7 @@
|
||||
"reportUnknownLambdaType": "hint",
|
||||
"reportMissingParameterType": "hint",
|
||||
"reportMissingTypeArgument": "hint",
|
||||
"reportUnnecessaryContains": "hint",
|
||||
"reportUnnecessaryComparison": "hint",
|
||||
"reportUnnecessaryCast": "hint",
|
||||
"reportUnnecessaryIsInstance": "hint",
|
||||
"reportUntypedFunctionDecorator": "hint",
|
||||
|
||||
|
||||
@ -149,8 +149,7 @@ class RagPipelineTransformService:
|
||||
file_extensions = node.get("data", {}).get("fileExtensions", [])
|
||||
if not file_extensions:
|
||||
return node
|
||||
file_extensions = [file_extension.lower() for file_extension in file_extensions]
|
||||
node["data"]["fileExtensions"] = DOCUMENT_EXTENSIONS
|
||||
node["data"]["fileExtensions"] = [ext.lower() for ext in file_extensions if ext in DOCUMENT_EXTENSIONS]
|
||||
return node
|
||||
|
||||
def _deal_knowledge_index(
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Any, cast
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import or_
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
@ -55,7 +55,7 @@ class MCPToolManageService:
|
||||
cache=NoOpProviderCredentialCache(),
|
||||
)
|
||||
|
||||
return cast(dict[str, str], encrypter_instance.encrypt(headers))
|
||||
return encrypter_instance.encrypt(headers)
|
||||
|
||||
@staticmethod
|
||||
def get_mcp_provider_by_provider_id(provider_id: str, tenant_id: str) -> MCPToolProvider:
|
||||
|
||||
@ -5,15 +5,10 @@ These tasks provide asynchronous storage capabilities for workflow execution dat
|
||||
improving performance by offloading storage operations to background workers.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from celery import shared_task # type: ignore[import-untyped]
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from extensions.ext_database import db
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
from services.workflow_draft_variable_service import DraftVarFileDeletion, WorkflowDraftVariableService
|
||||
|
||||
|
||||
|
||||
@ -15,13 +15,13 @@ def test_dify_config(monkeypatch: pytest.MonkeyPatch):
|
||||
# Set environment variables using monkeypatch
|
||||
monkeypatch.setenv("CONSOLE_API_URL", "https://example.com")
|
||||
monkeypatch.setenv("CONSOLE_WEB_URL", "https://example.com")
|
||||
monkeypatch.setenv("HTTP_REQUEST_MAX_WRITE_TIMEOUT", "30")
|
||||
monkeypatch.setenv("HTTP_REQUEST_MAX_WRITE_TIMEOUT", "30") # Custom value for testing
|
||||
monkeypatch.setenv("DB_USERNAME", "postgres")
|
||||
monkeypatch.setenv("DB_PASSWORD", "postgres")
|
||||
monkeypatch.setenv("DB_HOST", "localhost")
|
||||
monkeypatch.setenv("DB_PORT", "5432")
|
||||
monkeypatch.setenv("DB_DATABASE", "dify")
|
||||
monkeypatch.setenv("HTTP_REQUEST_MAX_READ_TIMEOUT", "600")
|
||||
monkeypatch.setenv("HTTP_REQUEST_MAX_READ_TIMEOUT", "300") # Custom value for testing
|
||||
|
||||
# load dotenv file with pydantic-settings
|
||||
config = DifyConfig()
|
||||
@ -35,16 +35,36 @@ def test_dify_config(monkeypatch: pytest.MonkeyPatch):
|
||||
assert config.SENTRY_TRACES_SAMPLE_RATE == 1.0
|
||||
assert config.TEMPLATE_TRANSFORM_MAX_LENGTH == 400_000
|
||||
|
||||
# annotated field with default value
|
||||
assert config.HTTP_REQUEST_MAX_READ_TIMEOUT == 600
|
||||
# annotated field with custom configured value
|
||||
assert config.HTTP_REQUEST_MAX_READ_TIMEOUT == 300
|
||||
|
||||
# annotated field with configured value
|
||||
# annotated field with custom configured value
|
||||
assert config.HTTP_REQUEST_MAX_WRITE_TIMEOUT == 30
|
||||
|
||||
# values from pyproject.toml
|
||||
assert Version(config.project.version) >= Version("1.0.0")
|
||||
|
||||
|
||||
def test_http_timeout_defaults(monkeypatch: pytest.MonkeyPatch):
|
||||
"""Test that HTTP timeout defaults are correctly set"""
|
||||
# clear system environment variables
|
||||
os.environ.clear()
|
||||
|
||||
# Set minimal required env vars
|
||||
monkeypatch.setenv("DB_USERNAME", "postgres")
|
||||
monkeypatch.setenv("DB_PASSWORD", "postgres")
|
||||
monkeypatch.setenv("DB_HOST", "localhost")
|
||||
monkeypatch.setenv("DB_PORT", "5432")
|
||||
monkeypatch.setenv("DB_DATABASE", "dify")
|
||||
|
||||
config = DifyConfig()
|
||||
|
||||
# Verify default timeout values
|
||||
assert config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT == 10
|
||||
assert config.HTTP_REQUEST_MAX_READ_TIMEOUT == 600
|
||||
assert config.HTTP_REQUEST_MAX_WRITE_TIMEOUT == 600
|
||||
|
||||
|
||||
# NOTE: If there is a `.env` file in your Workspace, this test might not succeed as expected.
|
||||
# This is due to `pymilvus` loading all the variables from the `.env` file into `os.environ`.
|
||||
def test_flask_configs(monkeypatch: pytest.MonkeyPatch):
|
||||
@ -55,7 +75,6 @@ def test_flask_configs(monkeypatch: pytest.MonkeyPatch):
|
||||
# Set environment variables using monkeypatch
|
||||
monkeypatch.setenv("CONSOLE_API_URL", "https://example.com")
|
||||
monkeypatch.setenv("CONSOLE_WEB_URL", "https://example.com")
|
||||
monkeypatch.setenv("HTTP_REQUEST_MAX_WRITE_TIMEOUT", "30")
|
||||
monkeypatch.setenv("DB_USERNAME", "postgres")
|
||||
monkeypatch.setenv("DB_PASSWORD", "postgres")
|
||||
monkeypatch.setenv("DB_HOST", "localhost")
|
||||
@ -105,7 +124,6 @@ def test_inner_api_config_exist(monkeypatch: pytest.MonkeyPatch):
|
||||
# Set environment variables using monkeypatch
|
||||
monkeypatch.setenv("CONSOLE_API_URL", "https://example.com")
|
||||
monkeypatch.setenv("CONSOLE_WEB_URL", "https://example.com")
|
||||
monkeypatch.setenv("HTTP_REQUEST_MAX_WRITE_TIMEOUT", "30")
|
||||
monkeypatch.setenv("DB_USERNAME", "postgres")
|
||||
monkeypatch.setenv("DB_PASSWORD", "postgres")
|
||||
monkeypatch.setenv("DB_HOST", "localhost")
|
||||
|
||||
17
api/uv.lock
generated
17
api/uv.lock
generated
@ -1337,6 +1337,7 @@ dependencies = [
|
||||
{ name = "opentelemetry-instrumentation" },
|
||||
{ name = "opentelemetry-instrumentation-celery" },
|
||||
{ name = "opentelemetry-instrumentation-flask" },
|
||||
{ name = "opentelemetry-instrumentation-httpx" },
|
||||
{ name = "opentelemetry-instrumentation-redis" },
|
||||
{ name = "opentelemetry-instrumentation-requests" },
|
||||
{ name = "opentelemetry-instrumentation-sqlalchemy" },
|
||||
@ -1528,6 +1529,7 @@ requires-dist = [
|
||||
{ name = "opentelemetry-instrumentation", specifier = "==0.48b0" },
|
||||
{ name = "opentelemetry-instrumentation-celery", specifier = "==0.48b0" },
|
||||
{ name = "opentelemetry-instrumentation-flask", specifier = "==0.48b0" },
|
||||
{ name = "opentelemetry-instrumentation-httpx", specifier = "==0.48b0" },
|
||||
{ name = "opentelemetry-instrumentation-redis", specifier = "==0.48b0" },
|
||||
{ name = "opentelemetry-instrumentation-requests", specifier = "==0.48b0" },
|
||||
{ name = "opentelemetry-instrumentation-sqlalchemy", specifier = "==0.48b0" },
|
||||
@ -3893,6 +3895,21 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/78/3d/fcde4f8f0bf9fa1ee73a12304fa538076fb83fe0a2ae966ab0f0b7da5109/opentelemetry_instrumentation_flask-0.48b0-py3-none-any.whl", hash = "sha256:26b045420b9d76e85493b1c23fcf27517972423480dc6cf78fd6924248ba5808", size = 14588, upload-time = "2024-08-28T21:26:58.504Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-instrumentation-httpx"
|
||||
version = "0.48b0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "opentelemetry-api" },
|
||||
{ name = "opentelemetry-instrumentation" },
|
||||
{ name = "opentelemetry-semantic-conventions" },
|
||||
{ name = "opentelemetry-util-http" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d3/d9/c65d818607c16d1b7ea8d2de6111c6cecadf8d2fd38c1885a72733a7c6d3/opentelemetry_instrumentation_httpx-0.48b0.tar.gz", hash = "sha256:ee977479e10398931921fb995ac27ccdeea2e14e392cb27ef012fc549089b60a", size = 16931, upload-time = "2024-08-28T21:28:03.794Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/fe/f2daa9d6d988c093b8c7b1d35df675761a8ece0b600b035dc04982746c9d/opentelemetry_instrumentation_httpx-0.48b0-py3-none-any.whl", hash = "sha256:d94f9d612c82d09fe22944d1904a30a464c19bea2ba76be656c99a28ad8be8e5", size = 13900, upload-time = "2024-08-28T21:27:01.566Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-instrumentation-redis"
|
||||
version = "0.48b0"
|
||||
|
||||
Reference in New Issue
Block a user