Merge branch 'main' into feat/webapp-verified-sso-main

2026-04-24 04:45:51 +08:00 · 2025-06-09 16:55:31 +09:00
parent 777cb06c38 d6a8af03b4
commit eabd34b2ae
247 changed files with 4124 additions and 1995 deletions
--- a/api/.env.example
+++ b/api/.env.example
@ -491,3 +491,10 @@ OTEL_METRIC_EXPORT_TIMEOUT=30000

 # Prevent Clickjacking
 ALLOW_EMBED=false
+
+# Dataset queue monitor configuration
+QUEUE_MONITOR_THRESHOLD=200
+# You can configure multiple ones, separated by commas. eg: test1@dify.ai,test2@dify.ai
+QUEUE_MONITOR_ALERT_EMAILS=
+# Monitor interval in minutes, default is 30 minutes
+QUEUE_MONITOR_INTERVAL=30
--- a/api/.ruff.toml
+++ b/api/.ruff.toml
@ -43,6 +43,7 @@ select = [
    "S307", # suspicious-eval-usage, disallow use of `eval` and `ast.literal_eval`
    "S301", # suspicious-pickle-usage, disallow use of `pickle` and its wrappers.
    "S302", # suspicious-marshal-usage, disallow use of `marshal` module
+    "S311", # suspicious-non-cryptographic-random-usage
 ]

 ignore = [
--- a/api/Dockerfile
+++ b/api/Dockerfile
@ -4,7 +4,7 @@ FROM python:3.12-slim-bookworm AS base
 WORKDIR /app/api

 # Install uv
-ENV UV_VERSION=0.6.14
+ENV UV_VERSION=0.7.11

 RUN pip install --no-cache-dir uv==${UV_VERSION}

--- a/api/configs/middleware/init.py
+++ b/api/configs/middleware/init.py
@ -2,7 +2,7 @@ import os
 from typing import Any, Literal, Optional
 from urllib.parse import parse_qsl, quote_plus

-from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
+from pydantic import Field, NonNegativeFloat, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
 from pydantic_settings import BaseSettings

 from .cache.redis_config import RedisConfig
@ -256,6 +256,25 @@ class InternalTestConfig(BaseSettings):
    )


+class DatasetQueueMonitorConfig(BaseSettings):
+    """
+    Configuration settings for Dataset Queue Monitor
+    """
+
+    QUEUE_MONITOR_THRESHOLD: Optional[NonNegativeInt] = Field(
+        description="Threshold for dataset queue monitor",
+        default=200,
+    )
+    QUEUE_MONITOR_ALERT_EMAILS: Optional[str] = Field(
+        description="Emails for dataset queue monitor alert, separated by commas",
+        default=None,
+    )
+    QUEUE_MONITOR_INTERVAL: Optional[NonNegativeFloat] = Field(
+        description="Interval for dataset queue monitor in minutes",
+        default=30,
+    )
+
+
 class MiddlewareConfig(
    # place the configs in alphabet order
    CeleryConfig,
@ -303,5 +322,6 @@ class MiddlewareConfig(
    BaiduVectorDBConfig,
    OpenGaussConfig,
    TableStoreConfig,
+    DatasetQueueMonitorConfig,
 ):
    pass
--- a/api/controllers/inner_api/plugin/wraps.py
+++ b/api/controllers/inner_api/plugin/wraps.py
@ -32,6 +32,7 @@ def get_user(tenant_id: str, user_id: str | None) -> Account | EndUser:
                    )
                    session.add(user_model)
                    session.commit()
+                    session.refresh(user_model)
            else:
                user_model = AccountService.load_user(user_id)
                if not user_model:
--- a/api/controllers/service_api/dataset/dataset.py
+++ b/api/controllers/service_api/dataset/dataset.py
@ -369,6 +369,7 @@ class DatasetTagsApi(DatasetApiResource):
        )
        parser.add_argument("tag_id", nullable=False, required=True, help="Id of a tag.", type=str)
        args = parser.parse_args()
+        args["type"] = "knowledge"
        tag = TagService.update_tags(args, args.get("tag_id"))

        binding_count = TagService.get_tag_binding_count(args.get("tag_id"))
--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@ -175,8 +175,11 @@ class DocumentAddByFileApi(DatasetApiResource):

        if not dataset:
            raise ValueError("Dataset does not exist.")
-        if not dataset.indexing_technique and not args.get("indexing_technique"):
+
+        indexing_technique = args.get("indexing_technique") or dataset.indexing_technique
+        if not indexing_technique:
            raise ValueError("indexing_technique is required.")
+        args["indexing_technique"] = indexing_technique

        # save file info
        file = request.files["file"]
@ -206,12 +209,16 @@ class DocumentAddByFileApi(DatasetApiResource):
        knowledge_config = KnowledgeConfig(**args)
        DocumentService.document_create_args_validate(knowledge_config)

+        dataset_process_rule = dataset.latest_process_rule if "process_rule" not in args else None
+        if not knowledge_config.original_document_id and not dataset_process_rule and not knowledge_config.process_rule:
+            raise ValueError("process_rule is required.")
+
        try:
            documents, batch = DocumentService.save_document_with_dataset_id(
                dataset=dataset,
                knowledge_config=knowledge_config,
                account=dataset.created_by_account,
-                dataset_process_rule=dataset.latest_process_rule if "process_rule" not in args else None,
+                dataset_process_rule=dataset_process_rule,
                created_from="api",
            )
        except ProviderTokenNotInitError as ex:
--- a/api/core/entities/model_entities.py
+++ b/api/core/entities/model_entities.py
@ -55,6 +55,25 @@ class ProviderModelWithStatusEntity(ProviderModel):
    status: ModelStatus
    load_balancing_enabled: bool = False

+    def raise_for_status(self) -> None:
+        """
+        Check model status and raise ValueError if not active.
+
+        :raises ValueError: When model status is not active, with a descriptive message
+        """
+        if self.status == ModelStatus.ACTIVE:
+            return
+
+        error_messages = {
+            ModelStatus.NO_CONFIGURE: "Model is not configured",
+            ModelStatus.QUOTA_EXCEEDED: "Model quota has been exceeded",
+            ModelStatus.NO_PERMISSION: "No permission to use this model",
+            ModelStatus.DISABLED: "Model is disabled",
+        }
+
+        if self.status in error_messages:
+            raise ValueError(error_messages[self.status])
+

 class ModelWithProviderEntity(ProviderModelWithStatusEntity):
    """
--- a/api/core/extension/extensible.py
+++ b/api/core/extension/extensible.py
@ -41,45 +41,53 @@ class Extensible:
        extensions = []
        position_map: dict[str, int] = {}

-        # get the path of the current class
-        current_path = os.path.abspath(cls.__module__.replace(".", os.path.sep) + ".py")
-        current_dir_path = os.path.dirname(current_path)
+        # Get the package name from the module path
+        package_name = ".".join(cls.__module__.split(".")[:-1])

-        # traverse subdirectories
-        for subdir_name in os.listdir(current_dir_path):
-            if subdir_name.startswith("__"):
-                continue
+        try:
+            # Get package directory path
+            package_spec = importlib.util.find_spec(package_name)
+            if not package_spec or not package_spec.origin:
+                raise ImportError(f"Could not find package {package_name}")

-            subdir_path = os.path.join(current_dir_path, subdir_name)
-            extension_name = subdir_name
-            if os.path.isdir(subdir_path):
+            package_dir = os.path.dirname(package_spec.origin)
+
+            # Traverse subdirectories
+            for subdir_name in os.listdir(package_dir):
+                if subdir_name.startswith("__"):
+                    continue
+
+                subdir_path = os.path.join(package_dir, subdir_name)
+                if not os.path.isdir(subdir_path):
+                    continue
+
+                extension_name = subdir_name
                file_names = os.listdir(subdir_path)

-                # is builtin extension, builtin extension
-                # in the front-end page and business logic, there are special treatments.
+                # Check for extension module file
+                if (extension_name + ".py") not in file_names:
+                    logging.warning(f"Missing {extension_name}.py file in {subdir_path}, Skip.")
+                    continue
+
+                # Check for builtin flag and position
                builtin = False
-                # default position is 0 can not be None for sort_to_dict_by_position_map
                position = 0
                if "__builtin__" in file_names:
                    builtin = True
-
                    builtin_file_path = os.path.join(subdir_path, "__builtin__")
                    if os.path.exists(builtin_file_path):
                        position = int(Path(builtin_file_path).read_text(encoding="utf-8").strip())
                    position_map[extension_name] = position

-                if (extension_name + ".py") not in file_names:
-                    logging.warning(f"Missing {extension_name}.py file in {subdir_path}, Skip.")
-                    continue
-
-                # Dynamic loading {subdir_name}.py file and find the subclass of Extensible
-                py_path = os.path.join(subdir_path, extension_name + ".py")
-                spec = importlib.util.spec_from_file_location(extension_name, py_path)
+                # Import the extension module
+                module_name = f"{package_name}.{extension_name}.{extension_name}"
+                spec = importlib.util.find_spec(module_name)
                if not spec or not spec.loader:
-                    raise Exception(f"Failed to load module {extension_name} from {py_path}")
+                    raise ImportError(f"Failed to load module {module_name}")
                mod = importlib.util.module_from_spec(spec)
                spec.loader.exec_module(mod)

+                # Find extension class
                extension_class = None
                for name, obj in vars(mod).items():
                    if isinstance(obj, type) and issubclass(obj, cls) and obj != cls:
@ -87,21 +95,21 @@ class Extensible:
                        break

                if not extension_class:
-                    logging.warning(f"Missing subclass of {cls.__name__} in {py_path}, Skip.")
+                    logging.warning(f"Missing subclass of {cls.__name__} in {module_name}, Skip.")
                    continue

+                # Load schema if not builtin
                json_data: dict[str, Any] = {}
                if not builtin:
-                    if "schema.json" not in file_names:
+                    json_path = os.path.join(subdir_path, "schema.json")
+                    if not os.path.exists(json_path):
                        logging.warning(f"Missing schema.json file in {subdir_path}, Skip.")
                        continue

-                    json_path = os.path.join(subdir_path, "schema.json")
-                    json_data = {}
-                    if os.path.exists(json_path):
-                        with open(json_path, encoding="utf-8") as f:
-                            json_data = json.load(f)
+                    with open(json_path, encoding="utf-8") as f:
+                        json_data = json.load(f)

+                # Create extension
                extensions.append(
                    ModuleExtension(
                        extension_class=extension_class,
@ -113,6 +121,11 @@ class Extensible:
                    )
                )

+        except Exception as e:
+            logging.exception("Error scanning extensions")
+            raise
+
+        # Sort extensions by position
        sorted_extensions = sort_to_dict_by_position_map(
            position_map=position_map, data=extensions, name_func=lambda x: x.name
        )
--- a/api/core/helper/moderation.py
+++ b/api/core/helper/moderation.py
@ -1,5 +1,5 @@
 import logging
-import random
+import secrets
 from typing import cast

 from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
@ -38,7 +38,7 @@ def check_moderation(tenant_id: str, model_config: ModelConfigWithCredentialsEnt
            if len(text_chunks) == 0:
                return True

-            text_chunk = random.choice(text_chunks)
+            text_chunk = secrets.choice(text_chunks)

            try:
                model_provider_factory = ModelProviderFactory(tenant_id)
--- a/api/core/model_runtime/entities/model_entities.py
+++ b/api/core/model_runtime/entities/model_entities.py
@ -160,6 +160,10 @@ class ProviderModel(BaseModel):
    deprecated: bool = False
    model_config = ConfigDict(protected_namespaces=())

+    @property
+    def support_structure_output(self) -> bool:
+        return self.features is not None and ModelFeature.STRUCTURED_OUTPUT in self.features
+

 class ParameterRule(BaseModel):
    """
--- a/api/core/ops/entities/config_entity.py
+++ b/api/core/ops/entities/config_entity.py
@ -98,6 +98,7 @@ class WeaveConfig(BaseTracingConfig):
    entity: str | None = None
    project: str
    endpoint: str = "https://trace.wandb.ai"
+    host: str | None = None

    @field_validator("endpoint")
    @classmethod
@ -109,6 +110,14 @@ class WeaveConfig(BaseTracingConfig):

        return v

+    @field_validator("host")
+    @classmethod
+    def validate_host(cls, v, info: ValidationInfo):
+        if v is not None and v != "":
+            if not v.startswith(("https://", "http://")):
+                raise ValueError("host must start with https:// or http://")
+        return v
+

 OPS_FILE_PATH = "ops_trace/"
 OPS_TRACE_FAILED_KEY = "FAILED_OPS_TRACE"
--- a/api/core/ops/ops_trace_manager.py
+++ b/api/core/ops/ops_trace_manager.py
@ -81,7 +81,7 @@ class OpsTraceProviderConfigMap(dict[str, dict[str, Any]]):
                return {
                    "config_class": WeaveConfig,
                    "secret_keys": ["api_key"],
-                    "other_keys": ["project", "entity", "endpoint"],
+                    "other_keys": ["project", "entity", "endpoint", "host"],
                    "trace_instance": WeaveDataTrace,
                }

--- a/api/core/ops/weave_trace/weave_trace.py
+++ b/api/core/ops/weave_trace/weave_trace.py
@ -40,9 +40,14 @@ class WeaveDataTrace(BaseTraceInstance):
        self.weave_api_key = weave_config.api_key
        self.project_name = weave_config.project
        self.entity = weave_config.entity
+        self.host = weave_config.host
+
+        # Login with API key first, including host if provided
+        if self.host:
+            login_status = wandb.login(key=self.weave_api_key, verify=True, relogin=True, host=self.host)
+        else:
+            login_status = wandb.login(key=self.weave_api_key, verify=True, relogin=True)

-        # Login with API key first
-        login_status = wandb.login(key=self.weave_api_key, verify=True, relogin=True)
        if not login_status:
            logger.error("Failed to login to Weights & Biases with the provided API key")
            raise ValueError("Weave login failed")
@ -386,7 +391,11 @@ class WeaveDataTrace(BaseTraceInstance):

    def api_check(self):
        try:
-            login_status = wandb.login(key=self.weave_api_key, verify=True, relogin=True)
+            if self.host:
+                login_status = wandb.login(key=self.weave_api_key, verify=True, relogin=True, host=self.host)
+            else:
+                login_status = wandb.login(key=self.weave_api_key, verify=True, relogin=True)
+
            if not login_status:
                raise ValueError("Weave login failed")
            else:
--- a/api/core/provider_manager.py
+++ b/api/core/provider_manager.py
@ -3,7 +3,9 @@ from collections import defaultdict
 from json import JSONDecodeError
 from typing import Any, Optional, cast

+from sqlalchemy import select
 from sqlalchemy.exc import IntegrityError
+from sqlalchemy.orm import Session

 from configs import dify_config
 from core.entities.model_entities import DefaultModelEntity, DefaultModelProviderEntity
@ -393,19 +395,13 @@ class ProviderManager:

    @staticmethod
    def _get_all_providers(tenant_id: str) -> dict[str, list[Provider]]:
-        """
-        Get all provider records of the workspace.
-
-        :param tenant_id: workspace id
-        :return:
-        """
-        providers = db.session.query(Provider).filter(Provider.tenant_id == tenant_id, Provider.is_valid == True).all()
-
        provider_name_to_provider_records_dict = defaultdict(list)
-        for provider in providers:
-            # TODO: Use provider name with prefix after the data migration
-            provider_name_to_provider_records_dict[str(ModelProviderID(provider.provider_name))].append(provider)
-
+        with Session(db.engine, expire_on_commit=False) as session:
+            stmt = select(Provider).where(Provider.tenant_id == tenant_id, Provider.is_valid == True)
+            providers = session.scalars(stmt)
+            for provider in providers:
+                # Use provider name with prefix after the data migration
+                provider_name_to_provider_records_dict[str(ModelProviderID(provider.provider_name))].append(provider)
        return provider_name_to_provider_records_dict

    @staticmethod
@ -416,17 +412,12 @@ class ProviderManager:
        :param tenant_id: workspace id
        :return:
        """
-        # Get all provider model records of the workspace
-        provider_models = (
-            db.session.query(ProviderModel)
-            .filter(ProviderModel.tenant_id == tenant_id, ProviderModel.is_valid == True)
-            .all()
-        )
-
        provider_name_to_provider_model_records_dict = defaultdict(list)
-        for provider_model in provider_models:
-            provider_name_to_provider_model_records_dict[provider_model.provider_name].append(provider_model)
-
+        with Session(db.engine, expire_on_commit=False) as session:
+            stmt = select(ProviderModel).where(ProviderModel.tenant_id == tenant_id, ProviderModel.is_valid == True)
+            provider_models = session.scalars(stmt)
+            for provider_model in provider_models:
+                provider_name_to_provider_model_records_dict[provider_model.provider_name].append(provider_model)
        return provider_name_to_provider_model_records_dict

    @staticmethod
@ -437,17 +428,14 @@ class ProviderManager:
        :param tenant_id: workspace id
        :return:
        """
-        preferred_provider_types = (
-            db.session.query(TenantPreferredModelProvider)
-            .filter(TenantPreferredModelProvider.tenant_id == tenant_id)
-            .all()
-        )
-
-        provider_name_to_preferred_provider_type_records_dict = {
-            preferred_provider_type.provider_name: preferred_provider_type
-            for preferred_provider_type in preferred_provider_types
-        }
-
+        provider_name_to_preferred_provider_type_records_dict = {}
+        with Session(db.engine, expire_on_commit=False) as session:
+            stmt = select(TenantPreferredModelProvider).where(TenantPreferredModelProvider.tenant_id == tenant_id)
+            preferred_provider_types = session.scalars(stmt)
+            provider_name_to_preferred_provider_type_records_dict = {
+                preferred_provider_type.provider_name: preferred_provider_type
+                for preferred_provider_type in preferred_provider_types
+            }
        return provider_name_to_preferred_provider_type_records_dict

    @staticmethod
@ -458,18 +446,14 @@ class ProviderManager:
        :param tenant_id: workspace id
        :return:
        """
-        provider_model_settings = (
-            db.session.query(ProviderModelSetting).filter(ProviderModelSetting.tenant_id == tenant_id).all()
-        )
-
        provider_name_to_provider_model_settings_dict = defaultdict(list)
-        for provider_model_setting in provider_model_settings:
-            (
+        with Session(db.engine, expire_on_commit=False) as session:
+            stmt = select(ProviderModelSetting).where(ProviderModelSetting.tenant_id == tenant_id)
+            provider_model_settings = session.scalars(stmt)
+            for provider_model_setting in provider_model_settings:
                provider_name_to_provider_model_settings_dict[provider_model_setting.provider_name].append(
                    provider_model_setting
                )
-            )
-
        return provider_name_to_provider_model_settings_dict

    @staticmethod
@ -492,15 +476,14 @@ class ProviderManager:
        if not model_load_balancing_enabled:
            return {}

-        provider_load_balancing_configs = (
-            db.session.query(LoadBalancingModelConfig).filter(LoadBalancingModelConfig.tenant_id == tenant_id).all()
-        )
-
        provider_name_to_provider_load_balancing_model_configs_dict = defaultdict(list)
-        for provider_load_balancing_config in provider_load_balancing_configs:
-            provider_name_to_provider_load_balancing_model_configs_dict[
-                provider_load_balancing_config.provider_name
-            ].append(provider_load_balancing_config)
+        with Session(db.engine, expire_on_commit=False) as session:
+            stmt = select(LoadBalancingModelConfig).where(LoadBalancingModelConfig.tenant_id == tenant_id)
+            provider_load_balancing_configs = session.scalars(stmt)
+            for provider_load_balancing_config in provider_load_balancing_configs:
+                provider_name_to_provider_load_balancing_model_configs_dict[
+                    provider_load_balancing_config.provider_name
+                ].append(provider_load_balancing_config)

        return provider_name_to_provider_load_balancing_model_configs_dict

@ -626,10 +609,9 @@ class ProviderManager:
            if not cached_provider_credentials:
                try:
                    # fix origin data
-                    if (
-                        custom_provider_record.encrypted_config
-                        and not custom_provider_record.encrypted_config.startswith("{")
-                    ):
+                    if custom_provider_record.encrypted_config is None:
+                        raise ValueError("No credentials found")
+                    if not custom_provider_record.encrypted_config.startswith("{"):
                        provider_credentials = {"openai_api_key": custom_provider_record.encrypted_config}
                    else:
                        provider_credentials = json.loads(custom_provider_record.encrypted_config)
@ -733,7 +715,7 @@ class ProviderManager:
            return SystemConfiguration(enabled=False)

        # Convert provider_records to dict
-        quota_type_to_provider_records_dict = {}
+        quota_type_to_provider_records_dict: dict[ProviderQuotaType, Provider] = {}
        for provider_record in provider_records:
            if provider_record.provider_type != ProviderType.SYSTEM.value:
                continue
@ -758,6 +740,11 @@ class ProviderManager:
            else:
                provider_record = quota_type_to_provider_records_dict[provider_quota.quota_type]

+                if provider_record.quota_used is None:
+                    raise ValueError("quota_used is None")
+                if provider_record.quota_limit is None:
+                    raise ValueError("quota_limit is None")
+
                quota_configuration = QuotaConfiguration(
                    quota_type=provider_quota.quota_type,
                    quota_unit=provider_hosting_configuration.quota_unit or QuotaUnit.TOKENS,
@ -791,10 +778,9 @@ class ProviderManager:
                cached_provider_credentials = provider_credentials_cache.get()

                if not cached_provider_credentials:
-                    try:
-                        provider_credentials: dict[str, Any] = json.loads(provider_record.encrypted_config)
-                    except JSONDecodeError:
-                        provider_credentials = {}
+                    provider_credentials: dict[str, Any] = {}
+                    if provider_records and provider_records[0].encrypted_config:
+                        provider_credentials = json.loads(provider_records[0].encrypted_config)

                    # Get provider credential secret variables
                    provider_credential_secret_variables = self._extract_secret_variables(
--- a/api/core/rag/datasource/keyword/jieba/stopwords.py
+++ b/api/core/rag/datasource/keyword/jieba/stopwords.py
@ -720,7 +720,7 @@ STOPWORDS = {
    "〉",
    "〈",
    "…",
-    "　",
+    " ",
    "0",
    "1",
    "2",
@ -731,16 +731,6 @@ STOPWORDS = {
    "7",
    "8",
    "9",
-    "０",
-    "１",
-    "２",
-    "３",
-    "４",
-    "５",
-    "６",
-    "７",
-    "８",
-    "９",
    "二",
    "三",
    "四",
--- a/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py
+++ b/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py
@ -184,7 +184,16 @@ class OpenSearchVector(BaseVector):
        }
        document_ids_filter = kwargs.get("document_ids_filter")
        if document_ids_filter:
-            query["query"] = {"terms": {"metadata.document_id": document_ids_filter}}
+            query["query"] = {
+                "script_score": {
+                    "query": {"bool": {"filter": [{"terms": {Field.DOCUMENT_ID.value: document_ids_filter}}]}},
+                    "script": {
+                        "source": "knn_score",
+                        "lang": "knn",
+                        "params": {"field": Field.VECTOR.value, "query_value": query_vector, "space_type": "l2"},
+                    },
+                }
+            }

        try:
            response = self._client.search(index=self._collection_name.lower(), body=query)
@ -209,10 +218,10 @@ class OpenSearchVector(BaseVector):
        return docs

    def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
-        full_text_query = {"query": {"match": {Field.CONTENT_KEY.value: query}}}
+        full_text_query = {"query": {"bool": {"must": [{"match": {Field.CONTENT_KEY.value: query}}]}}}
        document_ids_filter = kwargs.get("document_ids_filter")
        if document_ids_filter:
-            full_text_query["query"]["terms"] = {"metadata.document_id": document_ids_filter}
+            full_text_query["query"]["bool"]["filter"] = [{"terms": {"metadata.document_id": document_ids_filter}}]

        response = self._client.search(index=self._collection_name.lower(), body=full_text_query)

@ -255,7 +264,8 @@ class OpenSearchVector(BaseVector):
                            Field.METADATA_KEY.value: {
                                "type": "object",
                                "properties": {
-                                    "doc_id": {"type": "keyword"}  # Map doc_id to keyword type
+                                    "doc_id": {"type": "keyword"},  # Map doc_id to keyword type
+                                    "document_id": {"type": "keyword"},
                                },
                            },
                        }
--- a/api/core/rag/datasource/vdb/oracle/oraclevector.py
+++ b/api/core/rag/datasource/vdb/oracle/oraclevector.py
@ -261,7 +261,7 @@ class OracleVector(BaseVector):
                words = pseg.cut(query)
                current_entity = ""
                for word, pos in words:
-                    if pos in {"nr", "Ng", "eng", "nz", "n", "ORG", "v"}:  # nr: 人名, ns: 地名, nt: 机构名
+                    if pos in {"nr", "Ng", "eng", "nz", "n", "ORG", "v"}:  # nr: 人名，ns: 地名，nt: 机构名
                        current_entity += word
                    else:
                        if current_entity:
@ -303,7 +303,6 @@ class OracleVector(BaseVector):
            return docs
        else:
            return [Document(page_content="", metadata={})]
-        return []

    def delete(self) -> None:
        with self._get_connection() as conn:
--- a/api/core/tools/builtin_tool/_position.yaml
+++ b/api/core/tools/builtin_tool/_position.yaml
@ -1,3 +1,4 @@
+- audio
 - code
 - time
- qrcode
+- webscraper
--- a/api/core/tools/utils/dataset_retriever/dataset_multi_retriever_tool.py
+++ b/api/core/tools/utils/dataset_retriever/dataset_multi_retriever_tool.py
@ -153,8 +153,6 @@ class DatasetMultiRetrieverTool(DatasetRetrieverBaseTool):
            return str("\n".join(document_context_list))
        return ""

-        raise RuntimeError("not segments found")
-
    def _retriever(
        self,
        flask_app: Flask,
--- a/api/core/workflow/nodes/document_extractor/node.py
+++ b/api/core/workflow/nodes/document_extractor/node.py
@ -397,19 +397,44 @@ def _extract_text_from_csv(file_content: bytes) -> str:
        if not rows:
            return ""

-        # Create Markdown table
-        markdown_table = "| " + " | ".join(rows[0]) + " |\n"
-        markdown_table += "| " + " | ".join(["---"] * len(rows[0])) + " |\n"
-        for row in rows[1:]:
-            markdown_table += "| " + " | ".join(row) + " |\n"
+        # Combine multi-line text in the header row
+        header_row = [cell.replace("\n", " ").replace("\r", "") for cell in rows[0]]

-        return markdown_table.strip()
+        # Create Markdown table
+        markdown_table = "| " + " | ".join(header_row) + " |\n"
+        markdown_table += "| " + " | ".join(["-" * len(col) for col in rows[0]]) + " |\n"
+
+        # Process each data row and combine multi-line text in each cell
+        for row in rows[1:]:
+            processed_row = [cell.replace("\n", " ").replace("\r", "") for cell in row]
+            markdown_table += "| " + " | ".join(processed_row) + " |\n"
+
+        return markdown_table
    except Exception as e:
        raise TextExtractionError(f"Failed to extract text from CSV: {str(e)}") from e


 def _extract_text_from_excel(file_content: bytes) -> str:
    """Extract text from an Excel file using pandas."""
+
+    def _construct_markdown_table(df: pd.DataFrame) -> str:
+        """Manually construct a Markdown table from a DataFrame."""
+        # Construct the header row
+        header_row = "| " + " | ".join(df.columns) + " |"
+
+        # Construct the separator row
+        separator_row = "| " + " | ".join(["-" * len(col) for col in df.columns]) + " |"
+
+        # Construct the data rows
+        data_rows = []
+        for _, row in df.iterrows():
+            data_row = "| " + " | ".join(map(str, row)) + " |"
+            data_rows.append(data_row)
+
+        # Combine all rows into a single string
+        markdown_table = "\n".join([header_row, separator_row] + data_rows)
+        return markdown_table
+
    try:
        excel_file = pd.ExcelFile(io.BytesIO(file_content))
        markdown_table = ""
@ -417,8 +442,15 @@ def _extract_text_from_excel(file_content: bytes) -> str:
            try:
                df = excel_file.parse(sheet_name=sheet_name)
                df.dropna(how="all", inplace=True)
-                # Create Markdown table two times to separate tables with a newline
-                markdown_table += df.to_markdown(index=False, floatfmt="") + "\n\n"
+
+                # Combine multi-line text in each cell into a single line
+                df = df.applymap(lambda x: " ".join(str(x).splitlines()) if isinstance(x, str) else x)  # type: ignore
+
+                # Combine multi-line text in column names into a single line
+                df.columns = pd.Index([" ".join(col.splitlines()) for col in df.columns])
+
+                # Manually construct the Markdown table
+                markdown_table += _construct_markdown_table(df) + "\n\n"
            except Exception as e:
                continue
        return markdown_table
--- a/api/core/workflow/nodes/http_request/executor.py
+++ b/api/core/workflow/nodes/http_request/executor.py
@ -1,8 +1,9 @@
 import base64
 import json
+import secrets
+import string
 from collections.abc import Mapping
 from copy import deepcopy
-from random import randint
 from typing import Any, Literal
 from urllib.parse import urlencode, urlparse

@ -434,4 +435,4 @@ def _generate_random_string(n: int) -> str:
        >>> _generate_random_string(5)
        'abcde'
    """
-    return "".join([chr(randint(97, 122)) for _ in range(n)])
+    return "".join(secrets.choice(string.ascii_lowercase) for _ in range(n))
--- a/api/core/workflow/nodes/llm/entities.py
+++ b/api/core/workflow/nodes/llm/entities.py
@ -66,7 +66,8 @@ class LLMNodeData(BaseNodeData):
    context: ContextConfig
    vision: VisionConfig = Field(default_factory=VisionConfig)
    structured_output: dict | None = None
-    structured_output_enabled: bool = False
+    # We used 'structured_output_enabled' in the past, but it's not a good name.
+    structured_output_switch_on: bool = Field(False, alias="structured_output_enabled")

    @field_validator("prompt_config", mode="before")
    @classmethod
@ -74,3 +75,7 @@ class LLMNodeData(BaseNodeData):
        if v is None:
            return PromptConfig()
        return v
+
+    @property
+    def structured_output_enabled(self) -> bool:
+        return self.structured_output_switch_on and self.structured_output is not None
--- a/api/core/workflow/nodes/llm/node.py
+++ b/api/core/workflow/nodes/llm/node.py
@ -12,9 +12,7 @@ from sqlalchemy.orm import Session

 from configs import dify_config
 from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
-from core.entities.model_entities import ModelStatus
 from core.entities.provider_entities import QuotaUnit
-from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
 from core.file import FileType, file_manager
 from core.helper.code_executor import CodeExecutor, CodeLanguage
 from core.memory.token_buffer_memory import TokenBufferMemory
@ -74,7 +72,6 @@ from core.workflow.nodes.event import (
 from core.workflow.utils.structured_output.entities import (
    ResponseFormat,
    SpecialModelType,
-    SupportStructuredOutputStatus,
 )
 from core.workflow.utils.structured_output.prompt import STRUCTURED_OUTPUT_PROMPT
 from core.workflow.utils.variable_template_parser import VariableTemplateParser
@ -277,7 +274,7 @@ class LLMNode(BaseNode[LLMNodeData]):
                    llm_usage=usage,
                )
            )
-        except LLMNodeError as e:
+        except ValueError as e:
            yield RunCompletedEvent(
                run_result=NodeRunResult(
                    status=WorkflowNodeExecutionStatus.FAILED,
@ -527,65 +524,53 @@ class LLMNode(BaseNode[LLMNodeData]):
    def _fetch_model_config(
        self, node_data_model: ModelConfig
    ) -> tuple[ModelInstance, ModelConfigWithCredentialsEntity]:
-        model_name = node_data_model.name
-        provider_name = node_data_model.provider
+        if not node_data_model.mode:
+            raise LLMModeRequiredError("LLM mode is required.")

-        model_manager = ModelManager()
-        model_instance = model_manager.get_model_instance(
-            tenant_id=self.tenant_id, model_type=ModelType.LLM, provider=provider_name, model=model_name
+        model = ModelManager().get_model_instance(
+            tenant_id=self.tenant_id,
+            model_type=ModelType.LLM,
+            provider=node_data_model.provider,
+            model=node_data_model.name,
        )

-        provider_model_bundle = model_instance.provider_model_bundle
-        model_type_instance = model_instance.model_type_instance
-        model_type_instance = cast(LargeLanguageModel, model_type_instance)
-
-        model_credentials = model_instance.credentials
+        model.model_type_instance = cast(LargeLanguageModel, model.model_type_instance)

        # check model
-        provider_model = provider_model_bundle.configuration.get_provider_model(
-            model=model_name, model_type=ModelType.LLM
+        provider_model = model.provider_model_bundle.configuration.get_provider_model(
+            model=node_data_model.name, model_type=ModelType.LLM
        )

        if provider_model is None:
-            raise ModelNotExistError(f"Model {model_name} not exist.")
-
-        if provider_model.status == ModelStatus.NO_CONFIGURE:
-            raise ProviderTokenNotInitError(f"Model {model_name} credentials is not initialized.")
-        elif provider_model.status == ModelStatus.NO_PERMISSION:
-            raise ModelCurrentlyNotSupportError(f"Dify Hosted OpenAI {model_name} currently not support.")
-        elif provider_model.status == ModelStatus.QUOTA_EXCEEDED:
-            raise QuotaExceededError(f"Model provider {provider_name} quota exceeded.")
+            raise ModelNotExistError(f"Model {node_data_model.name} not exist.")
+        provider_model.raise_for_status()

        # model config
-        completion_params = node_data_model.completion_params
-        stop = []
-        if "stop" in completion_params:
-            stop = completion_params["stop"]
-            del completion_params["stop"]
-
-        # get model mode
-        model_mode = node_data_model.mode
-        if not model_mode:
-            raise LLMModeRequiredError("LLM mode is required.")
-
-        model_schema = model_type_instance.get_model_schema(model_name, model_credentials)
+        stop: list[str] = []
+        if "stop" in node_data_model.completion_params:
+            stop = node_data_model.completion_params.pop("stop")

+        model_schema = model.model_type_instance.get_model_schema(node_data_model.name, model.credentials)
        if not model_schema:
-            raise ModelNotExistError(f"Model {model_name} not exist.")
-        support_structured_output = self._check_model_structured_output_support()
-        if support_structured_output == SupportStructuredOutputStatus.SUPPORTED:
-            completion_params = self._handle_native_json_schema(completion_params, model_schema.parameter_rules)
-        elif support_structured_output == SupportStructuredOutputStatus.UNSUPPORTED:
-            # Set appropriate response format based on model capabilities
-            self._set_response_format(completion_params, model_schema.parameter_rules)
-        return model_instance, ModelConfigWithCredentialsEntity(
-            provider=provider_name,
-            model=model_name,
+            raise ModelNotExistError(f"Model {node_data_model.name} not exist.")
+
+        if self.node_data.structured_output_enabled:
+            if model_schema.support_structure_output:
+                node_data_model.completion_params = self._handle_native_json_schema(
+                    node_data_model.completion_params, model_schema.parameter_rules
+                )
+            else:
+                # Set appropriate response format based on model capabilities
+                self._set_response_format(node_data_model.completion_params, model_schema.parameter_rules)
+
+        return model, ModelConfigWithCredentialsEntity(
+            provider=node_data_model.provider,
+            model=node_data_model.name,
            model_schema=model_schema,
-            mode=model_mode,
-            provider_model_bundle=provider_model_bundle,
-            credentials=model_credentials,
-            parameters=completion_params,
+            mode=node_data_model.mode,
+            provider_model_bundle=model.provider_model_bundle,
+            credentials=model.credentials,
+            parameters=node_data_model.completion_params,
            stop=stop,
        )

@ -786,13 +771,25 @@ class LLMNode(BaseNode[LLMNodeData]):
                "No prompt found in the LLM configuration. "
                "Please ensure a prompt is properly configured before proceeding."
            )
-        support_structured_output = self._check_model_structured_output_support()
-        if support_structured_output == SupportStructuredOutputStatus.UNSUPPORTED:
-            filtered_prompt_messages = self._handle_prompt_based_schema(
-                prompt_messages=filtered_prompt_messages,
-            )
-        stop = model_config.stop
-        return filtered_prompt_messages, stop
+
+        model = ModelManager().get_model_instance(
+            tenant_id=self.tenant_id,
+            model_type=ModelType.LLM,
+            provider=self.node_data.model.provider,
+            model=self.node_data.model.name,
+        )
+        model_schema = model.model_type_instance.get_model_schema(
+            model=self.node_data.model.name,
+            credentials=model.credentials,
+        )
+        if not model_schema:
+            raise ModelNotExistError(f"Model {self.node_data.model.name} not exist.")
+        if self.node_data.structured_output_enabled:
+            if not model_schema.support_structure_output:
+                filtered_prompt_messages = self._handle_prompt_based_schema(
+                    prompt_messages=filtered_prompt_messages,
+                )
+        return filtered_prompt_messages, model_config.stop

    def _parse_structured_output(self, result_text: str) -> dict[str, Any]:
        structured_output: dict[str, Any] = {}
@ -903,7 +900,7 @@ class LLMNode(BaseNode[LLMNodeData]):
            variable_mapping["#context#"] = node_data.context.variable_selector

        if node_data.vision.enabled:
-            variable_mapping["#files#"] = ["sys", SystemVariableKey.FILES.value]
+            variable_mapping["#files#"] = node_data.vision.configs.variable_selector

        if node_data.memory:
            variable_mapping["#sys.query#"] = ["sys", SystemVariableKey.QUERY.value]
@ -1185,32 +1182,6 @@ class LLMNode(BaseNode[LLMNodeData]):
        except json.JSONDecodeError:
            raise LLMNodeError("structured_output_schema is not valid JSON format")

-    def _check_model_structured_output_support(self) -> SupportStructuredOutputStatus:
-        """
-        Check if the current model supports structured output.
-
-        Returns:
-            SupportStructuredOutput: The support status of structured output
-        """
-        # Early return if structured output is disabled
-        if (
-            not isinstance(self.node_data, LLMNodeData)
-            or not self.node_data.structured_output_enabled
-            or not self.node_data.structured_output
-        ):
-            return SupportStructuredOutputStatus.DISABLED
-        # Get model schema and check if it exists
-        model_schema = self._fetch_model_schema(self.node_data.model.provider)
-        if not model_schema:
-            return SupportStructuredOutputStatus.DISABLED
-
-        # Check if model supports structured output feature
-        return (
-            SupportStructuredOutputStatus.SUPPORTED
-            if bool(model_schema.features and ModelFeature.STRUCTURED_OUTPUT in model_schema.features)
-            else SupportStructuredOutputStatus.UNSUPPORTED
-        )
-
    def _save_multimodal_output_and_convert_result_to_markdown(
        self,
        contents: str | list[PromptMessageContentUnionTypes] | None,
--- a/api/core/workflow/utils/structured_output/entities.py
+++ b/api/core/workflow/utils/structured_output/entities.py
@ -14,11 +14,3 @@ class SpecialModelType(StrEnum):

    GEMINI = "gemini"
    OLLAMA = "ollama"
-
-
-class SupportStructuredOutputStatus(StrEnum):
-    """Constants for structured output support status"""
-
-    SUPPORTED = "supported"
-    UNSUPPORTED = "unsupported"
-    DISABLED = "disabled"
--- a/api/extensions/ext_celery.py
+++ b/api/extensions/ext_celery.py
@ -70,6 +70,7 @@ def init_app(app: DifyApp) -> Celery:
        "schedule.update_tidb_serverless_status_task",
        "schedule.clean_messages",
        "schedule.mail_clean_document_notify_task",
+        "schedule.queue_monitor_task",
    ]
    day = dify_config.CELERY_BEAT_SCHEDULER_TIME
    beat_schedule = {
@ -98,6 +99,12 @@ def init_app(app: DifyApp) -> Celery:
            "task": "schedule.mail_clean_document_notify_task.mail_clean_document_notify_task",
            "schedule": crontab(minute="0", hour="10", day_of_week="1"),
        },
+        "datasets-queue-monitor": {
+            "task": "schedule.queue_monitor_task.queue_monitor_task",
+            "schedule": timedelta(
+                minutes=dify_config.QUEUE_MONITOR_INTERVAL if dify_config.QUEUE_MONITOR_INTERVAL else 30
+            ),
+        },
    }
    celery_app.conf.update(beat_schedule=beat_schedule, imports=imports)

--- a/api/libs/helper.py
+++ b/api/libs/helper.py
@ -1,7 +1,7 @@
 import json
 import logging
-import random
 import re
+import secrets
 import string
 import subprocess
 import time
@ -18,6 +18,7 @@ from flask_restful import fields
 from configs import dify_config
 from core.app.features.rate_limiting.rate_limit import RateLimitGenerator
 from core.file import helpers as file_helpers
+from core.model_runtime.utils.encoders import jsonable_encoder
 from extensions.ext_redis import redis_client

 if TYPE_CHECKING:
@ -175,7 +176,7 @@ def generate_string(n):
    letters_digits = string.ascii_letters + string.digits
    result = ""
    for i in range(n):
-        result += random.choice(letters_digits)
+        result += secrets.choice(letters_digits)

    return result

@ -196,7 +197,7 @@ def generate_text_hash(text: str) -> str:

 def compact_generate_response(response: Union[Mapping, Generator, RateLimitGenerator]) -> Response:
    if isinstance(response, dict):
-        return Response(response=json.dumps(response), status=200, mimetype="application/json")
+        return Response(response=json.dumps(jsonable_encoder(response)), status=200, mimetype="application/json")
    else:

        def generate() -> Generator:
--- a/api/migrations/versions/2025_06_06_1424-4474872b0ee6_workflow_draft_varaibles_add_node_execution_id.py
+++ b/api/migrations/versions/2025_06_06_1424-4474872b0ee6_workflow_draft_varaibles_add_node_execution_id.py
@ -0,0 +1,60 @@
+"""`workflow_draft_varaibles` add `node_execution_id` column, add an index for `workflow_node_executions`.
+
+Revision ID: 4474872b0ee6
+Revises: 2adcbe1f5dfb
+Create Date: 2025-06-06 14:24:44.213018
+
+"""
+from alembic import op
+import models as models
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '4474872b0ee6'
+down_revision = '2adcbe1f5dfb'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # `CREATE INDEX CONCURRENTLY` cannot run within a transaction, so use the `autocommit_block`
+    # context manager to wrap the index creation statement.
+    # Reference:
+    #
+    # - https://www.postgresql.org/docs/current/sql-createindex.html#:~:text=Another%20difference%20is,CREATE%20INDEX%20CONCURRENTLY%20cannot.
+    # - https://alembic.sqlalchemy.org/en/latest/api/runtime.html#alembic.runtime.migration.MigrationContext.autocommit_block
+    with op.get_context().autocommit_block():
+        op.create_index(
+            op.f('workflow_node_executions_tenant_id_idx'),
+            "workflow_node_executions",
+            ['tenant_id', 'workflow_id', 'node_id', sa.literal_column('created_at DESC')],
+            unique=False,
+            postgresql_concurrently=True,
+        )
+
+    with op.batch_alter_table('workflow_draft_variables', schema=None) as batch_op:
+        batch_op.add_column(sa.Column('node_execution_id', models.types.StringUUID(), nullable=True))
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+
+    # `DROP INDEX CONCURRENTLY` cannot run within a transaction, so use the `autocommit_block`
+    # context manager to wrap the index creation statement.
+    # Reference:
+    #
+    # - https://www.postgresql.org/docs/current/sql-createindex.html#:~:text=Another%20difference%20is,CREATE%20INDEX%20CONCURRENTLY%20cannot.
+    # - https://alembic.sqlalchemy.org/en/latest/api/runtime.html#alembic.runtime.migration.MigrationContext.autocommit_block
+    # `DROP INDEX CONCURRENTLY` cannot run within a transaction, so commit existing transactions first.
+    # Reference:
+    #
+    # https://www.postgresql.org/docs/current/sql-createindex.html#:~:text=Another%20difference%20is,CREATE%20INDEX%20CONCURRENTLY%20cannot.
+    with op.get_context().autocommit_block():
+        op.drop_index(op.f('workflow_node_executions_tenant_id_idx'), postgresql_concurrently=True)
+
+    with op.batch_alter_table('workflow_draft_variables', schema=None) as batch_op:
+        batch_op.drop_column('node_execution_id')
+
+    # ### end Alembic commands ###
--- a/api/models/provider.py
+++ b/api/models/provider.py
@ -1,6 +1,9 @@
+from datetime import datetime
 from enum import Enum
+from typing import Optional

-from sqlalchemy import func
+from sqlalchemy import func, text
+from sqlalchemy.orm import Mapped, mapped_column

 from .base import Base
 from .engine import db
@ -51,20 +54,24 @@ class Provider(Base):
        ),
    )

-    id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
-    tenant_id = db.Column(StringUUID, nullable=False)
-    provider_name = db.Column(db.String(255), nullable=False)
-    provider_type = db.Column(db.String(40), nullable=False, server_default=db.text("'custom'::character varying"))
-    encrypted_config = db.Column(db.Text, nullable=True)
-    is_valid = db.Column(db.Boolean, nullable=False, server_default=db.text("false"))
-    last_used = db.Column(db.DateTime, nullable=True)
+    id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuid_generate_v4()"))
+    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    provider_name: Mapped[str] = mapped_column(db.String(255), nullable=False)
+    provider_type: Mapped[str] = mapped_column(
+        db.String(40), nullable=False, server_default=text("'custom'::character varying")
+    )
+    encrypted_config: Mapped[Optional[str]] = mapped_column(db.Text, nullable=True)
+    is_valid: Mapped[bool] = mapped_column(db.Boolean, nullable=False, server_default=text("false"))
+    last_used: Mapped[Optional[datetime]] = mapped_column(db.DateTime, nullable=True)

-    quota_type = db.Column(db.String(40), nullable=True, server_default=db.text("''::character varying"))
-    quota_limit = db.Column(db.BigInteger, nullable=True)
-    quota_used = db.Column(db.BigInteger, default=0)
+    quota_type: Mapped[Optional[str]] = mapped_column(
+        db.String(40), nullable=True, server_default=text("''::character varying")
+    )
+    quota_limit: Mapped[Optional[int]] = mapped_column(db.BigInteger, nullable=True)
+    quota_used: Mapped[Optional[int]] = mapped_column(db.BigInteger, default=0)

-    created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
-    updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    created_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    updated_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp())

    def __repr__(self):
        return (
@ -104,15 +111,15 @@ class ProviderModel(Base):
        ),
    )

-    id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
-    tenant_id = db.Column(StringUUID, nullable=False)
-    provider_name = db.Column(db.String(255), nullable=False)
-    model_name = db.Column(db.String(255), nullable=False)
-    model_type = db.Column(db.String(40), nullable=False)
-    encrypted_config = db.Column(db.Text, nullable=True)
-    is_valid = db.Column(db.Boolean, nullable=False, server_default=db.text("false"))
-    created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
-    updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuid_generate_v4()"))
+    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    provider_name: Mapped[str] = mapped_column(db.String(255), nullable=False)
+    model_name: Mapped[str] = mapped_column(db.String(255), nullable=False)
+    model_type: Mapped[str] = mapped_column(db.String(40), nullable=False)
+    encrypted_config: Mapped[Optional[str]] = mapped_column(db.Text, nullable=True)
+    is_valid: Mapped[bool] = mapped_column(db.Boolean, nullable=False, server_default=text("false"))
+    created_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    updated_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp())


 class TenantDefaultModel(Base):
@ -122,13 +129,13 @@ class TenantDefaultModel(Base):
        db.Index("tenant_default_model_tenant_id_provider_type_idx", "tenant_id", "provider_name", "model_type"),
    )

-    id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
-    tenant_id = db.Column(StringUUID, nullable=False)
-    provider_name = db.Column(db.String(255), nullable=False)
-    model_name = db.Column(db.String(255), nullable=False)
-    model_type = db.Column(db.String(40), nullable=False)
-    created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
-    updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuid_generate_v4()"))
+    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    provider_name: Mapped[str] = mapped_column(db.String(255), nullable=False)
+    model_name: Mapped[str] = mapped_column(db.String(255), nullable=False)
+    model_type: Mapped[str] = mapped_column(db.String(40), nullable=False)
+    created_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    updated_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp())


 class TenantPreferredModelProvider(Base):
@ -138,12 +145,12 @@ class TenantPreferredModelProvider(Base):
        db.Index("tenant_preferred_model_provider_tenant_provider_idx", "tenant_id", "provider_name"),
    )

-    id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
-    tenant_id = db.Column(StringUUID, nullable=False)
-    provider_name = db.Column(db.String(255), nullable=False)
-    preferred_provider_type = db.Column(db.String(40), nullable=False)
-    created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
-    updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuid_generate_v4()"))
+    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    provider_name: Mapped[str] = mapped_column(db.String(255), nullable=False)
+    preferred_provider_type: Mapped[str] = mapped_column(db.String(40), nullable=False)
+    created_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    updated_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp())


 class ProviderOrder(Base):
@ -153,22 +160,24 @@ class ProviderOrder(Base):
        db.Index("provider_order_tenant_provider_idx", "tenant_id", "provider_name"),
    )

-    id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
-    tenant_id = db.Column(StringUUID, nullable=False)
-    provider_name = db.Column(db.String(255), nullable=False)
-    account_id = db.Column(StringUUID, nullable=False)
-    payment_product_id = db.Column(db.String(191), nullable=False)
-    payment_id = db.Column(db.String(191))
-    transaction_id = db.Column(db.String(191))
-    quantity = db.Column(db.Integer, nullable=False, server_default=db.text("1"))
-    currency = db.Column(db.String(40))
-    total_amount = db.Column(db.Integer)
-    payment_status = db.Column(db.String(40), nullable=False, server_default=db.text("'wait_pay'::character varying"))
-    paid_at = db.Column(db.DateTime)
-    pay_failed_at = db.Column(db.DateTime)
-    refunded_at = db.Column(db.DateTime)
-    created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
-    updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuid_generate_v4()"))
+    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    provider_name: Mapped[str] = mapped_column(db.String(255), nullable=False)
+    account_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    payment_product_id: Mapped[str] = mapped_column(db.String(191), nullable=False)
+    payment_id: Mapped[Optional[str]] = mapped_column(db.String(191))
+    transaction_id: Mapped[Optional[str]] = mapped_column(db.String(191))
+    quantity: Mapped[int] = mapped_column(db.Integer, nullable=False, server_default=text("1"))
+    currency: Mapped[Optional[str]] = mapped_column(db.String(40))
+    total_amount: Mapped[Optional[int]] = mapped_column(db.Integer)
+    payment_status: Mapped[str] = mapped_column(
+        db.String(40), nullable=False, server_default=text("'wait_pay'::character varying")
+    )
+    paid_at: Mapped[Optional[datetime]] = mapped_column(db.DateTime)
+    pay_failed_at: Mapped[Optional[datetime]] = mapped_column(db.DateTime)
+    refunded_at: Mapped[Optional[datetime]] = mapped_column(db.DateTime)
+    created_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    updated_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp())


 class ProviderModelSetting(Base):
@ -182,15 +191,15 @@ class ProviderModelSetting(Base):
        db.Index("provider_model_setting_tenant_provider_model_idx", "tenant_id", "provider_name", "model_type"),
    )

-    id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
-    tenant_id = db.Column(StringUUID, nullable=False)
-    provider_name = db.Column(db.String(255), nullable=False)
-    model_name = db.Column(db.String(255), nullable=False)
-    model_type = db.Column(db.String(40), nullable=False)
-    enabled = db.Column(db.Boolean, nullable=False, server_default=db.text("true"))
-    load_balancing_enabled = db.Column(db.Boolean, nullable=False, server_default=db.text("false"))
-    created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
-    updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuid_generate_v4()"))
+    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    provider_name: Mapped[str] = mapped_column(db.String(255), nullable=False)
+    model_name: Mapped[str] = mapped_column(db.String(255), nullable=False)
+    model_type: Mapped[str] = mapped_column(db.String(40), nullable=False)
+    enabled: Mapped[bool] = mapped_column(db.Boolean, nullable=False, server_default=text("true"))
+    load_balancing_enabled: Mapped[bool] = mapped_column(db.Boolean, nullable=False, server_default=text("false"))
+    created_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    updated_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp())


 class LoadBalancingModelConfig(Base):
@ -204,13 +213,13 @@ class LoadBalancingModelConfig(Base):
        db.Index("load_balancing_model_config_tenant_provider_model_idx", "tenant_id", "provider_name", "model_type"),
    )

-    id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
-    tenant_id = db.Column(StringUUID, nullable=False)
-    provider_name = db.Column(db.String(255), nullable=False)
-    model_name = db.Column(db.String(255), nullable=False)
-    model_type = db.Column(db.String(40), nullable=False)
-    name = db.Column(db.String(255), nullable=False)
-    encrypted_config = db.Column(db.Text, nullable=True)
-    enabled = db.Column(db.Boolean, nullable=False, server_default=db.text("true"))
-    created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
-    updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuid_generate_v4()"))
+    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    provider_name: Mapped[str] = mapped_column(db.String(255), nullable=False)
+    model_name: Mapped[str] = mapped_column(db.String(255), nullable=False)
+    model_type: Mapped[str] = mapped_column(db.String(40), nullable=False)
+    name: Mapped[str] = mapped_column(db.String(255), nullable=False)
+    encrypted_config: Mapped[Optional[str]] = mapped_column(db.Text, nullable=True)
+    enabled: Mapped[bool] = mapped_column(db.Boolean, nullable=False, server_default=text("true"))
+    created_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp())
+    updated_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp())
--- a/api/models/workflow.py
+++ b/api/models/workflow.py
@ -16,8 +16,8 @@ if TYPE_CHECKING:
    from models.model import AppMode

 import sqlalchemy as sa
-from sqlalchemy import UniqueConstraint, func
-from sqlalchemy.orm import Mapped, mapped_column
+from sqlalchemy import Index, PrimaryKeyConstraint, UniqueConstraint, func
+from sqlalchemy.orm import Mapped, declared_attr, mapped_column

 from constants import DEFAULT_FILE_NUMBER_LIMITS, HIDDEN_VALUE
 from core.helper import encrypter
@ -590,28 +590,48 @@ class WorkflowNodeExecutionModel(Base):
    """

    __tablename__ = "workflow_node_executions"
-    __table_args__ = (
-        db.PrimaryKeyConstraint("id", name="workflow_node_execution_pkey"),
-        db.Index(
-            "workflow_node_execution_workflow_run_idx",
-            "tenant_id",
-            "app_id",
-            "workflow_id",
-            "triggered_from",
-            "workflow_run_id",
-        ),
-        db.Index(
-            "workflow_node_execution_node_run_idx", "tenant_id", "app_id", "workflow_id", "triggered_from", "node_id"
-        ),
-        db.Index(
-            "workflow_node_execution_id_idx",
-            "tenant_id",
-            "app_id",
-            "workflow_id",
-            "triggered_from",
-            "node_execution_id",
-        ),
-    )
+
+    @declared_attr
+    def __table_args__(cls):  # noqa
+        return (
+            PrimaryKeyConstraint("id", name="workflow_node_execution_pkey"),
+            Index(
+                "workflow_node_execution_workflow_run_idx",
+                "tenant_id",
+                "app_id",
+                "workflow_id",
+                "triggered_from",
+                "workflow_run_id",
+            ),
+            Index(
+                "workflow_node_execution_node_run_idx",
+                "tenant_id",
+                "app_id",
+                "workflow_id",
+                "triggered_from",
+                "node_id",
+            ),
+            Index(
+                "workflow_node_execution_id_idx",
+                "tenant_id",
+                "app_id",
+                "workflow_id",
+                "triggered_from",
+                "node_execution_id",
+            ),
+            Index(
+                # The first argument is the index name,
+                # which we leave as `None`` to allow auto-generation by the ORM.
+                None,
+                cls.tenant_id,
+                cls.workflow_id,
+                cls.node_id,
+                # MyPy may flag the following line because it doesn't recognize that
+                # the `declared_attr` decorator passes the receiving class as the first
+                # argument to this method, allowing us to reference class attributes.
+                cls.created_at.desc(),  # type: ignore
+            ),
+        )

    id: Mapped[str] = mapped_column(StringUUID, server_default=db.text("uuid_generate_v4()"))
    tenant_id: Mapped[str] = mapped_column(StringUUID)
@ -885,14 +905,29 @@ class WorkflowDraftVariable(Base):

    selector: Mapped[str] = mapped_column(sa.String(255), nullable=False, name="selector")

+    # The data type of this variable's value
    value_type: Mapped[SegmentType] = mapped_column(EnumText(SegmentType, length=20))
-    # JSON string
+
+    # The variable's value serialized as a JSON string
    value: Mapped[str] = mapped_column(sa.Text, nullable=False, name="value")

-    # visible
+    # Controls whether the variable should be displayed in the variable inspection panel
    visible: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, default=True)
+
+    # Determines whether this variable can be modified by users
    editable: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, default=False)

+    # The `node_execution_id` field identifies the workflow node execution that created this variable.
+    # It corresponds to the `id` field in the `WorkflowNodeExecutionModel` model.
+    #
+    # This field is not `None` for system variables and node variables, and is  `None`
+    # for conversation variables.
+    node_execution_id: Mapped[str | None] = mapped_column(
+        StringUUID,
+        nullable=True,
+        default=None,
+    )
+
    def get_selector(self) -> list[str]:
        selector = json.loads(self.selector)
        if not isinstance(selector, list):
--- a/api/schedule/queue_monitor_task.py
+++ b/api/schedule/queue_monitor_task.py
@ -0,0 +1,62 @@
+import logging
+from datetime import datetime
+from urllib.parse import urlparse
+
+import click
+from flask import render_template
+from redis import Redis
+
+import app
+from configs import dify_config
+from extensions.ext_database import db
+from extensions.ext_mail import mail
+
+# Create a dedicated Redis connection (using the same configuration as Celery)
+celery_broker_url = dify_config.CELERY_BROKER_URL
+
+parsed = urlparse(celery_broker_url)
+host = parsed.hostname or "localhost"
+port = parsed.port or 6379
+password = parsed.password or None
+redis_db = parsed.path.strip("/") or "1"  # type: ignore
+
+celery_redis = Redis(host=host, port=port, password=password, db=redis_db)
+
+
+@app.celery.task(queue="monitor")
+def queue_monitor_task():
+    queue_name = "dataset"
+    threshold = dify_config.QUEUE_MONITOR_THRESHOLD
+
+    try:
+        queue_length = celery_redis.llen(f"{queue_name}")
+        logging.info(click.style(f"Start monitor {queue_name}", fg="green"))
+        logging.info(click.style(f"Queue length: {queue_length}", fg="green"))
+
+        if queue_length >= threshold:
+            warning_msg = f"Queue {queue_name} task count exceeded the limit.: {queue_length}/{threshold}"
+            logging.warning(click.style(warning_msg, fg="red"))
+            alter_emails = dify_config.QUEUE_MONITOR_ALERT_EMAILS
+            if alter_emails:
+                to_list = alter_emails.split(",")
+                for to in to_list:
+                    try:
+                        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                        html_content = render_template(
+                            "queue_monitor_alert_email_template_en-US.html",
+                            queue_name=queue_name,
+                            queue_length=queue_length,
+                            threshold=threshold,
+                            alert_time=current_time,
+                        )
+                        mail.send(
+                            to=to, subject="Alert: Dataset Queue pending tasks exceeded the limit", html=html_content
+                        )
+                    except Exception as e:
+                        logging.exception(click.style("Exception occurred during sending email", fg="red"))
+
+    except Exception as e:
+        logging.exception(click.style("Exception occurred during queue monitoring", fg="red"))
+    finally:
+        if db.session.is_active:
+            db.session.close()
--- a/api/services/account_service.py
+++ b/api/services/account_service.py
@ -1,7 +1,6 @@
 import base64
 import json
 import logging
-import random
 import secrets
 import uuid
 from datetime import UTC, datetime, timedelta
@ -261,7 +260,7 @@ class AccountService:

    @staticmethod
    def generate_account_deletion_verification_code(account: Account) -> tuple[str, str]:
-        code = "".join([str(random.randint(0, 9)) for _ in range(6)])
+        code = "".join([str(secrets.randbelow(exclusive_upper_bound=10)) for _ in range(6)])
        token = TokenManager.generate_token(
            account=account, token_type="account_deletion", additional_data={"code": code}
        )
@ -429,7 +428,7 @@ class AccountService:
        additional_data: dict[str, Any] = {},
    ):
        if not code:
-            code = "".join([str(random.randint(0, 9)) for _ in range(6)])
+            code = "".join([str(secrets.randbelow(exclusive_upper_bound=10)) for _ in range(6)])
        additional_data["code"] = code
        token = TokenManager.generate_token(
            account=account, email=email, token_type="reset_password", additional_data=additional_data
@ -456,7 +455,7 @@ class AccountService:

            raise EmailCodeLoginRateLimitExceededError()

-        code = "".join([str(random.randint(0, 9)) for _ in range(6)])
+        code = "".join([str(secrets.randbelow(exclusive_upper_bound=10)) for _ in range(6)])
        token = TokenManager.generate_token(
            account=account, email=email, token_type="email_code_login", additional_data={"code": code}
        )
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@ -2,7 +2,7 @@ import copy
 import datetime
 import json
 import logging
-import random
+import secrets
 import time
 import uuid
 from collections import Counter
@ -970,7 +970,7 @@ class DocumentService:
            documents.append(document)
            batch = document.batch
        else:
-            batch = time.strftime("%Y%m%d%H%M%S") + str(random.randint(100000, 999999))
+            batch = time.strftime("%Y%m%d%H%M%S") + str(100000 + secrets.randbelow(exclusive_upper_bound=900000))
            # save process rule
            if not dataset_process_rule:
                process_rule = knowledge_config.process_rule
--- a/api/services/tag_service.py
+++ b/api/services/tag_service.py
@ -46,6 +46,8 @@ class TagService:

    @staticmethod
    def get_tag_by_tag_name(tag_type: str, current_tenant_id: str, tag_name: str) -> list:
+        if not tag_type or not tag_name:
+            return []
        tags = (
            db.session.query(Tag)
            .filter(Tag.name == tag_name, Tag.tenant_id == current_tenant_id, Tag.type == tag_type)
@ -88,7 +90,7 @@ class TagService:

    @staticmethod
    def update_tags(args: dict, tag_id: str) -> Tag:
-        if TagService.get_tag_by_tag_name(args["type"], current_user.current_tenant_id, args["name"]):
+        if TagService.get_tag_by_tag_name(args.get("type", ""), current_user.current_tenant_id, args.get("name", "")):
            raise ValueError("Tag name already exists")
        tag = db.session.query(Tag).filter(Tag.id == tag_id).first()
        if not tag:
--- a/api/services/webapp_auth_service.py
+++ b/api/services/webapp_auth_service.py
@ -1,5 +1,5 @@
 import enum
-import random
+import secrets
 from datetime import UTC, datetime, timedelta
 from typing import Any, Optional, cast

@ -69,7 +69,7 @@ class WebAppAuthService:
        if email is None:
            raise ValueError("Email must be provided.")

-        code = "".join([str(random.randint(0, 9)) for _ in range(6)])
+        code = "".join([str(secrets.randbelow(exclusive_upper_bound=10)) for _ in range(6)])
        token = TokenManager.generate_token(
            account=account, email=email, token_type="email_code_login", additional_data={"code": code}
        )
--- a/api/tasks/batch_create_segment_to_index_task.py
+++ b/api/tasks/batch_create_segment_to_index_task.py
@ -5,7 +5,7 @@ import uuid

 import click
 from celery import shared_task  # type: ignore
-from sqlalchemy import func, select
+from sqlalchemy import func
 from sqlalchemy.orm import Session

 from core.model_manager import ModelManager
@ -68,11 +68,6 @@ def batch_create_segment_to_index_task(
                    model_type=ModelType.TEXT_EMBEDDING,
                    model=dataset.embedding_model,
                )
-            word_count_change = 0
-            segments_to_insert: list[str] = []
-            max_position_stmt = select(func.max(DocumentSegment.position)).where(
-                DocumentSegment.document_id == dataset_document.id
-            )
        word_count_change = 0
        if embedding_model:
            tokens_list = embedding_model.get_text_embedding_num_tokens(
--- a/api/templates/queue_monitor_alert_email_template_en-US.html
+++ b/api/templates/queue_monitor_alert_email_template_en-US.html
@ -0,0 +1,129 @@
+<!DOCTYPE html>
+<html>
+
+<head>
+  <style>
+    body {
+      font-family: 'Arial', sans-serif;
+      line-height: 16pt;
+      color: #101828;
+      background-color: #e9ebf0;
+      margin: 0;
+      padding: 0;
+    }
+
+    .container {
+      width: 600px;
+      min-height: 605px;
+      margin: 40px auto;
+      padding: 36px 48px;
+      background-color: #fcfcfd;
+      border-radius: 16px;
+      border: 1px solid #ffffff;
+      box-shadow: 0 2px 4px -2px rgba(9, 9, 11, 0.08);
+    }
+
+    .header {
+      margin-bottom: 24px;
+    }
+
+    .header img {
+      max-width: 100px;
+      height: auto;
+    }
+
+    .title {
+      font-weight: 600;
+      font-size: 24px;
+      line-height: 28.8px;
+    }
+
+    .description {
+      font-size: 13px;
+      line-height: 16px;
+      color: #676f83;
+      margin-top: 12px;
+    }
+
+    .alert-content {
+      padding: 16px 32px;
+      text-align: center;
+      border-radius: 16px;
+      background-color: #fef0f0;
+      margin: 16px auto;
+      border: 1px solid #fda29b;
+    }
+
+    .alert-title {
+      line-height: 24px;
+      font-weight: 700;
+      font-size: 18px;
+      color: #d92d20;
+    }
+
+    .alert-detail {
+      line-height: 20px;
+      font-size: 14px;
+      margin-top: 8px;
+    }
+
+    .typography {
+      letter-spacing: -0.07px;
+      font-weight: 400;
+      font-style: normal;
+      font-size: 14px;
+      line-height: 20px;
+      color: #354052;
+      margin-top: 12px;
+      margin-bottom: 12px;
+    }
+    .typography p{
+      margin: 0 auto;
+    }
+
+    .typography-title {
+      color: #101828;
+      font-size: 14px;
+      font-style: normal;
+      font-weight: 600;
+      line-height: 20px;
+      margin-top: 12px;
+      margin-bottom: 4px;
+    }
+    .tip-list{
+      margin: 0;
+      padding-left: 10px;
+    }
+  </style>
+</head>
+
+<body>
+  <div class="container">
+    <div class="header">
+      <img src="https://assets.dify.ai/images/logo.png" alt="Dify Logo" />
+    </div>
+    <p class="title">Queue Monitoring Alert</p>
+    <p class="typography">Our system has detected an abnormal queue status that requires your attention:</p>
+
+    <div class="alert-content">
+      <div class="alert-title">Queue Task Alert</div>
+      <div class="alert-detail">
+        Queue "{{queue_name}}" has {{queue_length}} pending tasks (Threshold: {{threshold}})
+      </div>
+    </div>
+
+    <div class="typography">
+      <p style="margin-bottom:4px">Recommended actions:</p>
+      <p>1. Check the queue processing status in the system dashboard</p>
+      <p>2. Verify if there are any processing bottlenecks</p>
+      <p>3. Consider scaling up workers if needed</p>
+    </div>
+
+    <p class="typography-title">Additional Information:</p>
+    <ul class="typography tip-list">
+      <li>Alert triggered at: {{alert_time}}</li>
+    </ul>
+  </div>
+</body>
+
+</html>
--- a/api/tests/integration_tests/workflow/nodes/test_llm.py
+++ b/api/tests/integration_tests/workflow/nodes/test_llm.py
@ -3,11 +3,16 @@ import os
 import time
 import uuid
 from collections.abc import Generator
-from unittest.mock import MagicMock
+from decimal import Decimal
+from unittest.mock import MagicMock, patch

 import pytest

+from app_factory import create_app
+from configs import dify_config
 from core.app.entities.app_invoke_entities import InvokeFrom
+from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage
+from core.model_runtime.entities.message_entities import AssistantPromptMessage
 from core.workflow.entities.variable_pool import VariablePool
 from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus
 from core.workflow.enums import SystemVariableKey
@ -19,13 +24,27 @@ from core.workflow.nodes.llm.node import LLMNode
 from extensions.ext_database import db
 from models.enums import UserFrom
 from models.workflow import WorkflowType
-from tests.integration_tests.workflow.nodes.__mock.model import get_mocked_fetch_model_config

 """FOR MOCK FIXTURES, DO NOT REMOVE"""
 from tests.integration_tests.model_runtime.__mock.plugin_daemon import setup_model_mock
 from tests.integration_tests.workflow.nodes.__mock.code_executor import setup_code_executor_mock


+@pytest.fixture(scope="session")
+def app():
+    # Set up storage configuration
+    os.environ["STORAGE_TYPE"] = "opendal"
+    os.environ["OPENDAL_SCHEME"] = "fs"
+    os.environ["OPENDAL_FS_ROOT"] = "storage"
+
+    # Ensure storage directory exists
+    os.makedirs("storage", exist_ok=True)
+
+    app = create_app()
+    dify_config.LOGIN_DISABLED = True
+    return app
+
+
 def init_llm_node(config: dict) -> LLMNode:
    graph_config = {
        "edges": [
@ -40,13 +59,19 @@ def init_llm_node(config: dict) -> LLMNode:

    graph = Graph.init(graph_config=graph_config)

+    # Use proper UUIDs for database compatibility
+    tenant_id = "9d2074fc-6f86-45a9-b09d-6ecc63b9056b"
+    app_id = "9d2074fc-6f86-45a9-b09d-6ecc63b9056c"
+    workflow_id = "9d2074fc-6f86-45a9-b09d-6ecc63b9056d"
+    user_id = "9d2074fc-6f86-45a9-b09d-6ecc63b9056e"
+
    init_params = GraphInitParams(
-        tenant_id="1",
-        app_id="1",
+        tenant_id=tenant_id,
+        app_id=app_id,
        workflow_type=WorkflowType.WORKFLOW,
-        workflow_id="1",
+        workflow_id=workflow_id,
        graph_config=graph_config,
-        user_id="1",
+        user_id=user_id,
        user_from=UserFrom.ACCOUNT,
        invoke_from=InvokeFrom.DEBUGGER,
        call_depth=0,
@ -77,115 +102,197 @@ def init_llm_node(config: dict) -> LLMNode:
    return node


-def test_execute_llm(setup_model_mock):
-    node = init_llm_node(
-        config={
-            "id": "llm",
-            "data": {
-                "title": "123",
-                "type": "llm",
-                "model": {
-                    "provider": "langgenius/openai/openai",
-                    "name": "gpt-3.5-turbo",
-                    "mode": "chat",
-                    "completion_params": {},
+def test_execute_llm(app):
+    with app.app_context():
+        node = init_llm_node(
+            config={
+                "id": "llm",
+                "data": {
+                    "title": "123",
+                    "type": "llm",
+                    "model": {
+                        "provider": "langgenius/openai/openai",
+                        "name": "gpt-3.5-turbo",
+                        "mode": "chat",
+                        "completion_params": {},
+                    },
+                    "prompt_template": [
+                        {
+                            "role": "system",
+                            "text": "you are a helpful assistant.\ntoday's weather is {{#abc.output#}}.",
+                        },
+                        {"role": "user", "text": "{{#sys.query#}}"},
+                    ],
+                    "memory": None,
+                    "context": {"enabled": False},
+                    "vision": {"enabled": False},
                },
-                "prompt_template": [
-                    {"role": "system", "text": "you are a helpful assistant.\ntoday's weather is {{#abc.output#}}."},
-                    {"role": "user", "text": "{{#sys.query#}}"},
-                ],
-                "memory": None,
-                "context": {"enabled": False},
-                "vision": {"enabled": False},
            },
-        },
-    )
+        )

-    credentials = {"openai_api_key": os.environ.get("OPENAI_API_KEY")}
+        credentials = {"openai_api_key": os.environ.get("OPENAI_API_KEY")}

-    # Mock db.session.close()
-    db.session.close = MagicMock()
+        # Create a proper LLM result with real entities
+        mock_usage = LLMUsage(
+            prompt_tokens=30,
+            prompt_unit_price=Decimal("0.001"),
+            prompt_price_unit=Decimal("1000"),
+            prompt_price=Decimal("0.00003"),
+            completion_tokens=20,
+            completion_unit_price=Decimal("0.002"),
+            completion_price_unit=Decimal("1000"),
+            completion_price=Decimal("0.00004"),
+            total_tokens=50,
+            total_price=Decimal("0.00007"),
+            currency="USD",
+            latency=0.5,
+        )

-    node._fetch_model_config = get_mocked_fetch_model_config(
-        provider="langgenius/openai/openai",
-        model="gpt-3.5-turbo",
-        mode="chat",
-        credentials=credentials,
-    )
+        mock_message = AssistantPromptMessage(content="This is a test response from the mocked LLM.")

-    # execute node
-    result = node._run()
-    assert isinstance(result, Generator)
+        mock_llm_result = LLMResult(
+            model="gpt-3.5-turbo",
+            prompt_messages=[],
+            message=mock_message,
+            usage=mock_usage,
+        )

-    for item in result:
-        if isinstance(item, RunCompletedEvent):
-            assert item.run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED
-            assert item.run_result.process_data is not None
-            assert item.run_result.outputs is not None
-            assert item.run_result.outputs.get("text") is not None
-            assert item.run_result.outputs.get("usage", {})["total_tokens"] > 0
+        # Create a simple mock model instance that doesn't call real providers
+        mock_model_instance = MagicMock()
+        mock_model_instance.invoke_llm.return_value = mock_llm_result
+
+        # Create a simple mock model config with required attributes
+        mock_model_config = MagicMock()
+        mock_model_config.mode = "chat"
+        mock_model_config.provider = "langgenius/openai/openai"
+        mock_model_config.model = "gpt-3.5-turbo"
+        mock_model_config.provider_model_bundle.configuration.tenant_id = "9d2074fc-6f86-45a9-b09d-6ecc63b9056b"
+
+        # Mock the _fetch_model_config method
+        def mock_fetch_model_config_func(_node_data_model):
+            return mock_model_instance, mock_model_config
+
+        # Also mock ModelManager.get_model_instance to avoid database calls
+        def mock_get_model_instance(_self, **kwargs):
+            return mock_model_instance
+
+        with (
+            patch.object(node, "_fetch_model_config", mock_fetch_model_config_func),
+            patch("core.model_manager.ModelManager.get_model_instance", mock_get_model_instance),
+        ):
+            # execute node
+            result = node._run()
+            assert isinstance(result, Generator)
+
+            for item in result:
+                if isinstance(item, RunCompletedEvent):
+                    assert item.run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED
+                    assert item.run_result.process_data is not None
+                    assert item.run_result.outputs is not None
+                    assert item.run_result.outputs.get("text") is not None
+                    assert item.run_result.outputs.get("usage", {})["total_tokens"] > 0


@pytest.mark.parametrize("setup_code_executor_mock", [["none"]], indirect=True)
-def test_execute_llm_with_jinja2(setup_code_executor_mock, setup_model_mock):
+def test_execute_llm_with_jinja2(app, setup_code_executor_mock):
    """
    Test execute LLM node with jinja2
    """
-    node = init_llm_node(
-        config={
-            "id": "llm",
-            "data": {
-                "title": "123",
-                "type": "llm",
-                "model": {"provider": "openai", "name": "gpt-3.5-turbo", "mode": "chat", "completion_params": {}},
-                "prompt_config": {
-                    "jinja2_variables": [
-                        {"variable": "sys_query", "value_selector": ["sys", "query"]},
-                        {"variable": "output", "value_selector": ["abc", "output"]},
-                    ]
+    with app.app_context():
+        node = init_llm_node(
+            config={
+                "id": "llm",
+                "data": {
+                    "title": "123",
+                    "type": "llm",
+                    "model": {"provider": "openai", "name": "gpt-3.5-turbo", "mode": "chat", "completion_params": {}},
+                    "prompt_config": {
+                        "jinja2_variables": [
+                            {"variable": "sys_query", "value_selector": ["sys", "query"]},
+                            {"variable": "output", "value_selector": ["abc", "output"]},
+                        ]
+                    },
+                    "prompt_template": [
+                        {
+                            "role": "system",
+                            "text": "you are a helpful assistant.\ntoday's weather is {{#abc.output#}}",
+                            "jinja2_text": "you are a helpful assistant.\ntoday's weather is {{output}}.",
+                            "edition_type": "jinja2",
+                        },
+                        {
+                            "role": "user",
+                            "text": "{{#sys.query#}}",
+                            "jinja2_text": "{{sys_query}}",
+                            "edition_type": "basic",
+                        },
+                    ],
+                    "memory": None,
+                    "context": {"enabled": False},
+                    "vision": {"enabled": False},
                },
-                "prompt_template": [
-                    {
-                        "role": "system",
-                        "text": "you are a helpful assistant.\ntoday's weather is {{#abc.output#}}",
-                        "jinja2_text": "you are a helpful assistant.\ntoday's weather is {{output}}.",
-                        "edition_type": "jinja2",
-                    },
-                    {
-                        "role": "user",
-                        "text": "{{#sys.query#}}",
-                        "jinja2_text": "{{sys_query}}",
-                        "edition_type": "basic",
-                    },
-                ],
-                "memory": None,
-                "context": {"enabled": False},
-                "vision": {"enabled": False},
            },
-        },
-    )
+        )

-    credentials = {"openai_api_key": os.environ.get("OPENAI_API_KEY")}
+        # Mock db.session.close()
+        db.session.close = MagicMock()

-    # Mock db.session.close()
-    db.session.close = MagicMock()
+        # Create a proper LLM result with real entities
+        mock_usage = LLMUsage(
+            prompt_tokens=30,
+            prompt_unit_price=Decimal("0.001"),
+            prompt_price_unit=Decimal("1000"),
+            prompt_price=Decimal("0.00003"),
+            completion_tokens=20,
+            completion_unit_price=Decimal("0.002"),
+            completion_price_unit=Decimal("1000"),
+            completion_price=Decimal("0.00004"),
+            total_tokens=50,
+            total_price=Decimal("0.00007"),
+            currency="USD",
+            latency=0.5,
+        )

-    node._fetch_model_config = get_mocked_fetch_model_config(
-        provider="langgenius/openai/openai",
-        model="gpt-3.5-turbo",
-        mode="chat",
-        credentials=credentials,
-    )
+        mock_message = AssistantPromptMessage(content="Test response: sunny weather and what's the weather today?")

-    # execute node
-    result = node._run()
+        mock_llm_result = LLMResult(
+            model="gpt-3.5-turbo",
+            prompt_messages=[],
+            message=mock_message,
+            usage=mock_usage,
+        )

-    for item in result:
-        if isinstance(item, RunCompletedEvent):
-            assert item.run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED
-            assert item.run_result.process_data is not None
-            assert "sunny" in json.dumps(item.run_result.process_data)
-            assert "what's the weather today?" in json.dumps(item.run_result.process_data)
+        # Create a simple mock model instance that doesn't call real providers
+        mock_model_instance = MagicMock()
+        mock_model_instance.invoke_llm.return_value = mock_llm_result
+
+        # Create a simple mock model config with required attributes
+        mock_model_config = MagicMock()
+        mock_model_config.mode = "chat"
+        mock_model_config.provider = "openai"
+        mock_model_config.model = "gpt-3.5-turbo"
+        mock_model_config.provider_model_bundle.configuration.tenant_id = "9d2074fc-6f86-45a9-b09d-6ecc63b9056b"
+
+        # Mock the _fetch_model_config method
+        def mock_fetch_model_config_func(_node_data_model):
+            return mock_model_instance, mock_model_config
+
+        # Also mock ModelManager.get_model_instance to avoid database calls
+        def mock_get_model_instance(_self, **kwargs):
+            return mock_model_instance
+
+        with (
+            patch.object(node, "_fetch_model_config", mock_fetch_model_config_func),
+            patch("core.model_manager.ModelManager.get_model_instance", mock_get_model_instance),
+        ):
+            # execute node
+            result = node._run()
+
+            for item in result:
+                if isinstance(item, RunCompletedEvent):
+                    assert item.run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED
+                    assert item.run_result.process_data is not None
+                    assert "sunny" in json.dumps(item.run_result.process_data)
+                    assert "what's the weather today?" in json.dumps(item.run_result.process_data)


 def test_extract_json():
--- a/api/tests/unit_tests/core/helper/test_ssrf_proxy.py
+++ b/api/tests/unit_tests/core/helper/test_ssrf_proxy.py
@ -1,4 +1,4 @@
-import random
+import secrets
 from unittest.mock import MagicMock, patch

 import pytest
@ -34,7 +34,7 @@ def test_retry_logic_success(mock_request):
    side_effects = []

    for _ in range(SSRF_DEFAULT_MAX_RETRIES):
-        status_code = random.choice(STATUS_FORCELIST)
+        status_code = secrets.choice(STATUS_FORCELIST)
        mock_response = MagicMock()
        mock_response.status_code = status_code
        side_effects.append(mock_response)
--- a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py
+++ b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py
@ -1,5 +1,7 @@
+import io
 from unittest.mock import Mock, patch

+import pandas as pd
 import pytest
 from docx.oxml.text.paragraph import CT_P

@ -187,145 +189,134 @@ def test_node_type(document_extractor_node):

@patch("pandas.ExcelFile")
 def test_extract_text_from_excel_single_sheet(mock_excel_file):
-    """Test extracting text from Excel file with single sheet."""
-    # Mock DataFrame
-    mock_df = Mock()
-    mock_df.dropna = Mock()
-    mock_df.to_markdown.return_value = "| Name | Age |\n|------|-----|\n| John | 25  |"
+    """Test extracting text from Excel file with single sheet and multiline content."""
+
+    # Test multi-line cell
+    data = {"Name\nwith\nnewline": ["John\nDoe", "Jane\nSmith"], "Age": [25, 30]}
+
+    df = pd.DataFrame(data)

    # Mock ExcelFile
    mock_excel_instance = Mock()
    mock_excel_instance.sheet_names = ["Sheet1"]
-    mock_excel_instance.parse.return_value = mock_df
+    mock_excel_instance.parse.return_value = df
    mock_excel_file.return_value = mock_excel_instance

    file_content = b"fake_excel_content"
    result = _extract_text_from_excel(file_content)
+    expected_manual = "| Name with newline | Age |\n| ----------------- | --- |\n\
+| John Doe | 25 |\n| Jane Smith | 30 |\n\n"

-    expected = "| Name | Age |\n|------|-----|\n| John | 25  |\n\n"
-    assert result == expected
-    mock_excel_file.assert_called_once()
-    mock_df.dropna.assert_called_once_with(how="all", inplace=True)
-    mock_df.to_markdown.assert_called_once_with(index=False, floatfmt="")
+    assert expected_manual == result
+    mock_excel_instance.parse.assert_called_once_with(sheet_name="Sheet1")


@patch("pandas.ExcelFile")
 def test_extract_text_from_excel_multiple_sheets(mock_excel_file):
-    """Test extracting text from Excel file with multiple sheets."""
-    # Mock DataFrames for different sheets
-    mock_df1 = Mock()
-    mock_df1.dropna = Mock()
-    mock_df1.to_markdown.return_value = "| Product | Price |\n|---------|-------|\n| Apple   | 1.50  |"
+    """Test extracting text from Excel file with multiple sheets and multiline content."""

-    mock_df2 = Mock()
-    mock_df2.dropna = Mock()
-    mock_df2.to_markdown.return_value = "| City | Population |\n|------|------------|\n| NYC  | 8000000    |"
+    # Test multi-line cell
+    data1 = {"Product\nName": ["Apple\nRed", "Banana\nYellow"], "Price": [1.50, 0.99]}
+    df1 = pd.DataFrame(data1)
+
+    data2 = {"City\nName": ["New\nYork", "Los\nAngeles"], "Population": [8000000, 3900000]}
+    df2 = pd.DataFrame(data2)

    # Mock ExcelFile
    mock_excel_instance = Mock()
    mock_excel_instance.sheet_names = ["Products", "Cities"]
-    mock_excel_instance.parse.side_effect = [mock_df1, mock_df2]
+    mock_excel_instance.parse.side_effect = [df1, df2]
    mock_excel_file.return_value = mock_excel_instance

    file_content = b"fake_excel_content_multiple_sheets"
    result = _extract_text_from_excel(file_content)

-    expected = (
-        "| Product | Price |\n|---------|-------|\n| Apple   | 1.50  |\n\n"
-        "| City | Population |\n|------|------------|\n| NYC  | 8000000    |\n\n"
-    )
-    assert result == expected
+    expected_manual1 = "| Product Name | Price |\n| ------------ | ----- |\n\
+| Apple Red | 1.5 |\n| Banana Yellow | 0.99 |\n\n"
+    expected_manual2 = "| City Name | Population |\n| --------- | ---------- |\n\
+| New York | 8000000 |\n| Los Angeles | 3900000 |\n\n"
+
+    assert expected_manual1 in result
+    assert expected_manual2 in result
+
    assert mock_excel_instance.parse.call_count == 2


@patch("pandas.ExcelFile")
 def test_extract_text_from_excel_empty_sheets(mock_excel_file):
    """Test extracting text from Excel file with empty sheets."""
-    # Mock empty DataFrame
-    mock_df = Mock()
-    mock_df.dropna = Mock()
-    mock_df.to_markdown.return_value = ""
+
+    # Empty excel
+    df = pd.DataFrame()

    # Mock ExcelFile
    mock_excel_instance = Mock()
    mock_excel_instance.sheet_names = ["EmptySheet"]
-    mock_excel_instance.parse.return_value = mock_df
+    mock_excel_instance.parse.return_value = df
    mock_excel_file.return_value = mock_excel_instance

    file_content = b"fake_excel_empty_content"
    result = _extract_text_from_excel(file_content)

-    expected = "\n\n"
+    expected = "|  |\n|  |\n\n"
    assert result == expected

+    mock_excel_instance.parse.assert_called_once_with(sheet_name="EmptySheet")
+

@patch("pandas.ExcelFile")
 def test_extract_text_from_excel_sheet_parse_error(mock_excel_file):
    """Test handling of sheet parsing errors - should continue with other sheets."""
-    # Mock DataFrames - one successful, one that raises exception
-    mock_df_success = Mock()
-    mock_df_success.dropna = Mock()
-    mock_df_success.to_markdown.return_value = "| Data | Value |\n|------|-------|\n| Test | 123   |"
+
+    # Test error
+    data = {"Data": ["Test"], "Value": [123]}
+    df = pd.DataFrame(data)

    # Mock ExcelFile
    mock_excel_instance = Mock()
    mock_excel_instance.sheet_names = ["GoodSheet", "BadSheet"]
-    mock_excel_instance.parse.side_effect = [mock_df_success, Exception("Parse error")]
+    mock_excel_instance.parse.side_effect = [df, Exception("Parse error")]
    mock_excel_file.return_value = mock_excel_instance

    file_content = b"fake_excel_mixed_content"
    result = _extract_text_from_excel(file_content)

-    expected = "| Data | Value |\n|------|-------|\n| Test | 123   |\n\n"
-    assert result == expected
+    expected_manual = "| Data | Value |\n| ---- | ----- |\n| Test | 123 |\n\n"

+    assert expected_manual == result

-@patch("pandas.ExcelFile")
-def test_extract_text_from_excel_file_error(mock_excel_file):
-    """Test handling of Excel file reading errors."""
-    mock_excel_file.side_effect = Exception("Invalid Excel file")
-
-    file_content = b"invalid_excel_content"
-
-    with pytest.raises(Exception) as exc_info:
-        _extract_text_from_excel(file_content)
-
-    # Note: The function should raise TextExtractionError, but since it's not imported in the test,
-    # we check for the general Exception pattern
-    assert "Failed to extract text from Excel file" in str(exc_info.value)
+    assert mock_excel_instance.parse.call_count == 2


@patch("pandas.ExcelFile")
 def test_extract_text_from_excel_io_bytesio_usage(mock_excel_file):
    """Test that BytesIO is properly used with the file content."""
-    import io

-    # Mock DataFrame
-    mock_df = Mock()
-    mock_df.dropna = Mock()
-    mock_df.to_markdown.return_value = "| Test | Data |\n|------|------|\n| 1    | A    |"
+    # Test bytesio
+    data = {"Test": [1], "Data": ["A"]}
+    df = pd.DataFrame(data)

    # Mock ExcelFile
    mock_excel_instance = Mock()
    mock_excel_instance.sheet_names = ["TestSheet"]
-    mock_excel_instance.parse.return_value = mock_df
+    mock_excel_instance.parse.return_value = df
    mock_excel_file.return_value = mock_excel_instance

    file_content = b"test_excel_bytes"
    result = _extract_text_from_excel(file_content)

-    # Verify that ExcelFile was called with a BytesIO object
    mock_excel_file.assert_called_once()
-    call_args = mock_excel_file.call_args[0][0]
-    assert isinstance(call_args, io.BytesIO)
+    call_arg = mock_excel_file.call_args[0][0]
+    assert isinstance(call_arg, io.BytesIO)

-    expected = "| Test | Data |\n|------|------|\n| 1    | A    |\n\n"
-    assert result == expected
+    expected_manual = "| Test | Data |\n| ---- | ---- |\n| 1 | A |\n\n"
+    assert expected_manual == result


@patch("pandas.ExcelFile")
 def test_extract_text_from_excel_all_sheets_fail(mock_excel_file):
    """Test when all sheets fail to parse - should return empty string."""
+
    # Mock ExcelFile
    mock_excel_instance = Mock()
    mock_excel_instance.sheet_names = ["BadSheet1", "BadSheet2"]
@ -335,29 +326,6 @@ def test_extract_text_from_excel_all_sheets_fail(mock_excel_file):
    file_content = b"fake_excel_all_bad_sheets"
    result = _extract_text_from_excel(file_content)

-    # Should return empty string when all sheets fail
    assert result == ""

-
-@patch("pandas.ExcelFile")
-def test_extract_text_from_excel_markdown_formatting(mock_excel_file):
-    """Test that markdown formatting parameters are correctly applied."""
-    # Mock DataFrame
-    mock_df = Mock()
-    mock_df.dropna = Mock()
-    mock_df.to_markdown.return_value = "| Float | Int |\n|-------|-----|\n| 123456.78 | 42  |"
-
-    # Mock ExcelFile
-    mock_excel_instance = Mock()
-    mock_excel_instance.sheet_names = ["NumberSheet"]
-    mock_excel_instance.parse.return_value = mock_df
-    mock_excel_file.return_value = mock_excel_instance
-
-    file_content = b"fake_excel_numbers"
-    result = _extract_text_from_excel(file_content)
-
-    # Verify to_markdown was called with correct parameters
-    mock_df.to_markdown.assert_called_once_with(index=False, floatfmt="")
-
-    expected = "| Float | Int |\n|-------|-----|\n| 123456.78 | 42  |\n\n"
-    assert result == expected
+    assert mock_excel_instance.parse.call_count == 2