chore: bump version to 0.14.0 (#11679 )

Signed-off-by: -LAN- <laipz8200@outlook.com>
fix: workflow continue on error edge color (#11689 )
2026-02-20 10:05:55 +08:00 · 2024-12-16 15:49:17 +08:00 · 2024-12-16 15:39:53 +08:00 · 2024-12-16 15:06:03 +08:00 · 2024-12-16 14:33:00 +08:00 · 2024-12-16 13:05:38 +08:00
500 changed files with 14013 additions and 7566 deletions
--- a/.devcontainer/post_create_command.sh
+++ b/.devcontainer/post_create_command.sh
@ -7,5 +7,6 @@ echo 'alias start-api="cd /workspaces/dify/api && poetry run python -m flask run
 echo 'alias start-worker="cd /workspaces/dify/api && poetry run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion"' >> ~/.bashrc
 echo 'alias start-web="cd /workspaces/dify/web && npm run dev"' >> ~/.bashrc
 echo 'alias start-containers="cd /workspaces/dify/docker && docker-compose -f docker-compose.middleware.yaml -p dify up -d"' >> ~/.bashrc
+echo 'alias stop-containers="cd /workspaces/dify/docker && docker-compose -f docker-compose.middleware.yaml -p dify down"' >> ~/.bashrc

-source /home/vscode/.bashrc
+source /home/vscode/.bashrc
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@ -8,16 +8,9 @@ Please include a summary of the change and which issue is fixed. Please also inc

 # Screenshots

-<table>
-  <tr>
-  <td>Before: </td>
-  <td>After: </td>
-  </tr>
-  <tr>
-  <td>...</td>
-  <td>...</td>
-  </tr>
-</table>
+| Before | After |
+|--------|-------|
+| ...    | ...   |

 # Checklist

--- a/.github/workflows/style.yml
+++ b/.github/workflows/style.yml
@ -37,6 +37,7 @@ jobs:
      - name: Ruff check
        if: steps.changed-files.outputs.any_changed == 'true'
        run: |
+          poetry run -C api ruff --version
          poetry run -C api ruff check ./api
          poetry run -C api ruff format --check ./api

--- a/.github/workflows/vdb-tests.yml
+++ b/.github/workflows/vdb-tests.yml
@ -51,7 +51,7 @@ jobs:
      - name: Expose Service Ports
        run: sh .github/workflows/expose_service_ports.sh

-      - name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase)
+      - name: Set up Vector Stores (TiDB, Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase)
        uses: hoverkraft-tech/compose-action@v2.0.2
        with:
          compose-file: |
@ -67,6 +67,7 @@ jobs:
            pgvector
            chroma
            elasticsearch
+            tidb

      - name: Test Vector Stores
        run: poetry run -C api bash dev/pytest/pytest_vdb.sh
--- a/api/.env.example
+++ b/api/.env.example
@ -56,20 +56,36 @@ DB_DATABASE=dify

 # Storage configuration
 # use for store upload files, private keys...
-# storage type: local, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase
-STORAGE_TYPE=local
-STORAGE_LOCAL_PATH=storage
+# storage type: opendal, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase
+STORAGE_TYPE=opendal
+
+# Apache OpenDAL storage configuration, refer to https://github.com/apache/opendal
+STORAGE_OPENDAL_SCHEME=fs
+# OpenDAL FS
+OPENDAL_FS_ROOT=storage
+# OpenDAL S3
+OPENDAL_S3_ROOT=/
+OPENDAL_S3_BUCKET=your-bucket-name
+OPENDAL_S3_ENDPOINT=https://s3.amazonaws.com
+OPENDAL_S3_ACCESS_KEY_ID=your-access-key
+OPENDAL_S3_SECRET_ACCESS_KEY=your-secret-key
+OPENDAL_S3_REGION=your-region
+OPENDAL_S3_SERVER_SIDE_ENCRYPTION=
+
+# S3 Storage configuration
 S3_USE_AWS_MANAGED_IAM=false
 S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com
 S3_BUCKET_NAME=your-bucket-name
 S3_ACCESS_KEY=your-access-key
 S3_SECRET_KEY=your-secret-key
 S3_REGION=your-region
+
 # Azure Blob Storage configuration
 AZURE_BLOB_ACCOUNT_NAME=your-account-name
 AZURE_BLOB_ACCOUNT_KEY=your-account-key
 AZURE_BLOB_CONTAINER_NAME=yout-container-name
 AZURE_BLOB_ACCOUNT_URL=https://<your_account_name>.blob.core.windows.net
+
 # Aliyun oss Storage configuration
 ALIYUN_OSS_BUCKET_NAME=your-bucket-name
 ALIYUN_OSS_ACCESS_KEY=your-access-key
@ -79,6 +95,7 @@ ALIYUN_OSS_AUTH_VERSION=v1
 ALIYUN_OSS_REGION=your-region
 # Don't start with '/'. OSS doesn't support leading slash in object names.
 ALIYUN_OSS_PATH=your-path
+
 # Google Storage configuration
 GOOGLE_STORAGE_BUCKET_NAME=yout-bucket-name
 GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64=your-google-service-account-json-base64-string
@ -125,8 +142,8 @@ SUPABASE_URL=your-server-url
 WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
 CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*

-
-# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase
+# Vector database configuration
+# support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase
 VECTOR_STORE=weaviate

 # Weaviate configuration
@ -277,6 +294,7 @@ VIKINGDB_SOCKET_TIMEOUT=30
 LINDORM_URL=http://ld-*******************-proxy-search-pub.lindorm.aliyuncs.com:30070
 LINDORM_USERNAME=admin
 LINDORM_PASSWORD=admin
+USING_UGC_INDEX=False

 # OceanBase Vector configuration
 OCEANBASE_VECTOR_HOST=127.0.0.1
@ -381,6 +399,8 @@ LOG_FILE_BACKUP_COUNT=5
 LOG_DATEFORMAT=%Y-%m-%d %H:%M:%S
 # Log Timezone
 LOG_TZ=UTC
+# Log format
+LOG_FORMAT=%(asctime)s,%(msecs)d %(levelname)-2s [%(filename)s:%(lineno)d] %(req_id)s %(message)s

 # Indexing configuration
 INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=4000
@ -413,3 +433,5 @@ RESET_PASSWORD_TOKEN_EXPIRY_MINUTES=5

 CREATE_TIDB_SERVICE_JOB_ENABLED=false

+# Maximum number of submitted thread count in a ThreadPool for parallel node execution
+MAX_SUBMIT_COUNT=100
--- a/api/commands.py
+++ b/api/commands.py
@ -259,7 +259,7 @@ def migrate_knowledge_vector_database():
    skipped_count = 0
    total_count = 0
    vector_type = dify_config.VECTOR_STORE
-    upper_colletion_vector_types = {
+    upper_collection_vector_types = {
        VectorType.MILVUS,
        VectorType.PGVECTOR,
        VectorType.RELYT,
@ -267,7 +267,7 @@ def migrate_knowledge_vector_database():
        VectorType.ORACLE,
        VectorType.ELASTICSEARCH,
    }
-    lower_colletion_vector_types = {
+    lower_collection_vector_types = {
        VectorType.ANALYTICDB,
        VectorType.CHROMA,
        VectorType.MYSCALE,
@ -307,7 +307,7 @@ def migrate_knowledge_vector_database():
                        continue
                collection_name = ""
                dataset_id = dataset.id
-                if vector_type in upper_colletion_vector_types:
+                if vector_type in upper_collection_vector_types:
                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
                elif vector_type == VectorType.QDRANT:
                    if dataset.collection_binding_id:
@ -323,7 +323,7 @@ def migrate_knowledge_vector_database():
                    else:
                        collection_name = Dataset.gen_collection_name_by_id(dataset_id)

-                elif vector_type in lower_colletion_vector_types:
+                elif vector_type in lower_collection_vector_types:
                    collection_name = Dataset.gen_collection_name_by_id(dataset_id).lower()
                else:
                    raise ValueError(f"Vector store {vector_type} is not supported.")
--- a/api/configs/app_config.py
+++ b/api/configs/app_config.py
@ -1,11 +1,51 @@
-from pydantic_settings import SettingsConfigDict
+import logging
+from typing import Any

-from configs.deploy import DeploymentConfig
-from configs.enterprise import EnterpriseFeatureConfig
-from configs.extra import ExtraServiceConfig
-from configs.feature import FeatureConfig
-from configs.middleware import MiddlewareConfig
-from configs.packaging import PackagingInfo
+from pydantic.fields import FieldInfo
+from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict
+
+from .deploy import DeploymentConfig
+from .enterprise import EnterpriseFeatureConfig
+from .extra import ExtraServiceConfig
+from .feature import FeatureConfig
+from .middleware import MiddlewareConfig
+from .packaging import PackagingInfo
+from .remote_settings_sources import RemoteSettingsSource, RemoteSettingsSourceConfig, RemoteSettingsSourceName
+from .remote_settings_sources.apollo import ApolloSettingsSource
+
+logger = logging.getLogger(__name__)
+
+
+class RemoteSettingsSourceFactory(PydanticBaseSettingsSource):
+    def __init__(self, settings_cls: type[BaseSettings]):
+        super().__init__(settings_cls)
+
+    def get_field_value(self, field: FieldInfo, field_name: str) -> tuple[Any, str, bool]:
+        raise NotImplementedError
+
+    def __call__(self) -> dict[str, Any]:
+        current_state = self.current_state
+        remote_source_name = current_state.get("REMOTE_SETTINGS_SOURCE_NAME")
+        if not remote_source_name:
+            return {}
+
+        remote_source: RemoteSettingsSource | None = None
+        match remote_source_name:
+            case RemoteSettingsSourceName.APOLLO:
+                remote_source = ApolloSettingsSource(current_state)
+            case _:
+                logger.warning(f"Unsupported remote source: {remote_source_name}")
+                return {}
+
+        d: dict[str, Any] = {}
+
+        for field_name, field in self.settings_cls.model_fields.items():
+            field_value, field_key, value_is_complex = remote_source.get_field_value(field, field_name)
+            field_value = remote_source.prepare_field_value(field_name, field, field_value, value_is_complex)
+            if field_value is not None:
+                d[field_key] = field_value
+
+        return d


 class DifyConfig(
@ -19,6 +59,8 @@ class DifyConfig(
    MiddlewareConfig,
    # Extra service configs
    ExtraServiceConfig,
+    # Remote source configs
+    RemoteSettingsSourceConfig,
    # Enterprise feature configs
    # **Before using, please contact business@dify.ai by email to inquire about licensing matters.**
    EnterpriseFeatureConfig,
@ -35,3 +77,20 @@ class DifyConfig(
    # please consider to arrange it in the proper config group of existed or added
    # for better readability and maintainability.
    # Thanks for your concentration and consideration.
+
+    @classmethod
+    def settings_customise_sources(
+        cls,
+        settings_cls: type[BaseSettings],
+        init_settings: PydanticBaseSettingsSource,
+        env_settings: PydanticBaseSettingsSource,
+        dotenv_settings: PydanticBaseSettingsSource,
+        file_secret_settings: PydanticBaseSettingsSource,
+    ) -> tuple[PydanticBaseSettingsSource, ...]:
+        return (
+            init_settings,
+            env_settings,
+            RemoteSettingsSourceFactory(settings_cls),
+            dotenv_settings,
+            file_secret_settings,
+        )
--- a/api/configs/feature/init.py
+++ b/api/configs/feature/init.py
@ -439,6 +439,17 @@ class WorkflowConfig(BaseSettings):
    )


+class WorkflowNodeExecutionConfig(BaseSettings):
+    """
+    Configuration for workflow node execution
+    """
+
+    MAX_SUBMIT_COUNT: PositiveInt = Field(
+        description="Maximum number of submitted thread count in a ThreadPool for parallel node execution",
+        default=100,
+    )
+
+
 class AuthConfig(BaseSettings):
    """
    Configuration for authentication and OAuth
@ -775,6 +786,7 @@ class FeatureConfig(
    ToolConfig,
    UpdateConfig,
    WorkflowConfig,
+    WorkflowNodeExecutionConfig,
    WorkspaceConfig,
    LoginConfig,
    # hosted services config
--- a/api/configs/middleware/init.py
+++ b/api/configs/middleware/init.py
@ -1,54 +1,69 @@
-from typing import Any, Optional
+from typing import Any, Literal, Optional
 from urllib.parse import quote_plus

 from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
 from pydantic_settings import BaseSettings

-from configs.middleware.cache.redis_config import RedisConfig
-from configs.middleware.storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
-from configs.middleware.storage.amazon_s3_storage_config import S3StorageConfig
-from configs.middleware.storage.azure_blob_storage_config import AzureBlobStorageConfig
-from configs.middleware.storage.baidu_obs_storage_config import BaiduOBSStorageConfig
-from configs.middleware.storage.google_cloud_storage_config import GoogleCloudStorageConfig
-from configs.middleware.storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
-from configs.middleware.storage.oci_storage_config import OCIStorageConfig
-from configs.middleware.storage.supabase_storage_config import SupabaseStorageConfig
-from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
-from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
-from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig
-from configs.middleware.vdb.baidu_vector_config import BaiduVectorDBConfig
-from configs.middleware.vdb.chroma_config import ChromaConfig
-from configs.middleware.vdb.couchbase_config import CouchbaseConfig
-from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig
-from configs.middleware.vdb.lindorm_config import LindormConfig
-from configs.middleware.vdb.milvus_config import MilvusConfig
-from configs.middleware.vdb.myscale_config import MyScaleConfig
-from configs.middleware.vdb.oceanbase_config import OceanBaseVectorConfig
-from configs.middleware.vdb.opensearch_config import OpenSearchConfig
-from configs.middleware.vdb.oracle_config import OracleConfig
-from configs.middleware.vdb.pgvector_config import PGVectorConfig
-from configs.middleware.vdb.pgvectors_config import PGVectoRSConfig
-from configs.middleware.vdb.qdrant_config import QdrantConfig
-from configs.middleware.vdb.relyt_config import RelytConfig
-from configs.middleware.vdb.tencent_vector_config import TencentVectorDBConfig
-from configs.middleware.vdb.tidb_on_qdrant_config import TidbOnQdrantConfig
-from configs.middleware.vdb.tidb_vector_config import TiDBVectorConfig
-from configs.middleware.vdb.upstash_config import UpstashConfig
-from configs.middleware.vdb.vikingdb_config import VikingDBConfig
-from configs.middleware.vdb.weaviate_config import WeaviateConfig
+from .cache.redis_config import RedisConfig
+from .storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
+from .storage.amazon_s3_storage_config import S3StorageConfig
+from .storage.azure_blob_storage_config import AzureBlobStorageConfig
+from .storage.baidu_obs_storage_config import BaiduOBSStorageConfig
+from .storage.google_cloud_storage_config import GoogleCloudStorageConfig
+from .storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
+from .storage.oci_storage_config import OCIStorageConfig
+from .storage.opendal_storage_config import OpenDALStorageConfig
+from .storage.supabase_storage_config import SupabaseStorageConfig
+from .storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
+from .storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
+from .vdb.analyticdb_config import AnalyticdbConfig
+from .vdb.baidu_vector_config import BaiduVectorDBConfig
+from .vdb.chroma_config import ChromaConfig
+from .vdb.couchbase_config import CouchbaseConfig
+from .vdb.elasticsearch_config import ElasticsearchConfig
+from .vdb.lindorm_config import LindormConfig
+from .vdb.milvus_config import MilvusConfig
+from .vdb.myscale_config import MyScaleConfig
+from .vdb.oceanbase_config import OceanBaseVectorConfig
+from .vdb.opensearch_config import OpenSearchConfig
+from .vdb.oracle_config import OracleConfig
+from .vdb.pgvector_config import PGVectorConfig
+from .vdb.pgvectors_config import PGVectoRSConfig
+from .vdb.qdrant_config import QdrantConfig
+from .vdb.relyt_config import RelytConfig
+from .vdb.tencent_vector_config import TencentVectorDBConfig
+from .vdb.tidb_on_qdrant_config import TidbOnQdrantConfig
+from .vdb.tidb_vector_config import TiDBVectorConfig
+from .vdb.upstash_config import UpstashConfig
+from .vdb.vikingdb_config import VikingDBConfig
+from .vdb.weaviate_config import WeaviateConfig


 class StorageConfig(BaseSettings):
-    STORAGE_TYPE: str = Field(
+    STORAGE_TYPE: Literal[
+        "opendal",
+        "s3",
+        "aliyun-oss",
+        "azure-blob",
+        "baidu-obs",
+        "google-storage",
+        "huawei-obs",
+        "oci-storage",
+        "tencent-cos",
+        "volcengine-tos",
+        "supabase",
+        "local",
+    ] = Field(
        description="Type of storage to use."
-        " Options: 'local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', 'huawei-obs', "
-        "'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'local'.",
-        default="local",
+        " Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', "
+        "'huawei-obs', 'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'opendal'.",
+        default="opendal",
    )

    STORAGE_LOCAL_PATH: str = Field(
        description="Path for local storage when STORAGE_TYPE is set to 'local'.",
        default="storage",
+        deprecated=True,
    )


@ -73,7 +88,7 @@ class KeywordStoreConfig(BaseSettings):
    )


-class DatabaseConfig:
+class DatabaseConfig(BaseSettings):
    DB_HOST: str = Field(
        description="Hostname or IP address of the database server.",
        default="localhost",
@ -235,6 +250,7 @@ class MiddlewareConfig(
    GoogleCloudStorageConfig,
    HuaweiCloudOBSStorageConfig,
    OCIStorageConfig,
+    OpenDALStorageConfig,
    S3StorageConfig,
    SupabaseStorageConfig,
    TencentCloudCOSStorageConfig,
--- a/api/configs/middleware/storage/baidu_obs_storage_config.py
+++ b/api/configs/middleware/storage/baidu_obs_storage_config.py
@ -1,9 +1,10 @@
 from typing import Optional

-from pydantic import BaseModel, Field
+from pydantic import Field
+from pydantic_settings import BaseSettings


-class BaiduOBSStorageConfig(BaseModel):
+class BaiduOBSStorageConfig(BaseSettings):
    """
    Configuration settings for Baidu Object Storage Service (OBS)
    """
--- a/api/configs/middleware/storage/huawei_obs_storage_config.py
+++ b/api/configs/middleware/storage/huawei_obs_storage_config.py
@ -1,9 +1,10 @@
 from typing import Optional

-from pydantic import BaseModel, Field
+from pydantic import Field
+from pydantic_settings import BaseSettings


-class HuaweiCloudOBSStorageConfig(BaseModel):
+class HuaweiCloudOBSStorageConfig(BaseSettings):
    """
    Configuration settings for Huawei Cloud Object Storage Service (OBS)
    """
--- a/api/configs/middleware/storage/opendal_storage_config.py
+++ b/api/configs/middleware/storage/opendal_storage_config.py
@ -0,0 +1,51 @@
+from enum import StrEnum
+from typing import Literal
+
+from pydantic import Field
+from pydantic_settings import BaseSettings
+
+
+class OpenDALScheme(StrEnum):
+    FS = "fs"
+    S3 = "s3"
+
+
+class OpenDALStorageConfig(BaseSettings):
+    STORAGE_OPENDAL_SCHEME: str = Field(
+        default=OpenDALScheme.FS.value,
+        description="OpenDAL scheme.",
+    )
+    # FS
+    OPENDAL_FS_ROOT: str = Field(
+        default="storage",
+        description="Root path for local storage.",
+    )
+    # S3
+    OPENDAL_S3_ROOT: str = Field(
+        default="/",
+        description="Root path for S3 storage.",
+    )
+    OPENDAL_S3_BUCKET: str = Field(
+        default="",
+        description="S3 bucket name.",
+    )
+    OPENDAL_S3_ENDPOINT: str = Field(
+        default="https://s3.amazonaws.com",
+        description="S3 endpoint URL.",
+    )
+    OPENDAL_S3_ACCESS_KEY_ID: str = Field(
+        default="",
+        description="S3 access key ID.",
+    )
+    OPENDAL_S3_SECRET_ACCESS_KEY: str = Field(
+        default="",
+        description="S3 secret access key.",
+    )
+    OPENDAL_S3_REGION: str = Field(
+        default="",
+        description="S3 region.",
+    )
+    OPENDAL_S3_SERVER_SIDE_ENCRYPTION: Literal["aws:kms", ""] = Field(
+        default="",
+        description="S3 server-side encryption.",
+    )
--- a/api/configs/middleware/storage/supabase_storage_config.py
+++ b/api/configs/middleware/storage/supabase_storage_config.py
@ -1,9 +1,10 @@
 from typing import Optional

-from pydantic import BaseModel, Field
+from pydantic import Field
+from pydantic_settings import BaseSettings


-class SupabaseStorageConfig(BaseModel):
+class SupabaseStorageConfig(BaseSettings):
    """
    Configuration settings for Supabase Object Storage Service
    """
--- a/api/configs/middleware/storage/volcengine_tos_storage_config.py
+++ b/api/configs/middleware/storage/volcengine_tos_storage_config.py
@ -1,9 +1,10 @@
 from typing import Optional

-from pydantic import BaseModel, Field
+from pydantic import Field
+from pydantic_settings import BaseSettings


-class VolcengineTOSStorageConfig(BaseModel):
+class VolcengineTOSStorageConfig(BaseSettings):
    """
    Configuration settings for Volcengine Tinder Object Storage (TOS)
    """
--- a/api/configs/middleware/vdb/analyticdb_config.py
+++ b/api/configs/middleware/vdb/analyticdb_config.py
@ -1,9 +1,10 @@
 from typing import Optional

-from pydantic import BaseModel, Field, PositiveInt
+from pydantic import Field, PositiveInt
+from pydantic_settings import BaseSettings


-class AnalyticdbConfig(BaseModel):
+class AnalyticdbConfig(BaseSettings):
    """
    Configuration for connecting to Alibaba Cloud AnalyticDB for PostgreSQL.
    Refer to the following documentation for details on obtaining credentials:
--- a/api/configs/middleware/vdb/couchbase_config.py
+++ b/api/configs/middleware/vdb/couchbase_config.py
@ -1,9 +1,10 @@
 from typing import Optional

-from pydantic import BaseModel, Field
+from pydantic import Field
+from pydantic_settings import BaseSettings


-class CouchbaseConfig(BaseModel):
+class CouchbaseConfig(BaseSettings):
    """
    Couchbase configs
    """
--- a/api/configs/middleware/vdb/lindorm_config.py
+++ b/api/configs/middleware/vdb/lindorm_config.py
@ -21,3 +21,14 @@ class LindormConfig(BaseSettings):
        description="Lindorm password",
        default=None,
    )
+    DEFAULT_INDEX_TYPE: Optional[str] = Field(
+        description="Lindorm Vector Index Type, hnsw or flat is available in dify",
+        default="hnsw",
+    )
+    DEFAULT_DISTANCE_TYPE: Optional[str] = Field(
+        description="Vector Distance Type, support l2, cosinesimil, innerproduct", default="l2"
+    )
+    USING_UGC_INDEX: Optional[bool] = Field(
+        description="Using UGC index will store the same type of Index in a single index but can retrieve separately.",
+        default=False,
+    )
--- a/api/configs/middleware/vdb/myscale_config.py
+++ b/api/configs/middleware/vdb/myscale_config.py
@ -1,7 +1,8 @@
-from pydantic import BaseModel, Field, PositiveInt
+from pydantic import Field, PositiveInt
+from pydantic_settings import BaseSettings


-class MyScaleConfig(BaseModel):
+class MyScaleConfig(BaseSettings):
    """
    Configuration settings for MyScale vector database
    """
--- a/api/configs/middleware/vdb/vikingdb_config.py
+++ b/api/configs/middleware/vdb/vikingdb_config.py
@ -1,9 +1,10 @@
 from typing import Optional

-from pydantic import BaseModel, Field
+from pydantic import Field
+from pydantic_settings import BaseSettings


-class VikingDBConfig(BaseModel):
+class VikingDBConfig(BaseSettings):
    """
    Configuration for connecting to Volcengine VikingDB.
    Refer to the following documentation for details on obtaining credentials:
--- a/api/configs/packaging/init.py
+++ b/api/configs/packaging/init.py
@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):

    CURRENT_VERSION: str = Field(
        description="Dify version",
-        default="0.13.0",
+        default="0.14.0",
    )

    COMMIT_SHA: str = Field(
--- a/api/configs/remote_settings_sources/init.py
+++ b/api/configs/remote_settings_sources/init.py
@ -0,0 +1,17 @@
+from typing import Optional
+
+from pydantic import Field
+
+from .apollo import ApolloSettingsSourceInfo
+from .base import RemoteSettingsSource
+from .enums import RemoteSettingsSourceName
+
+
+class RemoteSettingsSourceConfig(ApolloSettingsSourceInfo):
+    REMOTE_SETTINGS_SOURCE_NAME: RemoteSettingsSourceName | str = Field(
+        description="name of remote config source",
+        default="",
+    )
+
+
+__all__ = ["RemoteSettingsSource", "RemoteSettingsSourceConfig", "RemoteSettingsSourceName"]
--- a/api/configs/remote_settings_sources/apollo/init.py
+++ b/api/configs/remote_settings_sources/apollo/init.py
@ -0,0 +1,55 @@
+from collections.abc import Mapping
+from typing import Any, Optional
+
+from pydantic import Field
+from pydantic.fields import FieldInfo
+from pydantic_settings import BaseSettings
+
+from configs.remote_settings_sources.base import RemoteSettingsSource
+
+from .client import ApolloClient
+
+
+class ApolloSettingsSourceInfo(BaseSettings):
+    """
+    Packaging build information
+    """
+
+    APOLLO_APP_ID: Optional[str] = Field(
+        description="apollo app_id",
+        default=None,
+    )
+
+    APOLLO_CLUSTER: Optional[str] = Field(
+        description="apollo cluster",
+        default=None,
+    )
+
+    APOLLO_CONFIG_URL: Optional[str] = Field(
+        description="apollo config url",
+        default=None,
+    )
+
+    APOLLO_NAMESPACE: Optional[str] = Field(
+        description="apollo namespace",
+        default=None,
+    )
+
+
+class ApolloSettingsSource(RemoteSettingsSource):
+    def __init__(self, configs: Mapping[str, Any]):
+        self.client = ApolloClient(
+            app_id=configs["APOLLO_APP_ID"],
+            cluster=configs["APOLLO_CLUSTER"],
+            config_url=configs["APOLLO_CONFIG_URL"],
+            start_hot_update=False,
+            _notification_map={configs["APOLLO_NAMESPACE"]: -1},
+        )
+        self.namespace = configs["APOLLO_NAMESPACE"]
+        self.remote_configs = self.client.get_all_dicts(self.namespace)
+
+    def get_field_value(self, field: FieldInfo, field_name: str) -> tuple[Any, str, bool]:
+        if not isinstance(self.remote_configs, dict):
+            raise ValueError(f"remote configs is not dict, but {type(self.remote_configs)}")
+        field_value = self.remote_configs.get(field_name)
+        return field_value, field_name, False
--- a/api/configs/remote_settings_sources/apollo/client.py
+++ b/api/configs/remote_settings_sources/apollo/client.py
@ -0,0 +1,303 @@
+import hashlib
+import json
+import logging
+import os
+import threading
+import time
+from pathlib import Path
+
+from .python_3x import http_request, makedirs_wrapper
+from .utils import (
+    CONFIGURATIONS,
+    NAMESPACE_NAME,
+    NOTIFICATION_ID,
+    get_value_from_dict,
+    init_ip,
+    no_key_cache_key,
+    signature,
+    url_encode_wrapper,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class ApolloClient:
+    def __init__(
+        self,
+        config_url,
+        app_id,
+        cluster="default",
+        secret="",
+        start_hot_update=True,
+        change_listener=None,
+        _notification_map=None,
+    ):
+        # Core routing parameters
+        self.config_url = config_url
+        self.cluster = cluster
+        self.app_id = app_id
+
+        # Non-core parameters
+        self.ip = init_ip()
+        self.secret = secret
+
+        # Check the parameter variables
+
+        # Private control variables
+        self._cycle_time = 5
+        self._stopping = False
+        self._cache = {}
+        self._no_key = {}
+        self._hash = {}
+        self._pull_timeout = 75
+        self._cache_file_path = os.path.expanduser("~") + "/.dify/config/remote-settings/apollo/cache/"
+        self._long_poll_thread = None
+        self._change_listener = change_listener  # "add" "delete" "update"
+        if _notification_map is None:
+            _notification_map = {"application": -1}
+        self._notification_map = _notification_map
+        self.last_release_key = None
+        # Private startup method
+        self._path_checker()
+        if start_hot_update:
+            self._start_hot_update()
+
+        # start the heartbeat thread
+        heartbeat = threading.Thread(target=self._heart_beat)
+        heartbeat.daemon = True
+        heartbeat.start()
+
+    def get_json_from_net(self, namespace="application"):
+        url = "{}/configs/{}/{}/{}?releaseKey={}&ip={}".format(
+            self.config_url, self.app_id, self.cluster, namespace, "", self.ip
+        )
+        try:
+            code, body = http_request(url, timeout=3, headers=self._sign_headers(url))
+            if code == 200:
+                if not body:
+                    logger.error(f"get_json_from_net load configs failed, body is {body}")
+                    return None
+                data = json.loads(body)
+                data = data["configurations"]
+                return_data = {CONFIGURATIONS: data}
+                return return_data
+            else:
+                return None
+        except Exception:
+            logger.exception("an error occurred in get_json_from_net")
+            return None
+
+    def get_value(self, key, default_val=None, namespace="application"):
+        try:
+            # read memory configuration
+            namespace_cache = self._cache.get(namespace)
+            val = get_value_from_dict(namespace_cache, key)
+            if val is not None:
+                return val
+
+            no_key = no_key_cache_key(namespace, key)
+            if no_key in self._no_key:
+                return default_val
+
+            # read the network configuration
+            namespace_data = self.get_json_from_net(namespace)
+            val = get_value_from_dict(namespace_data, key)
+            if val is not None:
+                self._update_cache_and_file(namespace_data, namespace)
+                return val
+
+            # read the file configuration
+            namespace_cache = self._get_local_cache(namespace)
+            val = get_value_from_dict(namespace_cache, key)
+            if val is not None:
+                self._update_cache_and_file(namespace_cache, namespace)
+                return val
+
+            # If all of them are not obtained, the default value is returned
+            # and the local cache is set to None
+            self._set_local_cache_none(namespace, key)
+            return default_val
+        except Exception:
+            logger.exception("get_value has error, [key is %s], [namespace is %s]", key, namespace)
+            return default_val
+
+    # Set the key of a namespace to none, and do not set default val
+    # to ensure the real-time correctness of the function call.
+    # If the user does not have the same default val twice
+    # and the default val is used here, there may be a problem.
+    def _set_local_cache_none(self, namespace, key):
+        no_key = no_key_cache_key(namespace, key)
+        self._no_key[no_key] = key
+
+    def _start_hot_update(self):
+        self._long_poll_thread = threading.Thread(target=self._listener)
+        # When the asynchronous thread is started, the daemon thread will automatically exit
+        # when the main thread is launched.
+        self._long_poll_thread.daemon = True
+        self._long_poll_thread.start()
+
+    def stop(self):
+        self._stopping = True
+        logger.info("Stopping listener...")
+
+    # Call the set callback function, and if it is abnormal, try it out
+    def _call_listener(self, namespace, old_kv, new_kv):
+        if self._change_listener is None:
+            return
+        if old_kv is None:
+            old_kv = {}
+        if new_kv is None:
+            new_kv = {}
+        try:
+            for key in old_kv:
+                new_value = new_kv.get(key)
+                old_value = old_kv.get(key)
+                if new_value is None:
+                    # If newValue is empty, it means key, and the value is deleted.
+                    self._change_listener("delete", namespace, key, old_value)
+                    continue
+                if new_value != old_value:
+                    self._change_listener("update", namespace, key, new_value)
+                    continue
+            for key in new_kv:
+                new_value = new_kv.get(key)
+                old_value = old_kv.get(key)
+                if old_value is None:
+                    self._change_listener("add", namespace, key, new_value)
+        except BaseException as e:
+            logger.warning(str(e))
+
+    def _path_checker(self):
+        if not os.path.isdir(self._cache_file_path):
+            makedirs_wrapper(self._cache_file_path)
+
+    # update the local cache and file cache
+    def _update_cache_and_file(self, namespace_data, namespace="application"):
+        # update the local cache
+        self._cache[namespace] = namespace_data
+        # update the file cache
+        new_string = json.dumps(namespace_data)
+        new_hash = hashlib.md5(new_string.encode("utf-8")).hexdigest()
+        if self._hash.get(namespace) == new_hash:
+            pass
+        else:
+            file_path = Path(self._cache_file_path) / f"{self.app_id}_configuration_{namespace}.txt"
+            file_path.write_text(new_string)
+            self._hash[namespace] = new_hash
+
+    # get the configuration from the local file
+    def _get_local_cache(self, namespace="application"):
+        cache_file_path = os.path.join(self._cache_file_path, f"{self.app_id}_configuration_{namespace}.txt")
+        if os.path.isfile(cache_file_path):
+            with open(cache_file_path) as f:
+                result = json.loads(f.readline())
+            return result
+        return {}
+
+    def _long_poll(self):
+        notifications = []
+        for key in self._cache:
+            namespace_data = self._cache[key]
+            notification_id = -1
+            if NOTIFICATION_ID in namespace_data:
+                notification_id = self._cache[key][NOTIFICATION_ID]
+            notifications.append({NAMESPACE_NAME: key, NOTIFICATION_ID: notification_id})
+        try:
+            # if the length is 0 it is returned directly
+            if len(notifications) == 0:
+                return
+            url = "{}/notifications/v2".format(self.config_url)
+            params = {
+                "appId": self.app_id,
+                "cluster": self.cluster,
+                "notifications": json.dumps(notifications, ensure_ascii=False),
+            }
+            param_str = url_encode_wrapper(params)
+            url = url + "?" + param_str
+            code, body = http_request(url, self._pull_timeout, headers=self._sign_headers(url))
+            http_code = code
+            if http_code == 304:
+                logger.debug("No change, loop...")
+                return
+            if http_code == 200:
+                if not body:
+                    logger.error(f"_long_poll load configs failed,body is {body}")
+                    return
+                data = json.loads(body)
+                for entry in data:
+                    namespace = entry[NAMESPACE_NAME]
+                    n_id = entry[NOTIFICATION_ID]
+                    logger.info("%s has changes: notificationId=%d", namespace, n_id)
+                    self._get_net_and_set_local(namespace, n_id, call_change=True)
+                    return
+            else:
+                logger.warning("Sleep...")
+        except Exception as e:
+            logger.warning(str(e))
+
+    def _get_net_and_set_local(self, namespace, n_id, call_change=False):
+        namespace_data = self.get_json_from_net(namespace)
+        if not namespace_data:
+            return
+        namespace_data[NOTIFICATION_ID] = n_id
+        old_namespace = self._cache.get(namespace)
+        self._update_cache_and_file(namespace_data, namespace)
+        if self._change_listener is not None and call_change and old_namespace:
+            old_kv = old_namespace.get(CONFIGURATIONS)
+            new_kv = namespace_data.get(CONFIGURATIONS)
+            self._call_listener(namespace, old_kv, new_kv)
+
+    def _listener(self):
+        logger.info("start long_poll")
+        while not self._stopping:
+            self._long_poll()
+            time.sleep(self._cycle_time)
+        logger.info("stopped, long_poll")
+
+    # add the need for endorsement to the header
+    def _sign_headers(self, url):
+        headers = {}
+        if self.secret == "":
+            return headers
+        uri = url[len(self.config_url) : len(url)]
+        time_unix_now = str(int(round(time.time() * 1000)))
+        headers["Authorization"] = "Apollo " + self.app_id + ":" + signature(time_unix_now, uri, self.secret)
+        headers["Timestamp"] = time_unix_now
+        return headers
+
+    def _heart_beat(self):
+        while not self._stopping:
+            for namespace in self._notification_map:
+                self._do_heart_beat(namespace)
+            time.sleep(60 * 10)  # 10分钟
+
+    def _do_heart_beat(self, namespace):
+        url = "{}/configs/{}/{}/{}?ip={}".format(self.config_url, self.app_id, self.cluster, namespace, self.ip)
+        try:
+            code, body = http_request(url, timeout=3, headers=self._sign_headers(url))
+            if code == 200:
+                if not body:
+                    logger.error(f"_do_heart_beat load configs failed,body is {body}")
+                    return None
+                data = json.loads(body)
+                if self.last_release_key == data["releaseKey"]:
+                    return None
+                self.last_release_key = data["releaseKey"]
+                data = data["configurations"]
+                self._update_cache_and_file(data, namespace)
+            else:
+                return None
+        except Exception:
+            logger.exception("an error occurred in _do_heart_beat")
+            return None
+
+    def get_all_dicts(self, namespace):
+        namespace_data = self._cache.get(namespace)
+        if namespace_data is None:
+            net_namespace_data = self.get_json_from_net(namespace)
+            if not net_namespace_data:
+                return namespace_data
+            namespace_data = net_namespace_data.get(CONFIGURATIONS)
+            if namespace_data:
+                self._update_cache_and_file(namespace_data, namespace)
+        return namespace_data
--- a/api/configs/remote_settings_sources/apollo/python_3x.py
+++ b/api/configs/remote_settings_sources/apollo/python_3x.py
@ -0,0 +1,41 @@
+import logging
+import os
+import ssl
+import urllib.request
+from urllib import parse
+from urllib.error import HTTPError
+
+# Create an SSL context that allows for a lower level of security
+ssl_context = ssl.create_default_context()
+ssl_context.set_ciphers("HIGH:!DH:!aNULL")
+ssl_context.check_hostname = False
+ssl_context.verify_mode = ssl.CERT_NONE
+
+# Create an opener object and pass in a custom SSL context
+opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
+
+urllib.request.install_opener(opener)
+
+logger = logging.getLogger(__name__)
+
+
+def http_request(url, timeout, headers={}):
+    try:
+        request = urllib.request.Request(url, headers=headers)
+        res = urllib.request.urlopen(request, timeout=timeout)
+        body = res.read().decode("utf-8")
+        return res.code, body
+    except HTTPError as e:
+        if e.code == 304:
+            logger.warning("http_request error,code is 304, maybe you should check secret")
+            return 304, None
+        logger.warning("http_request error,code is %d, msg is %s", e.code, e.msg)
+        raise e
+
+
+def url_encode(params):
+    return parse.urlencode(params)
+
+
+def makedirs_wrapper(path):
+    os.makedirs(path, exist_ok=True)
--- a/api/configs/remote_settings_sources/apollo/utils.py
+++ b/api/configs/remote_settings_sources/apollo/utils.py
@ -0,0 +1,51 @@
+import hashlib
+import socket
+
+from .python_3x import url_encode
+
+# define constants
+CONFIGURATIONS = "configurations"
+NOTIFICATION_ID = "notificationId"
+NAMESPACE_NAME = "namespaceName"
+
+
+# add timestamps uris and keys
+def signature(timestamp, uri, secret):
+    import base64
+    import hmac
+
+    string_to_sign = "" + timestamp + "\n" + uri
+    hmac_code = hmac.new(secret.encode(), string_to_sign.encode(), hashlib.sha1).digest()
+    return base64.b64encode(hmac_code).decode()
+
+
+def url_encode_wrapper(params):
+    return url_encode(params)
+
+
+def no_key_cache_key(namespace, key):
+    return "{}{}{}".format(namespace, len(namespace), key)
+
+
+# Returns whether the obtained value is obtained, and None if it does not
+def get_value_from_dict(namespace_cache, key):
+    if namespace_cache:
+        kv_data = namespace_cache.get(CONFIGURATIONS)
+        if kv_data is None:
+            return None
+        if key in kv_data:
+            return kv_data[key]
+    return None
+
+
+def init_ip():
+    ip = ""
+    s = None
+    try:
+        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+        s.connect(("8.8.8.8", 53))
+        ip = s.getsockname()[0]
+    finally:
+        if s:
+            s.close()
+    return ip
--- a/api/configs/remote_settings_sources/base.py
+++ b/api/configs/remote_settings_sources/base.py
@ -0,0 +1,15 @@
+from collections.abc import Mapping
+from typing import Any
+
+from pydantic.fields import FieldInfo
+
+
+class RemoteSettingsSource:
+    def __init__(self, configs: Mapping[str, Any]):
+        pass
+
+    def get_field_value(self, field: FieldInfo, field_name: str) -> tuple[Any, str, bool]:
+        raise NotImplementedError
+
+    def prepare_field_value(self, field_name: str, field: FieldInfo, value: Any, value_is_complex: bool) -> Any:
+        return value
--- a/api/configs/remote_settings_sources/enums.py
+++ b/api/configs/remote_settings_sources/enums.py
@ -0,0 +1,5 @@
+from enum import StrEnum
+
+
+class RemoteSettingsSourceName(StrEnum):
+    APOLLO = "apollo"
--- a/api/constants/init.py
+++ b/api/constants/init.py
@ -14,11 +14,11 @@ AUDIO_EXTENSIONS.extend([ext.upper() for ext in AUDIO_EXTENSIONS])


 if dify_config.ETL_TYPE == "Unstructured":
-    DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "pdf", "html", "htm", "xlsx", "xls"]
+    DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls"]
    DOCUMENT_EXTENSIONS.extend(("docx", "csv", "eml", "msg", "pptx", "xml", "epub"))
    if dify_config.UNSTRUCTURED_API_URL:
        DOCUMENT_EXTENSIONS.append("ppt")
    DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS])
 else:
-    DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "pdf", "html", "htm", "xlsx", "xls", "docx", "csv"]
+    DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls", "docx", "csv"]
    DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS])
--- a/api/controllers/console/explore/installed_app.py
+++ b/api/controllers/console/explore/installed_app.py
@ -1,5 +1,6 @@
 from datetime import UTC, datetime

+from flask import request
 from flask_login import current_user
 from flask_restful import Resource, inputs, marshal_with, reqparse
 from sqlalchemy import and_
@ -20,8 +21,17 @@ class InstalledAppsListApi(Resource):
    @account_initialization_required
    @marshal_with(installed_app_list_fields)
    def get(self):
+        app_id = request.args.get("app_id", default=None, type=str)
        current_tenant_id = current_user.current_tenant_id
-        installed_apps = db.session.query(InstalledApp).filter(InstalledApp.tenant_id == current_tenant_id).all()
+
+        if app_id:
+            installed_apps = (
+                db.session.query(InstalledApp)
+                .filter(and_(InstalledApp.tenant_id == current_tenant_id, InstalledApp.app_id == app_id))
+                .all()
+            )
+        else:
+            installed_apps = db.session.query(InstalledApp).filter(InstalledApp.tenant_id == current_tenant_id).all()

        current_user.role = TenantService.get_user_role(current_user, current_user.current_tenant)
        installed_apps = [
--- a/api/controllers/console/workspace/tool_providers.py
+++ b/api/controllers/console/workspace/tool_providers.py
@ -368,6 +368,7 @@ class ToolWorkflowProviderCreateApi(Resource):
            description=args["description"],
            parameters=args["parameters"],
            privacy_policy=args["privacy_policy"],
+            labels=args["labels"],
        )


--- a/api/core/app/apps/README.md
+++ b/api/core/app/apps/README.md
@ -2,7 +2,7 @@

 Due to the presence of tasks in App Runner that require long execution times, such as LLM generation and external requests, Flask-Sqlalchemy's strategy for database connection pooling is to allocate one connection (transaction) per request. This approach keeps a connection occupied even during non-DB tasks, leading to the inability to acquire new connections during high concurrency requests due to multiple long-running tasks.

-Therefore, the database operations in App Runner and Task Pipeline must ensure connections are closed immediately after use, and it's better to pass IDs rather than Model objects to avoid deattach errors.
+Therefore, the database operations in App Runner and Task Pipeline must ensure connections are closed immediately after use, and it's better to pass IDs rather than Model objects to avoid detach errors.

 Examples:

--- a/api/core/app/apps/advanced_chat/app_generator.py
+++ b/api/core/app/apps/advanced_chat/app_generator.py
@ -3,7 +3,7 @@ import logging
 import threading
 import uuid
 from collections.abc import Generator, Mapping
-from typing import Any, Optional, Union
+from typing import Any, Literal, Optional, Union, overload

 from flask import Flask, current_app
 from pydantic import ValidationError
@ -36,6 +36,29 @@ logger = logging.getLogger(__name__)
 class AdvancedChatAppGenerator(MessageBasedAppGenerator):
    _dialogue_count: int

+    @overload
+    def generate(
+        self,
+        app_model: App,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: Literal[True],
+    ) -> Generator[str, None, None]: ...
+
+    @overload
+    def generate(
+        self,
+        app_model: App,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: Literal[False],
+    ) -> Mapping[str, Any]: ...
+
+    @overload
    def generate(
        self,
        app_model: App,
@ -44,7 +67,17 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
        args: Mapping[str, Any],
        invoke_from: InvokeFrom,
        streaming: bool = True,
-    ) -> Mapping[str, Any] | Generator[str, None, None]:
+    ) -> Union[Mapping[str, Any], Generator[str, None, None]]: ...
+
+    def generate(
+        self,
+        app_model: App,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: bool = True,
+    ):
        """
        Generate App response.

--- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py
+++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
@ -19,6 +19,7 @@ from core.app.entities.queue_entities import (
    QueueIterationNextEvent,
    QueueIterationStartEvent,
    QueueMessageReplaceEvent,
+    QueueNodeExceptionEvent,
    QueueNodeFailedEvent,
    QueueNodeInIterationFailedEvent,
    QueueNodeStartedEvent,
@ -31,6 +32,7 @@ from core.app.entities.queue_entities import (
    QueueStopEvent,
    QueueTextChunkEvent,
    QueueWorkflowFailedEvent,
+    QueueWorkflowPartialSuccessEvent,
    QueueWorkflowStartedEvent,
    QueueWorkflowSucceededEvent,
 )
@ -127,7 +129,6 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc

        self._conversation_name_generate_thread = None
        self._recorded_files: list[Mapping[str, Any]] = []
-        self.total_tokens: int = 0

    def process(self):
        """
@ -318,7 +319,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc

                if response:
                    yield response
-            elif isinstance(event, QueueNodeFailedEvent | QueueNodeInIterationFailedEvent):
+            elif isinstance(event, QueueNodeFailedEvent | QueueNodeInIterationFailedEvent | QueueNodeExceptionEvent):
                workflow_node_execution = self._handle_workflow_node_execution_failed(event)

                response = self._workflow_node_finish_to_stream_response(
@ -361,8 +362,6 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
                if not workflow_run:
                    raise Exception("Workflow run not initialized.")

-                # FIXME for issue #11221 quick fix maybe have a better solution
-                self.total_tokens += event.metadata.get("total_tokens", 0) if event.metadata else 0
                yield self._workflow_iteration_completed_to_stream_response(
                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run, event=event
                )
@ -376,7 +375,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
                workflow_run = self._handle_workflow_run_success(
                    workflow_run=workflow_run,
                    start_at=graph_runtime_state.start_at,
-                    total_tokens=graph_runtime_state.total_tokens or self.total_tokens,
+                    total_tokens=graph_runtime_state.total_tokens,
                    total_steps=graph_runtime_state.node_run_steps,
                    outputs=event.outputs,
                    conversation_id=self._conversation.id,
@ -387,6 +386,29 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
                )

+                self._queue_manager.publish(QueueAdvancedChatMessageEndEvent(), PublishFrom.TASK_PIPELINE)
+            elif isinstance(event, QueueWorkflowPartialSuccessEvent):
+                if not workflow_run:
+                    raise Exception("Workflow run not initialized.")
+
+                if not graph_runtime_state:
+                    raise Exception("Graph runtime state not initialized.")
+
+                workflow_run = self._handle_workflow_run_partial_success(
+                    workflow_run=workflow_run,
+                    start_at=graph_runtime_state.start_at,
+                    total_tokens=graph_runtime_state.total_tokens,
+                    total_steps=graph_runtime_state.node_run_steps,
+                    outputs=event.outputs,
+                    exceptions_count=event.exceptions_count,
+                    conversation_id=None,
+                    trace_manager=trace_manager,
+                )
+
+                yield self._workflow_finish_to_stream_response(
+                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
+                )
+
                self._queue_manager.publish(QueueAdvancedChatMessageEndEvent(), PublishFrom.TASK_PIPELINE)
            elif isinstance(event, QueueWorkflowFailedEvent):
                if not workflow_run:
@ -404,6 +426,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
                    error=event.error,
                    conversation_id=self._conversation.id,
                    trace_manager=trace_manager,
+                    exceptions_count=event.exceptions_count,
                )

                yield self._workflow_finish_to_stream_response(
--- a/api/core/app/apps/agent_chat/app_generator.py
+++ b/api/core/app/apps/agent_chat/app_generator.py
@ -2,7 +2,7 @@ import logging
 import threading
 import uuid
 from collections.abc import Generator, Mapping
-from typing import Any, Union
+from typing import Any, Literal, Union, overload

 from flask import Flask, current_app
 from pydantic import ValidationError
@ -28,6 +28,39 @@ logger = logging.getLogger(__name__)


 class AgentChatAppGenerator(MessageBasedAppGenerator):
+    @overload
+    def generate(
+        self,
+        *,
+        app_model: App,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: Literal[True],
+    ) -> Generator[str, None, None]: ...
+
+    @overload
+    def generate(
+        self,
+        *,
+        app_model: App,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: Literal[False],
+    ) -> Mapping[str, Any]: ...
+
+    @overload
+    def generate(
+        self,
+        *,
+        app_model: App,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: bool,
+    ) -> Mapping[str, Any] | Generator[str, None, None]: ...
+
    def generate(
        self,
        *,
@ -36,7 +69,7 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
        args: Mapping[str, Any],
        invoke_from: InvokeFrom,
        streaming: bool = True,
-    ) -> Mapping[str, Any] | Generator[str, None, None]:
+    ):
        """
        Generate App response.

--- a/api/core/app/apps/base_app_generate_response_converter.py
+++ b/api/core/app/apps/base_app_generate_response_converter.py
@ -82,7 +82,7 @@ class AppGenerateResponseConverter(ABC):
            for resource in metadata["retriever_resources"]:
                updated_resources.append(
                    {
-                        "segment_id": resource["segment_id"],
+                        "segment_id": resource.get("segment_id", ""),
                        "position": resource["position"],
                        "document_name": resource["document_name"],
                        "score": resource["score"],
--- a/api/core/app/apps/chat/app_generator.py
+++ b/api/core/app/apps/chat/app_generator.py
@ -1,7 +1,7 @@
 import logging
 import threading
 import uuid
-from collections.abc import Generator
+from collections.abc import Generator, Mapping
 from typing import Any, Literal, Union, overload

 from flask import Flask, current_app
@ -34,9 +34,9 @@ class ChatAppGenerator(MessageBasedAppGenerator):
        self,
        app_model: App,
        user: Union[Account, EndUser],
-        args: Any,
+        args: Mapping[str, Any],
        invoke_from: InvokeFrom,
-        stream: Literal[True] = True,
+        streaming: Literal[True],
    ) -> Generator[str, None, None]: ...

    @overload
@ -44,19 +44,29 @@ class ChatAppGenerator(MessageBasedAppGenerator):
        self,
        app_model: App,
        user: Union[Account, EndUser],
-        args: Any,
+        args: Mapping[str, Any],
        invoke_from: InvokeFrom,
-        stream: Literal[False] = False,
-    ) -> dict: ...
+        streaming: Literal[False],
+    ) -> Mapping[str, Any]: ...
+
+    @overload
+    def generate(
+        self,
+        app_model: App,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: bool,
+    ) -> Union[Mapping[str, Any], Generator[str, None, None]]: ...

    def generate(
        self,
        app_model: App,
        user: Union[Account, EndUser],
-        args: Any,
+        args: Mapping[str, Any],
        invoke_from: InvokeFrom,
        streaming: bool = True,
-    ) -> Union[dict, Generator[str, None, None]]:
+    ):
        """
        Generate App response.

--- a/api/core/app/apps/completion/app_generator.py
+++ b/api/core/app/apps/completion/app_generator.py
@ -1,7 +1,7 @@
 import logging
 import threading
 import uuid
-from collections.abc import Generator
+from collections.abc import Generator, Mapping
 from typing import Any, Literal, Union, overload

 from flask import Flask, current_app
@ -34,9 +34,9 @@ class CompletionAppGenerator(MessageBasedAppGenerator):
        self,
        app_model: App,
        user: Union[Account, EndUser],
-        args: dict,
+        args: Mapping[str, Any],
        invoke_from: InvokeFrom,
-        stream: Literal[True] = True,
+        streaming: Literal[True],
    ) -> Generator[str, None, None]: ...

    @overload
@ -44,14 +44,29 @@ class CompletionAppGenerator(MessageBasedAppGenerator):
        self,
        app_model: App,
        user: Union[Account, EndUser],
-        args: dict,
+        args: Mapping[str, Any],
        invoke_from: InvokeFrom,
-        stream: Literal[False] = False,
-    ) -> dict: ...
+        streaming: Literal[False],
+    ) -> Mapping[str, Any]: ...
+
+    @overload
+    def generate(
+        self,
+        app_model: App,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: bool,
+    ) -> Mapping[str, Any] | Generator[str, None, None]: ...

    def generate(
-        self, app_model: App, user: Union[Account, EndUser], args: Any, invoke_from: InvokeFrom, streaming: bool = True
-    ) -> Union[dict, Generator[str, None, None]]:
+        self,
+        app_model: App,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: bool = True,
+    ):
        """
        Generate App response.

--- a/api/core/app/apps/workflow/app_generator.py
+++ b/api/core/app/apps/workflow/app_generator.py
@ -3,7 +3,7 @@ import logging
 import threading
 import uuid
 from collections.abc import Generator, Mapping, Sequence
-from typing import Any, Optional, Union
+from typing import Any, Literal, Optional, Union, overload

 from flask import Flask, current_app
 from pydantic import ValidationError
@ -30,6 +30,35 @@ logger = logging.getLogger(__name__)


 class WorkflowAppGenerator(BaseAppGenerator):
+    @overload
+    def generate(
+        self,
+        *,
+        app_model: App,
+        workflow: Workflow,
+        user: Account | EndUser,
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: Literal[True],
+        call_depth: int = 0,
+        workflow_thread_pool_id: Optional[str] = None,
+    ) -> Generator[str, None, None]: ...
+
+    @overload
+    def generate(
+        self,
+        *,
+        app_model: App,
+        workflow: Workflow,
+        user: Account | EndUser,
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: Literal[False],
+        call_depth: int = 0,
+        workflow_thread_pool_id: Optional[str] = None,
+    ) -> Mapping[str, Any]: ...
+
+    @overload
    def generate(
        self,
        *,
@ -41,7 +70,20 @@ class WorkflowAppGenerator(BaseAppGenerator):
        streaming: bool = True,
        call_depth: int = 0,
        workflow_thread_pool_id: Optional[str] = None,
-    ) -> Mapping[str, Any] | Generator[str, None, None]:
+    ) -> Mapping[str, Any] | Generator[str, None, None]: ...
+
+    def generate(
+        self,
+        *,
+        app_model: App,
+        workflow: Workflow,
+        user: Account | EndUser,
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: bool = True,
+        call_depth: int = 0,
+        workflow_thread_pool_id: Optional[str] = None,
+    ):
        files: Sequence[Mapping[str, Any]] = args.get("files") or []

        # parse files
--- a/api/core/app/apps/workflow/app_queue_manager.py
+++ b/api/core/app/apps/workflow/app_queue_manager.py
@ -6,6 +6,7 @@ from core.app.entities.queue_entities import (
    QueueMessageEndEvent,
    QueueStopEvent,
    QueueWorkflowFailedEvent,
+    QueueWorkflowPartialSuccessEvent,
    QueueWorkflowSucceededEvent,
    WorkflowQueueMessage,
 )
@ -34,7 +35,8 @@ class WorkflowAppQueueManager(AppQueueManager):
            | QueueErrorEvent
            | QueueMessageEndEvent
            | QueueWorkflowSucceededEvent
-            | QueueWorkflowFailedEvent,
+            | QueueWorkflowFailedEvent
+            | QueueWorkflowPartialSuccessEvent,
        ):
            self.stop_listen()

--- a/api/core/app/apps/workflow/generate_task_pipeline.py
+++ b/api/core/app/apps/workflow/generate_task_pipeline.py
@ -15,6 +15,7 @@ from core.app.entities.queue_entities import (
    QueueIterationCompletedEvent,
    QueueIterationNextEvent,
    QueueIterationStartEvent,
+    QueueNodeExceptionEvent,
    QueueNodeFailedEvent,
    QueueNodeInIterationFailedEvent,
    QueueNodeStartedEvent,
@ -26,6 +27,7 @@ from core.app.entities.queue_entities import (
    QueueStopEvent,
    QueueTextChunkEvent,
    QueueWorkflowFailedEvent,
+    QueueWorkflowPartialSuccessEvent,
    QueueWorkflowStartedEvent,
    QueueWorkflowSucceededEvent,
 )
@ -106,7 +108,6 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa

        self._task_state = WorkflowTaskState()
        self._wip_workflow_node_executions = {}
-        self.total_tokens: int = 0

    def process(self) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]:
        """
@ -258,36 +259,36 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa

                workflow_node_execution = self._handle_node_execution_start(workflow_run=workflow_run, event=event)

-                response = self._workflow_node_start_to_stream_response(
+                node_start_response = self._workflow_node_start_to_stream_response(
                    event=event,
                    task_id=self._application_generate_entity.task_id,
                    workflow_node_execution=workflow_node_execution,
                )

-                if response:
-                    yield response
+                if node_start_response:
+                    yield node_start_response
            elif isinstance(event, QueueNodeSucceededEvent):
                workflow_node_execution = self._handle_workflow_node_execution_success(event)

-                response = self._workflow_node_finish_to_stream_response(
+                node_success_response = self._workflow_node_finish_to_stream_response(
                    event=event,
                    task_id=self._application_generate_entity.task_id,
                    workflow_node_execution=workflow_node_execution,
                )

-                if response:
-                    yield response
-            elif isinstance(event, QueueNodeFailedEvent | QueueNodeInIterationFailedEvent):
+                if node_success_response:
+                    yield node_success_response
+            elif isinstance(event, QueueNodeFailedEvent | QueueNodeInIterationFailedEvent | QueueNodeExceptionEvent):
                workflow_node_execution = self._handle_workflow_node_execution_failed(event)

-                response = self._workflow_node_finish_to_stream_response(
+                node_failed_response = self._workflow_node_finish_to_stream_response(
                    event=event,
                    task_id=self._application_generate_entity.task_id,
                    workflow_node_execution=workflow_node_execution,
                )

-                if response:
-                    yield response
+                if node_failed_response:
+                    yield node_failed_response
            elif isinstance(event, QueueParallelBranchRunStartedEvent):
                if not workflow_run:
                    raise Exception("Workflow run not initialized.")
@ -320,8 +321,6 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
                if not workflow_run:
                    raise Exception("Workflow run not initialized.")

-                # FIXME for issue #11221 quick fix maybe have a better solution
-                self.total_tokens += event.metadata.get("total_tokens", 0) if event.metadata else 0
                yield self._workflow_iteration_completed_to_stream_response(
                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run, event=event
                )
@ -335,7 +334,7 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
                workflow_run = self._handle_workflow_run_success(
                    workflow_run=workflow_run,
                    start_at=graph_runtime_state.start_at,
-                    total_tokens=graph_runtime_state.total_tokens or self.total_tokens,
+                    total_tokens=graph_runtime_state.total_tokens,
                    total_steps=graph_runtime_state.node_run_steps,
                    outputs=event.outputs,
                    conversation_id=None,
@ -345,6 +344,30 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
                # save workflow app log
                self._save_workflow_app_log(workflow_run)

+                yield self._workflow_finish_to_stream_response(
+                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
+                )
+            elif isinstance(event, QueueWorkflowPartialSuccessEvent):
+                if not workflow_run:
+                    raise Exception("Workflow run not initialized.")
+
+                if not graph_runtime_state:
+                    raise Exception("Graph runtime state not initialized.")
+
+                workflow_run = self._handle_workflow_run_partial_success(
+                    workflow_run=workflow_run,
+                    start_at=graph_runtime_state.start_at,
+                    total_tokens=graph_runtime_state.total_tokens,
+                    total_steps=graph_runtime_state.node_run_steps,
+                    outputs=event.outputs,
+                    exceptions_count=event.exceptions_count,
+                    conversation_id=None,
+                    trace_manager=trace_manager,
+                )
+
+                # save workflow app log
+                self._save_workflow_app_log(workflow_run)
+
                yield self._workflow_finish_to_stream_response(
                    task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
                )
@ -354,7 +377,6 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa

                if not graph_runtime_state:
                    raise Exception("Graph runtime state not initialized.")
-
                workflow_run = self._handle_workflow_run_failed(
                    workflow_run=workflow_run,
                    start_at=graph_runtime_state.start_at,
@ -366,6 +388,7 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
                    error=event.error if isinstance(event, QueueWorkflowFailedEvent) else event.get_stop_reason(),
                    conversation_id=None,
                    trace_manager=trace_manager,
+                    exceptions_count=event.exceptions_count if isinstance(event, QueueWorkflowFailedEvent) else 0,
                )

                # save workflow app log
--- a/api/core/app/apps/workflow_app_runner.py
+++ b/api/core/app/apps/workflow_app_runner.py
@ -8,6 +8,7 @@ from core.app.entities.queue_entities import (
    QueueIterationCompletedEvent,
    QueueIterationNextEvent,
    QueueIterationStartEvent,
+    QueueNodeExceptionEvent,
    QueueNodeFailedEvent,
    QueueNodeInIterationFailedEvent,
    QueueNodeStartedEvent,
@ -18,6 +19,7 @@ from core.app.entities.queue_entities import (
    QueueRetrieverResourcesEvent,
    QueueTextChunkEvent,
    QueueWorkflowFailedEvent,
+    QueueWorkflowPartialSuccessEvent,
    QueueWorkflowStartedEvent,
    QueueWorkflowSucceededEvent,
 )
@ -25,6 +27,7 @@ from core.workflow.entities.variable_pool import VariablePool
 from core.workflow.graph_engine.entities.event import (
    GraphEngineEvent,
    GraphRunFailedEvent,
+    GraphRunPartialSucceededEvent,
    GraphRunStartedEvent,
    GraphRunSucceededEvent,
    IterationRunFailedEvent,
@ -32,6 +35,7 @@ from core.workflow.graph_engine.entities.event import (
    IterationRunStartedEvent,
    IterationRunSucceededEvent,
    NodeInIterationFailedEvent,
+    NodeRunExceptionEvent,
    NodeRunFailedEvent,
    NodeRunRetrieverResourceEvent,
    NodeRunStartedEvent,
@ -176,8 +180,12 @@ class WorkflowBasedAppRunner(AppRunner):
            )
        elif isinstance(event, GraphRunSucceededEvent):
            self._publish_event(QueueWorkflowSucceededEvent(outputs=event.outputs))
+        elif isinstance(event, GraphRunPartialSucceededEvent):
+            self._publish_event(
+                QueueWorkflowPartialSuccessEvent(outputs=event.outputs, exceptions_count=event.exceptions_count)
+            )
        elif isinstance(event, GraphRunFailedEvent):
-            self._publish_event(QueueWorkflowFailedEvent(error=event.error))
+            self._publish_event(QueueWorkflowFailedEvent(error=event.error, exceptions_count=event.exceptions_count))
        elif isinstance(event, NodeRunStartedEvent):
            self._publish_event(
                QueueNodeStartedEvent(
@ -253,6 +261,36 @@ class WorkflowBasedAppRunner(AppRunner):
                    in_iteration_id=event.in_iteration_id,
                )
            )
+        elif isinstance(event, NodeRunExceptionEvent):
+            self._publish_event(
+                QueueNodeExceptionEvent(
+                    node_execution_id=event.id,
+                    node_id=event.node_id,
+                    node_type=event.node_type,
+                    node_data=event.node_data,
+                    parallel_id=event.parallel_id,
+                    parallel_start_node_id=event.parallel_start_node_id,
+                    parent_parallel_id=event.parent_parallel_id,
+                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
+                    start_at=event.route_node_state.start_at,
+                    inputs=event.route_node_state.node_run_result.inputs
+                    if event.route_node_state.node_run_result
+                    else {},
+                    process_data=event.route_node_state.node_run_result.process_data
+                    if event.route_node_state.node_run_result
+                    else {},
+                    outputs=event.route_node_state.node_run_result.outputs
+                    if event.route_node_state.node_run_result
+                    else {},
+                    error=event.route_node_state.node_run_result.error
+                    if event.route_node_state.node_run_result and event.route_node_state.node_run_result.error
+                    else "Unknown error",
+                    execution_metadata=event.route_node_state.node_run_result.metadata
+                    if event.route_node_state.node_run_result
+                    else {},
+                    in_iteration_id=event.in_iteration_id,
+                )
+            )
        elif isinstance(event, NodeInIterationFailedEvent):
            self._publish_event(
                QueueNodeInIterationFailedEvent(
--- a/api/core/app/entities/queue_entities.py
+++ b/api/core/app/entities/queue_entities.py
@ -2,7 +2,7 @@ from datetime import datetime
 from enum import Enum, StrEnum
 from typing import Any, Optional

-from pydantic import BaseModel, field_validator
+from pydantic import BaseModel

 from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
 from core.workflow.entities.node_entities import NodeRunMetadataKey
@ -25,12 +25,14 @@ class QueueEvent(StrEnum):
    WORKFLOW_STARTED = "workflow_started"
    WORKFLOW_SUCCEEDED = "workflow_succeeded"
    WORKFLOW_FAILED = "workflow_failed"
+    WORKFLOW_PARTIAL_SUCCEEDED = "workflow_partial_succeeded"
    ITERATION_START = "iteration_start"
    ITERATION_NEXT = "iteration_next"
    ITERATION_COMPLETED = "iteration_completed"
    NODE_STARTED = "node_started"
    NODE_SUCCEEDED = "node_succeeded"
    NODE_FAILED = "node_failed"
+    NODE_EXCEPTION = "node_exception"
    RETRIEVER_RESOURCES = "retriever_resources"
    ANNOTATION_REPLY = "annotation_reply"
    AGENT_THOUGHT = "agent_thought"
@ -113,18 +115,6 @@ class QueueIterationNextEvent(AppQueueEvent):
    output: Optional[Any] = None  # output for the current iteration
    duration: Optional[float] = None

-    @field_validator("output", mode="before")
-    @classmethod
-    def set_output(cls, v):
-        """
-        Set output
-        """
-        if v is None:
-            return None
-        if isinstance(v, int | float | str | bool | dict | list):
-            return v
-        raise ValueError("output must be a valid type")
-

 class QueueIterationCompletedEvent(AppQueueEvent):
    """
@ -249,6 +239,17 @@ class QueueWorkflowFailedEvent(AppQueueEvent):

    event: QueueEvent = QueueEvent.WORKFLOW_FAILED
    error: str
+    exceptions_count: int
+
+
+class QueueWorkflowPartialSuccessEvent(AppQueueEvent):
+    """
+    QueueWorkflowFailedEvent entity
+    """
+
+    event: QueueEvent = QueueEvent.WORKFLOW_PARTIAL_SUCCEEDED
+    exceptions_count: int
+    outputs: Optional[dict[str, Any]] = None


 class QueueNodeStartedEvent(AppQueueEvent):
@ -343,6 +344,37 @@ class QueueNodeInIterationFailedEvent(AppQueueEvent):
    error: str


+class QueueNodeExceptionEvent(AppQueueEvent):
+    """
+    QueueNodeExceptionEvent entity
+    """
+
+    event: QueueEvent = QueueEvent.NODE_EXCEPTION
+
+    node_execution_id: str
+    node_id: str
+    node_type: NodeType
+    node_data: BaseNodeData
+    parallel_id: Optional[str] = None
+    """parallel id if node is in parallel"""
+    parallel_start_node_id: Optional[str] = None
+    """parallel start node id if node is in parallel"""
+    parent_parallel_id: Optional[str] = None
+    """parent parallel id if node is in parallel"""
+    parent_parallel_start_node_id: Optional[str] = None
+    """parent parallel start node id if node is in parallel"""
+    in_iteration_id: Optional[str] = None
+    """iteration id if node is in iteration"""
+    start_at: datetime
+
+    inputs: Optional[dict[str, Any]] = None
+    process_data: Optional[dict[str, Any]] = None
+    outputs: Optional[dict[str, Any]] = None
+    execution_metadata: Optional[dict[NodeRunMetadataKey, Any]] = None
+
+    error: str
+
+
 class QueueNodeFailedEvent(AppQueueEvent):
    """
    QueueNodeFailedEvent entity
--- a/api/core/app/entities/task_entities.py
+++ b/api/core/app/entities/task_entities.py
@ -213,6 +213,7 @@ class WorkflowFinishStreamResponse(StreamResponse):
        created_by: Optional[dict] = None
        created_at: int
        finished_at: int
+        exceptions_count: Optional[int] = 0
        files: Optional[Sequence[Mapping[str, Any]]] = []

    event: StreamEvent = StreamEvent.WORKFLOW_FINISHED
--- a/api/core/app/features/rate_limiting/rate_limit.py
+++ b/api/core/app/features/rate_limiting/rate_limit.py
@ -110,7 +110,7 @@ class RateLimitGenerator:
            raise StopIteration
        try:
            return next(self.generator)
-        except StopIteration:
+        except Exception:
            self.close()
            raise

--- a/api/core/app/task_pipeline/workflow_cycle_manage.py
+++ b/api/core/app/task_pipeline/workflow_cycle_manage.py
@ -12,6 +12,7 @@ from core.app.entities.queue_entities import (
    QueueIterationCompletedEvent,
    QueueIterationNextEvent,
    QueueIterationStartEvent,
+    QueueNodeExceptionEvent,
    QueueNodeFailedEvent,
    QueueNodeInIterationFailedEvent,
    QueueNodeStartedEvent,
@ -164,6 +165,55 @@ class WorkflowCycleManage:

        return workflow_run

+    def _handle_workflow_run_partial_success(
+        self,
+        workflow_run: WorkflowRun,
+        start_at: float,
+        total_tokens: int,
+        total_steps: int,
+        outputs: Mapping[str, Any] | None = None,
+        exceptions_count: int = 0,
+        conversation_id: Optional[str] = None,
+        trace_manager: Optional[TraceQueueManager] = None,
+    ) -> WorkflowRun:
+        """
+        Workflow run success
+        :param workflow_run: workflow run
+        :param start_at: start time
+        :param total_tokens: total tokens
+        :param total_steps: total steps
+        :param outputs: outputs
+        :param conversation_id: conversation id
+        :return:
+        """
+        workflow_run = self._refetch_workflow_run(workflow_run.id)
+
+        outputs = WorkflowEntry.handle_special_values(outputs)
+
+        workflow_run.status = WorkflowRunStatus.PARTIAL_SUCCESSED.value
+        workflow_run.outputs = json.dumps(outputs or {})
+        workflow_run.elapsed_time = time.perf_counter() - start_at
+        workflow_run.total_tokens = total_tokens
+        workflow_run.total_steps = total_steps
+        workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None)
+        workflow_run.exceptions_count = exceptions_count
+        db.session.commit()
+        db.session.refresh(workflow_run)
+
+        if trace_manager:
+            trace_manager.add_trace_task(
+                TraceTask(
+                    TraceTaskName.WORKFLOW_TRACE,
+                    workflow_run=workflow_run,
+                    conversation_id=conversation_id,
+                    user_id=trace_manager.user_id,
+                )
+            )
+
+        db.session.close()
+
+        return workflow_run
+
    def _handle_workflow_run_failed(
        self,
        workflow_run: WorkflowRun,
@ -174,6 +224,7 @@ class WorkflowCycleManage:
        error: str,
        conversation_id: Optional[str] = None,
        trace_manager: Optional[TraceQueueManager] = None,
+        exceptions_count: int = 0,
    ) -> WorkflowRun:
        """
        Workflow run failed
@ -193,7 +244,7 @@ class WorkflowCycleManage:
        workflow_run.total_tokens = total_tokens
        workflow_run.total_steps = total_steps
        workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None)
-
+        workflow_run.exceptions_count = exceptions_count
        db.session.commit()

        running_workflow_node_executions = (
@ -318,7 +369,7 @@ class WorkflowCycleManage:
        return workflow_node_execution

    def _handle_workflow_node_execution_failed(
-        self, event: QueueNodeFailedEvent | QueueNodeInIterationFailedEvent
+        self, event: QueueNodeFailedEvent | QueueNodeInIterationFailedEvent | QueueNodeExceptionEvent
    ) -> WorkflowNodeExecution:
        """
        Workflow node execution failed
@ -337,7 +388,11 @@ class WorkflowCycleManage:
        )
        db.session.query(WorkflowNodeExecution).filter(WorkflowNodeExecution.id == workflow_node_execution.id).update(
            {
-                WorkflowNodeExecution.status: WorkflowNodeExecutionStatus.FAILED.value,
+                WorkflowNodeExecution.status: (
+                    WorkflowNodeExecutionStatus.FAILED.value
+                    if not isinstance(event, QueueNodeExceptionEvent)
+                    else WorkflowNodeExecutionStatus.EXCEPTION.value
+                ),
                WorkflowNodeExecution.error: event.error,
                WorkflowNodeExecution.inputs: json.dumps(inputs) if inputs else None,
                WorkflowNodeExecution.process_data: json.dumps(process_data) if process_data else None,
@ -351,8 +406,11 @@ class WorkflowCycleManage:
        db.session.commit()
        db.session.close()
        process_data = WorkflowEntry.handle_special_values(event.process_data)
-
-        workflow_node_execution.status = WorkflowNodeExecutionStatus.FAILED.value
+        workflow_node_execution.status = (
+            WorkflowNodeExecutionStatus.FAILED.value
+            if not isinstance(event, QueueNodeExceptionEvent)
+            else WorkflowNodeExecutionStatus.EXCEPTION.value
+        )
        workflow_node_execution.error = event.error
        workflow_node_execution.inputs = json.dumps(inputs) if inputs else None
        workflow_node_execution.process_data = json.dumps(process_data) if process_data else None
@ -433,6 +491,7 @@ class WorkflowCycleManage:
                created_at=int(workflow_run.created_at.timestamp()),
                finished_at=int(workflow_run.finished_at.timestamp()),
                files=self._fetch_files_from_node_outputs(workflow_run.outputs_dict),
+                exceptions_count=workflow_run.exceptions_count,
            ),
        )

@ -483,7 +542,10 @@ class WorkflowCycleManage:

    def _workflow_node_finish_to_stream_response(
        self,
-        event: QueueNodeSucceededEvent | QueueNodeFailedEvent | QueueNodeInIterationFailedEvent,
+        event: QueueNodeSucceededEvent
+        | QueueNodeFailedEvent
+        | QueueNodeInIterationFailedEvent
+        | QueueNodeExceptionEvent,
        task_id: str,
        workflow_node_execution: WorkflowNodeExecution,
    ) -> Optional[NodeFinishStreamResponse]:
--- a/api/core/file/file_manager.py
+++ b/api/core/file/file_manager.py
@ -141,7 +141,7 @@ def _to_url(f: File, /):
    elif f.transfer_method == FileTransferMethod.LOCAL_FILE:
        if f.related_id is None:
            raise ValueError("Missing file related_id")
-        return helpers.get_signed_file_url(upload_file_id=f.related_id)
+        return f.remote_url or helpers.get_signed_file_url(upload_file_id=f.related_id)
    elif f.transfer_method == FileTransferMethod.TOOL_FILE:
        # add sign url
        if f.related_id is None or f.extension is None:
--- a/api/core/helper/ssrf_proxy.py
+++ b/api/core/helper/ssrf_proxy.py
@ -24,6 +24,12 @@ BACKOFF_FACTOR = 0.5
 STATUS_FORCELIST = [429, 500, 502, 503, 504]


+class MaxRetriesExceededError(Exception):
+    """Raised when the maximum number of retries is exceeded."""
+
+    pass
+
+
 def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
    if "allow_redirects" in kwargs:
        allow_redirects = kwargs.pop("allow_redirects")
@ -64,7 +70,7 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
        if retries <= max_retries:
            time.sleep(BACKOFF_FACTOR * (2 ** (retries - 1)))

-    raise Exception(f"Reached maximum retries ({max_retries}) for URL {url}")
+    raise MaxRetriesExceededError(f"Reached maximum retries ({max_retries}) for URL {url}")


 def get(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
--- a/api/core/model_runtime/docs/zh_Hans/interfaces.md
+++ b/api/core/model_runtime/docs/zh_Hans/interfaces.md
@ -91,7 +91,7 @@ class XinferenceProvider(Provider):
      """
  ```

-  也可以直接抛出对应Erros，并做如下定义，这样在之后的调用中可以直接抛出`InvokeConnectionError`等异常。
+  也可以直接抛出对应 Errors，并做如下定义，这样在之后的调用中可以直接抛出`InvokeConnectionError`等异常。
  
    ```python
    @property
--- a/api/core/model_runtime/model_providers/baichuan/llm/llm.py
+++ b/api/core/model_runtime/model_providers/baichuan/llm/llm.py
@ -10,6 +10,7 @@ from core.model_runtime.entities.llm_entities import (
 from core.model_runtime.entities.message_entities import (
    AssistantPromptMessage,
    PromptMessage,
+    PromptMessageContentType,
    PromptMessageTool,
    SystemPromptMessage,
    ToolPromptMessage,
@ -105,7 +106,11 @@ class BaichuanLanguageModel(LargeLanguageModel):
            if isinstance(message.content, str):
                message_dict = {"role": "user", "content": message.content}
            else:
-                raise ValueError("User message content must be str")
+                for message_content in message.content:
+                    if message_content.type == PromptMessageContentType.TEXT:
+                        message_dict = {"role": "user", "content": message_content.data}
+                    elif message_content.type == PromptMessageContentType.IMAGE:
+                        raise ValueError("Content object type not support image_url")
        elif isinstance(message, AssistantPromptMessage):
            message = cast(AssistantPromptMessage, message)
            message_dict = {"role": "assistant", "content": message.content}
--- a/api/core/model_runtime/model_providers/bedrock/bedrock.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/bedrock.yaml
@ -16,6 +16,7 @@ help:
 supported_model_types:
  - llm
  - text-embedding
+  - rerank
 configurate_methods:
  - predefined-model
 provider_credential_schema:
--- a/api/core/model_runtime/model_providers/bedrock/get_bedrock_client.py
+++ b/api/core/model_runtime/model_providers/bedrock/get_bedrock_client.py
@ -0,0 +1,21 @@
+import boto3
+from botocore.config import Config
+
+
+def get_bedrock_client(service_name, credentials=None):
+    client_config = Config(region_name=credentials["aws_region"])
+    aws_access_key_id = credentials["aws_access_key_id"]
+    aws_secret_access_key = credentials["aws_secret_access_key"]
+    if aws_access_key_id and aws_secret_access_key:
+        # use aksk to call bedrock
+        client = boto3.client(
+            service_name=service_name,
+            config=client_config,
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+        )
+    else:
+        # use iam without aksk to call
+        client = boto3.client(service_name=service_name, config=client_config)
+
+    return client
--- a/api/core/model_runtime/model_providers/bedrock/llm/amazon.nova-lite-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/amazon.nova-lite-v1.yaml
@ -0,0 +1,53 @@
+model: amazon.nova-lite-v1:0
+label:
+  en_US: Nova Lite V1
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+  - vision
+model_properties:
+  mode: chat
+  context_size: 300000
+parameter_rules:
+  - name: max_new_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 5000
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+  input: '0.00006'
+  output: '0.00024'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/amazon.nova-micro-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/amazon.nova-micro-v1.yaml
@ -0,0 +1,52 @@
+model: amazon.nova-micro-v1:0
+label:
+  en_US: Nova Micro V1
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: max_new_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 5000
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+  input: '0.000035'
+  output: '0.00014'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/amazon.nova-pro-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/amazon.nova-pro-v1.yaml
@ -0,0 +1,53 @@
+model: amazon.nova-pro-v1:0
+label:
+  en_US: Nova Pro V1
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+  - vision
+model_properties:
+  mode: chat
+  context_size: 300000
+parameter_rules:
+  - name: max_new_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 5000
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+  input: '0.0008'
+  output: '0.0032'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/llm.py
+++ b/api/core/model_runtime/model_providers/bedrock/llm/llm.py
@ -40,6 +40,7 @@ from core.model_runtime.errors.invoke import (
 )
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
+from core.model_runtime.model_providers.bedrock.get_bedrock_client import get_bedrock_client

 logger = logging.getLogger(__name__)
 ANTHROPIC_BLOCK_MODE_PROMPT = """You should always follow the instructions and output a valid {{block}} object.
@ -70,6 +71,8 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
        {"prefix": "cohere.command-r", "support_system_prompts": True, "support_tool_use": True},
        {"prefix": "amazon.titan", "support_system_prompts": False, "support_tool_use": False},
        {"prefix": "ai21.jamba-1-5", "support_system_prompts": True, "support_tool_use": False},
+        {"prefix": "amazon.nova", "support_system_prompts": True, "support_tool_use": False},
+        {"prefix": "us.amazon.nova", "support_system_prompts": True, "support_tool_use": False},
    ]

    @staticmethod
@ -171,13 +174,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
        :param stream: is stream response
        :return: full response or stream response chunk generator result
        """
-        bedrock_client = boto3.client(
-            service_name="bedrock-runtime",
-            aws_access_key_id=credentials.get("aws_access_key_id"),
-            aws_secret_access_key=credentials.get("aws_secret_access_key"),
-            region_name=credentials["aws_region"],
-        )
-
+        bedrock_client = get_bedrock_client("bedrock-runtime", credentials)
        system, prompt_message_dicts = self._convert_converse_prompt_messages(prompt_messages)
        inference_config, additional_model_fields = self._convert_converse_api_model_parameters(model_parameters, stop)

@ -194,6 +191,13 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
        if model_info["support_tool_use"] and tools:
            parameters["toolConfig"] = self._convert_converse_tool_config(tools=tools)
        try:
+            # for issue #10976
+            conversations_list = parameters["messages"]
+            # if two consecutive user messages found, combine them into one message
+            for i in range(len(conversations_list) - 2, -1, -1):
+                if conversations_list[i]["role"] == conversations_list[i + 1]["role"]:
+                    conversations_list[i]["content"].extend(conversations_list.pop(i + 1)["content"])
+
            if stream:
                response = bedrock_client.converse_stream(**parameters)
                return self._handle_converse_stream_response(
--- a/api/core/model_runtime/model_providers/bedrock/llm/us.amazon.nova-lite-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/us.amazon.nova-lite-v1.yaml
@ -0,0 +1,53 @@
+model: us.amazon.nova-lite-v1:0
+label:
+  en_US: Nova Lite V1 (US.Cross Region Inference)
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+  - vision
+model_properties:
+  mode: chat
+  context_size: 300000
+parameter_rules:
+  - name: max_new_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 5000
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+  input: '0.00006'
+  output: '0.00024'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/us.amazon.nova-micro-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/us.amazon.nova-micro-v1.yaml
@ -0,0 +1,52 @@
+model: us.amazon.nova-micro-v1:0
+label:
+  en_US: Nova Micro V1 (US.Cross Region Inference)
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: max_new_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 5000
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+  input: '0.000035'
+  output: '0.00014'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/us.amazon.nova-pro-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/us.amazon.nova-pro-v1.yaml
@ -0,0 +1,53 @@
+model: us.amazon.nova-pro-v1:0
+label:
+  en_US: Nova Pro V1 (US.Cross Region Inference)
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+  - vision
+model_properties:
+  mode: chat
+  context_size: 300000
+parameter_rules:
+  - name: max_new_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 5000
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+  input: '0.0008'
+  output: '0.0032'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/rerank/init.py
+++ b/api/core/model_runtime/model_providers/bedrock/rerank/init.py
--- a/api/core/model_runtime/model_providers/bedrock/rerank/_position.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/rerank/_position.yaml
@ -0,0 +1,2 @@
+- amazon.rerank-v1
+- cohere.rerank-v3-5
--- a/api/core/model_runtime/model_providers/bedrock/rerank/amazon.rerank-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/rerank/amazon.rerank-v1.yaml
@ -0,0 +1,4 @@
+model: amazon.rerank-v1:0
+model_type: rerank
+model_properties:
+  context_size: 5120
--- a/api/core/model_runtime/model_providers/bedrock/rerank/cohere.rerank-v3-5.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/rerank/cohere.rerank-v3-5.yaml
@ -0,0 +1,4 @@
+model: cohere.rerank-v3-5:0
+model_type: rerank
+model_properties:
+  context_size: 5120
--- a/api/core/model_runtime/model_providers/bedrock/rerank/rerank.py
+++ b/api/core/model_runtime/model_providers/bedrock/rerank/rerank.py
@ -0,0 +1,139 @@
+from typing import Optional
+
+from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.rerank_model import RerankModel
+from core.model_runtime.model_providers.bedrock.get_bedrock_client import get_bedrock_client
+
+
+class BedrockRerankModel(RerankModel):
+    """
+    Model class for Cohere rerank model.
+    """
+
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        query: str,
+        docs: list[str],
+        score_threshold: Optional[float] = None,
+        top_n: Optional[int] = None,
+        user: Optional[str] = None,
+    ) -> RerankResult:
+        """
+        Invoke rerank model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param query: search query
+        :param docs: docs for reranking
+        :param score_threshold: score threshold
+        :param top_n: top n
+        :param user: unique user id
+        :return: rerank result
+        """
+
+        if len(docs) == 0:
+            return RerankResult(model=model, docs=docs)
+
+        # initialize client
+        bedrock_runtime = get_bedrock_client("bedrock-agent-runtime", credentials)
+        queries = [{"type": "TEXT", "textQuery": {"text": query}}]
+        text_sources = []
+        for text in docs:
+            text_sources.append(
+                {
+                    "type": "INLINE",
+                    "inlineDocumentSource": {
+                        "type": "TEXT",
+                        "textDocument": {
+                            "text": text,
+                        },
+                    },
+                }
+            )
+        modelId = model
+        region = credentials["aws_region"]
+        model_package_arn = f"arn:aws:bedrock:{region}::foundation-model/{modelId}"
+        rerankingConfiguration = {
+            "type": "BEDROCK_RERANKING_MODEL",
+            "bedrockRerankingConfiguration": {
+                "numberOfResults": top_n,
+                "modelConfiguration": {
+                    "modelArn": model_package_arn,
+                },
+            },
+        }
+        response = bedrock_runtime.rerank(
+            queries=queries, sources=text_sources, rerankingConfiguration=rerankingConfiguration
+        )
+
+        rerank_documents = []
+        for idx, result in enumerate(response["results"]):
+            # format document
+            index = result["index"]
+            rerank_document = RerankDocument(
+                index=index,
+                text=docs[index],
+                score=result["relevanceScore"],
+            )
+
+            # score threshold check
+            if score_threshold is not None:
+                if rerank_document.score >= score_threshold:
+                    rerank_documents.append(rerank_document)
+            else:
+                rerank_documents.append(rerank_document)
+
+        return RerankResult(model=model, docs=rerank_documents)
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            self.invoke(
+                model=model,
+                credentials=credentials,
+                query="What is the capital of the United States?",
+                docs=[
+                    "Carson City is the capital city of the American state of Nevada. At the 2010 United States "
+                    "Census, Carson City had a population of 55,274.",
+                    "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that "
+                    "are a political division controlled by the United States. Its capital is Saipan.",
+                ],
+                score_threshold=0.8,
+            )
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        The key is the ermd = genai.GenerativeModel(model) error type thrown to the caller
+        The value is the md = genai.GenerativeModel(model) error type thrown by the model,
+        which needs to be converted into a unified error type for the caller.
+
+        :return: Invoke emd = genai.GenerativeModel(model) error mapping
+        """
+        return {
+            InvokeConnectionError: [],
+            InvokeServerUnavailableError: [],
+            InvokeRateLimitError: [],
+            InvokeAuthorizationError: [],
+            InvokeBadRequestError: [],
+        }
--- a/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
@ -3,8 +3,6 @@ import logging
 import time
 from typing import Optional

-import boto3
-from botocore.config import Config
 from botocore.exceptions import (
    ClientError,
    EndpointConnectionError,
@ -25,6 +23,7 @@ from core.model_runtime.errors.invoke import (
    InvokeServerUnavailableError,
 )
 from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+from core.model_runtime.model_providers.bedrock.get_bedrock_client import get_bedrock_client

 logger = logging.getLogger(__name__)

@ -48,14 +47,7 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
        :param input_type: input type
        :return: embeddings result
        """
-        client_config = Config(region_name=credentials["aws_region"])
-
-        bedrock_runtime = boto3.client(
-            service_name="bedrock-runtime",
-            config=client_config,
-            aws_access_key_id=credentials.get("aws_access_key_id"),
-            aws_secret_access_key=credentials.get("aws_secret_access_key"),
-        )
+        bedrock_runtime = get_bedrock_client("bedrock-runtime", credentials)

        embeddings = []
        token_usage = 0
--- a/api/core/model_runtime/model_providers/cohere/rerank/_position.yaml
+++ b/api/core/model_runtime/model_providers/cohere/rerank/_position.yaml
@ -2,3 +2,4 @@
 - rerank-english-v3.0
 - rerank-multilingual-v2.0
 - rerank-multilingual-v3.0
+- rerank-v3.5
--- a/api/core/model_runtime/model_providers/cohere/rerank/rerank-v3.5.yaml
+++ b/api/core/model_runtime/model_providers/cohere/rerank/rerank-v3.5.yaml
@ -0,0 +1,4 @@
+model: rerank-v3.5
+model_type: rerank
+model_properties:
+  context_size: 5120
--- a/api/core/model_runtime/model_providers/deepseek/llm/llm.py
+++ b/api/core/model_runtime/model_providers/deepseek/llm/llm.py
@ -24,6 +24,9 @@ class DeepseekLargeLanguageModel(OAIAPICompatLargeLanguageModel):
        user: Optional[str] = None,
    ) -> Union[LLMResult, Generator]:
        self._add_custom_parameters(credentials)
+        # {"response_format": "xx"} need convert to {"response_format": {"type": "xx"}}
+        if "response_format" in model_parameters:
+            model_parameters["response_format"] = {"type": model_parameters.get("response_format")}
        return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream)

    def validate_credentials(self, model: str, credentials: dict) -> None:
--- a/api/core/model_runtime/model_providers/google/llm/gemini-2.0-flash-exp.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-2.0-flash-exp.yaml
@ -0,0 +1,39 @@
+model: gemini-2.0-flash-exp
+label:
+  en_US: Gemini 2.0 Flash Exp
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-exp-1206.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-exp-1206.yaml
@ -0,0 +1,38 @@
+model: gemini-exp-1206
+label:
+  en_US: Gemini exp 1206
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/_position.yaml
@ -1,4 +1,5 @@
 - llama-3.1-405b-reasoning
+- llama-3.3-70b-versatile
 - llama-3.1-70b-versatile
 - llama-3.1-8b-instant
 - llama3-70b-8192
--- a/api/core/model_runtime/model_providers/groq/llm/gemma-7b-it.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/gemma-7b-it.yaml
@ -0,0 +1,25 @@
+model: gemma-7b-it
+label:
+  zh_Hans: Gemma 7B Instruction Tuned
+  en_US: Gemma 7B Instruction Tuned
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/gemma2-9b-it.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/gemma2-9b-it.yaml
@ -0,0 +1,25 @@
+model: gemma2-9b-it
+label:
+  zh_Hans: Gemma 2 9B Instruction Tuned
+  en_US: Gemma 2 9B Instruction Tuned
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.1-70b-versatile.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.1-70b-versatile.yaml
@ -1,7 +1,8 @@
 model: llama-3.1-70b-versatile
+deprecated: true
 label:
-  zh_Hans: Llama-3.1-70b-versatile
-  en_US: Llama-3.1-70b-versatile
+  zh_Hans: Llama-3.1-70b-versatile (DEPRECATED)
+  en_US: Llama-3.1-70b-versatile (DEPRECATED)
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
@ -1,4 +1,5 @@
 model: llama-3.2-11b-text-preview
+deprecated: true
 label:
  zh_Hans: Llama 3.2 11B Text (Preview)
  en_US: Llama 3.2 11B Text (Preview)
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
@ -1,4 +1,5 @@
 model: llama-3.2-90b-text-preview
+depraceted: true
 label:
  zh_Hans: Llama 3.2 90B Text (Preview)
  en_US: Llama 3.2 90B Text (Preview)
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-specdec.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-specdec.yaml
@ -0,0 +1,25 @@
+model: llama-3.3-70b-specdec
+label:
+  zh_Hans: Llama 3.3 70B Specdec
+  en_US: Llama 3.3 70B Specdec
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 32768
+pricing:
+  input: "0.05"
+  output: "0.1"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-versatile.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-versatile.yaml
@ -0,0 +1,25 @@
+model: llama-3.3-70b-versatile
+label:
+  zh_Hans: Llama 3.3 70B Versatile
+  en_US: Llama 3.3 70B Versatile
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 32768
+pricing:
+  input: "0.05"
+  output: "0.1"
+  unit: "0.000001"
+  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml
@ -0,0 +1,25 @@
+model: llama3-groq-70b-8192-tool-use-preview
+label:
+  zh_Hans: Llama3-groq-70b-8192-tool-use (PREVIEW)
+  en_US: Llama3-groq-70b-8192-tool-use (PREVIEW)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.08'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/minimax/llm/llm.py
+++ b/api/core/model_runtime/model_providers/minimax/llm/llm.py
@ -35,6 +35,7 @@ from core.model_runtime.model_providers.minimax.llm.types import MinimaxMessage
 class MinimaxLargeLanguageModel(LargeLanguageModel):
    model_apis = {
        "abab7-chat-preview": MinimaxChatCompletionPro,
+        "abab6.5t-chat": MinimaxChatCompletionPro,
        "abab6.5s-chat": MinimaxChatCompletionPro,
        "abab6.5-chat": MinimaxChatCompletionPro,
        "abab6-chat": MinimaxChatCompletionPro,
--- a/api/core/model_runtime/model_providers/mistralai/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/mistralai/llm/_position.yaml
@ -1,3 +1,5 @@
+- pixtral-large-latest
+- pixtral-large-2411
 - pixtral-12b-2409
 - codestral-latest
 - mistral-embed
--- a/api/core/model_runtime/model_providers/mistralai/llm/pixtral-12b-2409.yaml
+++ b/api/core/model_runtime/model_providers/mistralai/llm/pixtral-12b-2409.yaml
@ -5,6 +5,7 @@ label:
 model_type: llm
 features:
  - agent-thought
+  - vision
 model_properties:
  mode: chat
  context_size: 128000
@ -21,7 +22,7 @@ parameter_rules:
    max: 1
  - name: max_tokens
    use_template: max_tokens
-    default: 1024
+    default: 8192
    min: 1
    max: 8192
  - name: safe_prompt
--- a/api/core/model_runtime/model_providers/mistralai/llm/pixtral-large-2411.yaml
+++ b/api/core/model_runtime/model_providers/mistralai/llm/pixtral-large-2411.yaml
@ -0,0 +1,52 @@
+model: pixtral-large-2411
+label:
+  zh_Hans: pixtral-large-2411
+  en_US: pixtral-large-2411
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.7
+    min: 0
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 1
+    min: 0
+    max: 1
+  - name: max_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: safe_prompt
+    default: false
+    type: boolean
+    help:
+      en_US: Whether to inject a safety prompt before all conversations.
+      zh_Hans: 是否开启提示词审查
+    label:
+      en_US: SafePrompt
+      zh_Hans: 提示词审查
+  - name: random_seed
+    type: int
+    help:
+      en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+      zh_Hans: 当开启随机数种子以后，你可以通过指定一个固定的种子来使得回答结果更加稳定
+    label:
+      en_US: RandomSeed
+      zh_Hans: 随机数种子
+    default: 0
+    min: 0
+    max: 2147483647
+pricing:
+  input: '0.008'
+  output: '0.024'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/mistralai/llm/pixtral-large-latest.yaml
+++ b/api/core/model_runtime/model_providers/mistralai/llm/pixtral-large-latest.yaml
@ -0,0 +1,52 @@
+model: pixtral-large-latest
+label:
+  zh_Hans: pixtral-large-latest
+  en_US: pixtral-large-latest
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.7
+    min: 0
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 1
+    min: 0
+    max: 1
+  - name: max_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: safe_prompt
+    default: false
+    type: boolean
+    help:
+      en_US: Whether to inject a safety prompt before all conversations.
+      zh_Hans: 是否开启提示词审查
+    label:
+      en_US: SafePrompt
+      zh_Hans: 提示词审查
+  - name: random_seed
+    type: int
+    help:
+      en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+      zh_Hans: 当开启随机数种子以后，你可以通过指定一个固定的种子来使得回答结果更加稳定
+    label:
+      en_US: RandomSeed
+      zh_Hans: 随机数种子
+    default: 0
+    min: 0
+    max: 2147483647
+pricing:
+  input: '0.008'
+  output: '0.024'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@ -181,9 +181,11 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
        # prepare the payload for a simple ping to the model
        data = {"model": model, "stream": stream}

-        if "format" in model_parameters:
-            data["format"] = model_parameters["format"]
-            del model_parameters["format"]
+        if format_schema := model_parameters.pop("format", None):
+            try:
+                data["format"] = format_schema if format_schema == "json" else json.loads(format_schema)
+            except json.JSONDecodeError as e:
+                raise InvokeBadRequestError(f"Invalid format schema: {str(e)}")

        if "keep_alive" in model_parameters:
            data["keep_alive"] = model_parameters["keep_alive"]
@ -733,12 +735,12 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                ParameterRule(
                    name="format",
                    label=I18nObject(en_US="Format", zh_Hans="返回格式"),
-                    type=ParameterType.STRING,
+                    type=ParameterType.TEXT,
+                    default="json",
                    help=I18nObject(
-                        en_US="the format to return a response in. Currently the only accepted value is json.",
-                        zh_Hans="返回响应的格式。目前唯一接受的值是json。",
+                        en_US="the format to return a response in. Format can be `json` or a JSON schema.",
+                        zh_Hans="返回响应的格式。目前接受的值是字符串`json`或JSON schema.",
                    ),
-                    options=["json"],
                ),
            ],
            pricing=PriceConfig(
--- a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
@ -478,6 +478,10 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
                        usage=usage,
                    )
                    break
+                # handle the error here. for issue #11629
+                if chunk_json.get("error") and chunk_json.get("choices") is None:
+                    raise ValueError(chunk_json.get("error"))
+
                if chunk_json:
                    if u := chunk_json.get("usage"):
                        usage = u
--- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@ -1,4 +1,5 @@
 - Tencent/Hunyuan-A52B-Instruct
+- Qwen/QwQ-32B-Preview
 - Qwen/Qwen2.5-72B-Instruct
 - Qwen/Qwen2.5-32B-Instruct
 - Qwen/Qwen2.5-14B-Instruct
@ -19,6 +20,7 @@
 - 01-ai/Yi-1.5-6B-Chat
 - internlm/internlm2_5-20b-chat
 - internlm/internlm2_5-7b-chat
+- meta-llama/Llama-3.3-70B-Instruct
 - meta-llama/Meta-Llama-3.1-405B-Instruct
 - meta-llama/Meta-Llama-3.1-70B-Instruct
 - meta-llama/Meta-Llama-3.1-8B-Instruct
--- a/api/core/model_runtime/model_providers/siliconflow/llm/meta-llama-3.3-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-llama-3.3-70b-instruct.yaml
@ -0,0 +1,53 @@
+model: meta-llama/Llama-3.3-70B-Instruct
+label:
+  en_US: meta-llama/Llama-3.3-70B-Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '4.13'
+  output: '4.13'
+  unit: '0.000001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen-qwq-32B-preview.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen-qwq-32B-preview.yaml
@ -0,0 +1,53 @@
+model: Qwen/QwQ-32B-Preview
+label:
+  en_US: Qwen/QwQ-32B-Preview
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '1.26'
+  output: '1.26'
+  unit: '0.000001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
@ -59,8 +59,6 @@ parameter_rules:
    help:
      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
-  - name: response_format
-    use_template: response_format
  - name: repetition_penalty
    required: false
    type: float
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
@ -59,8 +59,6 @@ parameter_rules:
    help:
      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
-  - name: response_format
-    use_template: response_format
  - name: repetition_penalty
    required: false
    type: float
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
@ -58,8 +58,6 @@ parameter_rules:
    help:
      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
-  - name: response_format
-    use_template: response_format
  - name: repetition_penalty
    required: false
    type: float
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
@ -59,8 +59,6 @@ parameter_rules:
    help:
      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
-  - name: response_format
-    use_template: response_format
  - name: repetition_penalty
    required: false
    type: float
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
@ -59,8 +59,6 @@ parameter_rules:
    help:
      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
-  - name: response_format
-    use_template: response_format
  - name: repetition_penalty
    required: false
    type: float
--- a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-2.0-flash-exp.yaml
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-2.0-flash-exp.yaml
@ -0,0 +1,39 @@
+model: gemini-2.0-flash-exp
+label:
+  en_US: Gemini 2.0 Flash Exp
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py
@ -104,13 +104,14 @@ class VertexAiLargeLanguageModel(LargeLanguageModel):
        """
        # use Anthropic official SDK references
        # - https://github.com/anthropics/anthropic-sdk-python
-        service_account_info = json.loads(base64.b64decode(credentials["vertex_service_account_key"]))
+        service_account_key = credentials.get("vertex_service_account_key", "")
        project_id = credentials["vertex_project_id"]
        SCOPES = ["https://www.googleapis.com/auth/cloud-platform"]
        token = ""

        # get access token from service account credential
-        if service_account_info:
+        if service_account_key:
+            service_account_info = json.loads(base64.b64decode(service_account_key))
            credentials = service_account.Credentials.from_service_account_info(service_account_info, scopes=SCOPES)
            request = google.auth.transport.requests.Request()
            credentials.refresh(request)
@ -478,10 +479,11 @@ class VertexAiLargeLanguageModel(LargeLanguageModel):
        if stop:
            config_kwargs["stop_sequences"] = stop

-        service_account_info = json.loads(base64.b64decode(credentials["vertex_service_account_key"]))
+        service_account_key = credentials.get("vertex_service_account_key", "")
        project_id = credentials["vertex_project_id"]
        location = credentials["vertex_location"]
-        if service_account_info:
+        if service_account_key:
+            service_account_info = json.loads(base64.b64decode(service_account_key))
            service_accountSA = service_account.Credentials.from_service_account_info(service_account_info)
            aiplatform.init(credentials=service_accountSA, project=project_id, location=location)
        else:
--- a/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py
@ -48,10 +48,11 @@ class VertexAiTextEmbeddingModel(_CommonVertexAi, TextEmbeddingModel):
        :param input_type: input type
        :return: embeddings result
        """
-        service_account_info = json.loads(base64.b64decode(credentials["vertex_service_account_key"]))
+        service_account_key = credentials.get("vertex_service_account_key", "")
        project_id = credentials["vertex_project_id"]
        location = credentials["vertex_location"]
-        if service_account_info:
+        if service_account_key:
+            service_account_info = json.loads(base64.b64decode(service_account_key))
            service_accountSA = service_account.Credentials.from_service_account_info(service_account_info)
            aiplatform.init(credentials=service_accountSA, project=project_id, location=location)
        else:
@ -100,10 +101,11 @@ class VertexAiTextEmbeddingModel(_CommonVertexAi, TextEmbeddingModel):
        :return:
        """
        try:
-            service_account_info = json.loads(base64.b64decode(credentials["vertex_service_account_key"]))
+            service_account_key = credentials.get("vertex_service_account_key", "")
            project_id = credentials["vertex_project_id"]
            location = credentials["vertex_location"]
-            if service_account_info:
+            if service_account_key:
+                service_account_info = json.loads(base64.b64decode(service_account_key))
                service_accountSA = service_account.Credentials.from_service_account_info(service_account_info)
                aiplatform.init(credentials=service_accountSA, project=project_id, location=location)
            else:
--- a/api/core/model_runtime/model_providers/volcengine_maas/legacy/client.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/legacy/client.py
@ -68,7 +68,12 @@ class MaaSClient(MaasService):
                content = []
                for message_content in message.content:
                    if message_content.type == PromptMessageContentType.TEXT:
-                        raise ValueError("Content object type only support image_url")
+                        content.append(
+                            {
+                                "type": "text",
+                                "text": message_content.data,
+                            }
+                        )
                    elif message_content.type == PromptMessageContentType.IMAGE:
                        message_content = cast(ImagePromptMessageContent, message_content)
                        image_data = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", message_content.data)
--- a/api/core/model_runtime/model_providers/x/llm/grok-2-1212.yaml
+++ b/api/core/model_runtime/model_providers/x/llm/grok-2-1212.yaml
@ -0,0 +1,66 @@
+model: grok-2-1212
+label:
+  en_US: grok-2-1212
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - multi-tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    label:
+      en_US: "Temperature"
+      zh_Hans: "采样温度"
+    type: float
+    default: 0.7
+    min: 0.0
+    max: 2.0
+    precision: 1
+    required: true
+    help:
+      en_US: "The randomness of the sampling temperature control output. The temperature value is within the range of [0.0, 1.0]. The higher the value, the more random and creative the output; the lower the value, the more stable it is. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
+      zh_Hans: "采样温度控制输出的随机性。温度值在 [0.0, 1.0] 范围内，值越高，输出越随机和创造性；值越低，输出越稳定。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
+
+  - name: top_p
+    label:
+      en_US: "Top P"
+      zh_Hans: "Top P"
+    type: float
+    default: 0.7
+    min: 0.0
+    max: 1.0
+    precision: 1
+    required: true
+    help:
+      en_US: "The value range of the sampling method is [0.0, 1.0]. The top_p value determines that the model selects tokens from the top p% of candidate words with the highest probability; when top_p is 0, this parameter is invalid. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
+      zh_Hans: "采样方法的取值范围为 [0.0,1.0]。top_p 值确定模型从概率最高的前p%的候选词中选取 tokens；当 top_p 为 0 时，此参数无效。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
+
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    label:
+      en_US: "Frequency Penalty"
+      zh_Hans: "频率惩罚"
+    type: float
+    default: 0
+    min: 0
+    max: 2.0
+    precision: 1
+    required: false
+    help:
+      en_US: "Number between 0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim."
+      zh_Hans: "介于0和2.0之间的数字。正值会根据新标记在文本中迄今为止的现有频率来惩罚它们，从而降低模型一字不差地重复同一句话的可能性。"
+
+  - name: user
+    use_template: text
+    label:
+      en_US: "User"
+      zh_Hans: "用户"
+    type: string
+    required: false
+    help:
+      en_US: "Used to track and differentiate conversation requests from different users."
+      zh_Hans: "用于追踪和区分不同用户的对话请求。"
--- a/api/core/model_runtime/model_providers/x/llm/grok-2-vision-1212.yaml
+++ b/api/core/model_runtime/model_providers/x/llm/grok-2-vision-1212.yaml
@ -0,0 +1,64 @@
+model: grok-2-vision-1212
+label:
+  en_US: grok-2-vision-1212
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    label:
+      en_US: "Temperature"
+      zh_Hans: "采样温度"
+    type: float
+    default: 0.7
+    min: 0.0
+    max: 2.0
+    precision: 1
+    required: true
+    help:
+      en_US: "The randomness of the sampling temperature control output. The temperature value is within the range of [0.0, 1.0]. The higher the value, the more random and creative the output; the lower the value, the more stable it is. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
+      zh_Hans: "采样温度控制输出的随机性。温度值在 [0.0, 1.0] 范围内，值越高，输出越随机和创造性；值越低，输出越稳定。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
+
+  - name: top_p
+    label:
+      en_US: "Top P"
+      zh_Hans: "Top P"
+    type: float
+    default: 0.7
+    min: 0.0
+    max: 1.0
+    precision: 1
+    required: true
+    help:
+      en_US: "The value range of the sampling method is [0.0, 1.0]. The top_p value determines that the model selects tokens from the top p% of candidate words with the highest probability; when top_p is 0, this parameter is invalid. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
+      zh_Hans: "采样方法的取值范围为 [0.0,1.0]。top_p 值确定模型从概率最高的前p%的候选词中选取 tokens；当 top_p 为 0 时，此参数无效。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
+
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    label:
+      en_US: "Frequency Penalty"
+      zh_Hans: "频率惩罚"
+    type: float
+    default: 0
+    min: 0
+    max: 2.0
+    precision: 1
+    required: false
+    help:
+      en_US: "Number between 0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim."
+      zh_Hans: "介于0和2.0之间的数字。正值会根据新标记在文本中迄今为止的现有频率来惩罚它们，从而降低模型一字不差地重复同一句话的可能性。"
+
+  - name: user
+    use_template: text
+    label:
+      en_US: "User"
+      zh_Hans: "用户"
+    type: string
+    required: false
+    help:
+      en_US: "Used to track and differentiate conversation requests from different users."
+      zh_Hans: "用于追踪和区分不同用户的对话请求。"
--- a/api/core/model_runtime/model_providers/x/llm/grok-beta.yaml
+++ b/api/core/model_runtime/model_providers/x/llm/grok-beta.yaml
@ -1,6 +1,6 @@
 model: grok-beta
 label:
-  en_US: Grok Beta
+  en_US: grok-beta
 model_type: llm
 features:
  - agent-thought
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
-LAN-	967eb81112	chore: bump version to 0.14.0 (#11679 ) Signed-off-by: -LAN- <laipz8200@outlook.com>	2024-12-16 15:49:17 +08:00
zxhlyh	9f602f73eb	fix: workflow continue on error edge color (#11689 )	2024-12-16 15:39:53 +08:00
Joel	41de7e76ec	fix: iteration output array type causes always outputting string array (#11686 )	2024-12-16 15:06:03 +08:00
Joel	607a22ad12	fix: tool constant params change cause page crashed (#11682 )	2024-12-16 14:33:00 +08:00
wangbin77	4b402c4041	fix: enhance workflow.tool_published performance (#11640 ) Co-authored-by: wangbin <wangbin35@xiaomi.com>	2024-12-16 13:05:38 +08:00
zhongliliu-butterfly	daccb10d8c	fix: volcengine_maas and baichuan message error (#11625 ) Co-authored-by: zhongliliu <liuzlx@digitalchina.com>	2024-12-16 13:05:27 +08:00
Kazuhisa Wada	63f1dd7877	Make max_submit_count configurable via Config (#11673 )	2024-12-16 12:59:37 +08:00
zhaobingshuang	79801f5c30	fix: deepseek reports an error when using Response Format #11677 (#11678 ) Co-authored-by: zhaobs <zhaobs@cailian.net>	2024-12-16 12:58:03 +08:00
非法操作	9c7a1bc067	fix: change http node params from dict to list tuple (#11665 )	2024-12-15 21:27:39 +08:00
非法操作	cf0ff88120	feat: add grok-2-1212 and grok-2-vision-1212 (#11672 )	2024-12-15 21:18:24 +08:00
Novice	e0b67536e0	fix: remove the unused QueueWorkflowPartialSuccessEvent handle in workflow (#11669 ) Co-authored-by: Novice Lee <novicelee@NoviPro.local>	2024-12-15 21:18:14 +08:00
github-actions[bot]	94c7dcc7f1	chore: translate i18n files (#11639 ) Co-authored-by: douxc <7553076+douxc@users.noreply.github.com>	2024-12-15 17:22:45 +08:00
luckylhb90	38e155d819	feat: log add trace id (#11599 ) Co-authored-by: hobo.l <hobo.l@binance.com>	2024-12-15 17:22:25 +08:00
yihong	efd5575683	fix: _handle_workflow_run_partial_success args is wrong (#11562 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-15 17:22:13 +08:00
yihong	1a7c213405	fix: ExternalDatasetService.process_external_api wrong args (#11586 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-15 17:22:03 +08:00
yihong	8e3d60c359	fix: account.id should account_id (#11628 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-15 17:18:17 +08:00
Bowen Liang	924b4fe742	test: run vdb tests on TiDB Vector with docker in CI tests (#11645 )	2024-12-15 17:16:40 +08:00
yihong	7e154a467b	fix: better error message for stream (#11635 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-15 17:16:04 +08:00
IWAI, Masaharu	b90f1581be	Update translate to Japanese: natural Japanese expression (#11647 ) Co-authored-by: IWAI, Masaharu <iwai_masaharu@funkit.co.jp>	2024-12-15 17:15:24 +08:00
yihong	821992e21f	fix: langfuse do not have created_at args and fix the typing in the file (#11648 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-15 17:13:46 +08:00
IWAI, Masaharu	f0c0ce9db1	fix: rename README filename: Japanese language code is 'JA' (#11651 )	2024-12-15 17:13:34 +08:00
Junyan Qin	8ecb9aaa91	fix: remove unnecessary curly braces in wf api doc (#11658 )	2024-12-15 17:12:26 +08:00
yihong	22258fb0bf	fix: filter bug for keywork cause code can not reach (#11666 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-15 17:12:06 +08:00
NFish	a725b8bb6e	Feat: new entry point for app creation (#10847 )	2024-12-13 17:29:09 +08:00
Kevin9703	bdfdccd511	fix: app log filter value error (#11624 )	2024-12-13 16:40:34 +08:00
yihong	194bc60429	fix: split dir for opendal tests (#11627 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-13 16:31:00 +08:00
-LAN-	430ca3322b	chore(dependency): bump gunicorn to 23.0 (#11560 ) Signed-off-by: -LAN- <laipz8200@outlook.com>	2024-12-13 16:16:58 +08:00
zxhlyh	3d803c2e80	Fix/pdf preview in build (#11621 )	2024-12-13 11:01:53 +08:00
Hiroshi Fujita	fa3dcbb3bc	feat(devcontainer): add alias to stop Docker containers (#11616 )	2024-12-13 10:03:58 +08:00
yihong	ee342063d8	ci: better print version for ruff to check the change (#11587 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-12 21:44:00 +08:00
JasonVV	bb3bc60f83	feat(model): add vertex_ai Gemini 2.0 Flash Exp (#11604 )	2024-12-12 20:20:49 +08:00
crazywoola	e7a4cfac4d	fix: name of llama-3.3-70b-specdec (#11596 )	2024-12-12 16:33:49 +08:00
Alok Shrivastwa	6478aa1c9d	Added new models and Removed the deleted ones for Groq #11455 (#11456 ) Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: Alok Shrivastwa <Alok.Shrivastwa@microland.com>	2024-12-12 14:11:30 +08:00
Warren Chen	7b5839335a	[ref] use one method to get boto client for aws bedrock (#11506 )	2024-12-12 13:56:52 +08:00
github-actions[bot]	a360af8687	chore: translate i18n files (#11577 ) Co-authored-by: JzoNgKVO <27049666+JzoNgKVO@users.noreply.github.com>	2024-12-12 13:47:39 +08:00
yihong	36cb25b341	fix: support mdx files close #11557 (#11565 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-12 13:37:56 +08:00
Joe	e565ecdaef	fix: change workflow trace id (#11585 )	2024-12-12 13:37:29 +08:00
KVOJJJin	f96fdc2970	Feat: dark mode for logs and annotations (#11575 )	2024-12-12 10:09:48 +08:00
Jiang	0d04cdc323	Lindorm vdb (#11574 ) Co-authored-by: jiangzhijie <jiangzhijie.jzj@alibaba-inc.com>	2024-12-12 09:43:27 +08:00
非法操作	926f604f09	feat: add gemini-2.0-flash-exp (#11570 )	2024-12-12 09:33:39 +08:00
yihong	180743612c	fix: better opendal tests (#11569 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-12 09:33:30 +08:00
liuzhenghua	d05f189049	Fix: RateLimit requests were not released when a streaming generation exception occurred (#11540 )	2024-12-11 19:16:35 +08:00
github-actions[bot]	ceaa9f1101	chore: translate i18n files (#11545 ) Co-authored-by: zxhlyh <16177003+zxhlyh@users.noreply.github.com>	2024-12-11 18:04:14 +08:00
zxhlyh	6f4cbe0bde	fix: workflow continue on error doc link (#11554 )	2024-12-11 18:03:41 +08:00
-LAN-	8d4bb9b40d	feat: integrate opendal storage (#11508 ) Signed-off-by: -LAN- <laipz8200@outlook.com>	2024-12-11 14:50:54 +08:00
Novice	1765fe2a29	fix: iteration node in parallel mode token count error (#11539 ) Co-authored-by: Novice Lee <novicelee@NoviPro.local>	2024-12-11 14:23:01 +08:00
Novice	79a710ce98	Feat: continue on error (#11458 ) Co-authored-by: Novice Lee <novicelee@NovicedeMacBook-Pro.local> Co-authored-by: Novice Lee <novicelee@NoviPro.local>	2024-12-11 14:22:42 +08:00
zxhlyh	bec5451f12	feat: workflow continue on error (#11474 )	2024-12-11 14:21:38 +08:00
Yi Xiao	86dfdcb8ec	chore: update thai lang in app page (#11541 )	2024-12-11 12:08:09 +08:00
Tommy	42d986b96d	[Pixtral] Add new model ; add vision (#11231 )	2024-12-11 10:14:16 +08:00
zkyTech	fbc4ca980c	fix: Remove duplicate 'response_format' parameter from model YAML files (#11531 ) Co-authored-by: zhangkunyuan <zhangkunyuan@cmhi.chinamobile.com>	2024-12-11 10:10:53 +08:00
Paul van Oorschot	80c52e0ea4	feat: Add llama-3.3 models for Groq (#11533 )	2024-12-11 09:59:46 +08:00
yihong	50b76dd5a2	fix: better error message for url add external knowledge (#11537 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-11 09:55:48 +08:00
yihong	225fcd5e41	Revert "fix: total tokens is wrong which is zero in inter way, close … (#11536 )	2024-12-11 09:54:46 +08:00
yihong	afffd345bc	fix: can not start local by REMOTE_SETTINGS_SOURCE_NAME change it to … (#11535 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-11 09:35:25 +08:00
yihong	716576043d	fix: issue 11247 that Completion mode content maybe list or str (#11504 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-10 23:22:14 +08:00
Wei Mingzhi	28231d39a4	Remove the processing of single quote when testing API tools. (#11390 )	2024-12-10 19:53:38 +08:00
非法操作	9e23c3d625	chore: LOCAL_FILE also try to use remote_url as Prompt message (#11443 )	2024-12-10 10:56:49 +08:00
Charlie.Wei	bdd5869244	Msg file preview (#11466 ) Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>	2024-12-10 10:53:37 +08:00
barabicu	fc1415d705	chore: fix typo in Japanese localization (#11502 )	2024-12-10 09:29:16 +08:00
문정현	8218f62478	chore : fix translation Typo in ko-KR localization (#11509 )	2024-12-10 09:09:26 +08:00
-LAN-	fd354d999d	fix(app_generator_service): overload type hints (#11507 ) Signed-off-by: -LAN- <laipz8200@outlook.com>	2024-12-10 09:06:34 +08:00
orangeclk	ec00b25793	feat: add siliconflow qwq and llama3.3 model (#11492 )	2024-12-10 08:49:45 +08:00
huanshare	967b7d89e3	feat:add apollo configuration to load env file (#11210 ) Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: huanshare <liuhuan101@longfor.com> Co-authored-by: -LAN- <laipz8200@outlook.com>	2024-12-10 02:51:20 +08:00
Yingchun Lai	32f8439143	fix: add the missing abab6.5t-chat model of Minimax (#11484 )	2024-12-09 17:59:20 +08:00
-LAN-	0ff8bd2aa9	chore: bump version to 0.13.2 (#11489 ) Signed-off-by: -LAN- <laipz8200@outlook.com>	2024-12-09 17:57:23 +08:00
Hash Brown	2866383228	fix: cannot close notification manually (#11490 )	2024-12-09 17:55:06 +08:00
Jyong	00ac7edeb3	improve message clean logic (#11487 )	2024-12-09 16:12:30 +08:00
-LAN-	537068cfde	refactor(iteration_node): use Sequence and Mapping in parameters (#11483 ) Signed-off-by: -LAN- <laipz8200@outlook.com>	2024-12-09 15:41:20 +08:00
suzuki.sh	c3c6a48059	Fix the token count at the iteration node (#11235 ) Co-authored-by: -LAN- <laipz8200@outlook.com>	2024-12-09 15:02:04 +08:00
zhaobingshuang	5c166b3f40	fix: tags could not be saved when the Workflow Tool was created (#11481 ) Co-authored-by: zhaobs <zhaobs@cailian.net>	2024-12-09 14:38:02 +08:00
kurokobo	230fa3286b	feat: add 'Open in Explore' link for each apps on studio (#11402 )	2024-12-09 12:04:03 +08:00
Muneyuki Noguchi	061c0b10fd	Fix the Japanese translation for 'Detail' (#11476 )	2024-12-09 11:18:28 +08:00
Yi Xiao	32f8a98cf8	feat: ifelse condition variable editable after selection (#11431 )	2024-12-09 11:06:47 +08:00
Charlie.Wei	6c60ecb237	Refactor: Remove redundant style and simplify Mermaid component (#11472 )	2024-12-09 09:47:58 +08:00
xiandan-erizo	c3fae5e801	Update ext_redis.py (#11214 )	2024-12-09 09:35:52 +08:00
VoidIsVoid	a594e256ae	remove mermail render cache (#11470 ) Co-authored-by: Gimling <huangjl@ruyi.ai>	2024-12-09 09:33:18 +08:00
Trey Dong	41d90c2408	fix(api): throw error when notion block can not find (#11433 )	2024-12-09 09:10:59 +08:00
yihong	7ff42b1b7a	fix: unit tests env will need clear too (#11445 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-09 09:04:11 +08:00
Kazuki Takamatsu	4d7cfd0de5	Fix model provider of vertex ai (#11437 )	2024-12-08 08:44:49 +08:00
Hash Brown	266d32bd77	fix: cannot upload animated webp image as app icon (#11453 )	2024-12-08 08:37:21 +08:00
非法操作	7e1184c071	feat: support json_schema for ollama models (#11449 )	2024-12-08 08:36:12 +08:00
非法操作	1ce51e57ab	feat: add gemini exp 1206 (#11444 )	2024-12-07 22:28:10 +08:00
非法操作	142b4fd699	feat: add zhipu glm_4v_flash (#11440 )	2024-12-07 22:27:57 +08:00
Hash Brown	cc8feaa483	style: EmojiPicker component top padding (#11452 )	2024-12-07 22:26:28 +08:00
yihong	d9d5d35a77	fix: issue #10596 by making the iteration node outputs right (#11394 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com> Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: -LAN- <laipz8200@outlook.com>	2024-12-07 16:28:15 +08:00
Huỳnh Gia Bôi	9277156b6c	fix(document_extractor): pptx file type and missing metadata_filename UnstructuredIO (#11364 ) Co-authored-by: Julian Huynh <julian.huynh@immersio.io>	2024-12-06 18:55:59 +08:00
KVOJJJin	1490a19fa1	Fix: compatible with outputs data structure (#11432 )	2024-12-06 17:35:35 +08:00
Jyong	9b7adcd4d9	update tidb batch get endpoint to basic mode (#11426 )	2024-12-06 17:06:46 +08:00
Jyong	a8d32f9964	fix external retrieval without segment id (#11423 )	2024-12-06 14:45:15 +08:00
shirochan	5093337de1	FEAT: cohere rerank 3.5 model added (#11289 )	2024-12-06 09:58:55 +08:00
Matsuda	f54225568c	fix(model_runtime): add vision to Amazon Nova Lite and Pro (#11398 )	2024-12-06 09:15:32 +08:00
crazywoola	255ff446ba	use md table systax in pr template (#11412 )	2024-12-06 09:14:15 +08:00
kurokobo	9a0dc4bfdc	fix: add elkjs (#11404 )	2024-12-06 09:00:48 +08:00
huayaoyue6	9d975750bc	fix: update DocumentIsPausedError (#11405 )	2024-12-06 08:59:23 +08:00
Charlie.Wei	7c979e6490	Update mermaid (#11356 ) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>	2024-12-05 20:43:42 +08:00
github-actions[bot]	d60ca1661c	chore: translate i18n files (#11389 ) Co-authored-by: JzoNgKVO <27049666+JzoNgKVO@users.noreply.github.com>	2024-12-05 17:55:44 +08:00
eux	bb62391a4c	fix: broken link to knowledge base guide (#11387 )	2024-12-05 17:47:11 +08:00
KVOJJJin	0b25c0b677	Fix: support file download in workflow result (#11338 )	2024-12-05 16:58:39 +08:00
-LAN-	a5d6082418	chore: bump version to 0.13.1 (#11382 ) Signed-off-by: -LAN- <laipz8200@outlook.com>	2024-12-05 15:11:55 +08:00
Warren Chen	631cbcd781	[fix] rename yaml files to fit windows (#11379 )	2024-12-05 14:38:12 +08:00
Yi Xiao	20c4633d2a	fix: empty object (conversation variable) editable (#11352 )	2024-12-05 13:59:59 +08:00
yihong	5669cac16d	fix: some typos using typos (#11374 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-05 13:24:06 +08:00
Yi Xiao	6180762160	fix: bg typo in variable aggregator node (#11376 )	2024-12-05 11:46:12 +08:00
Warren Chen	376726cf90	[feat] Add AWS Bedrock rerank (#11349 ) Co-authored-by: crazywoola <427733928@qq.com>	2024-12-05 11:31:43 +08:00
Joel	284bb7ac71	fix: ref attribute in markdown causes page crash (#11369 ) Co-authored-by: crazywoola <427733928@qq.com>	2024-12-05 10:15:21 +08:00
Akira Noda	eca466bdaa	chore: fix typo (#11359 )	2024-12-05 09:04:30 +08:00
crazywoola	d56abec195	Revert "Fix: iteration not in main thread pool" (#11358 )	2024-12-04 21:22:22 +08:00
yihong	961e25f608	fix: better bedrock message handler close #10976 (#11317 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-12-04 19:46:40 +08:00
github-actions[bot]	138bf698b0	chore: translate i18n files (#11353 ) Co-authored-by: douxc <7553076+douxc@users.noreply.github.com>	2024-12-04 19:24:03 +08:00
NFish	e5bb4cca12	fix: Correct category of 'Workflow' used in Explore Apps. (#11351 )	2024-12-04 18:19:12 +08:00
AkaraChen	5e2cb0e3a8	feat: add base skeleton component (#11339 )	2024-12-04 17:34:55 +08:00
Hash Brown	16a65cb367	fix: cannot send message when debug with multiple model with conversa… (#11333 )	2024-12-04 16:17:11 +08:00
ybalbert001	1bae9b8ff7	update pricing for bedrock nova LLM models (#11336 ) Co-authored-by: Yuanbo Li <ybalbert@amazon.com>	2024-12-04 16:16:41 +08:00
Jyong	d7c1f43b49	fix tidb full-text-search vector missed (#11337 )	2024-12-04 16:13:23 +08:00
Yi Xiao	f933af9f57	fix: check valid for number variable (#11334 )	2024-12-04 15:46:54 +08:00
非法操作	91e1ff5e30	chore: improve zhipu LLM (#11321 )	2024-12-04 15:14:30 +08:00
ybalbert001	5908e10549	integrate amazon nove llms to dify (#11324 ) Co-authored-by: Yuanbo Li <ybalbert@amazon.com>	2024-12-04 15:13:08 +08:00
-LAN-	464e6354c5	feat: correct the prompt grammar. (#11328 ) Signed-off-by: -LAN- <laipz8200@outlook.com>	2024-12-04 15:12:47 +08:00
非法操作	d470e55f8c	fix: http node download file always image type (#11319 )	2024-12-04 12:15:26 +08:00
zxhlyh	98a1b01b0c	fix: file download in chat (#11322 )	2024-12-04 11:10:56 +08:00
Joel	e240424be5	fix: number variable can not input constant type value in tool config form (#11320 )	2024-12-04 10:46:03 +08:00
DDDDD12138	1cb5a12abb	fix: resolve scrolling issue in workflow-log table (#11302 )	2024-12-03 21:29:42 +08:00
KVOJJJin	ff2a4a6fcd	Fix: model params in logs (#11298 )	2024-12-03 21:17:55 +08:00