chore(versioning): bump version to 0.9.0 (#8911 )

Feat: add debounce for search in logs (#8924 )
fix: fix the issue with the system model configuration update (#8923 )
2026-01-24 13:56:03 +08:00 · 2024-09-30 18:33:20 +08:00 · 2024-09-30 17:18:47 +08:00 · 2024-09-30 17:14:13 +08:00 · 2024-09-30 16:35:00 +08:00 · 2024-09-30 16:32:23 +08:00
597 changed files with 15527 additions and 2341 deletions
--- a/.github/workflows/build-push.yml
+++ b/.github/workflows/build-push.yml
@ -125,7 +125,7 @@ jobs:
        with:
          images: ${{ env[matrix.image_name_env] }}
          tags: |
-            type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
+            type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }}
            type=ref,event=branch
            type=sha,enable=true,priority=100,prefix=,suffix=,format=long
            type=raw,value=${{ github.ref_name }},enable=${{ startsWith(github.ref, 'refs/tags/') }}
--- a/.github/workflows/web-tests.yml
+++ b/.github/workflows/web-tests.yml
@ -0,0 +1,46 @@
+name: Web Tests
+
+on:
+  pull_request:
+    branches:
+      - main
+    paths:
+      - web/**
+
+concurrency:
+  group: web-tests-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    name: Web Tests
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ./web
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Check changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v45
+        with:
+          files: web/**
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        if: steps.changed-files.outputs.any_changed == 'true'
+        with:
+          node-version: 20
+          cache: yarn
+          cache-dependency-path: ./web/package.json
+
+      - name: Install dependencies
+        if: steps.changed-files.outputs.any_changed == 'true'
+        run: yarn install --frozen-lockfile
+
+      - name: Run tests
+        if: steps.changed-files.outputs.any_changed == 'true'
+        run: yarn test
--- a/api/.env.example
+++ b/api/.env.example
@ -162,6 +162,8 @@ PGVECTOR_PORT=5433
 PGVECTOR_USER=postgres
 PGVECTOR_PASSWORD=postgres
 PGVECTOR_DATABASE=postgres
+PGVECTOR_MIN_CONNECTION=1
+PGVECTOR_MAX_CONNECTION=5

 # Tidb Vector configuration
 TIDB_VECTOR_HOST=xxx.eu-central-1.xxx.aws.tidbcloud.com
--- a/api/app.py
+++ b/api/app.py
@ -53,11 +53,9 @@ from services.account_service import AccountService

 warnings.simplefilter("ignore", ResourceWarning)

-# fix windows platform
-if os.name == "nt":
-    os.system('tzutil /s "UTC"')
-else:
-    os.environ["TZ"] = "UTC"
+os.environ["TZ"] = "UTC"
+# windows platform not support tzset
+if hasattr(time, "tzset"):
    time.tzset()


--- a/api/commands.py
+++ b/api/commands.py
@ -652,7 +652,7 @@ where sites.id is null limit 1000"""
                        app_was_created.send(app, account=account)
                except Exception as e:
                    failed_app_ids.append(app_id)
-                    click.echo(click.style("FFailed to fix missing site for app {}".format(app_id), fg="red"))
+                    click.echo(click.style("Failed to fix missing site for app {}".format(app_id), fg="red"))
                    logging.exception(f"Fix app related site missing issue failed, error: {e}")
                    continue

--- a/api/configs/middleware/vdb/pgvector_config.py
+++ b/api/configs/middleware/vdb/pgvector_config.py
@ -33,3 +33,13 @@ class PGVectorConfig(BaseSettings):
        description="Name of the PostgreSQL database to connect to",
        default=None,
    )
+
+    PGVECTOR_MIN_CONNECTION: PositiveInt = Field(
+        description="Min connection of the PostgreSQL database",
+        default=1,
+    )
+
+    PGVECTOR_MAX_CONNECTION: PositiveInt = Field(
+        description="Max connection of the PostgreSQL database",
+        default=5,
+    )
--- a/api/configs/packaging/init.py
+++ b/api/configs/packaging/init.py
@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):

    CURRENT_VERSION: str = Field(
        description="Dify version",
-        default="0.8.3",
+        default="0.9.0",
    )

    COMMIT_SHA: str = Field(
--- a/api/controllers/console/init.py
+++ b/api/controllers/console/init.py
@ -37,7 +37,16 @@ from .auth import activate, data_source_bearer_auth, data_source_oauth, forgot_p
 from .billing import billing

 # Import datasets controllers
-from .datasets import data_source, datasets, datasets_document, datasets_segments, file, hit_testing, website
+from .datasets import (
+    data_source,
+    datasets,
+    datasets_document,
+    datasets_segments,
+    external,
+    file,
+    hit_testing,
+    website,
+)

 # Import explore controllers
 from .explore import (
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@ -49,7 +49,7 @@ class DatasetListApi(Resource):
        page = request.args.get("page", default=1, type=int)
        limit = request.args.get("limit", default=20, type=int)
        ids = request.args.getlist("ids")
-        provider = request.args.get("provider", default="vendor")
+        # provider = request.args.get("provider", default="vendor")
        search = request.args.get("keyword", default=None, type=str)
        tag_ids = request.args.getlist("tag_ids")

@ -57,7 +57,7 @@ class DatasetListApi(Resource):
            datasets, total = DatasetService.get_datasets_by_ids(ids, current_user.current_tenant_id)
        else:
            datasets, total = DatasetService.get_datasets(
-                page, limit, provider, current_user.current_tenant_id, current_user, search, tag_ids
+                page, limit, current_user.current_tenant_id, current_user, search, tag_ids
            )

        # check embedding setting
@ -110,6 +110,26 @@ class DatasetListApi(Resource):
            nullable=True,
            help="Invalid indexing technique.",
        )
+        parser.add_argument(
+            "external_knowledge_api_id",
+            type=str,
+            nullable=True,
+            required=False,
+        )
+        parser.add_argument(
+            "provider",
+            type=str,
+            nullable=True,
+            choices=Dataset.PROVIDER_LIST,
+            required=False,
+            default="vendor",
+        )
+        parser.add_argument(
+            "external_knowledge_id",
+            type=str,
+            nullable=True,
+            required=False,
+        )
        args = parser.parse_args()

        # The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
@ -123,6 +143,9 @@ class DatasetListApi(Resource):
                indexing_technique=args["indexing_technique"],
                account=current_user,
                permission=DatasetPermissionEnum.ONLY_ME,
+                provider=args["provider"],
+                external_knowledge_api_id=args["external_knowledge_api_id"],
+                external_knowledge_id=args["external_knowledge_id"],
            )
        except services.errors.dataset.DatasetNameDuplicateError:
            raise DatasetNameDuplicateError()
@ -211,6 +234,33 @@ class DatasetApi(Resource):
        )
        parser.add_argument("retrieval_model", type=dict, location="json", help="Invalid retrieval model.")
        parser.add_argument("partial_member_list", type=list, location="json", help="Invalid parent user list.")
+
+        parser.add_argument(
+            "external_retrieval_model",
+            type=dict,
+            required=False,
+            nullable=True,
+            location="json",
+            help="Invalid external retrieval model.",
+        )
+
+        parser.add_argument(
+            "external_knowledge_id",
+            type=str,
+            required=False,
+            nullable=True,
+            location="json",
+            help="Invalid external knowledge id.",
+        )
+
+        parser.add_argument(
+            "external_knowledge_api_id",
+            type=str,
+            required=False,
+            nullable=True,
+            location="json",
+            help="Invalid external knowledge api id.",
+        )
        args = parser.parse_args()
        data = request.get_json()

@ -563,10 +613,10 @@ class DatasetRetrievalSettingApi(Resource):
            case (
                VectorType.MILVUS
                | VectorType.RELYT
-                | VectorType.PGVECTOR
                | VectorType.TIDB_VECTOR
                | VectorType.CHROMA
                | VectorType.TENCENT
+                | VectorType.PGVECTO_RS
            ):
                return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
            case (
@ -577,6 +627,7 @@ class DatasetRetrievalSettingApi(Resource):
                | VectorType.MYSCALE
                | VectorType.ORACLE
                | VectorType.ELASTICSEARCH
+                | VectorType.PGVECTOR
            ):
                return {
                    "retrieval_method": [
--- a/api/controllers/console/datasets/external.py
+++ b/api/controllers/console/datasets/external.py
@ -0,0 +1,239 @@
+from flask import request
+from flask_login import current_user
+from flask_restful import Resource, marshal, reqparse
+from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
+
+import services
+from controllers.console import api
+from controllers.console.datasets.error import DatasetNameDuplicateError
+from controllers.console.setup import setup_required
+from controllers.console.wraps import account_initialization_required
+from fields.dataset_fields import dataset_detail_fields
+from libs.login import login_required
+from services.dataset_service import DatasetService
+from services.external_knowledge_service import ExternalDatasetService
+from services.hit_testing_service import HitTestingService
+
+
+def _validate_name(name):
+    if not name or len(name) < 1 or len(name) > 100:
+        raise ValueError("Name must be between 1 to 100 characters.")
+    return name
+
+
+def _validate_description_length(description):
+    if description and len(description) > 400:
+        raise ValueError("Description cannot exceed 400 characters.")
+    return description
+
+
+class ExternalApiTemplateListApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self):
+        page = request.args.get("page", default=1, type=int)
+        limit = request.args.get("limit", default=20, type=int)
+        search = request.args.get("keyword", default=None, type=str)
+
+        external_knowledge_apis, total = ExternalDatasetService.get_external_knowledge_apis(
+            page, limit, current_user.current_tenant_id, search
+        )
+        response = {
+            "data": [item.to_dict() for item in external_knowledge_apis],
+            "has_more": len(external_knowledge_apis) == limit,
+            "limit": limit,
+            "total": total,
+            "page": page,
+        }
+        return response, 200
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def post(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument(
+            "name",
+            nullable=False,
+            required=True,
+            help="Name is required. Name must be between 1 to 100 characters.",
+            type=_validate_name,
+        )
+        parser.add_argument(
+            "settings",
+            type=dict,
+            location="json",
+            nullable=False,
+            required=True,
+        )
+        args = parser.parse_args()
+
+        ExternalDatasetService.validate_api_list(args["settings"])
+
+        # The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
+        if not current_user.is_dataset_editor:
+            raise Forbidden()
+
+        try:
+            external_knowledge_api = ExternalDatasetService.create_external_knowledge_api(
+                tenant_id=current_user.current_tenant_id, user_id=current_user.id, args=args
+            )
+        except services.errors.dataset.DatasetNameDuplicateError:
+            raise DatasetNameDuplicateError()
+
+        return external_knowledge_api.to_dict(), 201
+
+
+class ExternalApiTemplateApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self, external_knowledge_api_id):
+        external_knowledge_api_id = str(external_knowledge_api_id)
+        external_knowledge_api = ExternalDatasetService.get_external_knowledge_api(external_knowledge_api_id)
+        if external_knowledge_api is None:
+            raise NotFound("API template not found.")
+
+        return external_knowledge_api.to_dict(), 200
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def patch(self, external_knowledge_api_id):
+        external_knowledge_api_id = str(external_knowledge_api_id)
+
+        parser = reqparse.RequestParser()
+        parser.add_argument(
+            "name",
+            nullable=False,
+            required=True,
+            help="type is required. Name must be between 1 to 100 characters.",
+            type=_validate_name,
+        )
+        parser.add_argument(
+            "settings",
+            type=dict,
+            location="json",
+            nullable=False,
+            required=True,
+        )
+        args = parser.parse_args()
+        ExternalDatasetService.validate_api_list(args["settings"])
+
+        external_knowledge_api = ExternalDatasetService.update_external_knowledge_api(
+            tenant_id=current_user.current_tenant_id,
+            user_id=current_user.id,
+            external_knowledge_api_id=external_knowledge_api_id,
+            args=args,
+        )
+
+        return external_knowledge_api.to_dict(), 200
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def delete(self, external_knowledge_api_id):
+        external_knowledge_api_id = str(external_knowledge_api_id)
+
+        # The role of the current user in the ta table must be admin, owner, or editor
+        if not current_user.is_editor or current_user.is_dataset_operator:
+            raise Forbidden()
+
+        ExternalDatasetService.delete_external_knowledge_api(current_user.current_tenant_id, external_knowledge_api_id)
+        return {"result": "success"}, 200
+
+
+class ExternalApiUseCheckApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self, external_knowledge_api_id):
+        external_knowledge_api_id = str(external_knowledge_api_id)
+
+        external_knowledge_api_is_using, count = ExternalDatasetService.external_knowledge_api_use_check(
+            external_knowledge_api_id
+        )
+        return {"is_using": external_knowledge_api_is_using, "count": count}, 200
+
+
+class ExternalDatasetCreateApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def post(self):
+        # The role of the current user in the ta table must be admin, owner, or editor
+        if not current_user.is_editor:
+            raise Forbidden()
+
+        parser = reqparse.RequestParser()
+        parser.add_argument("external_knowledge_api_id", type=str, required=True, nullable=False, location="json")
+        parser.add_argument("external_knowledge_id", type=str, required=True, nullable=False, location="json")
+        parser.add_argument(
+            "name",
+            nullable=False,
+            required=True,
+            help="name is required. Name must be between 1 to 100 characters.",
+            type=_validate_name,
+        )
+        parser.add_argument("description", type=str, required=False, nullable=True, location="json")
+        parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
+
+        args = parser.parse_args()
+
+        # The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
+        if not current_user.is_dataset_editor:
+            raise Forbidden()
+
+        try:
+            dataset = ExternalDatasetService.create_external_dataset(
+                tenant_id=current_user.current_tenant_id,
+                user_id=current_user.id,
+                args=args,
+            )
+        except services.errors.dataset.DatasetNameDuplicateError:
+            raise DatasetNameDuplicateError()
+
+        return marshal(dataset, dataset_detail_fields), 201
+
+
+class ExternalKnowledgeHitTestingApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def post(self, dataset_id):
+        dataset_id_str = str(dataset_id)
+        dataset = DatasetService.get_dataset(dataset_id_str)
+        if dataset is None:
+            raise NotFound("Dataset not found.")
+
+        try:
+            DatasetService.check_dataset_permission(dataset, current_user)
+        except services.errors.account.NoPermissionError as e:
+            raise Forbidden(str(e))
+
+        parser = reqparse.RequestParser()
+        parser.add_argument("query", type=str, location="json")
+        parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
+        args = parser.parse_args()
+
+        HitTestingService.hit_testing_args_check(args)
+
+        try:
+            response = HitTestingService.external_retrieve(
+                dataset=dataset,
+                query=args["query"],
+                account=current_user,
+                external_retrieval_model=args["external_retrieval_model"],
+            )
+
+            return response
+        except Exception as e:
+            raise InternalServerError(str(e))
+
+
+api.add_resource(ExternalKnowledgeHitTestingApi, "/datasets/<uuid:dataset_id>/external-hit-testing")
+api.add_resource(ExternalDatasetCreateApi, "/datasets/external")
+api.add_resource(ExternalApiTemplateListApi, "/datasets/external-knowledge-api")
+api.add_resource(ExternalApiTemplateApi, "/datasets/external-knowledge-api/<uuid:external_knowledge_api_id>")
+api.add_resource(ExternalApiUseCheckApi, "/datasets/external-knowledge-api/<uuid:external_knowledge_api_id>/use-check")
--- a/api/controllers/console/datasets/hit_testing.py
+++ b/api/controllers/console/datasets/hit_testing.py
@ -47,6 +47,7 @@ class HitTestingApi(Resource):
        parser = reqparse.RequestParser()
        parser.add_argument("query", type=str, location="json")
        parser.add_argument("retrieval_model", type=dict, required=False, location="json")
+        parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
        args = parser.parse_args()

        HitTestingService.hit_testing_args_check(args)
@ -57,6 +58,7 @@ class HitTestingApi(Resource):
                query=args["query"],
                account=current_user,
                retrieval_model=args["retrieval_model"],
+                external_retrieval_model=args["external_retrieval_model"],
                limit=10,
            )

--- a/api/controllers/console/datasets/website.py
+++ b/api/controllers/console/datasets/website.py
@ -14,7 +14,9 @@ class WebsiteCrawlApi(Resource):
    @account_initialization_required
    def post(self):
        parser = reqparse.RequestParser()
-        parser.add_argument("provider", type=str, choices=["firecrawl"], required=True, nullable=True, location="json")
+        parser.add_argument(
+            "provider", type=str, choices=["firecrawl", "jinareader"], required=True, nullable=True, location="json"
+        )
        parser.add_argument("url", type=str, required=True, nullable=True, location="json")
        parser.add_argument("options", type=dict, required=True, nullable=True, location="json")
        args = parser.parse_args()
@ -33,7 +35,7 @@ class WebsiteCrawlStatusApi(Resource):
    @account_initialization_required
    def get(self, job_id: str):
        parser = reqparse.RequestParser()
-        parser.add_argument("provider", type=str, choices=["firecrawl"], required=True, location="args")
+        parser.add_argument("provider", type=str, choices=["firecrawl", "jinareader"], required=True, location="args")
        args = parser.parse_args()
        # get crawl status
        try:
--- a/api/controllers/console/version.py
+++ b/api/controllers/console/version.py
@ -38,11 +38,52 @@ class VersionApi(Resource):
            return result

        content = json.loads(response.content)
-        result["version"] = content["version"]
-        result["release_date"] = content["releaseDate"]
-        result["release_notes"] = content["releaseNotes"]
-        result["can_auto_update"] = content["canAutoUpdate"]
+        if _has_new_version(latest_version=content["version"], current_version=f"{args.get('current_version')}"):
+            result["version"] = content["version"]
+            result["release_date"] = content["releaseDate"]
+            result["release_notes"] = content["releaseNotes"]
+            result["can_auto_update"] = content["canAutoUpdate"]
        return result


+def _has_new_version(*, latest_version: str, current_version: str) -> bool:
+    def parse_version(version: str) -> tuple:
+        # Split version into parts and pre-release suffix if any
+        parts = version.split("-")
+        version_parts = parts[0].split(".")
+        pre_release = parts[1] if len(parts) > 1 else None
+
+        # Validate version format
+        if len(version_parts) != 3:
+            raise ValueError(f"Invalid version format: {version}")
+
+        try:
+            # Convert version parts to integers
+            major, minor, patch = map(int, version_parts)
+            return (major, minor, patch, pre_release)
+        except ValueError:
+            raise ValueError(f"Invalid version format: {version}")
+
+    latest = parse_version(latest_version)
+    current = parse_version(current_version)
+
+    # Compare major, minor, and patch versions
+    for latest_part, current_part in zip(latest[:3], current[:3]):
+        if latest_part > current_part:
+            return True
+        elif latest_part < current_part:
+            return False
+
+    # If versions are equal, check pre-release suffixes
+    if latest[3] is None and current[3] is not None:
+        return True
+    elif latest[3] is not None and current[3] is None:
+        return False
+    elif latest[3] is not None and current[3] is not None:
+        # Simple string comparison for pre-release versions
+        return latest[3] > current[3]
+
+    return False
+
+
 api.add_resource(VersionApi, "/version")
--- a/api/controllers/console/workspace/models.py
+++ b/api/controllers/console/workspace/models.py
@ -72,8 +72,9 @@ class DefaultModelApi(Resource):
                    provider=model_setting["provider"],
                    model=model_setting["model"],
                )
-            except Exception:
-                logging.warning(f"{model_setting['model_type']} save error")
+            except Exception as ex:
+                logging.exception(f"{model_setting['model_type']} save error: {ex}")
+                raise ex

        return {"result": "success"}

--- a/api/controllers/service_api/dataset/dataset.py
+++ b/api/controllers/service_api/dataset/dataset.py
@ -28,11 +28,11 @@ class DatasetListApi(DatasetApiResource):

        page = request.args.get("page", default=1, type=int)
        limit = request.args.get("limit", default=20, type=int)
-        provider = request.args.get("provider", default="vendor")
+        # provider = request.args.get("provider", default="vendor")
        search = request.args.get("keyword", default=None, type=str)
        tag_ids = request.args.getlist("tag_ids")

-        datasets, total = DatasetService.get_datasets(page, limit, provider, tenant_id, current_user, search, tag_ids)
+        datasets, total = DatasetService.get_datasets(page, limit, tenant_id, current_user, search, tag_ids)
        # check embedding setting
        provider_manager = ProviderManager()
        configurations = provider_manager.get_configurations(tenant_id=current_user.current_tenant_id)
@ -82,6 +82,26 @@ class DatasetListApi(DatasetApiResource):
            required=False,
            nullable=False,
        )
+        parser.add_argument(
+            "external_knowledge_api_id",
+            type=str,
+            nullable=True,
+            required=False,
+            default="_validate_name",
+        )
+        parser.add_argument(
+            "provider",
+            type=str,
+            nullable=True,
+            required=False,
+            default="vendor",
+        )
+        parser.add_argument(
+            "external_knowledge_id",
+            type=str,
+            nullable=True,
+            required=False,
+        )
        args = parser.parse_args()

        try:
@ -91,6 +111,9 @@ class DatasetListApi(DatasetApiResource):
                indexing_technique=args["indexing_technique"],
                account=current_user,
                permission=args["permission"],
+                provider=args["provider"],
+                external_knowledge_api_id=args["external_knowledge_api_id"],
+                external_knowledge_id=args["external_knowledge_id"],
            )
        except services.errors.dataset.DatasetNameDuplicateError:
            raise DatasetNameDuplicateError()
--- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py
+++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
@ -231,7 +231,8 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
            except Exception as e:
                logger.error(e)
                break
-        yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
+        if tts_publisher:
+            yield MessageAudioEndStreamResponse(audio="", task_id=task_id)

    def _process_stream_response(
        self,
--- a/api/core/app/apps/base_app_generate_response_converter.py
+++ b/api/core/app/apps/base_app_generate_response_converter.py
@ -75,10 +75,10 @@ class AppGenerateResponseConverter(ABC):
        :return:
        """
        # show_retrieve_source
+        updated_resources = []
        if "retriever_resources" in metadata:
-            metadata["retriever_resources"] = []
            for resource in metadata["retriever_resources"]:
-                metadata["retriever_resources"].append(
+                updated_resources.append(
                    {
                        "segment_id": resource["segment_id"],
                        "position": resource["position"],
@ -87,6 +87,7 @@ class AppGenerateResponseConverter(ABC):
                        "content": resource["content"],
                    }
                )
+            metadata["retriever_resources"] = updated_resources

        # show annotation reply
        if "annotation_reply" in metadata:
--- a/api/core/app/apps/base_app_runner.py
+++ b/api/core/app/apps/base_app_runner.py
@ -309,7 +309,7 @@ class AppRunner:
            if not prompt_messages:
                prompt_messages = result.prompt_messages

-            if not usage and result.delta.usage:
+            if result.delta.usage:
                usage = result.delta.usage

        if not usage:
--- a/api/core/app/apps/workflow/generate_task_pipeline.py
+++ b/api/core/app/apps/workflow/generate_task_pipeline.py
@ -212,7 +212,8 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
            except Exception as e:
                logger.error(e)
                break
-        yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
+        if tts_publisher:
+            yield MessageAudioEndStreamResponse(audio="", task_id=task_id)

    def _process_stream_response(
        self,
--- a/api/core/app/segments/exc.py
+++ b/api/core/app/segments/exc.py
@ -1,2 +1,2 @@
-class VariableError(Exception):
+class VariableError(ValueError):
    pass
--- a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
+++ b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
@ -248,7 +248,8 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
            else:
                start_listener_time = time.time()
                yield MessageAudioStreamResponse(audio=audio.audio, task_id=task_id)
-        yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
+        if publisher:
+            yield MessageAudioEndStreamResponse(audio="", task_id=task_id)

    def _process_stream_response(
        self, publisher: AppGeneratorTTSPublisher, trace_manager: Optional[TraceQueueManager] = None
--- a/api/core/callback_handler/index_tool_callback_handler.py
+++ b/api/core/callback_handler/index_tool_callback_handler.py
@ -59,7 +59,7 @@ class DatasetIndexToolCallbackHandler:
            for item in resource:
                dataset_retriever_resource = DatasetRetrieverResource(
                    message_id=self._message_id,
-                    position=item.get("position"),
+                    position=item.get("position") or 0,
                    dataset_id=item.get("dataset_id"),
                    dataset_name=item.get("dataset_name"),
                    document_id=item.get("document_id"),
--- a/api/core/embedding/cached_embedding.py
+++ b/api/core/embedding/cached_embedding.py
@ -5,6 +5,7 @@ from typing import Optional, cast
 import numpy as np
 from sqlalchemy.exc import IntegrityError

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_manager import ModelInstance
 from core.model_runtime.entities.model_entities import ModelPropertyKey
 from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
@ -56,7 +57,9 @@ class CacheEmbedding(Embeddings):
                for i in range(0, len(embedding_queue_texts), max_chunks):
                    batch_texts = embedding_queue_texts[i : i + max_chunks]

-                    embedding_result = self._model_instance.invoke_text_embedding(texts=batch_texts, user=self._user)
+                    embedding_result = self._model_instance.invoke_text_embedding(
+                        texts=batch_texts, user=self._user, input_type=EmbeddingInputType.DOCUMENT
+                    )

                    for vector in embedding_result.embeddings:
                        try:
@ -100,7 +103,9 @@ class CacheEmbedding(Embeddings):
            redis_client.expire(embedding_cache_key, 600)
            return list(np.frombuffer(base64.b64decode(embedding), dtype="float"))
        try:
-            embedding_result = self._model_instance.invoke_text_embedding(texts=[text], user=self._user)
+            embedding_result = self._model_instance.invoke_text_embedding(
+                texts=[text], user=self._user, input_type=EmbeddingInputType.QUERY
+            )

            embedding_results = embedding_result.embeddings[0]
            embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()
--- a/api/core/embedding/embedding_constant.py
+++ b/api/core/embedding/embedding_constant.py
@ -0,0 +1,10 @@
+from enum import Enum
+
+
+class EmbeddingInputType(Enum):
+    """
+    Enum for embedding input type.
+    """
+
+    DOCUMENT = "document"
+    QUERY = "query"
--- a/api/core/entities/provider_configuration.py
+++ b/api/core/entities/provider_configuration.py
@ -119,7 +119,7 @@ class ProviderConfiguration(BaseModel):
                        credentials = model_configuration.credentials
                        break

-            if self.custom_configuration.provider:
+            if not credentials and self.custom_configuration.provider:
                credentials = self.custom_configuration.provider.credentials

            return credentials
--- a/api/core/llm_generator/prompts.py
+++ b/api/core/llm_generator/prompts.py
@ -65,7 +65,6 @@ SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
    "Please help me predict the three most likely questions that human would ask, "
    "and keeping each question under 20 characters.\n"
    "MAKE SURE your output is the SAME language as the Assistant's latest response"
-    "(if the main response is written in Chinese, then the language of your output must be using Chinese.)!\n"
    "The output must be an array in JSON format following the specified schema:\n"
    '["question1","question2","question3"]\n'
 )
--- a/api/core/model_manager.py
+++ b/api/core/model_manager.py
@ -3,6 +3,7 @@ import os
 from collections.abc import Callable, Generator, Sequence
 from typing import IO, Optional, Union, cast

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
 from core.entities.provider_entities import ModelLoadBalancingConfiguration
 from core.errors.error import ProviderTokenNotInitError
@ -158,12 +159,15 @@ class ModelInstance:
            tools=tools,
        )

-    def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) -> TextEmbeddingResult:
+    def invoke_text_embedding(
+        self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT
+    ) -> TextEmbeddingResult:
        """
        Invoke large language model

        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        if not isinstance(self.model_type_instance, TextEmbeddingModel):
@ -176,6 +180,7 @@ class ModelInstance:
            credentials=self.credentials,
            texts=texts,
            user=user,
+            input_type=input_type,
        )

    def get_text_embedding_num_tokens(self, texts: list[str]) -> int:
--- a/api/core/model_runtime/callbacks/base_callback.py
+++ b/api/core/model_runtime/callbacks/base_callback.py
@ -1,3 +1,4 @@
+from abc import ABC, abstractmethod
 from typing import Optional

 from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
@ -13,7 +14,7 @@ _TEXT_COLOR_MAPPING = {
 }


-class Callback:
+class Callback(ABC):
    """
    Base class for callbacks.
    Only for LLM.
@ -21,6 +22,7 @@ class Callback:

    raise_error: bool = False

+    @abstractmethod
    def on_before_invoke(
        self,
        llm_instance: AIModel,
@ -48,6 +50,7 @@ class Callback:
        """
        raise NotImplementedError()

+    @abstractmethod
    def on_new_chunk(
        self,
        llm_instance: AIModel,
@ -77,6 +80,7 @@ class Callback:
        """
        raise NotImplementedError()

+    @abstractmethod
    def on_after_invoke(
        self,
        llm_instance: AIModel,
@ -106,6 +110,7 @@ class Callback:
        """
        raise NotImplementedError()

+    @abstractmethod
    def on_invoke_error(
        self,
        llm_instance: AIModel,
--- a/api/core/model_runtime/docs/en_US/customizable_model_scale_out.md
+++ b/api/core/model_runtime/docs/en_US/customizable_model_scale_out.md
@ -0,0 +1,310 @@
+## Custom Integration of Pre-defined Models
+
+### Introduction
+
+After completing the vendors integration, the next step is to connect the vendor's models. To illustrate the entire connection process, we will use Xinference as an example to demonstrate a complete vendor integration.
+
+It is important to note that for custom models, each model connection requires a complete vendor credential.
+
+Unlike pre-defined models, a custom vendor integration always includes the following two parameters, which do not need to be defined in the vendor YAML file.
+
+![](images/index/image-3.png)
+
+As mentioned earlier, vendors do not need to implement validate_provider_credential. The runtime will automatically call the corresponding model layer's validate_credentials to validate the credentials based on the model type and name selected by the user.
+
+### Writing the Vendor YAML
+
+First, we need to identify the types of models supported by the vendor we are integrating.
+
+Currently supported model types are as follows:
+
+- `llm` Text Generation Models
+
+- `text_embedding` Text Embedding Models
+
+- `rerank` Rerank Models
+
+- `speech2text` Speech-to-Text
+
+- `tts` Text-to-Speech
+
+- `moderation` Moderation
+
+Xinference supports LLM, Text Embedding, and Rerank. So we will start by writing xinference.yaml.
+
+```yaml
+provider: xinference #Define the vendor identifier
+label: # Vendor display name, supports both en_US (English) and zh_Hans (Simplified Chinese). If zh_Hans is not set, it will use en_US by default.
+  en_US: Xorbits Inference
+icon_small: # Small icon, refer to other vendors' icons stored in the _assets directory within the vendor implementation directory; follows the same language policy as the label
+  en_US: icon_s_en.svg
+icon_large: # Large icon
+  en_US: icon_l_en.svg
+help: # Help information
+  title:
+    en_US: How to deploy Xinference
+    zh_Hans: 如何部署 Xinference
+  url:
+    en_US: https://github.com/xorbitsai/inference
+supported_model_types: # Supported model types. Xinference supports LLM, Text Embedding, and Rerank
+- llm
+- text-embedding
+- rerank
+configurate_methods: # Since Xinference is a locally deployed vendor with no predefined models, users need to deploy whatever models they need according to Xinference documentation. Thus, it only supports custom models.
+- customizable-model
+provider_credential_schema:
+  credential_form_schemas:
+```
+
+
+Then, we need to determine what credentials are required to define a model in Xinference.
+
+- Since it supports three different types of models, we need to specify the model_type to denote the model type. Here is how we can define it:
+
+```yaml
+provider_credential_schema:
+  credential_form_schemas:
+  - variable: model_type
+    type: select
+    label:
+      en_US: Model type
+      zh_Hans: 模型类型
+    required: true
+    options:
+    - value: text-generation
+      label:
+        en_US: Language Model
+        zh_Hans: 语言模型
+    - value: embeddings
+      label:
+        en_US: Text Embedding
+    - value: reranking
+      label:
+        en_US: Rerank
+```
+
+- Next, each model has its own model_name, so we need to define that here:
+
+```yaml
+  - variable: model_name
+    type: text-input
+    label:
+      en_US: Model name
+      zh_Hans: 模型名称
+    required: true
+    placeholder:
+      zh_Hans: 填写模型名称
+      en_US: Input model name
+```
+
+- Specify the Xinference local deployment address:
+
+```yaml
+  - variable: server_url
+    label:
+      zh_Hans: 服务器URL
+      en_US: Server url
+    type: text-input
+    required: true
+    placeholder:
+      zh_Hans: 在此输入Xinference的服务器地址，如 https://example.com/xxx
+      en_US: Enter the url of your Xinference, for example https://example.com/xxx
+```
+
+- Each model has a unique model_uid, so we also need to define that here:
+
+```yaml
+  - variable: model_uid
+    label:
+      zh_Hans: 模型UID
+      en_US: Model uid
+    type: text-input
+    required: true
+    placeholder:
+      zh_Hans: 在此输入您的Model UID
+      en_US: Enter the model uid
+```
+
+Now, we have completed the basic definition of the vendor.
+
+### Writing the Model Code
+
+Next, let's take the `llm` type as an example and write `xinference.llm.llm.py`.
+
+In `llm.py`, create a Xinference LLM class, we name it `XinferenceAILargeLanguageModel` (this can be arbitrary), inheriting from the `__base.large_language_model.LargeLanguageModel` base class, and implement the following methods:
+
+- LLM Invocation
+
+Implement the core method for LLM invocation, supporting both stream and synchronous responses.
+
+```python
+def _invoke(self, model: str, credentials: dict,
+            prompt_messages: list[PromptMessage], model_parameters: dict,
+            tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+            stream: bool = True, user: Optional[str] = None) \
+        -> Union[LLMResult, Generator]:
+    """
+    Invoke large language model
+    
+    :param model: model name
+	:param credentials: model credentials
+	:param prompt_messages: prompt messages
+	:param model_parameters: model parameters
+	:param tools: tools for tool usage
+	:param stop: stop words
+	:param stream: is the response a stream
+	:param user: unique user id
+	:return: full response or stream response chunk generator result
+	"""
+```
+
+When implementing, ensure to use two functions to return data separately for synchronous and stream responses. This is important because Python treats functions containing the `yield` keyword as generator functions, mandating them to return `Generator` types. Here’s an example (note that the example uses simplified parameters; in real implementation, use the parameter list as defined above):
+
+```python
+def _invoke(self, stream: bool, **kwargs) \
+        -> Union[LLMResult, Generator]:
+    if stream:
+          return self._handle_stream_response(**kwargs)
+    return self._handle_sync_response(**kwargs)
+
+def _handle_stream_response(self, **kwargs) -> Generator:
+    for chunk in response:
+          yield chunk
+def _handle_sync_response(self, **kwargs) -> LLMResult:
+    return LLMResult(**response)
+```
+
+- Pre-compute Input Tokens
+
+If the model does not provide an interface for pre-computing tokens, you can return 0 directly.
+
+```python
+def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],tools: Optional[list[PromptMessageTool]] = None) -> int:
+  """
+  Get number of tokens for given prompt messages
+
+  :param model: model name
+  :param credentials: model credentials
+  :param prompt_messages: prompt messages
+  :param tools: tools for tool usage
+  :return: token count
+  """
+```
+
+
+Sometimes, you might not want to return 0 directly. In such cases, you can use `self._get_num_tokens_by_gpt2(text: str)` to get pre-computed tokens. This method is provided by the `AIModel` base class, and it uses GPT2's Tokenizer for calculation. However, it should be noted that this is only a substitute and may not be fully accurate.
+
+- Model Credentials Validation
+
+Similar to vendor credentials validation, this method validates individual model credentials.
+
+```python
+def validate_credentials(self, model: str, credentials: dict) -> None:
+    """
+    Validate model credentials
+    
+    :param model: model name
+	:param credentials: model credentials
+	:return: None
+	"""
+```
+
+- Model Parameter Schema
+
+Unlike custom types, since the YAML file does not define which parameters a model supports, we need to dynamically generate the model parameter schema.
+
+For instance, Xinference supports `max_tokens`, `temperature`, and `top_p` parameters.
+
+However, some vendors may support different parameters for different models. For example, the `OpenLLM` vendor supports `top_k`, but not all models provided by this vendor support `top_k`. Let's say model A supports `top_k` but model B does not. In such cases, we need to dynamically generate the model parameter schema, as illustrated below:
+
+```python
+    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
+        """
+            used to define customizable model schema
+        """
+        rules = [
+            ParameterRule(
+                name='temperature', type=ParameterType.FLOAT,
+                use_template='temperature',
+                label=I18nObject(
+                    zh_Hans='温度', en_US='Temperature'
+                )
+            ),
+            ParameterRule(
+                name='top_p', type=ParameterType.FLOAT,
+                use_template='top_p',
+                label=I18nObject(
+                    zh_Hans='Top P', en_US='Top P'
+                )
+            ),
+            ParameterRule(
+                name='max_tokens', type=ParameterType.INT,
+                use_template='max_tokens',
+                min=1,
+                default=512,
+                label=I18nObject(
+                    zh_Hans='最大生成长度', en_US='Max Tokens'
+                )
+            )
+        ]
+
+        # if model is A, add top_k to rules
+        if model == 'A':
+            rules.append(
+                ParameterRule(
+                    name='top_k', type=ParameterType.INT,
+                    use_template='top_k',
+                    min=1,
+                    default=50,
+                    label=I18nObject(
+                        zh_Hans='Top K', en_US='Top K'
+                    )
+                )
+            )
+
+        """
+            some NOT IMPORTANT code here
+        """
+
+        entity = AIModelEntity(
+            model=model,
+            label=I18nObject(
+                en_US=model
+            ),
+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            model_type=model_type,
+            model_properties={ 
+                ModelPropertyKey.MODE:  ModelType.LLM,
+            },
+            parameter_rules=rules
+        )
+
+        return entity
+```
+
+- Exception Error Mapping
+
+When a model invocation error occurs, it should be mapped to the runtime's specified `InvokeError` type, enabling Dify to handle different errors appropriately.
+
+Runtime Errors:
+
+- `InvokeConnectionError` Connection error during invocation
+- `InvokeServerUnavailableError` Service provider unavailable
+- `InvokeRateLimitError` Rate limit reached
+- `InvokeAuthorizationError` Authorization failure
+- `InvokeBadRequestError` Invalid request parameters
+
+```python
+  @property
+  def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+      """
+      Map model invoke error to unified error
+      The key is the error type thrown to the caller
+      The value is the error type thrown by the model,
+      which needs to be converted into a unified error type for the caller.
+  
+      :return: Invoke error mapping
+      """
+```
+
+For interface method details, see: [Interfaces](./interfaces.md). For specific implementations, refer to: [llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py).
--- a/api/core/model_runtime/docs/en_US/images/index/image-1.png
+++ b/api/core/model_runtime/docs/en_US/images/index/image-1.png
--- a/api/core/model_runtime/docs/en_US/images/index/image-2.png
+++ b/api/core/model_runtime/docs/en_US/images/index/image-2.png
--- a/api/core/model_runtime/docs/en_US/images/index/image-3.png
+++ b/api/core/model_runtime/docs/en_US/images/index/image-3.png
--- a/api/core/model_runtime/docs/en_US/images/index/image.png
+++ b/api/core/model_runtime/docs/en_US/images/index/image.png
--- a/api/core/model_runtime/docs/en_US/predefined_model_scale_out.md
+++ b/api/core/model_runtime/docs/en_US/predefined_model_scale_out.md
@ -0,0 +1,173 @@
+## Predefined Model Integration
+
+After completing the vendor integration, the next step is to integrate the models from the vendor.
+
+First, we need to determine the type of model to be integrated and create the corresponding model type `module` under the respective vendor's directory.
+
+Currently supported model types are:
+
+- `llm` Text Generation Model
+- `text_embedding` Text Embedding Model
+- `rerank` Rerank Model
+- `speech2text` Speech-to-Text
+- `tts` Text-to-Speech
+- `moderation` Moderation
+
+Continuing with `Anthropic` as an example, `Anthropic` only supports LLM, so create a `module` named `llm` under `model_providers.anthropic`.
+
+For predefined models, we first need to create a YAML file named after the model under the `llm` `module`, such as `claude-2.1.yaml`.
+
+### Prepare Model YAML
+
+```yaml
+model: claude-2.1  # Model identifier
+# Display name of the model, which can be set to en_US English or zh_Hans Chinese. If zh_Hans is not set, it will default to en_US.
+# This can also be omitted, in which case the model identifier will be used as the label
+label:
+  en_US: claude-2.1
+model_type: llm  # Model type, claude-2.1 is an LLM
+features:  # Supported features, agent-thought supports Agent reasoning, vision supports image understanding
+- agent-thought
+model_properties:  # Model properties
+  mode: chat  # LLM mode, complete for text completion models, chat for conversation models
+  context_size: 200000  # Maximum context size
+parameter_rules:  # Parameter rules for the model call; only LLM requires this
+- name: temperature  # Parameter variable name
+  # Five default configuration templates are provided: temperature/top_p/max_tokens/presence_penalty/frequency_penalty
+  # The template variable name can be set directly in use_template, which will use the default configuration in entities.defaults.PARAMETER_RULE_TEMPLATE
+  # Additional configuration parameters will override the default configuration if set
+  use_template: temperature
+- name: top_p
+  use_template: top_p
+- name: top_k
+  label:  # Display name of the parameter
+    zh_Hans: 取样数量
+    en_US: Top k
+  type: int  # Parameter type, supports float/int/string/boolean
+  help:  # Help information, describing the parameter's function
+    zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+    en_US: Only sample from the top K options for each subsequent token.
+  required: false  # Whether the parameter is mandatory; can be omitted
+- name: max_tokens_to_sample
+  use_template: max_tokens
+  default: 4096  # Default value of the parameter
+  min: 1  # Minimum value of the parameter, applicable to float/int only
+  max: 4096  # Maximum value of the parameter, applicable to float/int only
+pricing:  # Pricing information
+  input: '8.00'  # Input unit price, i.e., prompt price
+  output: '24.00'  # Output unit price, i.e., response content price
+  unit: '0.000001'  # Price unit, meaning the above prices are per 100K
+  currency: USD  # Price currency
+```
+
+It is recommended to prepare all model configurations before starting the implementation of the model code.
+
+You can also refer to the YAML configuration information under the corresponding model type directories of other vendors in the `model_providers` directory. For the complete YAML rules, refer to: [Schema](schema.md#aimodelentity).
+
+### Implement the Model Call Code
+
+Next, create a Python file named `llm.py` under the `llm` `module` to write the implementation code.
+
+Create an Anthropic LLM class named `AnthropicLargeLanguageModel` (or any other name), inheriting from the `__base.large_language_model.LargeLanguageModel` base class, and implement the following methods:
+
+- LLM Call
+
+Implement the core method for calling the LLM, supporting both streaming and synchronous responses.
+
+```python
+  def _invoke(self, model: str, credentials: dict,
+              prompt_messages: list[PromptMessage], model_parameters: dict,
+              tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+              stream: bool = True, user: Optional[str] = None) \
+          -> Union[LLMResult, Generator]:
+      """
+      Invoke large language model
+  
+      :param model: model name
+      :param credentials: model credentials
+      :param prompt_messages: prompt messages
+      :param model_parameters: model parameters
+      :param tools: tools for tool calling
+      :param stop: stop words
+      :param stream: is stream response
+      :param user: unique user id
+      :return: full response or stream response chunk generator result
+      """
+```
+
+Ensure to use two functions for returning data, one for synchronous returns and the other for streaming returns, because Python identifies functions containing the `yield` keyword as generator functions, fixing the return type to `Generator`. Thus, synchronous and streaming returns need to be implemented separately, as shown below (note that the example uses simplified parameters, for actual implementation follow the above parameter list):
+
+```python
+  def _invoke(self, stream: bool, **kwargs) \
+          -> Union[LLMResult, Generator]:
+      if stream:
+            return self._handle_stream_response(**kwargs)
+      return self._handle_sync_response(**kwargs)
+
+  def _handle_stream_response(self, **kwargs) -> Generator:
+      for chunk in response:
+            yield chunk
+  def _handle_sync_response(self, **kwargs) -> LLMResult:
+      return LLMResult(**response)
+```
+
+- Pre-compute Input Tokens
+
+If the model does not provide an interface to precompute tokens, return 0 directly.
+
+```python
+  def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
+                     tools: Optional[list[PromptMessageTool]] = None) -> int:
+      """
+      Get number of tokens for given prompt messages
+
+      :param model: model name
+      :param credentials: model credentials
+      :param prompt_messages: prompt messages
+      :param tools: tools for tool calling
+      :return:
+      """
+```
+
+- Validate Model Credentials
+
+Similar to vendor credential validation, but specific to a single model.
+
+```python
+  def validate_credentials(self, model: str, credentials: dict) -> None:
+      """
+      Validate model credentials
+  
+      :param model: model name
+      :param credentials: model credentials
+      :return:
+      """
+```
+
+- Map Invoke Errors
+
+When a model call fails, map it to a specific `InvokeError` type as required by Runtime, allowing Dify to handle different errors accordingly.
+
+Runtime Errors:
+
+- `InvokeConnectionError` Connection error
+
+- `InvokeServerUnavailableError` Service provider unavailable
+- `InvokeRateLimitError` Rate limit reached
+- `InvokeAuthorizationError` Authorization failed
+- `InvokeBadRequestError` Parameter error
+
+```python
+  @property
+  def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+      """
+      Map model invoke error to unified error
+      The key is the error type thrown to the caller
+      The value is the error type thrown by the model,
+      which needs to be converted into a unified error type for the caller.
+  
+      :return: Invoke error mapping
+      """
+```
+
+For interface method explanations, see: [Interfaces](./interfaces.md). For detailed implementation, refer to: [llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py).
--- a/api/core/model_runtime/docs/en_US/provider_scale_out.md
+++ b/api/core/model_runtime/docs/en_US/provider_scale_out.md
@ -58,7 +58,7 @@ provider_credential_schema:  # Provider credential rules, as Anthropic only supp
      en_US: Enter your API URL
 ```

-You can also refer to the YAML configuration information under other provider directories in `model_providers`. The complete YAML rules are available at: [Schema](schema.md#Provider).
+You can also refer to the YAML configuration information under other provider directories in `model_providers`. The complete YAML rules are available at: [Schema](schema.md#provider).

 ### Implementing Provider Code

--- a/api/core/model_runtime/docs/zh_Hans/provider_scale_out.md
+++ b/api/core/model_runtime/docs/zh_Hans/provider_scale_out.md
@ -117,7 +117,7 @@ model_credential_schema:
      en_US: Enter your API Base
 ```

-也可以参考  `model_providers` 目录下其他供应商目录下的 YAML 配置信息，完整的 YAML 规则见：[Schema](schema.md#Provider)。
+也可以参考  `model_providers` 目录下其他供应商目录下的 YAML 配置信息，完整的 YAML 规则见：[Schema](schema.md#provider)。

 #### 实现供应商代码

--- a/api/core/model_runtime/model_providers/__base/text_embedding_model.py
+++ b/api/core/model_runtime/model_providers/__base/text_embedding_model.py
@ -4,6 +4,7 @@ from typing import Optional

 from pydantic import ConfigDict

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
 from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
 from core.model_runtime.model_providers.__base.ai_model import AIModel
@ -20,35 +21,47 @@ class TextEmbeddingModel(AIModel):
    model_config = ConfigDict(protected_namespaces=())

    def invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
-        Invoke large language model
+        Invoke text embedding model

        :param model: model name
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        self.started_at = time.perf_counter()

        try:
-            return self._invoke(model, credentials, texts, user)
+            return self._invoke(model, credentials, texts, user, input_type)
        except Exception as e:
            raise self._transform_invoke_error(e)

    @abstractmethod
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
-        Invoke large language model
+        Invoke text embedding model

        :param model: model name
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        raise NotImplementedError
--- a/api/core/model_runtime/model_providers/_position.yaml
+++ b/api/core/model_runtime/model_providers/_position.yaml
@ -38,3 +38,6 @@
 - perfxcloud
 - zhinao
 - fireworks
+- mixedbread
+- nomic
+- voyage
--- a/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
@ -7,6 +7,7 @@ import numpy as np
 import tiktoken
 from openai import AzureOpenAI

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
@ -17,8 +18,23 @@ from core.model_runtime.model_providers.azure_openai._constant import EMBEDDING_

 class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
        base_model_name = credentials["base_model_name"]
        credentials_kwargs = self._to_credential_kwargs(credentials)
        client = AzureOpenAI(**credentials_kwargs)
--- a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional

 from requests import post

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -35,7 +36,12 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
    api_base: str = "http://api.baichuan-ai.com/v1/embeddings"

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -44,6 +50,7 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        api_key = credentials["api_key"]
--- a/api/core/model_runtime/model_providers/bedrock/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/_position.yaml
@ -6,6 +6,8 @@
 - anthropic.claude-v2:1
 - anthropic.claude-3-sonnet-v1:0
 - anthropic.claude-3-haiku-v1:0
+- ai21.jamba-1-5-large-v1:0
+- ai21.jamba-1-5-mini-v1:0
 - cohere.command-light-text-v14
 - cohere.command-text-v14
 - cohere.command-r-plus-v1.0
@ -15,6 +17,10 @@
 - meta.llama3-1-405b-instruct-v1:0
 - meta.llama3-8b-instruct-v1:0
 - meta.llama3-70b-instruct-v1:0
+- us.meta.llama3-2-1b-instruct-v1:0
+- us.meta.llama3-2-3b-instruct-v1:0
+- us.meta.llama3-2-11b-instruct-v1:0
+- us.meta.llama3-2-90b-instruct-v1:0
 - meta.llama2-13b-chat-v1
 - meta.llama2-70b-chat-v1
 - mistral.mistral-large-2407-v1:0
--- a/api/core/model_runtime/model_providers/bedrock/llm/ai21.jamba-1-5-large-v1.0.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/ai21.jamba-1-5-large-v1.0.yaml
@ -0,0 +1,26 @@
+model: ai21.jamba-1-5-large-v1:0
+label:
+  en_US: Jamba 1.5 Large
+model_type: llm
+model_properties:
+  mode: completion
+  context_size: 256000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 1
+    min: 0.0
+    max: 2.0
+  - name: top_p
+    use_template: top_p
+  - name: max_gen_len
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+pricing:
+  input: '0.002'
+  output: '0.008'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/ai21.jamba-1-5-mini-v1.0.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/ai21.jamba-1-5-mini-v1.0.yaml
@ -0,0 +1,26 @@
+model: ai21.jamba-1-5-mini-v1:0
+label:
+  en_US: Jamba 1.5 Mini
+model_type: llm
+model_properties:
+  mode: completion
+  context_size: 256000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 1
+    min: 0.0
+    max: 2.0
+  - name: top_p
+    use_template: top_p
+  - name: max_gen_len
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+pricing:
+  input: '0.0002'
+  output: '0.0004'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/llm.py
+++ b/api/core/model_runtime/model_providers/bedrock/llm/llm.py
@ -63,6 +63,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
        {"prefix": "us.anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True},
        {"prefix": "eu.anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True},
        {"prefix": "anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True},
+        {"prefix": "us.meta.llama3-2", "support_system_prompts": True, "support_tool_use": True},
        {"prefix": "meta.llama", "support_system_prompts": True, "support_tool_use": False},
        {"prefix": "mistral.mistral-7b-instruct", "support_system_prompts": False, "support_tool_use": False},
        {"prefix": "mistral.mixtral-8x7b-instruct", "support_system_prompts": False, "support_tool_use": False},
@ -70,6 +71,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
        {"prefix": "mistral.mistral-small", "support_system_prompts": True, "support_tool_use": True},
        {"prefix": "cohere.command-r", "support_system_prompts": True, "support_tool_use": True},
        {"prefix": "amazon.titan", "support_system_prompts": False, "support_tool_use": False},
+        {"prefix": "ai21.jamba-1-5", "support_system_prompts": True, "support_tool_use": False},
    ]

    @staticmethod
--- a/api/core/model_runtime/model_providers/bedrock/llm/us.meta.llama3-2-11b-instruct-v1.0.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/us.meta.llama3-2-11b-instruct-v1.0.yaml
@ -0,0 +1,29 @@
+model: us.meta.llama3-2-11b-instruct-v1:0
+label:
+  en_US: US Meta Llama 3.2 11B Instruct
+model_type: llm
+features:
+  - vision
+  - tool-call
+model_properties:
+  mode: completion
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.5
+    min: 0.0
+    max: 1
+  - name: top_p
+    use_template: top_p
+  - name: max_gen_len
+    use_template: max_tokens
+    required: true
+    default: 512
+    min: 1
+    max: 2048
+pricing:
+  input: '0.00035'
+  output: '0.00035'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/us.meta.llama3-2-1b-instruct-v1.0.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/us.meta.llama3-2-1b-instruct-v1.0.yaml
@ -0,0 +1,26 @@
+model: us.meta.llama3-2-1b-instruct-v1:0
+label:
+  en_US: US Meta Llama 3.2 1B Instruct
+model_type: llm
+model_properties:
+  mode: completion
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.5
+    min: 0.0
+    max: 1
+  - name: top_p
+    use_template: top_p
+  - name: max_gen_len
+    use_template: max_tokens
+    required: true
+    default: 512
+    min: 1
+    max: 2048
+pricing:
+  input: '0.0001'
+  output: '0.0001'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/us.meta.llama3-2-3b-instruct-v1.0.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/us.meta.llama3-2-3b-instruct-v1.0.yaml
@ -0,0 +1,26 @@
+model: us.meta.llama3-2-3b-instruct-v1:0
+label:
+  en_US: US Meta Llama 3.2 3B Instruct
+model_type: llm
+model_properties:
+  mode: completion
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.5
+    min: 0.0
+    max: 1
+  - name: top_p
+    use_template: top_p
+  - name: max_gen_len
+    use_template: max_tokens
+    required: true
+    default: 512
+    min: 1
+    max: 2048
+pricing:
+  input: '0.00015'
+  output: '0.00015'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/us.meta.llama3-2-90b-instruct-v1.0.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/us.meta.llama3-2-90b-instruct-v1.0.yaml
@ -0,0 +1,31 @@
+model: us.meta.llama3-2-90b-instruct-v1:0
+label:
+  en_US: US Meta Llama 3.2 90B Instruct
+model_type: llm
+features:
+  - tool-call
+model_properties:
+  mode: completion
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.5
+    min: 0.0
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 0.9
+    min: 0
+    max: 1
+  - name: max_gen_len
+    use_template: max_tokens
+    required: true
+    default: 512
+    min: 1
+    max: 2048
+pricing:
+  input: '0.002'
+  output: '0.002'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
@ -13,6 +13,7 @@ from botocore.exceptions import (
    UnknownServiceError,
 )

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -30,7 +31,12 @@ logger = logging.getLogger(__name__)

 class BedrockTextEmbeddingModel(TextEmbeddingModel):
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -39,6 +45,7 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        client_config = Config(region_name=credentials["aws_region"])
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
@ -5,6 +5,7 @@ import cohere
 import numpy as np
 from cohere.core import RequestOptions

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -25,7 +26,12 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
    """

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -34,6 +40,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        # get model properties
--- a/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
@ -15,6 +15,7 @@ help:
    en_US: https://fireworks.ai/account/api-keys
 supported_model_types:
  - llm
+  - text-embedding
 configurate_methods:
  - predefined-model
 provider_credential_schema:
--- a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+label:
+  zh_Hans: Llama 3.2 11B Vision Instruct
+  en_US: Llama 3.2 11B Vision Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.2'
+  output: '0.2'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml
@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-1b-instruct
+label:
+  zh_Hans: Llama 3.2 1B Instruct
+  en_US: Llama 3.2 1B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.1'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml
@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-3b-instruct
+label:
+  zh_Hans: Llama 3.2 3B Instruct
+  en_US: Llama 3.2 3B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.1'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+label:
+  zh_Hans: Llama 3.2 90B Vision Instruct
+  en_US: Llama 3.2 90B Vision Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.9'
+  output: '0.9'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml
@ -0,0 +1,12 @@
+model: WhereIsAI/UAE-Large-V1
+label:
+  zh_Hans: UAE-Large-V1
+  en_US: UAE-Large-V1
+model_type: text-embedding
+model_properties:
+  context_size: 512
+  max_chunks: 1
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/init.py
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/init.py
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml
@ -0,0 +1,12 @@
+model: thenlper/gte-base
+label:
+  zh_Hans: GTE-base
+  en_US: GTE-base
+model_type: text-embedding
+model_properties:
+  context_size: 512
+  max_chunks: 1
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml
@ -0,0 +1,12 @@
+model: thenlper/gte-large
+label:
+  zh_Hans: GTE-large
+  en_US: GTE-large
+model_type: text-embedding
+model_properties:
+  context_size: 512
+  max_chunks: 1
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml
@ -0,0 +1,12 @@
+model: nomic-ai/nomic-embed-text-v1.5
+label:
+  zh_Hans: nomic-embed-text-v1.5
+  en_US: nomic-embed-text-v1.5
+model_type: text-embedding
+model_properties:
+  context_size: 8192
+  max_chunks: 16
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml
@ -0,0 +1,12 @@
+model: nomic-ai/nomic-embed-text-v1
+label:
+  zh_Hans: nomic-embed-text-v1
+  en_US: nomic-embed-text-v1
+model_type: text-embedding
+model_properties:
+  context_size: 8192
+  max_chunks: 16
+pricing:
+  input: '0.008'
+  unit: '0.000001'
+  currency: 'USD'
--- a/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py
@ -0,0 +1,151 @@
+import time
+from collections.abc import Mapping
+from typing import Optional, Union
+
+import numpy as np
+from openai import OpenAI
+
+from core.embedding.embedding_constant import EmbeddingInputType
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+from core.model_runtime.model_providers.fireworks._common import _CommonFireworks
+
+
+class FireworksTextEmbeddingModel(_CommonFireworks, TextEmbeddingModel):
+    """
+    Model class for Fireworks text embedding model.
+    """
+
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
+    ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
+
+        credentials_kwargs = self._to_credential_kwargs(credentials)
+        client = OpenAI(**credentials_kwargs)
+
+        extra_model_kwargs = {}
+        if user:
+            extra_model_kwargs["user"] = user
+
+        extra_model_kwargs["encoding_format"] = "float"
+
+        context_size = self._get_context_size(model, credentials)
+        max_chunks = self._get_max_chunks(model, credentials)
+
+        inputs = []
+        indices = []
+        used_tokens = 0
+
+        for i, text in enumerate(texts):
+            # Here token count is only an approximation based on the GPT2 tokenizer
+            # TODO: Optimize for better token estimation and chunking
+            num_tokens = self._get_num_tokens_by_gpt2(text)
+
+            if num_tokens >= context_size:
+                cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
+                # if num tokens is larger than context length, only use the start
+                inputs.append(text[0:cutoff])
+            else:
+                inputs.append(text)
+            indices += [i]
+
+        batched_embeddings = []
+        _iter = range(0, len(inputs), max_chunks)
+
+        for i in _iter:
+            embeddings_batch, embedding_used_tokens = self._embedding_invoke(
+                model=model,
+                client=client,
+                texts=inputs[i : i + max_chunks],
+                extra_model_kwargs=extra_model_kwargs,
+            )
+            used_tokens += embedding_used_tokens
+            batched_embeddings += embeddings_batch
+
+        usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
+        return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
+
+    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+        """
+        Get number of tokens for given prompt messages
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :return:
+        """
+        return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
+
+    def validate_credentials(self, model: str, credentials: Mapping) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            # transform credentials to kwargs for model instance
+            credentials_kwargs = self._to_credential_kwargs(credentials)
+            client = OpenAI(**credentials_kwargs)
+
+            # call embedding model
+            self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={})
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+
+    def _embedding_invoke(
+        self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict
+    ) -> tuple[list[list[float]], int]:
+        """
+        Invoke embedding model
+        :param model: model name
+        :param client: model client
+        :param texts: texts to embed
+        :param extra_model_kwargs: extra model kwargs
+        :return: embeddings and used tokens
+        """
+        response = client.embeddings.create(model=model, input=texts, **extra_model_kwargs)
+        return [data.embedding for data in response.data], response.usage.total_tokens
+
+    def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
+        """
+        Calculate response usage
+
+        :param model: model name
+        :param credentials: model credentials
+        :param tokens: input tokens
+        :return: usage
+        """
+        input_price_info = self.get_price(
+            model=model, credentials=credentials, tokens=tokens, price_type=PriceType.INPUT
+        )
+
+        usage = EmbeddingUsage(
+            tokens=tokens,
+            total_tokens=tokens,
+            unit_price=input_price_info.unit_price,
+            price_unit=input_price_info.unit,
+            total_price=input_price_info.total_amount,
+            currency=input_price_info.currency,
+            latency=time.perf_counter() - self.started_at,
+        )
+
+        return usage
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
@ -0,0 +1,48 @@
+model: gemini-1.5-flash-001
+label:
+  en_US: Gemini 1.5 Flash 001
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
@ -0,0 +1,48 @@
+model: gemini-1.5-flash-002
+label:
+  en_US: Gemini 1.5 Flash 002
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
@ -0,0 +1,48 @@
+model: gemini-1.5-flash-8b-exp-0924
+label:
+  en_US: Gemini 1.5 Flash 8B 0924
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
@ -1,6 +1,6 @@
 model: gemini-1.5-flash-latest
 label:
-  en_US: Gemini 1.5 Flash
+  en_US: Gemini 1.5 Flash Latest
 model_type: llm
 features:
  - agent-thought
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
@ -0,0 +1,48 @@
+model: gemini-1.5-flash
+label:
+  en_US: Gemini 1.5 Flash
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
@ -0,0 +1,48 @@
+model: gemini-1.5-pro-001
+label:
+  en_US: Gemini 1.5 Pro 001
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
@ -0,0 +1,48 @@
+model: gemini-1.5-pro-002
+label:
+  en_US: Gemini 1.5 Pro 002
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
@ -1,6 +1,6 @@
 model: gemini-1.5-pro-latest
 label:
-  en_US: Gemini 1.5 Pro
+  en_US: Gemini 1.5 Pro Latest
 model_type: llm
 features:
  - agent-thought
@ -32,6 +32,15 @@ parameter_rules:
    max: 8192
  - name: response_format
    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
@ -0,0 +1,48 @@
+model: gemini-1.5-pro
+label:
+  en_US: Gemini 1.5 Pro
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
@ -27,6 +27,15 @@ parameter_rules:
    default: 4096
    min: 1
    max: 4096
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
@ -31,6 +31,15 @@ parameter_rules:
    max: 2048
  - name: response_format
    use_template: response_format
+  - name: stream
+    label:
+      zh_Hans: 流式输出
+      en_US: Stream
+    type: boolean
+    help:
+      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
+      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+    default: false
 pricing:
  input: '0.00'
  output: '0.00'
--- a/api/core/model_runtime/model_providers/google/llm/llm.py
+++ b/api/core/model_runtime/model_providers/google/llm/llm.py
@ -9,8 +9,8 @@ import google.ai.generativelanguage as glm
 import google.generativeai as genai
 import requests
 from google.api_core import exceptions
-from google.generativeai import client
-from google.generativeai.types import ContentType, GenerateContentResponse, HarmBlockThreshold, HarmCategory
+from google.generativeai.client import _ClientManager
+from google.generativeai.types import ContentType, GenerateContentResponse
 from google.generativeai.types.content_types import to_part
 from PIL import Image

@ -200,24 +200,16 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
                    history.append(content)

        # Create a new ClientManager with tenant's API key
-        new_client_manager = client._ClientManager()
+        new_client_manager = _ClientManager()
        new_client_manager.configure(api_key=credentials["google_api_key"])
        new_custom_client = new_client_manager.make_client("generative")

        google_model._client = new_custom_client

-        safety_settings = {
-            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
-        }
-
        response = google_model.generate_content(
            contents=history,
            generation_config=genai.types.GenerationConfig(**config_kwargs),
            stream=stream,
-            safety_settings=safety_settings,
            tools=self._convert_tools_to_glm_tool(tools) if tools else None,
            request_options={"timeout": 600},
        )
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
@ -0,0 +1,25 @@
+model: llama-3.2-11b-text-preview
+label:
+  zh_Hans: Llama 3.2 11B Text (Preview)
+  en_US: Llama 3.2 11B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
@ -0,0 +1,25 @@
+model: llama-3.2-1b-preview
+label:
+  zh_Hans: Llama 3.2 1B Text (Preview)
+  en_US: Llama 3.2 1B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
@ -0,0 +1,25 @@
+model: llama-3.2-3b-preview
+label:
+  zh_Hans: Llama 3.2 3B Text (Preview)
+  en_US: Llama 3.2 3B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
@ -0,0 +1,25 @@
+model: llama-3.2-90b-text-preview
+label:
+  zh_Hans: Llama 3.2 90B Text (Preview)
+  en_US: Llama 3.2 90B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
@ -6,6 +6,7 @@ import numpy as np
 import requests
 from huggingface_hub import HfApi, InferenceClient

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -18,8 +19,23 @@ HUGGINGFACE_ENDPOINT_API = "https://api.endpoints.huggingface.cloud/v2/endpoint/

 class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel):
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
        client = InferenceClient(token=credentials["huggingfacehub_api_token"])

        execute_model = model
--- a/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
@ -1,6 +1,7 @@
 import time
 from typing import Optional

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -23,7 +24,12 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
    """

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -38,6 +44,7 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        server_url = credentials["server_url"]
--- a/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
@ -9,6 +9,7 @@ from tencentcloud.common.profile.client_profile import ClientProfile
 from tencentcloud.common.profile.http_profile import HttpProfile
 from tencentcloud.hunyuan.v20230901 import hunyuan_client, models

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -26,7 +27,12 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
    """

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -35,6 +41,7 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """

--- a/api/core/model_runtime/model_providers/jina/jina.yaml
+++ b/api/core/model_runtime/model_providers/jina/jina.yaml
@ -1,6 +1,6 @@
 provider: jina
 label:
-  en_US: Jina
+  en_US: Jina AI
 description:
  en_US: Embedding and Rerank Model Supported
 icon_small:
@ -11,7 +11,7 @@ background: "#EFFDFD"
 help:
  title:
    en_US: Get your API key from Jina AI
-    zh_Hans: 从 Jina 获取 API Key
+    zh_Hans: 从 Jina AI 获取 API Key
  url:
    en_US: https://jina.ai/
 supported_model_types:
--- a/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional

 from requests import post

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -27,8 +28,37 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):

    api_base: str = "https://api.jina.ai/v1"

+    def _to_payload(self, model: str, texts: list[str], credentials: dict, input_type: EmbeddingInputType) -> dict:
+        """
+        Parse model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :return: parsed credentials
+        """
+
+        def transform_jina_input_text(model, text):
+            if model == "jina-clip-v1":
+                return {"text": text}
+            return text
+
+        data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
+
+        # model specific parameters
+        if model == "jina-embeddings-v3":
+            # set `task` type according to input type for the best performance
+            data["task"] = "retrieval.query" if input_type == EmbeddingInputType.QUERY else "retrieval.passage"
+
+        return data
+
    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -37,6 +67,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        api_key = credentials["api_key"]
@ -49,15 +80,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
        url = base_url + "/embeddings"
        headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}

-        def transform_jina_input_text(model, text):
-            if model == "jina-clip-v1":
-                return {"text": text}
-            return text
-
-        data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
-
-        if model == "jina-embeddings-v3":
-            data["task"] = "text-matching"
+        data = self._to_payload(model=model, texts=texts, credentials=credentials, input_type=input_type)

        try:
            response = post(url, headers=headers, data=dumps(data))
--- a/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
@ -5,6 +5,7 @@ from typing import Optional
 from requests import post
 from yarl import URL

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -22,11 +23,16 @@ from core.model_runtime.model_providers.__base.text_embedding_model import TextE

 class LocalAITextEmbeddingModel(TextEmbeddingModel):
    """
-    Model class for Jina text embedding model.
+    Model class for LocalAI text embedding model.
    """

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -35,6 +41,7 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        if len(texts) != 1:
--- a/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
@ -4,6 +4,7 @@ from typing import Optional

 from requests import post

+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
@ -34,7 +35,12 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
    api_base: str = "https://api.minimax.chat/v1/embeddings"

    def _invoke(
-        self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        """
        Invoke text embedding model
@ -43,6 +49,7 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
+        :param input_type: input type
        :return: embeddings result
        """
        api_key = credentials["minimax_api_key"]
--- a/api/core/model_runtime/model_providers/mixedbread/init.py
+++ b/api/core/model_runtime/model_providers/mixedbread/init.py
--- a/api/core/model_runtime/model_providers/mixedbread/_assets/icon_l_en.png
+++ b/api/core/model_runtime/model_providers/mixedbread/_assets/icon_l_en.png
--- a/api/core/model_runtime/model_providers/mixedbread/_assets/icon_s_en.png
+++ b/api/core/model_runtime/model_providers/mixedbread/_assets/icon_s_en.png
--- a/api/core/model_runtime/model_providers/mixedbread/mixedbread.py
+++ b/api/core/model_runtime/model_providers/mixedbread/mixedbread.py
@ -0,0 +1,27 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class MixedBreadProvider(ModelProvider):
+    def validate_provider_credentials(self, credentials: dict) -> None:
+        """
+        Validate provider credentials
+        if validate failed, raise exception
+
+        :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+        """
+        try:
+            model_instance = self.get_model_instance(ModelType.TEXT_EMBEDDING)
+
+            # Use `mxbai-embed-large-v1` model for validate,
+            model_instance.validate_credentials(model="mxbai-embed-large-v1", credentials=credentials)
+        except CredentialsValidateFailedError as ex:
+            raise ex
+        except Exception as ex:
+            logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
+            raise ex
--- a/api/core/model_runtime/model_providers/mixedbread/mixedbread.yaml
+++ b/api/core/model_runtime/model_providers/mixedbread/mixedbread.yaml
@ -0,0 +1,31 @@
+provider: mixedbread
+label:
+  en_US: MixedBread
+description:
+  en_US: Embedding and Rerank Model Supported
+icon_small:
+  en_US: icon_s_en.png
+icon_large:
+  en_US: icon_l_en.png
+background: "#EFFDFD"
+help:
+  title:
+    en_US: Get your API key from MixedBread AI
+    zh_Hans: 从 MixedBread 获取 API Key
+  url:
+    en_US: https://www.mixedbread.ai/
+supported_model_types:
+  - text-embedding
+  - rerank
+configurate_methods:
+  - predefined-model
+provider_credential_schema:
+  credential_form_schemas:
+    - variable: api_key
+      label:
+        en_US: API Key
+      type: secret-input
+      required: true
+      placeholder:
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key
--- a/api/core/model_runtime/model_providers/mixedbread/rerank/init.py
+++ b/api/core/model_runtime/model_providers/mixedbread/rerank/init.py
--- a/api/core/model_runtime/model_providers/mixedbread/rerank/mxbai-rerank-large-v1-en.yaml
+++ b/api/core/model_runtime/model_providers/mixedbread/rerank/mxbai-rerank-large-v1-en.yaml
@ -0,0 +1,4 @@
+model: mxbai-rerank-large-v1
+model_type: rerank
+model_properties:
+  context_size: 512
--- a/api/core/model_runtime/model_providers/mixedbread/rerank/rerank.py
+++ b/api/core/model_runtime/model_providers/mixedbread/rerank/rerank.py
@ -0,0 +1,125 @@
+from typing import Optional
+
+import httpx
+
+from core.model_runtime.entities.common_entities import I18nObject
+from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType
+from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.rerank_model import RerankModel
+
+
+class MixedBreadRerankModel(RerankModel):
+    """
+    Model class for MixedBread rerank model.
+    """
+
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        query: str,
+        docs: list[str],
+        score_threshold: Optional[float] = None,
+        top_n: Optional[int] = None,
+        user: Optional[str] = None,
+    ) -> RerankResult:
+        """
+        Invoke rerank model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param query: search query
+        :param docs: docs for reranking
+        :param score_threshold: score threshold
+        :param top_n: top n documents to return
+        :param user: unique user id
+        :return: rerank result
+        """
+        if len(docs) == 0:
+            return RerankResult(model=model, docs=[])
+
+        base_url = credentials.get("base_url", "https://api.mixedbread.ai/v1")
+        base_url = base_url.removesuffix("/")
+
+        try:
+            response = httpx.post(
+                base_url + "/reranking",
+                json={"model": model, "query": query, "input": docs, "top_k": top_n, "return_input": True},
+                headers={"Authorization": f"Bearer {credentials.get('api_key')}", "Content-Type": "application/json"},
+            )
+            response.raise_for_status()
+            results = response.json()
+
+            rerank_documents = []
+            for result in results["data"]:
+                rerank_document = RerankDocument(
+                    index=result["index"],
+                    text=result["input"],
+                    score=result["score"],
+                )
+                if score_threshold is None or result["score"] >= score_threshold:
+                    rerank_documents.append(rerank_document)
+
+            return RerankResult(model=model, docs=rerank_documents)
+        except httpx.HTTPStatusError as e:
+            raise InvokeServerUnavailableError(str(e))
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            self._invoke(
+                model=model,
+                credentials=credentials,
+                query="What is the capital of the United States?",
+                docs=[
+                    "Carson City is the capital city of the American state of Nevada. At the 2010 United States "
+                    "Census, Carson City had a population of 55,274.",
+                    "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that "
+                    "are a political division controlled by the United States. Its capital is Saipan.",
+                ],
+                score_threshold=0.8,
+            )
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        """
+        return {
+            InvokeConnectionError: [httpx.ConnectError],
+            InvokeServerUnavailableError: [httpx.RemoteProtocolError],
+            InvokeRateLimitError: [],
+            InvokeAuthorizationError: [httpx.HTTPStatusError],
+            InvokeBadRequestError: [httpx.RequestError],
+        }
+
+    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
+        """
+        generate custom model entities from credentials
+        """
+        entity = AIModelEntity(
+            model=model,
+            label=I18nObject(en_US=model),
+            model_type=ModelType.RERANK,
+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "512"))},
+        )
+
+        return entity
--- a/api/core/model_runtime/model_providers/mixedbread/text_embedding/init.py
+++ b/api/core/model_runtime/model_providers/mixedbread/text_embedding/init.py
--- a/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-2d-large-v1-en.yaml
+++ b/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-2d-large-v1-en.yaml
@ -0,0 +1,8 @@
+model: mxbai-embed-2d-large-v1
+model_type: text-embedding
+model_properties:
+  context_size: 512
+pricing:
+  input: '0.0001'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-large-v1-en.yaml
+++ b/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-large-v1-en.yaml
@ -0,0 +1,8 @@
+model: mxbai-embed-large-v1
+model_type: text-embedding
+model_properties:
+  context_size: 512
+pricing:
+  input: '0.0001'
+  unit: '0.001'
+  currency: USD
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
-LAN-	d2ce4960f1	chore(versioning): bump version to 0.9.0 (#8911 )	2024-09-30 18:33:20 +08:00
KVOJJJin	1af4ca344e	Feat: add debounce for search in logs (#8924 )	2024-09-30 17:18:47 +08:00
zhuhao	fa837b2dfd	fix: fix the issue with the system model configuration update (#8923 )	2024-09-30 17:14:13 +08:00
github-actions[bot]	824a71388a	chore: translate i18n files (#8917 ) Co-authored-by: JohnJyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>	2024-09-30 16:35:00 +08:00
Aurelius Huang	4585cffce1	fix: Compatible with special characters in pg full-text search. (#8921 ) Co-authored-by: Aurelius Huang <cm.huang@aftership.com>	2024-09-30 16:32:23 +08:00
Yi Xiao	13046709a9	fix: line in iteration node is not straight (#8918 )	2024-09-30 16:04:51 +08:00
Jyong	9d221a5e19	external knowledge api (#8913 ) Co-authored-by: Yi <yxiaoisme@gmail.com>	2024-09-30 15:38:43 +08:00
zhuhao	77aef9ff1d	refactor: optimize the calculation of rerank threshold and the logic for forbidden characters in model_uid (#8879 )	2024-09-30 12:55:01 +08:00
zhuhao	503561f464	fix: fix the data validation consistency issue in keyword content review (#8908 )	2024-09-30 12:52:18 +08:00
-LAN-	ada9d408ac	refactor(api/variables): VariableError as a ValueError. (#8554 )	2024-09-30 12:48:58 +08:00
-LAN-	3af65b2f45	feat(api): add version comparison logic (#8902 )	2024-09-30 11:12:26 +08:00
Zhaofeng Miao	369e1e6f58	feat(website-crawl): add jina reader as additional alternative for website crawling (#8761 )	2024-09-30 09:57:19 +08:00
zhuhao	fb49413a41	feat: add voyage ai as a new model provider (#8747 )	2024-09-29 16:55:59 +08:00
zhuhao	42dfde6546	docs: add english versions for the files customizable_model_scale_out and predefined_model_scale_out (#8871 )	2024-09-29 16:16:56 +08:00
chenxu9741	c531b4a911	fix: #8843 event: tts_message_end always return in api streaming resp… (#8846 )	2024-09-29 16:13:20 +08:00
longzhihun	e4ed916baa	Add Jamba and Llama3.2 model support (#8878 )	2024-09-29 16:12:56 +08:00
-LAN-	4ec977eaba	fix(workflow): update tagging logic in GitHub Actions (#8882 )	2024-09-29 16:12:42 +08:00
Bowen Liang	74f58f29f9	chore: bump ruff to 0.6.8 for fixing violation in SIM910 (#8869 )	2024-09-29 00:29:59 +08:00
zhuhao	f97607370a	refactor: update Callback to an abstract class (#8868 )	2024-09-28 21:41:02 +08:00
zhuhao	850492dafa	feat: deprecate gte-Qwen2-7B-instruct embedding model (#8866 )	2024-09-28 21:40:27 +08:00
zhuhao	61c89a9168	feat: add internlm2.5-20b and qwen2.5-coder-7b model (#8862 )	2024-09-28 16:31:02 +08:00
takatost	49af18fbd6	fix: customize model credentials were invalid despite the provider credentials being active (#8864 )	2024-09-28 15:54:26 +08:00
zhuhao	6cd22f3bca	fix: update qwen2.5-coder-7b model name (#8861 )	2024-09-28 15:01:27 +08:00
Kevin9703	a2e2f8a8c9	fix(workflow/nodes/knowledge-retrieval/use-config): Preserve rerankin… (#8842 )	2024-09-28 10:54:50 +08:00
ice yao	27e33fb15c	chore: fix wrong VectorType match case (#8857 )	2024-09-28 10:54:04 +08:00
zhuhao	55e6123db9	feat: add min-connection and max-connection for pgvector (#8841 )	2024-09-27 18:16:20 +08:00
走在修行的大街上	c828a5dfdf	feat(Tools): add feishu tools (#8800 ) Co-authored-by: 黎斌 <libin.23@bytedance.com>	2024-09-27 17:31:45 +08:00
CXwudi	0603359e2d	fix: delete harm catalog settings for gemini (#8829 )	2024-09-27 13:49:03 +08:00
HowardChan	bb781764b8	Add Llama3.2 models in Groq provider (#8831 )	2024-09-27 12:13:00 +08:00
zhuhao	29275c7447	feat: deprecate mistral model for siliconflow (#8828 )	2024-09-27 12:11:56 +08:00
8bitpd	4c1063e1c5	fix: AnalyticdbVector retrieval scores (#8803 )	2024-09-27 12:05:21 +08:00
非法操作	d6b9587a97	fix: close log status option raise error (#8826 )	2024-09-27 11:13:40 +08:00
zhuhao	6fbaabc1bc	feat: add pgvecto-rs and analyticdb in docker/.env.example (#8823 )	2024-09-27 11:13:29 +08:00
Shai Perednik	a36117e12d	Updated the YouTube channel to Dify's (#8817 )	2024-09-27 09:15:33 +08:00
CXwudi	e5efd09ebb	chore: massive update of the Gemini models based on latest documentation (#8822 )	2024-09-27 09:14:33 +08:00
wenmeng zhou	ecc951609d	add more detailed doc for models of qwen series (#8799 ) Co-authored-by: crazywoola <427733928@qq.com>	2024-09-26 22:32:33 +08:00
ice yao	063474f408	Add llama3.2 model in fireworks provider (#8809 )	2024-09-26 22:21:01 +08:00
Hash Brown	3dfbc348e3	feat: improved SVG output UX (#8765 )	2024-09-26 19:41:59 +08:00
AAEE86	9a4b53a212	feat: add stream for Gemini (#8678 )	2024-09-26 19:08:59 +08:00
AAEE86	03edfbe6f5	feat: add qwen to add custom model parameters (#8759 )	2024-09-26 19:04:25 +08:00
Joel	3d2cb25a67	fix: change wrong company name (#8801 )	2024-09-26 17:53:11 +08:00
非法操作	6df14e50b2	fix: workflow as tool always outdated (#8798 )	2024-09-26 17:50:36 +08:00
zhuhao	008e0efeb0	refactor: update delete method as an abstract method (#8794 )	2024-09-26 16:36:21 +08:00
cx	128a66f7fe	fix: Ollama modelfeature set vision, and an exception occurred at the… (#8783 )	2024-09-26 16:34:40 +08:00
非法操作	62406991df	fix: start node input config modal raise 'variable name is required' (#8793 )	2024-09-26 16:28:20 +08:00
非法操作	d1173a69f8	fix: the Image-1X tool (#8787 )	2024-09-26 13:48:06 +08:00
Shenghang Tsai	a0b0809b1c	Add more models for SiliconFlow (#8779 )	2024-09-26 11:29:53 +08:00
Aaron Ji	4c9ef6e830	fix: update usage for Jina Embeddings v3 (#8771 )	2024-09-26 11:29:35 +08:00
非法操作	0c96f0aa51	fix: credential *** should be string (#8785 )	2024-09-26 11:24:03 +08:00
zhuhao	ac73763726	chore: add input_type param desc for the _invoke method of text_embedding (#8778 )	2024-09-26 11:23:09 +08:00
非法操作	5ba19d64e9	fix: TavilySearch tool get api link (#8780 )	2024-09-26 11:22:18 +08:00
Qun	fefbc43fb0	chore: fix comfyui tool doc url (#8775 )	2024-09-26 08:18:13 +08:00
Bowen Liang	a8b837c4a9	dep: bump ElasticSearch from 8.14.x to 8.15.x (#8197 )	2024-09-25 22:55:24 +08:00
Pan, Wen-Ming	02ff6cca70	feat: add support for Vertex AI Gemini 1.5 002 and experimental models (#8767 )	2024-09-25 21:27:26 +08:00
NFish	ef47f68e4a	fix: the translation result may cause a different meaning (#8763 )	2024-09-25 18:25:06 +08:00
Hash Brown	2ef8b187fa	Add GitHub Actions Workflow for Web Tests (#8753 )	2024-09-25 15:50:51 +08:00
zhuiyue132	b0927c39fb	fix: expose the configuration of HTTP request node to Docker (#8716 ) Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>	2024-09-25 15:06:54 +08:00
cherryhuahua	d0e0111f88	fix:Spark's large language model token calculation error #7911 (#8755 )	2024-09-25 14:51:42 +08:00
zhuhao	2328944987	chore: apply ruff reformat for python-client sdk (#8752 )	2024-09-25 14:48:06 +08:00
非法操作	cb1942c242	chore: make url display in the middle of http node (#8741 )	2024-09-25 11:27:17 +08:00
crazywoola	bf64ff215b	fix: . is missing in file_extension (#8736 )	2024-09-25 10:09:20 +08:00
ybalbert001	68c7e68a8a	Fix Issue: switch LLM of SageMaker endpoint doesn't take effect (#8737 ) Co-authored-by: Yuanbo Li <ybalbert@amazon.com>	2024-09-25 09:12:35 +08:00
ice yao	91f70d0bd9	Add embedding models in fireworks provider (#8728 )	2024-09-25 08:47:11 +08:00
Jyong	4669eb24be	add embedding input type parameter (#8724 )	2024-09-24 21:53:50 +08:00
Sa Zhang	debe5953a8	Fix/update jina ai products labels and descriptions (#8730 ) Co-authored-by: sa zhang <sa.zhang@jina.ai>	2024-09-24 21:19:49 +08:00
Shota Totsuka	1c7877b048	fix: remove harm category setting from vertex ai (#8721 )	2024-09-24 20:53:26 +08:00
非法操作	9ca2e2c968	chore: remove windows platform timezone set (#8712 )	2024-09-24 17:33:29 +08:00
zxhlyh	f42ef0624d	fix: embedded chat on ios (#8718 )	2024-09-24 17:23:11 +08:00
ice yao	64baedb484	fix: update nomic model provider token calculation (#8705 )	2024-09-24 14:04:07 +08:00
Benjamin	4638f99aaa	fix: change model provider name issue Ref #8691 (#8710 )	2024-09-24 13:26:58 +08:00
AAEE86	aebe5fc68c	fix: Remove unsupported parameters in qwen model (#8699 )	2024-09-24 13:06:21 +08:00
zhuhao	1ecf70dca0	feat: add mixedbread as a new model provider (#8523 )	2024-09-24 11:20:15 +08:00
ybalbert001	7c485f8bb8	fix llm integration problem: It doesn't work on docker env (#8701 ) Co-authored-by: Yuanbo Li <ybalbert@amazon.com>	2024-09-24 10:33:30 +08:00
themanforfree	21e9608b23	feat: add xinference sd web ui api tool (#8385 ) Signed-off-by: themanforfree <themanforfree@gmail.com>	2024-09-24 10:20:06 +08:00
Sa Zhang	7f1b028840	fix: change the brand name to Jina AI (#8691 ) Co-authored-by: sa zhang <sa.zhang@jina.ai>	2024-09-23 21:39:26 +08:00
Nam Vu	bef83a4d2e	fix: typos and improve naming conventions: (#8687 )	2024-09-23 21:32:58 +08:00
crazywoola	8cc9e68363	fix: prompt for the follow-up suggestions (#8685 )	2024-09-23 20:00:34 +08:00
ice yao	d7aada38a1	Add nomic embedding model provider (#8640 )	2024-09-23 19:57:21 +08:00
Vikey Chen	4f69adc8ab	fix: document_create_args_validate (#8569 )	2024-09-23 18:45:10 +08:00
Likename Haojie	52da5b16e7	fixbug tts(stream) not work on ios safari(17.1+) (#8645 ) Co-authored-by: crazywoola <427733928@qq.com>	2024-09-23 18:44:24 +08:00
Hash Brown	11d09a92d0	fix: send message error when last sent message not succeeded (#8682 )	2024-09-23 18:44:09 +08:00
Nam Vu	c7eacd1aac	chore: Optimize I18nObject class for better performance and readability (#8681 )	2024-09-23 18:40:40 +08:00
AAEE86	a126d535cf	add Spark Max-32K (#8676 )	2024-09-23 16:39:46 +08:00
AAEE86	3554a803e7	add zhipuai web search (#8668 )	2024-09-23 16:19:42 +08:00
AAEE86	c66cecaa55	add Qwen model translate (#8674 )	2024-09-23 16:18:55 +08:00
非法操作	b37954b966	fix: png avatar upload as jpeg (#8665 )	2024-09-23 15:33:06 +08:00
Bowen Liang	86f90fd9ff	chore: skip PLR6201 linter rule (#8666 )	2024-09-23 15:28:57 +08:00
haike-1213	4c7beb9d7b	fix: Assignment exception (#8663 ) Co-authored-by: fum <fum@investoday.com.cn>	2024-09-23 15:23:52 +08:00
Aaron Ji	3618a97c20	feat: extend api params for Jina Embeddings V3 (#8657 )	2024-09-23 13:45:09 +08:00