r2

Merge branch 'main' into feat/r2
2026-02-16 08:15:18 +08:00 · 2025-07-02 18:46:36 +08:00 · 2025-07-02 18:20:41 +08:00 · 2025-07-02 18:20:05 +08:00 · 2025-07-02 18:15:23 +08:00 · 2025-07-02 17:48:23 +08:00
1032 changed files with 44629 additions and 6793 deletions
--- a/.github/workflows/api-tests.yml
+++ b/.github/workflows/api-tests.yml
@ -47,15 +47,17 @@ jobs:
      - name: Run Unit tests
        run: |
          uv run --project api bash dev/pytest/pytest_unit_tests.sh
+
+      - name: Coverage Summary
+        run: |
+          set -x
          # Extract coverage percentage and create a summary
          TOTAL_COVERAGE=$(python -c 'import json; print(json.load(open("coverage.json"))["totals"]["percent_covered_display"])')

          # Create a detailed coverage summary
          echo "### Test Coverage Summary :test_tube:" >> $GITHUB_STEP_SUMMARY
          echo "Total Coverage: ${TOTAL_COVERAGE}%" >> $GITHUB_STEP_SUMMARY
-          echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
-          uv run --project api coverage report >> $GITHUB_STEP_SUMMARY
-          echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+          uv run --project api coverage report --format=markdown >> $GITHUB_STEP_SUMMARY

      - name: Run dify config tests
        run: uv run --project api dev/pytest/pytest_config_tests.py
--- a/.github/workflows/build-push.yml
+++ b/.github/workflows/build-push.yml
@ -6,6 +6,7 @@ on:
      - "main"
      - "deploy/dev"
      - "deploy/enterprise"
+      - "deploy/rag-dev"
    tags:
      - "*"

--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@ -4,7 +4,7 @@ on:
  workflow_run:
    workflows: ["Build and Push API & Web"]
    branches:
-      - "deploy/dev"
+      - "deploy/rag-dev"
    types:
      - completed

@ -12,12 +12,13 @@ jobs:
  deploy:
    runs-on: ubuntu-latest
    if: |
-      github.event.workflow_run.conclusion == 'success'
+      github.event.workflow_run.conclusion == 'success' &&
+      github.event.workflow_run.head_branch == 'deploy/rag-dev'
    steps:
      - name: Deploy to server
        uses: appleboy/ssh-action@v0.1.8
        with:
-          host: ${{ secrets.SSH_HOST }}
+          host: ${{ secrets.RAG_SSH_HOST }}
          username: ${{ secrets.SSH_USER }}
          key: ${{ secrets.SSH_PRIVATE_KEY }}
          script: |
--- a/.gitignore
+++ b/.gitignore
@ -214,3 +214,4 @@ mise.toml

 # AI Assistant
 .roo/
+api/.env.backup
--- a/api/app.py
+++ b/api/app.py
@ -1,4 +1,3 @@
-import os
 import sys


@ -17,20 +16,20 @@ else:
    # It seems that JetBrains Python debugger does not work well with gevent,
    # so we need to disable gevent in debug mode.
    # If you are using debugpy and set GEVENT_SUPPORT=True, you can debug with gevent.
-    if (flask_debug := os.environ.get("FLASK_DEBUG", "0")) and flask_debug.lower() in {"false", "0", "no"}:
-        from gevent import monkey
+    # if (flask_debug := os.environ.get("FLASK_DEBUG", "0")) and flask_debug.lower() in {"false", "0", "no"}:
+    # from gevent import monkey
+    #
+    # # gevent
+    # monkey.patch_all()
+    #
+    # from grpc.experimental import gevent as grpc_gevent  # type: ignore
+    #
+    # # grpc gevent
+    # grpc_gevent.init_gevent()

-        # gevent
-        monkey.patch_all()
-
-        from grpc.experimental import gevent as grpc_gevent  # type: ignore
-
-        # grpc gevent
-        grpc_gevent.init_gevent()
-
-        import psycogreen.gevent  # type: ignore
-
-        psycogreen.gevent.patch_psycopg()
+    # import psycogreen.gevent  # type: ignore
+    #
+    # psycogreen.gevent.patch_psycopg()

    from app_factory import create_app

--- a/api/configs/app_config.py
+++ b/api/configs/app_config.py
@ -1,8 +1,11 @@
 import logging
+from pathlib import Path
 from typing import Any

 from pydantic.fields import FieldInfo
-from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict
+from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict, TomlConfigSettingsSource
+
+from libs.file_utils import search_file_upwards

 from .deploy import DeploymentConfig
 from .enterprise import EnterpriseFeatureConfig
@ -99,4 +102,12 @@ class DifyConfig(
            RemoteSettingsSourceFactory(settings_cls),
            dotenv_settings,
            file_secret_settings,
+            TomlConfigSettingsSource(
+                settings_cls=settings_cls,
+                toml_file=search_file_upwards(
+                    base_dir_path=Path(__file__).parent,
+                    target_file_name="pyproject.toml",
+                    max_search_parent_depth=2,
+                ),
+            ),
        )
--- a/api/configs/feature/hosted_service/init.py
+++ b/api/configs/feature/hosted_service/init.py
@ -222,11 +222,28 @@ class HostedFetchAppTemplateConfig(BaseSettings):
    )


+class HostedFetchPipelineTemplateConfig(BaseSettings):
+    """
+    Configuration for fetching pipeline templates
+    """
+
+    HOSTED_FETCH_PIPELINE_TEMPLATES_MODE: str = Field(
+        description="Mode for fetching pipeline templates: remote, db, or builtin default to remote,",
+        default="database",
+    )
+
+    HOSTED_FETCH_PIPELINE_TEMPLATES_REMOTE_DOMAIN: str = Field(
+        description="Domain for fetching remote pipeline templates",
+        default="https://tmpl.dify.ai",
+    )
+
+
 class HostedServiceConfig(
    # place the configs in alphabet order
    HostedAnthropicConfig,
    HostedAzureOpenAiConfig,
    HostedFetchAppTemplateConfig,
+    HostedFetchPipelineTemplateConfig,
    HostedMinmaxConfig,
    HostedOpenAiConfig,
    HostedSparkConfig,
--- a/api/configs/middleware/init.py
+++ b/api/configs/middleware/init.py
@ -223,6 +223,10 @@ class CeleryConfig(DatabaseConfig):
        default=None,
    )

+    CELERY_SENTINEL_PASSWORD: Optional[str] = Field(
+        description="Password of the Redis Sentinel master.",
+        default=None,
+    )
    CELERY_SENTINEL_SOCKET_TIMEOUT: Optional[PositiveFloat] = Field(
        description="Timeout for Redis Sentinel socket operations in seconds.",
        default=0.1,
--- a/api/configs/packaging/init.py
+++ b/api/configs/packaging/init.py
@ -1,17 +1,13 @@
 from pydantic import Field
-from pydantic_settings import BaseSettings
+
+from configs.packaging.pyproject import PyProjectConfig, PyProjectTomlConfig


-class PackagingInfo(BaseSettings):
+class PackagingInfo(PyProjectTomlConfig):
    """
    Packaging build information
    """

-    CURRENT_VERSION: str = Field(
-        description="Dify version",
-        default="1.5.0",
-    )
-
    COMMIT_SHA: str = Field(
        description="SHA-1 checksum of the git commit used to build the app",
        default="",
--- a/api/configs/packaging/pyproject.py
+++ b/api/configs/packaging/pyproject.py
@ -0,0 +1,17 @@
+from pydantic import BaseModel, Field
+from pydantic_settings import BaseSettings
+
+
+class PyProjectConfig(BaseModel):
+    version: str = Field(description="Dify version", default="")
+
+
+class PyProjectTomlConfig(BaseSettings):
+    """
+    configs in api/pyproject.toml
+    """
+
+    project: PyProjectConfig = Field(
+        description="configs in the project section of pyproject.toml",
+        default=PyProjectConfig(),
+    )
--- a/api/contexts/init.py
+++ b/api/contexts/init.py
@ -3,6 +3,7 @@ from threading import Lock
 from typing import TYPE_CHECKING

 from contexts.wrapper import RecyclableContextVar
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController

 if TYPE_CHECKING:
    from core.model_runtime.entities.model_entities import AIModelEntity
@ -33,3 +34,11 @@ plugin_model_schema_lock: RecyclableContextVar[Lock] = RecyclableContextVar(Cont
 plugin_model_schemas: RecyclableContextVar[dict[str, "AIModelEntity"]] = RecyclableContextVar(
    ContextVar("plugin_model_schemas")
 )
+
+datasource_plugin_providers: RecyclableContextVar[dict[str, "DatasourcePluginProviderController"]] = (
+    RecyclableContextVar(ContextVar("datasource_plugin_providers"))
+)
+
+datasource_plugin_providers_lock: RecyclableContextVar[Lock] = RecyclableContextVar(
+    ContextVar("datasource_plugin_providers_lock")
+)
--- a/api/controllers/console/init.py
+++ b/api/controllers/console/init.py
@ -76,7 +76,6 @@ from .billing import billing, compliance

 # Import datasets controllers
 from .datasets import (
-    data_source,
    datasets,
    datasets_document,
    datasets_segments,
@ -85,6 +84,14 @@ from .datasets import (
    metadata,
    website,
 )
+from .datasets.rag_pipeline import (
+    datasource_auth,
+    datasource_content_preview,
+    rag_pipeline,
+    rag_pipeline_datasets,
+    rag_pipeline_import,
+    rag_pipeline_workflow,
+)

 # Import explore controllers
 from .explore import (
--- a/api/controllers/console/auth/data_source_oauth.py
+++ b/api/controllers/console/auth/data_source_oauth.py
@ -41,7 +41,7 @@ class OAuthDataSource(Resource):
            if not internal_secret:
                return ({"error": "Internal secret is not set"},)
            oauth_provider.save_internal_access_token(internal_secret)
-            return {"data": ""}
+            return {"data": "internal"}
        else:
            auth_url = oauth_provider.get_authorization_url()
            return {"data": auth_url}, 200
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@ -283,6 +283,15 @@ class DatasetApi(Resource):
            location="json",
            help="Invalid external knowledge api id.",
        )
+
+        parser.add_argument(
+            "icon_info",
+            type=dict,
+            required=False,
+            nullable=True,
+            location="json",
+            help="Invalid icon info.",
+        )
        args = parser.parse_args()
        data = request.get_json()

--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@ -1,3 +1,4 @@
+import json
 import logging
 from argparse import ArgumentTypeError
 from datetime import UTC, datetime
@ -51,6 +52,7 @@ from fields.document_fields import (
 )
 from libs.login import login_required
 from models import Dataset, DatasetProcessRule, Document, DocumentSegment, UploadFile
+from models.dataset import DocumentPipelineExecutionLog
 from services.dataset_service import DatasetService, DocumentService
 from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig

@ -661,7 +663,7 @@ class DocumentDetailApi(DocumentResource):
            response = {"id": document.id, "doc_type": document.doc_type, "doc_metadata": document.doc_metadata_details}
        elif metadata == "without":
            dataset_process_rules = DatasetService.get_process_rules(dataset_id)
-            document_process_rules = document.dataset_process_rule.to_dict()
+            document_process_rules = document.dataset_process_rule.to_dict() if document.dataset_process_rule else {}
            data_source_info = document.data_source_detail_dict
            response = {
                "id": document.id,
@ -1028,6 +1030,41 @@ class WebsiteDocumentSyncApi(DocumentResource):
        return {"result": "success"}, 200


+class DocumentPipelineExecutionLogApi(DocumentResource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self, dataset_id, document_id):
+        dataset_id = str(dataset_id)
+        document_id = str(document_id)
+
+        dataset = DatasetService.get_dataset(dataset_id)
+        if not dataset:
+            raise NotFound("Dataset not found.")
+        document = DocumentService.get_document(dataset.id, document_id)
+        if not document:
+            raise NotFound("Document not found.")
+        log = (
+            db.session.query(DocumentPipelineExecutionLog)
+            .filter_by(document_id=document_id)
+            .order_by(DocumentPipelineExecutionLog.created_at.desc())
+            .first()
+        )
+        if not log:
+            return {
+                "datasource_info": None,
+                "datasource_type": None,
+                "input_data": None,
+                "datasource_node_id": None,
+            }, 200
+        return {
+            "datasource_info": json.loads(log.datasource_info),
+            "datasource_type": log.datasource_type,
+            "input_data": log.input_data,
+            "datasource_node_id": log.datasource_node_id,
+        }, 200
+
+
 api.add_resource(GetProcessRuleApi, "/datasets/process-rule")
 api.add_resource(DatasetDocumentListApi, "/datasets/<uuid:dataset_id>/documents")
 api.add_resource(DatasetInitApi, "/datasets/init")
@ -1050,3 +1087,6 @@ api.add_resource(DocumentRetryApi, "/datasets/<uuid:dataset_id>/retry")
 api.add_resource(DocumentRenameApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/rename")

 api.add_resource(WebsiteDocumentSyncApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/website-sync")
+api.add_resource(
+    DocumentPipelineExecutionLogApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/pipeline-execution-log"
+)
--- a/api/controllers/console/datasets/error.py
+++ b/api/controllers/console/datasets/error.py
@ -101,3 +101,9 @@ class ChildChunkDeleteIndexError(BaseHTTPException):
    error_code = "child_chunk_delete_index_error"
    description = "Delete child chunk index failed: {message}"
    code = 500
+
+
+class PipelineNotFoundError(BaseHTTPException):
+    error_code = "pipeline_not_found"
+    description = "Pipeline not found."
+    code = 404
--- a/api/controllers/console/datasets/rag_pipeline/datasource_auth.py
+++ b/api/controllers/console/datasets/rag_pipeline/datasource_auth.py
@ -0,0 +1,197 @@
+from flask import redirect, request
+from flask_login import current_user  # type: ignore
+from flask_restful import (  # type: ignore
+    Resource,  # type: ignore
+    reqparse,
+)
+from werkzeug.exceptions import Forbidden, NotFound
+
+from configs import dify_config
+from controllers.console import api
+from controllers.console.wraps import (
+    account_initialization_required,
+    setup_required,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.plugin.impl.oauth import OAuthHandler
+from extensions.ext_database import db
+from libs.login import login_required
+from models.oauth import DatasourceOauthParamConfig, DatasourceProvider
+from services.datasource_provider_service import DatasourceProviderService
+
+
+class DatasourcePluginOauthApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
+        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
+        args = parser.parse_args()
+        provider = args["provider"]
+        plugin_id = args["plugin_id"]
+        # Check user role first
+        if not current_user.is_editor:
+            raise Forbidden()
+        # get all plugin oauth configs
+        plugin_oauth_config = (
+            db.session.query(DatasourceOauthParamConfig).filter_by(provider=provider, plugin_id=plugin_id).first()
+        )
+        if not plugin_oauth_config:
+            raise NotFound()
+        oauth_handler = OAuthHandler()
+        redirect_url = (
+            f"{dify_config.CONSOLE_WEB_URL}/oauth/datasource/callback?provider={provider}&plugin_id={plugin_id}"
+        )
+        system_credentials = plugin_oauth_config.system_credentials
+        if system_credentials:
+            system_credentials["redirect_url"] = redirect_url
+        response = oauth_handler.get_authorization_url(
+            current_user.current_tenant.id, current_user.id, plugin_id, provider, system_credentials=system_credentials
+        )
+        return response.model_dump()
+
+
+class DatasourceOauthCallback(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
+        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
+        args = parser.parse_args()
+        provider = args["provider"]
+        plugin_id = args["plugin_id"]
+        oauth_handler = OAuthHandler()
+        plugin_oauth_config = (
+            db.session.query(DatasourceOauthParamConfig).filter_by(provider=provider, plugin_id=plugin_id).first()
+        )
+        if not plugin_oauth_config:
+            raise NotFound()
+        credentials = oauth_handler.get_credentials(
+            current_user.current_tenant.id,
+            current_user.id,
+            plugin_id,
+            provider,
+            system_credentials=plugin_oauth_config.system_credentials,
+            request=request,
+        )
+        datasource_provider = DatasourceProvider(
+            plugin_id=plugin_id, provider=provider, auth_type="oauth", encrypted_credentials=credentials
+        )
+        db.session.add(datasource_provider)
+        db.session.commit()
+        return redirect(f"{dify_config.CONSOLE_WEB_URL}")
+
+
+class DatasourceAuth(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def post(self):
+        if not current_user.is_editor:
+            raise Forbidden()
+
+        parser = reqparse.RequestParser()
+        parser.add_argument("provider", type=str, required=True, nullable=False, location="json")
+        parser.add_argument("name", type=str, required=False, nullable=False, location="json", default="test")
+        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="json")
+        parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
+        args = parser.parse_args()
+
+        datasource_provider_service = DatasourceProviderService()
+
+        try:
+            datasource_provider_service.datasource_provider_credentials_validate(
+                tenant_id=current_user.current_tenant_id,
+                provider=args["provider"],
+                plugin_id=args["plugin_id"],
+                credentials=args["credentials"],
+                name=args["name"],
+            )
+        except CredentialsValidateFailedError as ex:
+            raise ValueError(str(ex))
+
+        return {"result": "success"}, 201
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
+        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
+        args = parser.parse_args()
+        datasource_provider_service = DatasourceProviderService()
+        datasources = datasource_provider_service.get_datasource_credentials(
+            tenant_id=current_user.current_tenant_id, provider=args["provider"], plugin_id=args["plugin_id"]
+        )
+        return {"result": datasources}, 200
+
+
+class DatasourceAuthUpdateDeleteApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def delete(self, auth_id: str):
+        parser = reqparse.RequestParser()
+        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
+        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
+        args = parser.parse_args()
+        if not current_user.is_editor:
+            raise Forbidden()
+        datasource_provider_service = DatasourceProviderService()
+        datasource_provider_service.remove_datasource_credentials(
+            tenant_id=current_user.current_tenant_id,
+            auth_id=auth_id,
+            provider=args["provider"],
+            plugin_id=args["plugin_id"],
+        )
+        return {"result": "success"}, 200
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def patch(self, auth_id: str):
+        parser = reqparse.RequestParser()
+        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
+        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
+        parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
+        args = parser.parse_args()
+        if not current_user.is_editor:
+            raise Forbidden()
+        try:
+            datasource_provider_service = DatasourceProviderService()
+            datasource_provider_service.update_datasource_credentials(
+                tenant_id=current_user.current_tenant_id,
+                auth_id=auth_id,
+                provider=args["provider"],
+                plugin_id=args["plugin_id"],
+                credentials=args["credentials"],
+            )
+        except CredentialsValidateFailedError as ex:
+            raise ValueError(str(ex))
+
+        return {"result": "success"}, 201
+
+
+# Import Rag Pipeline
+api.add_resource(
+    DatasourcePluginOauthApi,
+    "/oauth/plugin/datasource",
+)
+api.add_resource(
+    DatasourceOauthCallback,
+    "/oauth/plugin/datasource/callback",
+)
+api.add_resource(
+    DatasourceAuth,
+    "/auth/plugin/datasource",
+)
+
+api.add_resource(
+    DatasourceAuthUpdateDeleteApi,
+    "/auth/plugin/datasource/<string:auth_id>",
+)
--- a/api/controllers/console/datasets/rag_pipeline/datasource_content_preview.py
+++ b/api/controllers/console/datasets/rag_pipeline/datasource_content_preview.py
@ -0,0 +1,55 @@
+from flask_restful import (  # type: ignore
+    Resource,  # type: ignore
+    reqparse,
+)
+from werkzeug.exceptions import Forbidden
+
+from controllers.console import api
+from controllers.console.datasets.wraps import get_rag_pipeline
+from controllers.console.wraps import account_initialization_required, setup_required
+from libs.login import current_user, login_required
+from models import Account
+from models.dataset import Pipeline
+from services.rag_pipeline.rag_pipeline import RagPipelineService
+
+
+class DataSourceContentPreviewApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @get_rag_pipeline
+    def post(self, pipeline: Pipeline, node_id: str):
+        """
+        Run datasource content preview
+        """
+        if not isinstance(current_user, Account):
+            raise Forbidden()
+
+        parser = reqparse.RequestParser()
+        parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
+        parser.add_argument("datasource_type", type=str, required=True, location="json")
+        args = parser.parse_args()
+
+        inputs = args.get("inputs")
+        if inputs is None:
+            raise ValueError("missing inputs")
+        datasource_type = args.get("datasource_type")
+        if datasource_type is None:
+            raise ValueError("missing datasource_type")
+
+        rag_pipeline_service = RagPipelineService()
+        preview_content = rag_pipeline_service.run_datasource_node_preview(
+            pipeline=pipeline,
+            node_id=node_id,
+            user_inputs=inputs,
+            account=current_user,
+            datasource_type=datasource_type,
+            is_published=True,
+        )
+        return preview_content, 200
+
+
+api.add_resource(
+    DataSourceContentPreviewApi,
+    "/rag/pipelines/<uuid:pipeline_id>/workflows/published/datasource/nodes/<string:node_id>/preview",
+)
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
@ -0,0 +1,162 @@
+import logging
+
+from flask import request
+from flask_restful import Resource, reqparse
+from sqlalchemy.orm import Session
+
+from controllers.console import api
+from controllers.console.wraps import (
+    account_initialization_required,
+    enterprise_license_required,
+    setup_required,
+)
+from extensions.ext_database import db
+from libs.login import login_required
+from models.dataset import PipelineCustomizedTemplate
+from services.entities.knowledge_entities.rag_pipeline_entities import PipelineTemplateInfoEntity
+from services.rag_pipeline.rag_pipeline import RagPipelineService
+
+logger = logging.getLogger(__name__)
+
+
+def _validate_name(name):
+    if not name or len(name) < 1 or len(name) > 40:
+        raise ValueError("Name must be between 1 to 40 characters.")
+    return name
+
+
+def _validate_description_length(description):
+    if len(description) > 400:
+        raise ValueError("Description cannot exceed 400 characters.")
+    return description
+
+
+class PipelineTemplateListApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @enterprise_license_required
+    def get(self):
+        type = request.args.get("type", default="built-in", type=str)
+        language = request.args.get("language", default="en-US", type=str)
+        # get pipeline templates
+        pipeline_templates = RagPipelineService.get_pipeline_templates(type, language)
+        return pipeline_templates, 200
+
+
+class PipelineTemplateDetailApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @enterprise_license_required
+    def get(self, template_id: str):
+        type = request.args.get("type", default="built-in", type=str)
+        rag_pipeline_service = RagPipelineService()
+        pipeline_template = rag_pipeline_service.get_pipeline_template_detail(template_id, type)
+        return pipeline_template, 200
+
+
+class CustomizedPipelineTemplateApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @enterprise_license_required
+    def patch(self, template_id: str):
+        parser = reqparse.RequestParser()
+        parser.add_argument(
+            "name",
+            nullable=False,
+            required=True,
+            help="Name must be between 1 to 40 characters.",
+            type=_validate_name,
+        )
+        parser.add_argument(
+            "description",
+            type=str,
+            nullable=True,
+            required=False,
+            default="",
+        )
+        parser.add_argument(
+            "icon_info",
+            type=dict,
+            location="json",
+            nullable=True,
+        )
+        args = parser.parse_args()
+        pipeline_template_info = PipelineTemplateInfoEntity(**args)
+        RagPipelineService.update_customized_pipeline_template(template_id, pipeline_template_info)
+        return 200
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @enterprise_license_required
+    def delete(self, template_id: str):
+        RagPipelineService.delete_customized_pipeline_template(template_id)
+        return 200
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @enterprise_license_required
+    def post(self, template_id: str):
+        with Session(db.engine) as session:
+            template = (
+                session.query(PipelineCustomizedTemplate).filter(PipelineCustomizedTemplate.id == template_id).first()
+            )
+            if not template:
+                raise ValueError("Customized pipeline template not found.")
+
+        return {"data": template.yaml_content}, 200
+
+
+class PublishCustomizedPipelineTemplateApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @enterprise_license_required
+    def post(self, pipeline_id: str):
+        parser = reqparse.RequestParser()
+        parser.add_argument(
+            "name",
+            nullable=False,
+            required=True,
+            help="Name must be between 1 to 40 characters.",
+            type=_validate_name,
+        )
+        parser.add_argument(
+            "description",
+            type=str,
+            nullable=True,
+            required=False,
+            default="",
+        )
+        parser.add_argument(
+            "icon_info",
+            type=dict,
+            location="json",
+            nullable=True,
+        )
+        args = parser.parse_args()
+        rag_pipeline_service = RagPipelineService()
+        rag_pipeline_service.publish_customized_pipeline_template(pipeline_id, args)
+        return {"result": "success"}
+
+
+api.add_resource(
+    PipelineTemplateListApi,
+    "/rag/pipeline/templates",
+)
+api.add_resource(
+    PipelineTemplateDetailApi,
+    "/rag/pipeline/templates/<string:template_id>",
+)
+api.add_resource(
+    CustomizedPipelineTemplateApi,
+    "/rag/pipeline/customized/templates/<string:template_id>",
+)
+api.add_resource(
+    PublishCustomizedPipelineTemplateApi,
+    "/rag/pipelines/<string:pipeline_id>/customized/publish",
+)
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_datasets.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_datasets.py
@ -0,0 +1,171 @@
+from flask_login import current_user  # type: ignore  # type: ignore
+from flask_restful import Resource, marshal, reqparse  # type: ignore
+from werkzeug.exceptions import Forbidden
+
+import services
+from controllers.console import api
+from controllers.console.datasets.error import DatasetNameDuplicateError
+from controllers.console.wraps import (
+    account_initialization_required,
+    cloud_edition_billing_rate_limit_check,
+    setup_required,
+)
+from fields.dataset_fields import dataset_detail_fields
+from libs.login import login_required
+from models.dataset import DatasetPermissionEnum
+from services.dataset_service import DatasetPermissionService, DatasetService
+from services.entities.knowledge_entities.rag_pipeline_entities import RagPipelineDatasetCreateEntity
+from services.rag_pipeline.rag_pipeline_dsl_service import RagPipelineDslService
+
+
+def _validate_name(name):
+    if not name or len(name) < 1 or len(name) > 40:
+        raise ValueError("Name must be between 1 to 40 characters.")
+    return name
+
+
+def _validate_description_length(description):
+    if len(description) > 400:
+        raise ValueError("Description cannot exceed 400 characters.")
+    return description
+
+
+class CreateRagPipelineDatasetApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @cloud_edition_billing_rate_limit_check("knowledge")
+    def post(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument(
+            "name",
+            nullable=False,
+            required=True,
+            help="type is required. Name must be between 1 to 40 characters.",
+            type=_validate_name,
+        )
+        parser.add_argument(
+            "description",
+            type=str,
+            nullable=True,
+            required=False,
+            default="",
+        )
+
+        parser.add_argument(
+            "icon_info",
+            type=dict,
+            nullable=True,
+            required=False,
+            default={},
+        )
+
+        parser.add_argument(
+            "permission",
+            type=str,
+            choices=(DatasetPermissionEnum.ONLY_ME, DatasetPermissionEnum.ALL_TEAM, DatasetPermissionEnum.PARTIAL_TEAM),
+            nullable=True,
+            required=False,
+            default=DatasetPermissionEnum.ONLY_ME,
+        )
+
+        parser.add_argument(
+            "partial_member_list",
+            type=list,
+            nullable=True,
+            required=False,
+            default=[],
+        )
+
+        parser.add_argument(
+            "yaml_content",
+            type=str,
+            nullable=False,
+            required=True,
+            help="yaml_content is required.",
+        )
+
+        args = parser.parse_args()
+
+        # The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
+        if not current_user.is_dataset_editor:
+            raise Forbidden()
+        rag_pipeline_dataset_create_entity = RagPipelineDatasetCreateEntity(**args)
+        try:
+            import_info = RagPipelineDslService.create_rag_pipeline_dataset(
+                tenant_id=current_user.current_tenant_id,
+                rag_pipeline_dataset_create_entity=rag_pipeline_dataset_create_entity,
+            )
+            if rag_pipeline_dataset_create_entity.permission == "partial_members":
+                DatasetPermissionService.update_partial_member_list(
+                    current_user.current_tenant_id,
+                    import_info["dataset_id"],
+                    rag_pipeline_dataset_create_entity.partial_member_list,
+                )
+        except services.errors.dataset.DatasetNameDuplicateError:
+            raise DatasetNameDuplicateError()
+
+        return import_info, 201
+
+
+class CreateEmptyRagPipelineDatasetApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @cloud_edition_billing_rate_limit_check("knowledge")
+    def post(self):
+        # The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
+        if not current_user.is_dataset_editor:
+            raise Forbidden()
+
+        parser = reqparse.RequestParser()
+        parser.add_argument(
+            "name",
+            nullable=False,
+            required=True,
+            help="type is required. Name must be between 1 to 40 characters.",
+            type=_validate_name,
+        )
+        parser.add_argument(
+            "description",
+            type=str,
+            nullable=True,
+            required=False,
+            default="",
+        )
+
+        parser.add_argument(
+            "icon_info",
+            type=dict,
+            nullable=True,
+            required=False,
+            default={},
+        )
+
+        parser.add_argument(
+            "permission",
+            type=str,
+            choices=(DatasetPermissionEnum.ONLY_ME, DatasetPermissionEnum.ALL_TEAM, DatasetPermissionEnum.PARTIAL_TEAM),
+            nullable=True,
+            required=False,
+            default=DatasetPermissionEnum.ONLY_ME,
+        )
+
+        parser.add_argument(
+            "partial_member_list",
+            type=list,
+            nullable=True,
+            required=False,
+            default=[],
+        )
+
+        args = parser.parse_args()
+        dataset = DatasetService.create_empty_rag_pipeline_dataset(
+            tenant_id=current_user.current_tenant_id,
+            rag_pipeline_dataset_create_entity=RagPipelineDatasetCreateEntity(**args),
+        )
+        return marshal(dataset, dataset_detail_fields), 201
+
+
+api.add_resource(CreateRagPipelineDatasetApi, "/rag/pipeline/dataset")
+api.add_resource(CreateEmptyRagPipelineDatasetApi, "/rag/pipeline/empty-dataset")
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py
@ -0,0 +1,146 @@
+from typing import cast
+
+from flask_login import current_user  # type: ignore
+from flask_restful import Resource, marshal_with, reqparse  # type: ignore
+from sqlalchemy.orm import Session
+from werkzeug.exceptions import Forbidden
+
+from controllers.console import api
+from controllers.console.datasets.wraps import get_rag_pipeline
+from controllers.console.wraps import (
+    account_initialization_required,
+    setup_required,
+)
+from extensions.ext_database import db
+from fields.rag_pipeline_fields import pipeline_import_check_dependencies_fields, pipeline_import_fields
+from libs.login import login_required
+from models import Account
+from models.dataset import Pipeline
+from services.app_dsl_service import ImportStatus
+from services.rag_pipeline.rag_pipeline_dsl_service import RagPipelineDslService
+
+
+class RagPipelineImportApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @marshal_with(pipeline_import_fields)
+    def post(self):
+        # Check user role first
+        if not current_user.is_editor:
+            raise Forbidden()
+
+        parser = reqparse.RequestParser()
+        parser.add_argument("mode", type=str, required=True, location="json")
+        parser.add_argument("yaml_content", type=str, location="json")
+        parser.add_argument("yaml_url", type=str, location="json")
+        parser.add_argument("name", type=str, location="json")
+        parser.add_argument("description", type=str, location="json")
+        parser.add_argument("icon_type", type=str, location="json")
+        parser.add_argument("icon", type=str, location="json")
+        parser.add_argument("icon_background", type=str, location="json")
+        parser.add_argument("pipeline_id", type=str, location="json")
+        args = parser.parse_args()
+
+        # Create service with session
+        with Session(db.engine) as session:
+            import_service = RagPipelineDslService(session)
+            # Import app
+            account = cast(Account, current_user)
+            result = import_service.import_rag_pipeline(
+                account=account,
+                import_mode=args["mode"],
+                yaml_content=args.get("yaml_content"),
+                yaml_url=args.get("yaml_url"),
+                pipeline_id=args.get("pipeline_id"),
+            )
+            session.commit()
+
+        # Return appropriate status code based on result
+        status = result.status
+        if status == ImportStatus.FAILED.value:
+            return result.model_dump(mode="json"), 400
+        elif status == ImportStatus.PENDING.value:
+            return result.model_dump(mode="json"), 202
+        return result.model_dump(mode="json"), 200
+
+
+class RagPipelineImportConfirmApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @marshal_with(pipeline_import_fields)
+    def post(self, import_id):
+        # Check user role first
+        if not current_user.is_editor:
+            raise Forbidden()
+
+        # Create service with session
+        with Session(db.engine) as session:
+            import_service = RagPipelineDslService(session)
+            # Confirm import
+            account = cast(Account, current_user)
+            result = import_service.confirm_import(import_id=import_id, account=account)
+            session.commit()
+
+        # Return appropriate status code based on result
+        if result.status == ImportStatus.FAILED.value:
+            return result.model_dump(mode="json"), 400
+        return result.model_dump(mode="json"), 200
+
+
+class RagPipelineImportCheckDependenciesApi(Resource):
+    @setup_required
+    @login_required
+    @get_rag_pipeline
+    @account_initialization_required
+    @marshal_with(pipeline_import_check_dependencies_fields)
+    def get(self, pipeline: Pipeline):
+        if not current_user.is_editor:
+            raise Forbidden()
+
+        with Session(db.engine) as session:
+            import_service = RagPipelineDslService(session)
+            result = import_service.check_dependencies(pipeline=pipeline)
+
+        return result.model_dump(mode="json"), 200
+
+
+class RagPipelineExportApi(Resource):
+    @setup_required
+    @login_required
+    @get_rag_pipeline
+    @account_initialization_required
+    def get(self, pipeline: Pipeline):
+        if not current_user.is_editor:
+            raise Forbidden()
+
+            # Add include_secret params
+        parser = reqparse.RequestParser()
+        parser.add_argument("include_secret", type=bool, default=False, location="args")
+        args = parser.parse_args()
+
+        with Session(db.engine) as session:
+            export_service = RagPipelineDslService(session)
+            result = export_service.export_rag_pipeline_dsl(pipeline=pipeline, include_secret=args["include_secret"])
+
+        return {"data": result}, 200
+
+
+# Import Rag Pipeline
+api.add_resource(
+    RagPipelineImportApi,
+    "/rag/pipelines/imports",
+)
+api.add_resource(
+    RagPipelineImportConfirmApi,
+    "/rag/pipelines/imports/<string:import_id>/confirm",
+)
+api.add_resource(
+    RagPipelineImportCheckDependenciesApi,
+    "/rag/pipelines/imports/<string:pipeline_id>/check-dependencies",
+)
+api.add_resource(
+    RagPipelineExportApi,
+    "/rag/pipelines/<string:pipeline_id>/exports",
+)
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py
--- a/api/controllers/console/datasets/wraps.py
+++ b/api/controllers/console/datasets/wraps.py
@ -0,0 +1,43 @@
+from collections.abc import Callable
+from functools import wraps
+from typing import Optional
+
+from controllers.console.datasets.error import PipelineNotFoundError
+from extensions.ext_database import db
+from libs.login import current_user
+from models.dataset import Pipeline
+
+
+def get_rag_pipeline(
+    view: Optional[Callable] = None,
+):
+    def decorator(view_func):
+        @wraps(view_func)
+        def decorated_view(*args, **kwargs):
+            if not kwargs.get("pipeline_id"):
+                raise ValueError("missing pipeline_id in path parameters")
+
+            pipeline_id = kwargs.get("pipeline_id")
+            pipeline_id = str(pipeline_id)
+
+            del kwargs["pipeline_id"]
+
+            pipeline = (
+                db.session.query(Pipeline)
+                .filter(Pipeline.id == pipeline_id, Pipeline.tenant_id == current_user.current_tenant_id)
+                .first()
+            )
+
+            if not pipeline:
+                raise PipelineNotFoundError()
+
+            kwargs["pipeline"] = pipeline
+
+            return view_func(*args, **kwargs)
+
+        return decorated_view
+
+    if view is None:
+        return decorator
+    else:
+        return decorator(view)
--- a/api/controllers/console/version.py
+++ b/api/controllers/console/version.py
@ -18,7 +18,7 @@ class VersionApi(Resource):
        check_update_url = dify_config.CHECK_UPDATE_URL

        result = {
-            "version": dify_config.CURRENT_VERSION,
+            "version": dify_config.project.version,
            "release_date": "",
            "release_notes": "",
            "can_auto_update": False,
--- a/api/controllers/console/workspace/members.py
+++ b/api/controllers/console/workspace/members.py
@ -85,6 +85,7 @@ class MemberInviteEmailApi(Resource):
        return {
            "result": "success",
            "invitation_results": invitation_results,
+            "tenant_id": str(current_user.current_tenant.id),
        }, 201


@ -110,7 +111,7 @@ class MemberCancelInviteApi(Resource):
            except Exception as e:
                raise ValueError(str(e))

-        return {"result": "success"}, 204
+        return {"result": "success", "tenant_id": str(current_user.current_tenant.id)}, 200


 class MemberUpdateRoleApi(Resource):
--- a/api/controllers/console/workspace/plugin.py
+++ b/api/controllers/console/workspace/plugin.py
@ -13,6 +13,7 @@ from core.model_runtime.utils.encoders import jsonable_encoder
 from core.plugin.impl.exc import PluginDaemonClientSideError
 from libs.login import login_required
 from models.account import TenantPluginPermission
+from services.plugin.plugin_parameter_service import PluginParameterService
 from services.plugin.plugin_permission_service import PluginPermissionService
 from services.plugin.plugin_service import PluginService

@ -497,6 +498,42 @@ class PluginFetchPermissionApi(Resource):
        )


+class PluginFetchDynamicSelectOptionsApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self):
+        # check if the user is admin or owner
+        if not current_user.is_admin_or_owner:
+            raise Forbidden()
+
+        tenant_id = current_user.current_tenant_id
+        user_id = current_user.id
+
+        parser = reqparse.RequestParser()
+        parser.add_argument("plugin_id", type=str, required=True, location="args")
+        parser.add_argument("provider", type=str, required=True, location="args")
+        parser.add_argument("action", type=str, required=True, location="args")
+        parser.add_argument("parameter", type=str, required=True, location="args")
+        parser.add_argument("provider_type", type=str, required=True, location="args")
+        args = parser.parse_args()
+
+        try:
+            options = PluginParameterService.get_dynamic_select_options(
+                tenant_id,
+                user_id,
+                args["plugin_id"],
+                args["provider"],
+                args["action"],
+                args["parameter"],
+                args["provider_type"],
+            )
+        except PluginDaemonClientSideError as e:
+            raise ValueError(e)
+
+        return jsonable_encoder({"options": options})
+
+
 api.add_resource(PluginDebuggingKeyApi, "/workspaces/current/plugin/debugging-key")
 api.add_resource(PluginListApi, "/workspaces/current/plugin/list")
 api.add_resource(PluginListLatestVersionsApi, "/workspaces/current/plugin/list/latest-versions")
@ -521,3 +558,5 @@ api.add_resource(PluginFetchMarketplacePkgApi, "/workspaces/current/plugin/marke

 api.add_resource(PluginChangePermissionApi, "/workspaces/current/plugin/permission/change")
 api.add_resource(PluginFetchPermissionApi, "/workspaces/current/plugin/permission/fetch")
+
+api.add_resource(PluginFetchDynamicSelectOptionsApi, "/workspaces/current/plugin/parameters/dynamic-options")
--- a/api/controllers/inner_api/plugin/plugin.py
+++ b/api/controllers/inner_api/plugin/plugin.py
@ -17,6 +17,7 @@ from core.plugin.entities.request import (
    RequestInvokeApp,
    RequestInvokeEncrypt,
    RequestInvokeLLM,
+    RequestInvokeLLMWithStructuredOutput,
    RequestInvokeModeration,
    RequestInvokeParameterExtractorNode,
    RequestInvokeQuestionClassifierNode,
@ -47,6 +48,21 @@ class PluginInvokeLLMApi(Resource):
        return length_prefixed_response(0xF, generator())


+class PluginInvokeLLMWithStructuredOutputApi(Resource):
+    @setup_required
+    @plugin_inner_api_only
+    @get_user_tenant
+    @plugin_data(payload_type=RequestInvokeLLMWithStructuredOutput)
+    def post(self, user_model: Account | EndUser, tenant_model: Tenant, payload: RequestInvokeLLMWithStructuredOutput):
+        def generator():
+            response = PluginModelBackwardsInvocation.invoke_llm_with_structured_output(
+                user_model.id, tenant_model, payload
+            )
+            return PluginModelBackwardsInvocation.convert_to_event_stream(response)
+
+        return length_prefixed_response(0xF, generator())
+
+
 class PluginInvokeTextEmbeddingApi(Resource):
    @setup_required
    @plugin_inner_api_only
@ -291,6 +307,7 @@ class PluginFetchAppInfoApi(Resource):


 api.add_resource(PluginInvokeLLMApi, "/invoke/llm")
+api.add_resource(PluginInvokeLLMWithStructuredOutputApi, "/invoke/llm/structured-output")
 api.add_resource(PluginInvokeTextEmbeddingApi, "/invoke/text-embedding")
 api.add_resource(PluginInvokeRerankApi, "/invoke/rerank")
 api.add_resource(PluginInvokeTTSApi, "/invoke/tts")
--- a/api/controllers/inner_api/workspace/workspace.py
+++ b/api/controllers/inner_api/workspace/workspace.py
@ -29,7 +29,19 @@ class EnterpriseWorkspace(Resource):

        tenant_was_created.send(tenant)

-        return {"message": "enterprise workspace created."}
+        resp = {
+            "id": tenant.id,
+            "name": tenant.name,
+            "plan": tenant.plan,
+            "status": tenant.status,
+            "created_at": tenant.created_at.isoformat() + "Z" if tenant.created_at else None,
+            "updated_at": tenant.updated_at.isoformat() + "Z" if tenant.updated_at else None,
+        }
+
+        return {
+            "message": "enterprise workspace created.",
+            "tenant": resp,
+        }


 class EnterpriseWorkspaceNoOwnerEmail(Resource):
--- a/api/controllers/service_api/dataset/dataset.py
+++ b/api/controllers/service_api/dataset/dataset.py
@ -133,6 +133,22 @@ class DatasetListApi(DatasetApiResource):
        parser.add_argument("embedding_model_provider", type=str, required=False, nullable=True, location="json")

        args = parser.parse_args()
+
+        if args.get("embedding_model_provider"):
+            DatasetService.check_embedding_model_setting(
+                tenant_id, args.get("embedding_model_provider"), args.get("embedding_model")
+            )
+        if (
+            args.get("retrieval_model")
+            and args.get("retrieval_model").get("reranking_model")
+            and args.get("retrieval_model").get("reranking_model").get("reranking_provider_name")
+        ):
+            DatasetService.check_reranking_model_setting(
+                tenant_id,
+                args.get("retrieval_model").get("reranking_model").get("reranking_provider_name"),
+                args.get("retrieval_model").get("reranking_model").get("reranking_model_name"),
+            )
+
        try:
            dataset = DatasetService.create_empty_dataset(
                tenant_id=tenant_id,
@ -265,10 +281,20 @@ class DatasetApi(DatasetApiResource):
        data = request.get_json()

        # check embedding model setting
-        if data.get("indexing_technique") == "high_quality":
+        if data.get("indexing_technique") == "high_quality" or data.get("embedding_model_provider"):
            DatasetService.check_embedding_model_setting(
                dataset.tenant_id, data.get("embedding_model_provider"), data.get("embedding_model")
            )
+        if (
+            data.get("retrieval_model")
+            and data.get("retrieval_model").get("reranking_model")
+            and data.get("retrieval_model").get("reranking_model").get("reranking_provider_name")
+        ):
+            DatasetService.check_reranking_model_setting(
+                dataset.tenant_id,
+                data.get("retrieval_model").get("reranking_model").get("reranking_provider_name"),
+                data.get("retrieval_model").get("reranking_model").get("reranking_model_name"),
+            )

        # The role of the current user in the ta table must be admin, owner, editor, or dataset_operator
        DatasetPermissionService.check_permission(
--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@ -3,7 +3,7 @@ import json
 from flask import request
 from flask_restful import marshal, reqparse
 from sqlalchemy import desc, select
-from werkzeug.exceptions import NotFound
+from werkzeug.exceptions import Forbidden, NotFound

 import services
 from controllers.common.errors import FilenameNotExistsError
@ -18,6 +18,7 @@ from controllers.service_api.app.error import (
 from controllers.service_api.dataset.error import (
    ArchivedDocumentImmutableError,
    DocumentIndexingError,
+    InvalidMetadataError,
 )
 from controllers.service_api.wraps import (
    DatasetApiResource,
@ -29,7 +30,7 @@ from extensions.ext_database import db
 from fields.document_fields import document_fields, document_status_fields
 from libs.login import current_user
 from models.dataset import Dataset, Document, DocumentSegment
-from services.dataset_service import DocumentService
+from services.dataset_service import DatasetService, DocumentService
 from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig
 from services.file_service import FileService

@ -59,6 +60,7 @@ class DocumentAddByTextApi(DatasetApiResource):
        parser.add_argument("embedding_model_provider", type=str, required=False, nullable=True, location="json")

        args = parser.parse_args()
+
        dataset_id = str(dataset_id)
        tenant_id = str(tenant_id)
        dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
@ -74,6 +76,21 @@ class DocumentAddByTextApi(DatasetApiResource):
        if text is None or name is None:
            raise ValueError("Both 'text' and 'name' must be non-null values.")

+        if args.get("embedding_model_provider"):
+            DatasetService.check_embedding_model_setting(
+                tenant_id, args.get("embedding_model_provider"), args.get("embedding_model")
+            )
+        if (
+            args.get("retrieval_model")
+            and args.get("retrieval_model").get("reranking_model")
+            and args.get("retrieval_model").get("reranking_model").get("reranking_provider_name")
+        ):
+            DatasetService.check_reranking_model_setting(
+                tenant_id,
+                args.get("retrieval_model").get("reranking_model").get("reranking_provider_name"),
+                args.get("retrieval_model").get("reranking_model").get("reranking_model_name"),
+            )
+
        upload_file = FileService.upload_text(text=str(text), text_name=str(name))
        data_source = {
            "type": "upload_file",
@ -124,6 +141,17 @@ class DocumentUpdateByTextApi(DatasetApiResource):
        if not dataset:
            raise ValueError("Dataset does not exist.")

+        if (
+            args.get("retrieval_model")
+            and args.get("retrieval_model").get("reranking_model")
+            and args.get("retrieval_model").get("reranking_model").get("reranking_provider_name")
+        ):
+            DatasetService.check_reranking_model_setting(
+                tenant_id,
+                args.get("retrieval_model").get("reranking_model").get("reranking_provider_name"),
+                args.get("retrieval_model").get("reranking_model").get("reranking_model_name"),
+            )
+
        # indexing_technique is already set in dataset since this is an update
        args["indexing_technique"] = dataset.indexing_technique

@ -188,6 +216,21 @@ class DocumentAddByFileApi(DatasetApiResource):
            raise ValueError("indexing_technique is required.")
        args["indexing_technique"] = indexing_technique

+        if "embedding_model_provider" in args:
+            DatasetService.check_embedding_model_setting(
+                tenant_id, args["embedding_model_provider"], args["embedding_model"]
+            )
+        if (
+            "retrieval_model" in args
+            and args["retrieval_model"].get("reranking_model")
+            and args["retrieval_model"].get("reranking_model").get("reranking_provider_name")
+        ):
+            DatasetService.check_reranking_model_setting(
+                tenant_id,
+                args["retrieval_model"].get("reranking_model").get("reranking_provider_name"),
+                args["retrieval_model"].get("reranking_model").get("reranking_model_name"),
+            )
+
        # save file info
        file = request.files["file"]
        # check file
@ -424,6 +467,101 @@ class DocumentIndexingStatusApi(DatasetApiResource):
        return data


+class DocumentDetailApi(DatasetApiResource):
+    METADATA_CHOICES = {"all", "only", "without"}
+
+    def get(self, tenant_id, dataset_id, document_id):
+        dataset_id = str(dataset_id)
+        document_id = str(document_id)
+
+        dataset = self.get_dataset(dataset_id, tenant_id)
+
+        document = DocumentService.get_document(dataset.id, document_id)
+
+        if not document:
+            raise NotFound("Document not found.")
+
+        if document.tenant_id != str(tenant_id):
+            raise Forbidden("No permission.")
+
+        metadata = request.args.get("metadata", "all")
+        if metadata not in self.METADATA_CHOICES:
+            raise InvalidMetadataError(f"Invalid metadata value: {metadata}")
+
+        if metadata == "only":
+            response = {"id": document.id, "doc_type": document.doc_type, "doc_metadata": document.doc_metadata_details}
+        elif metadata == "without":
+            dataset_process_rules = DatasetService.get_process_rules(dataset_id)
+            document_process_rules = document.dataset_process_rule.to_dict()
+            data_source_info = document.data_source_detail_dict
+            response = {
+                "id": document.id,
+                "position": document.position,
+                "data_source_type": document.data_source_type,
+                "data_source_info": data_source_info,
+                "dataset_process_rule_id": document.dataset_process_rule_id,
+                "dataset_process_rule": dataset_process_rules,
+                "document_process_rule": document_process_rules,
+                "name": document.name,
+                "created_from": document.created_from,
+                "created_by": document.created_by,
+                "created_at": document.created_at.timestamp(),
+                "tokens": document.tokens,
+                "indexing_status": document.indexing_status,
+                "completed_at": int(document.completed_at.timestamp()) if document.completed_at else None,
+                "updated_at": int(document.updated_at.timestamp()) if document.updated_at else None,
+                "indexing_latency": document.indexing_latency,
+                "error": document.error,
+                "enabled": document.enabled,
+                "disabled_at": int(document.disabled_at.timestamp()) if document.disabled_at else None,
+                "disabled_by": document.disabled_by,
+                "archived": document.archived,
+                "segment_count": document.segment_count,
+                "average_segment_length": document.average_segment_length,
+                "hit_count": document.hit_count,
+                "display_status": document.display_status,
+                "doc_form": document.doc_form,
+                "doc_language": document.doc_language,
+            }
+        else:
+            dataset_process_rules = DatasetService.get_process_rules(dataset_id)
+            document_process_rules = document.dataset_process_rule.to_dict()
+            data_source_info = document.data_source_detail_dict
+            response = {
+                "id": document.id,
+                "position": document.position,
+                "data_source_type": document.data_source_type,
+                "data_source_info": data_source_info,
+                "dataset_process_rule_id": document.dataset_process_rule_id,
+                "dataset_process_rule": dataset_process_rules,
+                "document_process_rule": document_process_rules,
+                "name": document.name,
+                "created_from": document.created_from,
+                "created_by": document.created_by,
+                "created_at": document.created_at.timestamp(),
+                "tokens": document.tokens,
+                "indexing_status": document.indexing_status,
+                "completed_at": int(document.completed_at.timestamp()) if document.completed_at else None,
+                "updated_at": int(document.updated_at.timestamp()) if document.updated_at else None,
+                "indexing_latency": document.indexing_latency,
+                "error": document.error,
+                "enabled": document.enabled,
+                "disabled_at": int(document.disabled_at.timestamp()) if document.disabled_at else None,
+                "disabled_by": document.disabled_by,
+                "archived": document.archived,
+                "doc_type": document.doc_type,
+                "doc_metadata": document.doc_metadata_details,
+                "segment_count": document.segment_count,
+                "average_segment_length": document.average_segment_length,
+                "hit_count": document.hit_count,
+                "display_status": document.display_status,
+                "doc_form": document.doc_form,
+                "doc_language": document.doc_language,
+            }
+
+        return response
+
+
 api.add_resource(
    DocumentAddByTextApi,
    "/datasets/<uuid:dataset_id>/document/create_by_text",
@ -447,3 +585,4 @@ api.add_resource(
 api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
 api.add_resource(DocumentListApi, "/datasets/<uuid:dataset_id>/documents")
 api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<string:batch>/indexing-status")
+api.add_resource(DocumentDetailApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
--- a/api/controllers/service_api/index.py
+++ b/api/controllers/service_api/index.py
@ -9,7 +9,7 @@ class IndexApi(Resource):
        return {
            "welcome": "Dify OpenAPI",
            "api_version": "v1",
-            "server_version": dify_config.CURRENT_VERSION,
+            "server_version": dify_config.project.version,
        }


--- a/api/controllers/service_api/wraps.py
+++ b/api/controllers/service_api/wraps.py
@ -11,13 +11,13 @@ from flask_restful import Resource
 from pydantic import BaseModel
 from sqlalchemy import select, update
 from sqlalchemy.orm import Session
-from werkzeug.exceptions import Forbidden, Unauthorized
+from werkzeug.exceptions import Forbidden, NotFound, Unauthorized

 from extensions.ext_database import db
 from extensions.ext_redis import redis_client
 from libs.login import _get_user
 from models.account import Account, Tenant, TenantAccountJoin, TenantStatus
-from models.dataset import RateLimitLog
+from models.dataset import Dataset, RateLimitLog
 from models.model import ApiToken, App, EndUser
 from services.feature_service import FeatureService

@ -317,3 +317,11 @@ def create_or_update_end_user_for_user_id(app_model: App, user_id: Optional[str]

 class DatasetApiResource(Resource):
    method_decorators = [validate_dataset_token]
+
+    def get_dataset(self, dataset_id: str, tenant_id: str) -> Dataset:
+        dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id, Dataset.tenant_id == tenant_id).first()
+
+        if not dataset:
+            raise NotFound("Dataset not found.")
+
+        return dataset
--- a/api/core/app/app_config/entities.py
+++ b/api/core/app/app_config/entities.py
@ -113,9 +113,9 @@ class VariableEntity(BaseModel):
    hide: bool = False
    max_length: Optional[int] = None
    options: Sequence[str] = Field(default_factory=list)
-    allowed_file_types: Sequence[FileType] = Field(default_factory=list)
-    allowed_file_extensions: Sequence[str] = Field(default_factory=list)
-    allowed_file_upload_methods: Sequence[FileTransferMethod] = Field(default_factory=list)
+    allowed_file_types: Optional[Sequence[FileType]] = Field(default_factory=list)
+    allowed_file_extensions: Optional[Sequence[str]] = Field(default_factory=list)
+    allowed_file_upload_methods: Optional[Sequence[FileTransferMethod]] = Field(default_factory=list)

    @field_validator("description", mode="before")
    @classmethod
@ -128,6 +128,16 @@ class VariableEntity(BaseModel):
        return v or []


+class RagPipelineVariableEntity(VariableEntity):
+    """
+    Rag Pipeline Variable Entity.
+    """
+
+    tooltips: Optional[str] = None
+    placeholder: Optional[str] = None
+    belong_to_node_id: str
+
+
 class ExternalDataVariableEntity(BaseModel):
    """
    External Data Variable Entity.
@ -285,7 +295,7 @@ class AppConfig(BaseModel):
    tenant_id: str
    app_id: str
    app_mode: AppMode
-    additional_features: AppAdditionalFeatures
+    additional_features: Optional[AppAdditionalFeatures] = None
    variables: list[VariableEntity] = []
    sensitive_word_avoidance: Optional[SensitiveWordAvoidanceEntity] = None

--- a/api/core/app/app_config/workflow_ui_based_app/variables/manager.py
+++ b/api/core/app/app_config/workflow_ui_based_app/variables/manager.py
@ -1,4 +1,4 @@
-from core.app.app_config.entities import VariableEntity
+from core.app.app_config.entities import RagPipelineVariableEntity, VariableEntity
 from models.workflow import Workflow


@ -20,3 +20,19 @@ class WorkflowVariablesConfigManager:
            variables.append(VariableEntity.model_validate(variable))

        return variables
+
+    @classmethod
+    def convert_rag_pipeline_variable(cls, workflow: Workflow) -> list[RagPipelineVariableEntity]:
+        """
+        Convert workflow start variables to variables
+
+        :param workflow: workflow instance
+        """
+        variables = []
+
+        user_input_form = workflow.rag_pipeline_user_input_form()
+        # variables
+        for variable in user_input_form:
+            variables.append(RagPipelineVariableEntity.model_validate(variable))
+
+        return variables
--- a/api/core/app/apps/advanced_chat/app_generator.py
+++ b/api/core/app/apps/advanced_chat/app_generator.py
@ -27,6 +27,9 @@ from core.ops.ops_trace_manager import TraceQueueManager
 from core.prompt.utils.get_thread_messages_length import get_thread_messages_length
 from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository
 from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository
+from core.workflow.repositories.draft_variable_repository import (
+    DraftVariableSaverFactory,
+)
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
 from core.workflow.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader
@ -36,8 +39,10 @@ from libs.flask_utils import preserve_flask_contexts
 from models import Account, App, Conversation, EndUser, Message, Workflow, WorkflowNodeExecutionTriggeredFrom
 from models.enums import WorkflowRunTriggeredFrom
 from services.conversation_service import ConversationService
-from services.errors.message import MessageNotExistsError
-from services.workflow_draft_variable_service import DraftVarLoader, WorkflowDraftVariableService
+from services.workflow_draft_variable_service import (
+    DraftVarLoader,
+    WorkflowDraftVariableService,
+)

 logger = logging.getLogger(__name__)

@ -451,6 +456,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
            workflow_execution_repository=workflow_execution_repository,
            workflow_node_execution_repository=workflow_node_execution_repository,
            stream=stream,
+            draft_var_saver_factory=self._get_draft_var_saver_factory(invoke_from),
        )

        return AdvancedChatAppGenerateResponseConverter.convert(response=response, invoke_from=invoke_from)
@ -480,8 +486,6 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
                # get conversation and message
                conversation = self._get_conversation(conversation_id)
                message = self._get_message(message_id)
-                if message is None:
-                    raise MessageNotExistsError("Message not exists")

                # chatbot app
                runner = AdvancedChatAppRunner(
@ -524,6 +528,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
        user: Union[Account, EndUser],
        workflow_execution_repository: WorkflowExecutionRepository,
        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
+        draft_var_saver_factory: DraftVariableSaverFactory,
        stream: bool = False,
    ) -> Union[ChatbotAppBlockingResponse, Generator[ChatbotAppStreamResponse, None, None]]:
        """
@ -550,6 +555,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
            workflow_execution_repository=workflow_execution_repository,
            workflow_node_execution_repository=workflow_node_execution_repository,
            stream=stream,
+            draft_var_saver_factory=draft_var_saver_factory,
        )

        try:
--- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py
+++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
@ -64,6 +64,7 @@ from core.workflow.entities.workflow_execution import WorkflowExecutionStatus, W
 from core.workflow.enums import SystemVariableKey
 from core.workflow.graph_engine.entities.graph_runtime_state import GraphRuntimeState
 from core.workflow.nodes import NodeType
+from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
 from core.workflow.workflow_cycle_manager import CycleManagerWorkflowInfo, WorkflowCycleManager
@ -94,6 +95,7 @@ class AdvancedChatAppGenerateTaskPipeline:
        dialogue_count: int,
        workflow_execution_repository: WorkflowExecutionRepository,
        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
+        draft_var_saver_factory: DraftVariableSaverFactory,
    ) -> None:
        self._base_task_pipeline = BasedGenerateTaskPipeline(
            application_generate_entity=application_generate_entity,
@ -153,6 +155,7 @@ class AdvancedChatAppGenerateTaskPipeline:
        self._conversation_name_generate_thread: Thread | None = None
        self._recorded_files: list[Mapping[str, Any]] = []
        self._workflow_run_id: str = ""
+        self._draft_var_saver_factory = draft_var_saver_factory

    def process(self) -> Union[ChatbotAppBlockingResponse, Generator[ChatbotAppStreamResponse, None, None]]:
        """
@ -371,6 +374,7 @@ class AdvancedChatAppGenerateTaskPipeline:
                        workflow_node_execution=workflow_node_execution,
                    )
                    session.commit()
+                self._save_output_for_event(event, workflow_node_execution.id)

                if node_finish_resp:
                    yield node_finish_resp
@ -390,6 +394,8 @@ class AdvancedChatAppGenerateTaskPipeline:
                    task_id=self._application_generate_entity.task_id,
                    workflow_node_execution=workflow_node_execution,
                )
+                if isinstance(event, QueueNodeExceptionEvent):
+                    self._save_output_for_event(event, workflow_node_execution.id)

                if node_finish_resp:
                    yield node_finish_resp
@ -759,3 +765,15 @@ class AdvancedChatAppGenerateTaskPipeline:
        if not message:
            raise ValueError(f"Message not found: {self._message_id}")
        return message
+
+    def _save_output_for_event(self, event: QueueNodeSucceededEvent | QueueNodeExceptionEvent, node_execution_id: str):
+        with Session(db.engine) as session, session.begin():
+            saver = self._draft_var_saver_factory(
+                session=session,
+                app_id=self._application_generate_entity.app_config.app_id,
+                node_id=event.node_id,
+                node_type=event.node_type,
+                node_execution_id=node_execution_id,
+                enclosing_node_id=event.in_loop_id or event.in_iteration_id,
+            )
+            saver.save(event.process_data, event.outputs)
--- a/api/core/app/apps/agent_chat/app_generator.py
+++ b/api/core/app/apps/agent_chat/app_generator.py
@ -26,7 +26,6 @@ from factories import file_factory
 from libs.flask_utils import preserve_flask_contexts
 from models import Account, App, EndUser
 from services.conversation_service import ConversationService
-from services.errors.message import MessageNotExistsError

 logger = logging.getLogger(__name__)

@ -238,8 +237,6 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
                # get conversation and message
                conversation = self._get_conversation(conversation_id)
                message = self._get_message(message_id)
-                if message is None:
-                    raise MessageNotExistsError("Message not exists")

                # chatbot app
                runner = AgentChatAppRunner()
--- a/api/core/app/apps/base_app_generator.py
+++ b/api/core/app/apps/base_app_generator.py
@ -1,10 +1,20 @@
 import json
 from collections.abc import Generator, Mapping, Sequence
-from typing import TYPE_CHECKING, Any, Optional, Union
+from typing import TYPE_CHECKING, Any, Optional, Union, final
+
+from sqlalchemy.orm import Session

 from core.app.app_config.entities import VariableEntityType
+from core.app.entities.app_invoke_entities import InvokeFrom
 from core.file import File, FileUploadConfig
+from core.workflow.nodes.enums import NodeType
+from core.workflow.repositories.draft_variable_repository import (
+    DraftVariableSaver,
+    DraftVariableSaverFactory,
+    NoopDraftVariableSaver,
+)
 from factories import file_factory
+from services.workflow_draft_variable_service import DraftVariableSaver as DraftVariableSaverImpl

 if TYPE_CHECKING:
    from core.app.app_config.entities import VariableEntity
@ -159,3 +169,38 @@ class BaseAppGenerator:
                        yield f"event: {message}\n\n"

            return gen()
+
+    @final
+    @staticmethod
+    def _get_draft_var_saver_factory(invoke_from: InvokeFrom) -> DraftVariableSaverFactory:
+        if invoke_from == InvokeFrom.DEBUGGER:
+
+            def draft_var_saver_factory(
+                session: Session,
+                app_id: str,
+                node_id: str,
+                node_type: NodeType,
+                node_execution_id: str,
+                enclosing_node_id: str | None = None,
+            ) -> DraftVariableSaver:
+                return DraftVariableSaverImpl(
+                    session=session,
+                    app_id=app_id,
+                    node_id=node_id,
+                    node_type=node_type,
+                    node_execution_id=node_execution_id,
+                    enclosing_node_id=enclosing_node_id,
+                )
+        else:
+
+            def draft_var_saver_factory(
+                session: Session,
+                app_id: str,
+                node_id: str,
+                node_type: NodeType,
+                node_execution_id: str,
+                enclosing_node_id: str | None = None,
+            ) -> DraftVariableSaver:
+                return NoopDraftVariableSaver()
+
+        return draft_var_saver_factory
--- a/api/core/app/apps/chat/app_generator.py
+++ b/api/core/app/apps/chat/app_generator.py
@ -25,7 +25,6 @@ from factories import file_factory
 from models.account import Account
 from models.model import App, EndUser
 from services.conversation_service import ConversationService
-from services.errors.message import MessageNotExistsError

 logger = logging.getLogger(__name__)

@ -224,8 +223,6 @@ class ChatAppGenerator(MessageBasedAppGenerator):
                # get conversation and message
                conversation = self._get_conversation(conversation_id)
                message = self._get_message(message_id)
-                if message is None:
-                    raise MessageNotExistsError("Message not exists")

                # chatbot app
                runner = ChatAppRunner()
--- a/api/core/app/apps/common/workflow_response_converter.py
+++ b/api/core/app/apps/common/workflow_response_converter.py
@ -43,10 +43,13 @@ from core.app.entities.task_entities import (
    WorkflowStartStreamResponse,
 )
 from core.file import FILE_MODEL_IDENTITY, File
+from core.plugin.impl.datasource import PluginDatasourceManager
 from core.tools.tool_manager import ToolManager
+from core.variables.segments import ArrayFileSegment, FileSegment, Segment
 from core.workflow.entities.workflow_execution import WorkflowExecution
 from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution, WorkflowNodeExecutionStatus
 from core.workflow.nodes import NodeType
+from core.workflow.nodes.datasource.entities import DatasourceNodeData
 from core.workflow.nodes.tool.entities import ToolNodeData
 from core.workflow.workflow_type_encoder import WorkflowRuntimeTypeConverter
 from models import (
@ -182,6 +185,14 @@ class WorkflowResponseConverter:
                provider_type=node_data.provider_type,
                provider_id=node_data.provider_id,
            )
+        elif event.node_type == NodeType.DATASOURCE:
+            node_data = cast(DatasourceNodeData, event.node_data)
+            manager = PluginDatasourceManager()
+            provider_entity = manager.fetch_datasource_provider(
+                self._application_generate_entity.app_config.tenant_id,
+                f"{node_data.plugin_id}/{node_data.provider_name}",
+            )
+            response.data.extras["icon"] = provider_entity.declaration.identity.icon

        return response

@ -506,7 +517,8 @@ class WorkflowResponseConverter:
        # Convert to tuple to match Sequence type
        return tuple(flattened_files)

-    def _fetch_files_from_variable_value(self, value: Union[dict, list]) -> Sequence[Mapping[str, Any]]:
+    @classmethod
+    def _fetch_files_from_variable_value(cls, value: Union[dict, list, Segment]) -> Sequence[Mapping[str, Any]]:
        """
        Fetch files from variable value
        :param value: variable value
@ -515,20 +527,30 @@ class WorkflowResponseConverter:
        if not value:
            return []

-        files = []
-        if isinstance(value, list):
+        files: list[Mapping[str, Any]] = []
+        if isinstance(value, FileSegment):
+            files.append(value.value.to_dict())
+        elif isinstance(value, ArrayFileSegment):
+            files.extend([i.to_dict() for i in value.value])
+        elif isinstance(value, File):
+            files.append(value.to_dict())
+        elif isinstance(value, list):
            for item in value:
-                file = self._get_file_var_from_value(item)
+                file = cls._get_file_var_from_value(item)
                if file:
                    files.append(file)
-        elif isinstance(value, dict):
-            file = self._get_file_var_from_value(value)
+        elif isinstance(
+            value,
+            dict,
+        ):
+            file = cls._get_file_var_from_value(value)
            if file:
                files.append(file)

        return files

-    def _get_file_var_from_value(self, value: Union[dict, list]) -> Mapping[str, Any] | None:
+    @classmethod
+    def _get_file_var_from_value(cls, value: Union[dict, list]) -> Mapping[str, Any] | None:
        """
        Get file var from value
        :param value: variable value
--- a/api/core/app/apps/completion/app_generator.py
+++ b/api/core/app/apps/completion/app_generator.py
@ -201,8 +201,6 @@ class CompletionAppGenerator(MessageBasedAppGenerator):
            try:
                # get message
                message = self._get_message(message_id)
-                if message is None:
-                    raise MessageNotExistsError()

                # chatbot app
                runner = CompletionAppRunner()
--- a/api/core/app/apps/message_based_app_generator.py
+++ b/api/core/app/apps/message_based_app_generator.py
@ -29,6 +29,7 @@ from models.enums import CreatorUserRole
 from models.model import App, AppMode, AppModelConfig, Conversation, EndUser, Message, MessageFile
 from services.errors.app_model_config import AppModelConfigBrokenError
 from services.errors.conversation import ConversationNotExistsError
+from services.errors.message import MessageNotExistsError

 logger = logging.getLogger(__name__)

@ -251,7 +252,7 @@ class MessageBasedAppGenerator(BaseAppGenerator):

        return introduction or ""

-    def _get_conversation(self, conversation_id: str):
+    def _get_conversation(self, conversation_id: str) -> Conversation:
        """
        Get conversation by conversation id
        :param conversation_id: conversation id
@ -260,11 +261,11 @@ class MessageBasedAppGenerator(BaseAppGenerator):
        conversation = db.session.query(Conversation).filter(Conversation.id == conversation_id).first()

        if not conversation:
-            raise ConversationNotExistsError()
+            raise ConversationNotExistsError("Conversation not exists")

        return conversation

-    def _get_message(self, message_id: str) -> Optional[Message]:
+    def _get_message(self, message_id: str) -> Message:
        """
        Get message by message id
        :param message_id: message id
@ -272,4 +273,7 @@ class MessageBasedAppGenerator(BaseAppGenerator):
        """
        message = db.session.query(Message).filter(Message.id == message_id).first()

+        if message is None:
+            raise MessageNotExistsError("Message not exists")
+
        return message
--- a/api/core/app/apps/pipeline/init.py
+++ b/api/core/app/apps/pipeline/init.py
--- a/api/core/app/apps/pipeline/generate_response_converter.py
+++ b/api/core/app/apps/pipeline/generate_response_converter.py
@ -0,0 +1,95 @@
+from collections.abc import Generator
+from typing import cast
+
+from core.app.apps.base_app_generate_response_converter import AppGenerateResponseConverter
+from core.app.entities.task_entities import (
+    AppStreamResponse,
+    ErrorStreamResponse,
+    NodeFinishStreamResponse,
+    NodeStartStreamResponse,
+    PingStreamResponse,
+    WorkflowAppBlockingResponse,
+    WorkflowAppStreamResponse,
+)
+
+
+class WorkflowAppGenerateResponseConverter(AppGenerateResponseConverter):
+    _blocking_response_type = WorkflowAppBlockingResponse
+
+    @classmethod
+    def convert_blocking_full_response(cls, blocking_response: WorkflowAppBlockingResponse) -> dict:  # type: ignore[override]
+        """
+        Convert blocking full response.
+        :param blocking_response: blocking response
+        :return:
+        """
+        return dict(blocking_response.to_dict())
+
+    @classmethod
+    def convert_blocking_simple_response(cls, blocking_response: WorkflowAppBlockingResponse) -> dict:  # type: ignore[override]
+        """
+        Convert blocking simple response.
+        :param blocking_response: blocking response
+        :return:
+        """
+        return cls.convert_blocking_full_response(blocking_response)
+
+    @classmethod
+    def convert_stream_full_response(
+        cls, stream_response: Generator[AppStreamResponse, None, None]
+    ) -> Generator[dict | str, None, None]:
+        """
+        Convert stream full response.
+        :param stream_response: stream response
+        :return:
+        """
+        for chunk in stream_response:
+            chunk = cast(WorkflowAppStreamResponse, chunk)
+            sub_stream_response = chunk.stream_response
+
+            if isinstance(sub_stream_response, PingStreamResponse):
+                yield "ping"
+                continue
+
+            response_chunk = {
+                "event": sub_stream_response.event.value,
+                "workflow_run_id": chunk.workflow_run_id,
+            }
+
+            if isinstance(sub_stream_response, ErrorStreamResponse):
+                data = cls._error_to_stream_response(sub_stream_response.err)
+                response_chunk.update(data)
+            else:
+                response_chunk.update(sub_stream_response.to_dict())
+            yield response_chunk
+
+    @classmethod
+    def convert_stream_simple_response(
+        cls, stream_response: Generator[AppStreamResponse, None, None]
+    ) -> Generator[dict | str, None, None]:
+        """
+        Convert stream simple response.
+        :param stream_response: stream response
+        :return:
+        """
+        for chunk in stream_response:
+            chunk = cast(WorkflowAppStreamResponse, chunk)
+            sub_stream_response = chunk.stream_response
+
+            if isinstance(sub_stream_response, PingStreamResponse):
+                yield "ping"
+                continue
+
+            response_chunk = {
+                "event": sub_stream_response.event.value,
+                "workflow_run_id": chunk.workflow_run_id,
+            }
+
+            if isinstance(sub_stream_response, ErrorStreamResponse):
+                data = cls._error_to_stream_response(sub_stream_response.err)
+                response_chunk.update(data)
+            elif isinstance(sub_stream_response, NodeStartStreamResponse | NodeFinishStreamResponse):
+                response_chunk.update(sub_stream_response.to_ignore_detail_dict())
+            else:
+                response_chunk.update(sub_stream_response.to_dict())
+            yield response_chunk
--- a/api/core/app/apps/pipeline/pipeline_config_manager.py
+++ b/api/core/app/apps/pipeline/pipeline_config_manager.py
@ -0,0 +1,64 @@
+from core.app.app_config.base_app_config_manager import BaseAppConfigManager
+from core.app.app_config.common.sensitive_word_avoidance.manager import SensitiveWordAvoidanceConfigManager
+from core.app.app_config.entities import RagPipelineVariableEntity, WorkflowUIBasedAppConfig
+from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
+from core.app.app_config.features.text_to_speech.manager import TextToSpeechConfigManager
+from core.app.app_config.workflow_ui_based_app.variables.manager import WorkflowVariablesConfigManager
+from models.dataset import Pipeline
+from models.model import AppMode
+from models.workflow import Workflow
+
+
+class PipelineConfig(WorkflowUIBasedAppConfig):
+    """
+    Pipeline Config Entity.
+    """
+
+    rag_pipeline_variables: list[RagPipelineVariableEntity] = []
+    pass
+
+
+class PipelineConfigManager(BaseAppConfigManager):
+    @classmethod
+    def get_pipeline_config(cls, pipeline: Pipeline, workflow: Workflow) -> PipelineConfig:
+        pipeline_config = PipelineConfig(
+            tenant_id=pipeline.tenant_id,
+            app_id=pipeline.id,
+            app_mode=AppMode.RAG_PIPELINE,
+            workflow_id=workflow.id,
+            rag_pipeline_variables=WorkflowVariablesConfigManager.convert_rag_pipeline_variable(workflow=workflow),
+        )
+
+        return pipeline_config
+
+    @classmethod
+    def config_validate(cls, tenant_id: str, config: dict, only_structure_validate: bool = False) -> dict:
+        """
+        Validate for pipeline config
+
+        :param tenant_id: tenant id
+        :param config: app model config args
+        :param only_structure_validate: only validate the structure of the config
+        """
+        related_config_keys = []
+
+        # file upload validation
+        config, current_related_config_keys = FileUploadConfigManager.validate_and_set_defaults(config=config)
+        related_config_keys.extend(current_related_config_keys)
+
+        # text_to_speech
+        config, current_related_config_keys = TextToSpeechConfigManager.validate_and_set_defaults(config)
+        related_config_keys.extend(current_related_config_keys)
+
+        # moderation validation
+        config, current_related_config_keys = SensitiveWordAvoidanceConfigManager.validate_and_set_defaults(
+            tenant_id=tenant_id, config=config, only_structure_validate=only_structure_validate
+        )
+        related_config_keys.extend(current_related_config_keys)
+
+        related_config_keys = list(set(related_config_keys))
+
+        # Filter out extra parameters
+        filtered_config = {key: config.get(key) for key in related_config_keys}
+
+        return filtered_config
--- a/api/core/app/apps/pipeline/pipeline_generator.py
+++ b/api/core/app/apps/pipeline/pipeline_generator.py
@ -0,0 +1,621 @@
+import contextvars
+import datetime
+import json
+import logging
+import secrets
+import threading
+import time
+import uuid
+from collections.abc import Generator, Mapping
+from typing import Any, Literal, Optional, Union, overload
+
+from flask import Flask, current_app
+from pydantic import ValidationError
+from sqlalchemy.orm import sessionmaker
+
+import contexts
+from configs import dify_config
+from core.app.apps.base_app_generator import BaseAppGenerator
+from core.app.apps.base_app_queue_manager import AppQueueManager, GenerateTaskStoppedError, PublishFrom
+from core.app.apps.pipeline.pipeline_config_manager import PipelineConfigManager
+from core.app.apps.pipeline.pipeline_queue_manager import PipelineQueueManager
+from core.app.apps.pipeline.pipeline_runner import PipelineRunner
+from core.app.apps.workflow.generate_response_converter import WorkflowAppGenerateResponseConverter
+from core.app.apps.workflow.generate_task_pipeline import WorkflowAppGenerateTaskPipeline
+from core.app.entities.app_invoke_entities import InvokeFrom, RagPipelineGenerateEntity
+from core.app.entities.task_entities import WorkflowAppBlockingResponse, WorkflowAppStreamResponse
+from core.entities.knowledge_entities import PipelineDataset, PipelineDocument
+from core.model_runtime.errors.invoke import InvokeAuthorizationError
+from core.rag.index_processor.constant.built_in_field import BuiltInField
+from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository
+from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository
+from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
+from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
+from extensions.ext_database import db
+from libs.flask_utils import preserve_flask_contexts
+from models import Account, EndUser, Workflow, WorkflowNodeExecutionTriggeredFrom
+from models.dataset import Document, DocumentPipelineExecutionLog, Pipeline
+from models.enums import WorkflowRunTriggeredFrom
+from models.model import AppMode
+from services.dataset_service import DocumentService
+
+logger = logging.getLogger(__name__)
+
+
+class PipelineGenerator(BaseAppGenerator):
+    @overload
+    def generate(
+        self,
+        *,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: Literal[True],
+        call_depth: int,
+        workflow_thread_pool_id: Optional[str],
+    ) -> Mapping[str, Any] | Generator[Mapping | str, None, None] | None: ...
+
+    @overload
+    def generate(
+        self,
+        *,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: Literal[False],
+        call_depth: int,
+        workflow_thread_pool_id: Optional[str],
+    ) -> Mapping[str, Any]: ...
+
+    @overload
+    def generate(
+        self,
+        *,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: bool,
+        call_depth: int,
+        workflow_thread_pool_id: Optional[str],
+    ) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None]]: ...
+
+    def generate(
+        self,
+        *,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: bool = True,
+        call_depth: int = 0,
+        workflow_thread_pool_id: Optional[str] = None,
+    ) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None], None]:
+        # convert to app config
+        pipeline_config = PipelineConfigManager.get_pipeline_config(
+            pipeline=pipeline,
+            workflow=workflow,
+        )
+        # Add null check for dataset
+        dataset = pipeline.dataset
+        if not dataset:
+            raise ValueError("Pipeline dataset is required")
+        inputs: Mapping[str, Any] = args["inputs"]
+        start_node_id: str = args["start_node_id"]
+        datasource_type: str = args["datasource_type"]
+        datasource_info_list: list[Mapping[str, Any]] = args["datasource_info_list"]
+        batch = time.strftime("%Y%m%d%H%M%S") + str(secrets.randbelow(900000) + 100000)
+        documents = []
+        if invoke_from == InvokeFrom.PUBLISHED:
+            for datasource_info in datasource_info_list:
+                position = DocumentService.get_documents_position(dataset.id)
+                document = self._build_document(
+                    tenant_id=pipeline.tenant_id,
+                    dataset_id=dataset.id,
+                    built_in_field_enabled=dataset.built_in_field_enabled,
+                    datasource_type=datasource_type,
+                    datasource_info=datasource_info,
+                    created_from="rag-pipeline",
+                    position=position,
+                    account=user,
+                    batch=batch,
+                    document_form=dataset.chunk_structure,
+                )
+                db.session.add(document)
+                documents.append(document)
+            db.session.commit()
+
+        # run in child thread
+        for i, datasource_info in enumerate(datasource_info_list):
+            workflow_run_id = str(uuid.uuid4())
+            document_id = None
+            if invoke_from == InvokeFrom.PUBLISHED:
+                document_id = documents[i].id
+                document_pipeline_execution_log = DocumentPipelineExecutionLog(
+                    document_id=document_id,
+                    datasource_type=datasource_type,
+                    datasource_info=json.dumps(datasource_info),
+                    datasource_node_id=start_node_id,
+                    input_data=inputs,
+                    pipeline_id=pipeline.id,
+                    created_by=user.id,
+                )
+                db.session.add(document_pipeline_execution_log)
+                db.session.commit()
+            application_generate_entity = RagPipelineGenerateEntity(
+                task_id=str(uuid.uuid4()),
+                app_config=pipeline_config,
+                pipeline_config=pipeline_config,
+                datasource_type=datasource_type,
+                datasource_info=datasource_info,
+                dataset_id=dataset.id,
+                start_node_id=start_node_id,
+                batch=batch,
+                document_id=document_id,
+                inputs=self._prepare_user_inputs(
+                    user_inputs=inputs,
+                    variables=pipeline_config.rag_pipeline_variables,
+                    tenant_id=pipeline.tenant_id,
+                    strict_type_validation=True if invoke_from == InvokeFrom.SERVICE_API else False,
+                ),
+                files=[],
+                user_id=user.id,
+                stream=streaming,
+                invoke_from=invoke_from,
+                call_depth=call_depth,
+                workflow_execution_id=workflow_run_id,
+            )
+
+            contexts.plugin_tool_providers.set({})
+            contexts.plugin_tool_providers_lock.set(threading.Lock())
+            if invoke_from == InvokeFrom.DEBUGGER:
+                workflow_triggered_from = WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING
+            else:
+                workflow_triggered_from = WorkflowRunTriggeredFrom.RAG_PIPELINE_RUN
+            # Create workflow node execution repository
+            session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
+            workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
+                session_factory=session_factory,
+                user=user,
+                app_id=application_generate_entity.app_config.app_id,
+                triggered_from=workflow_triggered_from,
+            )
+
+            workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
+                session_factory=session_factory,
+                user=user,
+                app_id=application_generate_entity.app_config.app_id,
+                triggered_from=WorkflowNodeExecutionTriggeredFrom.RAG_PIPELINE_RUN,
+            )
+            if invoke_from == InvokeFrom.DEBUGGER:
+                return self._generate(
+                    flask_app=current_app._get_current_object(),  # type: ignore
+                    context=contextvars.copy_context(),
+                    pipeline=pipeline,
+                    workflow_id=workflow.id,
+                    user=user,
+                    application_generate_entity=application_generate_entity,
+                    invoke_from=invoke_from,
+                    workflow_execution_repository=workflow_execution_repository,
+                    workflow_node_execution_repository=workflow_node_execution_repository,
+                    streaming=streaming,
+                    workflow_thread_pool_id=workflow_thread_pool_id,
+                )
+            else:
+                # run in child thread
+                context = contextvars.copy_context()
+
+                worker_thread = threading.Thread(
+                    target=self._generate,
+                    kwargs={
+                        "flask_app": current_app._get_current_object(),  # type: ignore
+                        "context": context,
+                        "pipeline": pipeline,
+                        "workflow_id": workflow.id,
+                        "user": user,
+                        "application_generate_entity": application_generate_entity,
+                        "invoke_from": invoke_from,
+                        "workflow_execution_repository": workflow_execution_repository,
+                        "workflow_node_execution_repository": workflow_node_execution_repository,
+                        "streaming": streaming,
+                        "workflow_thread_pool_id": workflow_thread_pool_id,
+                    },
+                )
+
+                worker_thread.start()
+        # return batch, dataset, documents
+        return {
+            "batch": batch,
+            "dataset": PipelineDataset(
+                id=dataset.id,
+                name=dataset.name,
+                description=dataset.description,
+                chunk_structure=dataset.chunk_structure,
+            ).model_dump(),
+            "documents": [
+                PipelineDocument(
+                    id=document.id,
+                    position=document.position,
+                    data_source_type=document.data_source_type,
+                    data_source_info=json.loads(document.data_source_info) if document.data_source_info else None,
+                    name=document.name,
+                    indexing_status=document.indexing_status,
+                    error=document.error,
+                    enabled=document.enabled,
+                ).model_dump()
+                for document in documents
+            ],
+        }
+
+    def _generate(
+        self,
+        *,
+        flask_app: Flask,
+        context: contextvars.Context,
+        pipeline: Pipeline,
+        workflow_id: str,
+        user: Union[Account, EndUser],
+        application_generate_entity: RagPipelineGenerateEntity,
+        invoke_from: InvokeFrom,
+        workflow_execution_repository: WorkflowExecutionRepository,
+        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
+        streaming: bool = True,
+        workflow_thread_pool_id: Optional[str] = None,
+    ) -> Union[Mapping[str, Any], Generator[str | Mapping[str, Any], None, None]]:
+        """
+        Generate App response.
+
+        :param pipeline: Pipeline
+        :param workflow: Workflow
+        :param user: account or end user
+        :param application_generate_entity: application generate entity
+        :param invoke_from: invoke from source
+        :param workflow_execution_repository: repository for workflow execution
+        :param workflow_node_execution_repository: repository for workflow node execution
+        :param streaming: is stream
+        :param workflow_thread_pool_id: workflow thread pool id
+        """
+        with preserve_flask_contexts(flask_app, context_vars=context):
+            # init queue manager
+            workflow = db.session.query(Workflow).filter(Workflow.id == workflow_id).first()
+            if not workflow:
+                raise ValueError(f"Workflow not found: {workflow_id}")
+            queue_manager = PipelineQueueManager(
+                task_id=application_generate_entity.task_id,
+                user_id=application_generate_entity.user_id,
+                invoke_from=application_generate_entity.invoke_from,
+                app_mode=AppMode.RAG_PIPELINE,
+            )
+            context = contextvars.copy_context()
+
+            # new thread
+            worker_thread = threading.Thread(
+                target=self._generate_worker,
+                kwargs={
+                    "flask_app": current_app._get_current_object(),  # type: ignore
+                    "context": context,
+                    "queue_manager": queue_manager,
+                    "application_generate_entity": application_generate_entity,
+                    "workflow_thread_pool_id": workflow_thread_pool_id,
+                },
+            )
+
+            worker_thread.start()
+
+            # return response or stream generator
+            response = self._handle_response(
+                application_generate_entity=application_generate_entity,
+                workflow=workflow,
+                queue_manager=queue_manager,
+                user=user,
+                workflow_execution_repository=workflow_execution_repository,
+                workflow_node_execution_repository=workflow_node_execution_repository,
+                stream=streaming,
+            )
+
+            return WorkflowAppGenerateResponseConverter.convert(response=response, invoke_from=invoke_from)
+
+    def single_iteration_generate(
+        self,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        node_id: str,
+        user: Account | EndUser,
+        args: Mapping[str, Any],
+        streaming: bool = True,
+    ) -> Mapping[str, Any] | Generator[str | Mapping[str, Any], None, None]:
+        """
+        Generate App response.
+
+        :param app_model: App
+        :param workflow: Workflow
+        :param node_id: the node id
+        :param user: account or end user
+        :param args: request args
+        :param streaming: is streamed
+        """
+        if not node_id:
+            raise ValueError("node_id is required")
+
+        if args.get("inputs") is None:
+            raise ValueError("inputs is required")
+
+        # convert to app config
+        pipeline_config = PipelineConfigManager.get_pipeline_config(pipeline=pipeline, workflow=workflow)
+
+        dataset = pipeline.dataset
+        if not dataset:
+            raise ValueError("Pipeline dataset is required")
+
+        # init application generate entity - use RagPipelineGenerateEntity instead
+        application_generate_entity = RagPipelineGenerateEntity(
+            task_id=str(uuid.uuid4()),
+            app_config=pipeline_config,
+            pipeline_config=pipeline_config,
+            datasource_type=args.get("datasource_type", ""),
+            datasource_info=args.get("datasource_info", {}),
+            dataset_id=dataset.id,
+            batch=args.get("batch", ""),
+            document_id=args.get("document_id"),
+            inputs={},
+            files=[],
+            user_id=user.id,
+            stream=streaming,
+            invoke_from=InvokeFrom.DEBUGGER,
+            call_depth=0,
+            workflow_execution_id=str(uuid.uuid4()),
+        )
+        contexts.plugin_tool_providers.set({})
+        contexts.plugin_tool_providers_lock.set(threading.Lock())
+        # Create workflow node execution repository
+        session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
+
+        workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
+            session_factory=session_factory,
+            user=user,
+            app_id=application_generate_entity.app_config.app_id,
+            triggered_from=WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING,
+        )
+
+        workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
+            session_factory=session_factory,
+            user=user,
+            app_id=application_generate_entity.app_config.app_id,
+            triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP,
+        )
+
+        return self._generate(
+            flask_app=current_app._get_current_object(),  # type: ignore
+            pipeline=pipeline,
+            workflow_id=workflow.id,
+            user=user,
+            invoke_from=InvokeFrom.DEBUGGER,
+            application_generate_entity=application_generate_entity,
+            workflow_execution_repository=workflow_execution_repository,
+            workflow_node_execution_repository=workflow_node_execution_repository,
+            streaming=streaming,
+        )
+
+    def single_loop_generate(
+        self,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        node_id: str,
+        user: Account | EndUser,
+        args: Mapping[str, Any],
+        streaming: bool = True,
+    ) -> Mapping[str, Any] | Generator[str | Mapping[str, Any], None, None]:
+        """
+        Generate App response.
+
+        :param app_model: App
+        :param workflow: Workflow
+        :param node_id: the node id
+        :param user: account or end user
+        :param args: request args
+        :param streaming: is streamed
+        """
+        if not node_id:
+            raise ValueError("node_id is required")
+
+        if args.get("inputs") is None:
+            raise ValueError("inputs is required")
+
+        dataset = pipeline.dataset
+        if not dataset:
+            raise ValueError("Pipeline dataset is required")
+
+        # convert to app config
+        pipeline_config = PipelineConfigManager.get_pipeline_config(pipeline=pipeline, workflow=workflow)
+
+        # init application generate entity
+        application_generate_entity = RagPipelineGenerateEntity(
+            task_id=str(uuid.uuid4()),
+            app_config=pipeline_config,
+            pipeline_config=pipeline_config,
+            datasource_type=args.get("datasource_type", ""),
+            datasource_info=args.get("datasource_info", {}),
+            batch=args.get("batch", ""),
+            document_id=args.get("document_id"),
+            dataset_id=dataset.id,
+            inputs={},
+            files=[],
+            user_id=user.id,
+            stream=streaming,
+            invoke_from=InvokeFrom.DEBUGGER,
+            extras={"auto_generate_conversation_name": False},
+            single_loop_run=RagPipelineGenerateEntity.SingleLoopRunEntity(node_id=node_id, inputs=args["inputs"]),
+            workflow_execution_id=str(uuid.uuid4()),
+        )
+        contexts.plugin_tool_providers.set({})
+        contexts.plugin_tool_providers_lock.set(threading.Lock())
+
+        # Create workflow node execution repository
+        session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
+
+        workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
+            session_factory=session_factory,
+            user=user,
+            app_id=application_generate_entity.app_config.app_id,
+            triggered_from=WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING,
+        )
+
+        workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
+            session_factory=session_factory,
+            user=user,
+            app_id=application_generate_entity.app_config.app_id,
+            triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP,
+        )
+
+        return self._generate(
+            flask_app=current_app._get_current_object(),  # type: ignore
+            pipeline=pipeline,
+            workflow=workflow,
+            user=user,
+            invoke_from=InvokeFrom.DEBUGGER,
+            application_generate_entity=application_generate_entity,
+            workflow_execution_repository=workflow_execution_repository,
+            workflow_node_execution_repository=workflow_node_execution_repository,
+            streaming=streaming,
+        )
+
+    def _generate_worker(
+        self,
+        flask_app: Flask,
+        application_generate_entity: RagPipelineGenerateEntity,
+        queue_manager: AppQueueManager,
+        context: contextvars.Context,
+        workflow_thread_pool_id: Optional[str] = None,
+    ) -> None:
+        """
+        Generate worker in a new thread.
+        :param flask_app: Flask app
+        :param application_generate_entity: application generate entity
+        :param queue_manager: queue manager
+        :param workflow_thread_pool_id: workflow thread pool id
+        :return:
+        """
+
+        with preserve_flask_contexts(flask_app, context_vars=context):
+            try:
+                # workflow app
+                runner = PipelineRunner(
+                    application_generate_entity=application_generate_entity,
+                    queue_manager=queue_manager,
+                    workflow_thread_pool_id=workflow_thread_pool_id,
+                )
+
+                runner.run()
+            except GenerateTaskStoppedError:
+                pass
+            except InvokeAuthorizationError:
+                queue_manager.publish_error(
+                    InvokeAuthorizationError("Incorrect API key provided"), PublishFrom.APPLICATION_MANAGER
+                )
+            except ValidationError as e:
+                logger.exception("Validation Error when generating")
+                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
+            except ValueError as e:
+                if dify_config.DEBUG:
+                    logger.exception("Error when generating")
+                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
+            except Exception as e:
+                logger.exception("Unknown Error when generating")
+                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
+            finally:
+                db.session.close()
+
+    def _handle_response(
+        self,
+        application_generate_entity: RagPipelineGenerateEntity,
+        workflow: Workflow,
+        queue_manager: AppQueueManager,
+        user: Union[Account, EndUser],
+        workflow_execution_repository: WorkflowExecutionRepository,
+        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
+        stream: bool = False,
+    ) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]:
+        """
+        Handle response.
+        :param application_generate_entity: application generate entity
+        :param workflow: workflow
+        :param queue_manager: queue manager
+        :param user: account or end user
+        :param stream: is stream
+        :param workflow_node_execution_repository: optional repository for workflow node execution
+        :return:
+        """
+        # init generate task pipeline
+        generate_task_pipeline = WorkflowAppGenerateTaskPipeline(
+            application_generate_entity=application_generate_entity,
+            workflow=workflow,
+            queue_manager=queue_manager,
+            user=user,
+            stream=stream,
+            workflow_node_execution_repository=workflow_node_execution_repository,
+            workflow_execution_repository=workflow_execution_repository,
+        )
+
+        try:
+            return generate_task_pipeline.process()
+        except ValueError as e:
+            if len(e.args) > 0 and e.args[0] == "I/O operation on closed file.":  # ignore this error
+                raise GenerateTaskStoppedError()
+            else:
+                logger.exception(
+                    f"Fails to process generate task pipeline, task_id: {application_generate_entity.task_id}"
+                )
+                raise e
+
+    def _build_document(
+        self,
+        tenant_id: str,
+        dataset_id: str,
+        built_in_field_enabled: bool,
+        datasource_type: str,
+        datasource_info: Mapping[str, Any],
+        created_from: str,
+        position: int,
+        account: Union[Account, EndUser],
+        batch: str,
+        document_form: str,
+    ):
+        if datasource_type == "local_file":
+            name = datasource_info["name"]
+        elif datasource_type == "online_document":
+            name = datasource_info["page"]["page_name"]
+        elif datasource_type == "website_crawl":
+            name = datasource_info["title"]
+        else:
+            raise ValueError(f"Unsupported datasource type: {datasource_type}")
+
+        document = Document(
+            tenant_id=tenant_id,
+            dataset_id=dataset_id,
+            position=position,
+            data_source_type=datasource_type,
+            data_source_info=json.dumps(datasource_info),
+            batch=batch,
+            name=name,
+            created_from=created_from,
+            created_by=account.id,
+            doc_form=document_form,
+        )
+        doc_metadata = {}
+        if built_in_field_enabled:
+            doc_metadata = {
+                BuiltInField.document_name: name,
+                BuiltInField.uploader: account.name,
+                BuiltInField.upload_date: datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M:%S"),
+                BuiltInField.last_update_date: datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M:%S"),
+                BuiltInField.source: datasource_type,
+            }
+        if doc_metadata:
+            document.doc_metadata = doc_metadata
+        return document
--- a/api/core/app/apps/pipeline/pipeline_queue_manager.py
+++ b/api/core/app/apps/pipeline/pipeline_queue_manager.py
@ -0,0 +1,44 @@
+from core.app.apps.base_app_queue_manager import AppQueueManager, GenerateTaskStoppedError, PublishFrom
+from core.app.entities.app_invoke_entities import InvokeFrom
+from core.app.entities.queue_entities import (
+    AppQueueEvent,
+    QueueErrorEvent,
+    QueueMessageEndEvent,
+    QueueStopEvent,
+    QueueWorkflowFailedEvent,
+    QueueWorkflowPartialSuccessEvent,
+    QueueWorkflowSucceededEvent,
+    WorkflowQueueMessage,
+)
+
+
+class PipelineQueueManager(AppQueueManager):
+    def __init__(self, task_id: str, user_id: str, invoke_from: InvokeFrom, app_mode: str) -> None:
+        super().__init__(task_id, user_id, invoke_from)
+
+        self._app_mode = app_mode
+
+    def _publish(self, event: AppQueueEvent, pub_from: PublishFrom) -> None:
+        """
+        Publish event to queue
+        :param event:
+        :param pub_from:
+        :return:
+        """
+        message = WorkflowQueueMessage(task_id=self._task_id, app_mode=self._app_mode, event=event)
+
+        self._q.put(message)
+
+        if isinstance(
+            event,
+            QueueStopEvent
+            | QueueErrorEvent
+            | QueueMessageEndEvent
+            | QueueWorkflowSucceededEvent
+            | QueueWorkflowFailedEvent
+            | QueueWorkflowPartialSuccessEvent,
+        ):
+            self.stop_listen()
+
+        if pub_from == PublishFrom.APPLICATION_MANAGER and self._is_stopped():
+            raise GenerateTaskStoppedError()
--- a/api/core/app/apps/pipeline/pipeline_runner.py
+++ b/api/core/app/apps/pipeline/pipeline_runner.py
@ -0,0 +1,221 @@
+import logging
+from collections.abc import Mapping
+from typing import Any, Optional, cast
+
+from configs import dify_config
+from core.app.apps.base_app_queue_manager import AppQueueManager
+from core.app.apps.pipeline.pipeline_config_manager import PipelineConfig
+from core.app.apps.workflow_app_runner import WorkflowBasedAppRunner
+from core.app.entities.app_invoke_entities import (
+    InvokeFrom,
+    RagPipelineGenerateEntity,
+)
+from core.variables.variables import RAGPipelineVariable, RAGPipelineVariableInput
+from core.workflow.callbacks import WorkflowCallback, WorkflowLoggingCallback
+from core.workflow.entities.variable_pool import VariablePool
+from core.workflow.enums import SystemVariableKey
+from core.workflow.graph_engine.entities.graph import Graph
+from core.workflow.workflow_entry import WorkflowEntry
+from extensions.ext_database import db
+from models.dataset import Pipeline
+from models.enums import UserFrom
+from models.model import EndUser
+from models.workflow import Workflow, WorkflowType
+
+logger = logging.getLogger(__name__)
+
+
+class PipelineRunner(WorkflowBasedAppRunner):
+    """
+    Pipeline Application Runner
+    """
+
+    def __init__(
+        self,
+        application_generate_entity: RagPipelineGenerateEntity,
+        queue_manager: AppQueueManager,
+        workflow_thread_pool_id: Optional[str] = None,
+    ) -> None:
+        """
+        :param application_generate_entity: application generate entity
+        :param queue_manager: application queue manager
+        :param workflow_thread_pool_id: workflow thread pool id
+        """
+        self.application_generate_entity = application_generate_entity
+        self.queue_manager = queue_manager
+        self.workflow_thread_pool_id = workflow_thread_pool_id
+
+    def _get_app_id(self) -> str:
+        return self.application_generate_entity.app_config.app_id
+
+    def run(self) -> None:
+        """
+        Run application
+        """
+        app_config = self.application_generate_entity.app_config
+        app_config = cast(PipelineConfig, app_config)
+
+        user_id = None
+        if self.application_generate_entity.invoke_from in {InvokeFrom.WEB_APP, InvokeFrom.SERVICE_API}:
+            end_user = db.session.query(EndUser).filter(EndUser.id == self.application_generate_entity.user_id).first()
+            if end_user:
+                user_id = end_user.session_id
+        else:
+            user_id = self.application_generate_entity.user_id
+
+        pipeline = db.session.query(Pipeline).filter(Pipeline.id == app_config.app_id).first()
+        if not pipeline:
+            raise ValueError("Pipeline not found")
+
+        workflow = self.get_workflow(pipeline=pipeline, workflow_id=app_config.workflow_id)
+        if not workflow:
+            raise ValueError("Workflow not initialized")
+
+        db.session.close()
+
+        workflow_callbacks: list[WorkflowCallback] = []
+        if dify_config.DEBUG:
+            workflow_callbacks.append(WorkflowLoggingCallback())
+
+        # if only single iteration run is requested
+        if self.application_generate_entity.single_iteration_run:
+            # if only single iteration run is requested
+            graph, variable_pool = self._get_graph_and_variable_pool_of_single_iteration(
+                workflow=workflow,
+                node_id=self.application_generate_entity.single_iteration_run.node_id,
+                user_inputs=self.application_generate_entity.single_iteration_run.inputs,
+            )
+        elif self.application_generate_entity.single_loop_run:
+            # if only single loop run is requested
+            graph, variable_pool = self._get_graph_and_variable_pool_of_single_loop(
+                workflow=workflow,
+                node_id=self.application_generate_entity.single_loop_run.node_id,
+                user_inputs=self.application_generate_entity.single_loop_run.inputs,
+            )
+        else:
+            inputs = self.application_generate_entity.inputs
+            files = self.application_generate_entity.files
+
+            # Create a variable pool.
+            system_inputs = {
+                SystemVariableKey.FILES: files,
+                SystemVariableKey.USER_ID: user_id,
+                SystemVariableKey.APP_ID: app_config.app_id,
+                SystemVariableKey.WORKFLOW_ID: app_config.workflow_id,
+                SystemVariableKey.WORKFLOW_EXECUTION_ID: self.application_generate_entity.workflow_execution_id,
+                SystemVariableKey.DOCUMENT_ID: self.application_generate_entity.document_id,
+                SystemVariableKey.BATCH: self.application_generate_entity.batch,
+                SystemVariableKey.DATASET_ID: self.application_generate_entity.dataset_id,
+                SystemVariableKey.DATASOURCE_TYPE: self.application_generate_entity.datasource_type,
+                SystemVariableKey.DATASOURCE_INFO: self.application_generate_entity.datasource_info,
+                SystemVariableKey.INVOKE_FROM: self.application_generate_entity.invoke_from.value,
+            }
+            rag_pipeline_variables = []
+            if workflow.rag_pipeline_variables:
+                for v in workflow.rag_pipeline_variables:
+                    rag_pipeline_variable = RAGPipelineVariable(**v)
+                    if (
+                        rag_pipeline_variable.belong_to_node_id
+                        in (self.application_generate_entity.start_node_id, "shared")
+                    ) and rag_pipeline_variable.variable in inputs:
+                        rag_pipeline_variables.append(
+                            RAGPipelineVariableInput(
+                                variable=rag_pipeline_variable,
+                                value=inputs[rag_pipeline_variable.variable],
+                            )
+                        )
+
+            variable_pool = VariablePool(
+                system_variables=system_inputs,
+                user_inputs=inputs,
+                environment_variables=workflow.environment_variables,
+                conversation_variables=[],
+                rag_pipeline_variables=rag_pipeline_variables,
+            )
+
+            # init graph
+            graph = self._init_rag_pipeline_graph(
+                graph_config=workflow.graph_dict,
+                start_node_id=self.application_generate_entity.start_node_id,
+            )
+
+        # RUN WORKFLOW
+        workflow_entry = WorkflowEntry(
+            tenant_id=workflow.tenant_id,
+            app_id=workflow.app_id,
+            workflow_id=workflow.id,
+            workflow_type=WorkflowType.value_of(workflow.type),
+            graph=graph,
+            graph_config=workflow.graph_dict,
+            user_id=self.application_generate_entity.user_id,
+            user_from=(
+                UserFrom.ACCOUNT
+                if self.application_generate_entity.invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER}
+                else UserFrom.END_USER
+            ),
+            invoke_from=self.application_generate_entity.invoke_from,
+            call_depth=self.application_generate_entity.call_depth,
+            variable_pool=variable_pool,
+            thread_pool_id=self.workflow_thread_pool_id,
+        )
+
+        generator = workflow_entry.run(callbacks=workflow_callbacks)
+
+        for event in generator:
+            self._handle_event(workflow_entry, event)
+
+    def get_workflow(self, pipeline: Pipeline, workflow_id: str) -> Optional[Workflow]:
+        """
+        Get workflow
+        """
+        # fetch workflow by workflow_id
+        workflow = (
+            db.session.query(Workflow)
+            .filter(
+                Workflow.tenant_id == pipeline.tenant_id, Workflow.app_id == pipeline.id, Workflow.id == workflow_id
+            )
+            .first()
+        )
+
+        # return workflow
+        return workflow
+
+    def _init_rag_pipeline_graph(self, graph_config: Mapping[str, Any], start_node_id: Optional[str] = None) -> Graph:
+        """
+        Init pipeline graph
+        """
+        if "nodes" not in graph_config or "edges" not in graph_config:
+            raise ValueError("nodes or edges not found in workflow graph")
+
+        if not isinstance(graph_config.get("nodes"), list):
+            raise ValueError("nodes in workflow graph must be a list")
+
+        if not isinstance(graph_config.get("edges"), list):
+            raise ValueError("edges in workflow graph must be a list")
+        nodes = graph_config.get("nodes", [])
+        edges = graph_config.get("edges", [])
+        real_run_nodes = []
+        real_edges = []
+        exclude_node_ids = []
+        for node in nodes:
+            node_id = node.get("id")
+            node_type = node.get("data", {}).get("type", "")
+            if node_type == "datasource":
+                if start_node_id != node_id:
+                    exclude_node_ids.append(node_id)
+                    continue
+            real_run_nodes.append(node)
+        for edge in edges:
+            if edge.get("source") in exclude_node_ids:
+                continue
+            real_edges.append(edge)
+        graph_config = dict(graph_config)
+        graph_config["nodes"] = real_run_nodes
+        graph_config["edges"] = real_edges
+        # init graph
+        graph = Graph.init(graph_config=graph_config)
+
+        if not graph:
+            raise ValueError("graph not found in workflow")
+
+        return graph
--- a/api/core/app/apps/workflow/app_generator.py
+++ b/api/core/app/apps/workflow/app_generator.py
@ -25,6 +25,7 @@ from core.model_runtime.errors.invoke import InvokeAuthorizationError
 from core.ops.ops_trace_manager import TraceQueueManager
 from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository
 from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository
+from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
 from core.workflow.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader
@ -219,6 +220,9 @@ class WorkflowAppGenerator(BaseAppGenerator):
        # new thread with request context and contextvars
        context = contextvars.copy_context()

+        # release database connection, because the following new thread operations may take a long time
+        db.session.close()
+
        worker_thread = threading.Thread(
            target=self._generate_worker,
            kwargs={
@ -233,6 +237,10 @@ class WorkflowAppGenerator(BaseAppGenerator):

        worker_thread.start()

+        draft_var_saver_factory = self._get_draft_var_saver_factory(
+            invoke_from,
+        )
+
        # return response or stream generator
        response = self._handle_response(
            application_generate_entity=application_generate_entity,
@ -241,6 +249,7 @@ class WorkflowAppGenerator(BaseAppGenerator):
            user=user,
            workflow_execution_repository=workflow_execution_repository,
            workflow_node_execution_repository=workflow_node_execution_repository,
+            draft_var_saver_factory=draft_var_saver_factory,
            stream=streaming,
        )

@ -471,6 +480,7 @@ class WorkflowAppGenerator(BaseAppGenerator):
        user: Union[Account, EndUser],
        workflow_execution_repository: WorkflowExecutionRepository,
        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
+        draft_var_saver_factory: DraftVariableSaverFactory,
        stream: bool = False,
    ) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]:
        """
@ -491,6 +501,7 @@ class WorkflowAppGenerator(BaseAppGenerator):
            user=user,
            workflow_execution_repository=workflow_execution_repository,
            workflow_node_execution_repository=workflow_node_execution_repository,
+            draft_var_saver_factory=draft_var_saver_factory,
            stream=stream,
        )

--- a/api/core/app/apps/workflow/generate_task_pipeline.py
+++ b/api/core/app/apps/workflow/generate_task_pipeline.py
@ -56,6 +56,7 @@ from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk
 from core.ops.ops_trace_manager import TraceQueueManager
 from core.workflow.entities.workflow_execution import WorkflowExecution, WorkflowExecutionStatus, WorkflowType
 from core.workflow.enums import SystemVariableKey
+from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
 from core.workflow.workflow_cycle_manager import CycleManagerWorkflowInfo, WorkflowCycleManager
@ -87,6 +88,7 @@ class WorkflowAppGenerateTaskPipeline:
        stream: bool,
        workflow_execution_repository: WorkflowExecutionRepository,
        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
+        draft_var_saver_factory: DraftVariableSaverFactory,
    ) -> None:
        self._base_task_pipeline = BasedGenerateTaskPipeline(
            application_generate_entity=application_generate_entity,
@ -131,6 +133,8 @@ class WorkflowAppGenerateTaskPipeline:
        self._application_generate_entity = application_generate_entity
        self._workflow_features_dict = workflow.features_dict
        self._workflow_run_id = ""
+        self._invoke_from = queue_manager._invoke_from
+        self._draft_var_saver_factory = draft_var_saver_factory

    def process(self) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]:
        """
@ -322,6 +326,8 @@ class WorkflowAppGenerateTaskPipeline:
                    workflow_node_execution=workflow_node_execution,
                )

+                self._save_output_for_event(event, workflow_node_execution.id)
+
                if node_success_response:
                    yield node_success_response
            elif isinstance(
@ -339,6 +345,8 @@ class WorkflowAppGenerateTaskPipeline:
                    task_id=self._application_generate_entity.task_id,
                    workflow_node_execution=workflow_node_execution,
                )
+                if isinstance(event, QueueNodeExceptionEvent):
+                    self._save_output_for_event(event, workflow_node_execution.id)

                if node_failed_response:
                    yield node_failed_response
@ -593,3 +601,15 @@ class WorkflowAppGenerateTaskPipeline:
        )

        return response
+
+    def _save_output_for_event(self, event: QueueNodeSucceededEvent | QueueNodeExceptionEvent, node_execution_id: str):
+        with Session(db.engine) as session, session.begin():
+            saver = self._draft_var_saver_factory(
+                session=session,
+                app_id=self._application_generate_entity.app_config.app_id,
+                node_id=event.node_id,
+                node_type=event.node_type,
+                node_execution_id=node_execution_id,
+                enclosing_node_id=event.in_loop_id or event.in_iteration_id,
+            )
+            saver.save(event.process_data, event.outputs)
--- a/api/core/app/apps/workflow_app_runner.py
+++ b/api/core/app/apps/workflow_app_runner.py
@ -1,8 +1,6 @@
 from collections.abc import Mapping
 from typing import Any, Optional, cast

-from sqlalchemy.orm import Session
-
 from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
 from core.app.apps.base_app_runner import AppRunner
 from core.app.entities.queue_entities import (
@ -35,7 +33,6 @@ from core.workflow.entities.variable_pool import VariablePool
 from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey
 from core.workflow.graph_engine.entities.event import (
    AgentLogEvent,
-    BaseNodeEvent,
    GraphEngineEvent,
    GraphRunFailedEvent,
    GraphRunPartialSucceededEvent,
@ -70,9 +67,6 @@ from core.workflow.workflow_entry import WorkflowEntry
 from extensions.ext_database import db
 from models.model import App
 from models.workflow import Workflow
-from services.workflow_draft_variable_service import (
-    DraftVariableSaver,
-)


 class WorkflowBasedAppRunner(AppRunner):
@ -400,7 +394,6 @@ class WorkflowBasedAppRunner(AppRunner):
                    in_loop_id=event.in_loop_id,
                )
            )
-            self._save_draft_var_for_event(event)

        elif isinstance(event, NodeRunFailedEvent):
            self._publish_event(
@ -464,7 +457,6 @@ class WorkflowBasedAppRunner(AppRunner):
                    in_loop_id=event.in_loop_id,
                )
            )
-            self._save_draft_var_for_event(event)

        elif isinstance(event, NodeInIterationFailedEvent):
            self._publish_event(
@ -718,30 +710,3 @@ class WorkflowBasedAppRunner(AppRunner):

    def _publish_event(self, event: AppQueueEvent) -> None:
        self.queue_manager.publish(event, PublishFrom.APPLICATION_MANAGER)
-
-    def _save_draft_var_for_event(self, event: BaseNodeEvent):
-        run_result = event.route_node_state.node_run_result
-        if run_result is None:
-            return
-        process_data = run_result.process_data
-        outputs = run_result.outputs
-        with Session(bind=db.engine) as session, session.begin():
-            draft_var_saver = DraftVariableSaver(
-                session=session,
-                app_id=self._get_app_id(),
-                node_id=event.node_id,
-                node_type=event.node_type,
-                # FIXME(QuantumGhost): rely on private state of queue_manager is not ideal.
-                invoke_from=self.queue_manager._invoke_from,
-                node_execution_id=event.id,
-                enclosing_node_id=event.in_loop_id or event.in_iteration_id or None,
-            )
-            draft_var_saver.save(process_data=process_data, outputs=outputs)
-
-
-def _remove_first_element_from_variable_string(key: str) -> str:
-    """
-    Remove the first element from the prefix.
-    """
-    prefix, remaining = key.split(".", maxsplit=1)
-    return remaining
--- a/api/core/app/entities/app_invoke_entities.py
+++ b/api/core/app/entities/app_invoke_entities.py
@ -36,6 +36,7 @@ class InvokeFrom(Enum):
    # DEBUGGER indicates that this invocation is from
    # the workflow (or chatflow) edit page.
    DEBUGGER = "debugger"
+    PUBLISHED = "published"

    @classmethod
    def value_of(cls, value: str):
@ -240,3 +241,38 @@ class WorkflowAppGenerateEntity(AppGenerateEntity):
        inputs: dict

    single_loop_run: Optional[SingleLoopRunEntity] = None
+
+
+class RagPipelineGenerateEntity(WorkflowAppGenerateEntity):
+    """
+    RAG Pipeline Application Generate Entity.
+    """
+
+    # pipeline config
+    pipeline_config: WorkflowUIBasedAppConfig
+    datasource_type: str
+    datasource_info: Mapping[str, Any]
+    dataset_id: str
+    batch: str
+    document_id: Optional[str] = None
+    start_node_id: Optional[str] = None
+
+    class SingleIterationRunEntity(BaseModel):
+        """
+        Single Iteration Run Entity.
+        """
+
+        node_id: str
+        inputs: dict
+
+    single_iteration_run: Optional[SingleIterationRunEntity] = None
+
+    class SingleLoopRunEntity(BaseModel):
+        """
+        Single Loop Run Entity.
+        """
+
+        node_id: str
+        inputs: dict
+
+    single_loop_run: Optional[SingleLoopRunEntity] = None
--- a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
+++ b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
@ -395,6 +395,7 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
        message.provider_response_latency = time.perf_counter() - self._start_at
        message.total_price = usage.total_price
        message.currency = usage.currency
+        self._task_state.llm_result.usage.latency = message.provider_response_latency
        message.message_metadata = self._task_state.metadata.model_dump_json()

        if trace_manager:
--- a/api/core/callback_handler/agent_tool_callback_handler.py
+++ b/api/core/callback_handler/agent_tool_callback_handler.py
@ -105,6 +105,14 @@ class DifyAgentCallbackHandler(BaseModel):

        self.current_loop += 1

+    def on_datasource_start(self, datasource_name: str, datasource_inputs: Mapping[str, Any]) -> None:
+        """Run on datasource start."""
+        if dify_config.DEBUG:
+            print_text(
+                "\n[on_datasource_start] DatasourceCall:" + datasource_name + "\n" + str(datasource_inputs) + "\n",
+                color=self.color,
+            )
+
    @property
    def ignore_agent(self) -> bool:
        """Whether to ignore agent callbacks."""
--- a/api/core/datasource/__base/datasource_plugin.py
+++ b/api/core/datasource/__base/datasource_plugin.py
@ -0,0 +1,33 @@
+from abc import ABC, abstractmethod
+
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import (
+    DatasourceEntity,
+    DatasourceProviderType,
+)
+
+
+class DatasourcePlugin(ABC):
+    entity: DatasourceEntity
+    runtime: DatasourceRuntime
+
+    def __init__(
+        self,
+        entity: DatasourceEntity,
+        runtime: DatasourceRuntime,
+    ) -> None:
+        self.entity = entity
+        self.runtime = runtime
+
+    @abstractmethod
+    def datasource_provider_type(self) -> str:
+        """
+        returns the type of the datasource provider
+        """
+        return DatasourceProviderType.LOCAL_FILE
+
+    def fork_datasource_runtime(self, runtime: DatasourceRuntime) -> "DatasourcePlugin":
+        return self.__class__(
+            entity=self.entity.model_copy(),
+            runtime=runtime,
+        )
--- a/api/core/datasource/__base/datasource_provider.py
+++ b/api/core/datasource/__base/datasource_provider.py
@ -0,0 +1,118 @@
+from abc import ABC, abstractmethod
+from typing import Any
+
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
+from core.entities.provider_entities import ProviderConfig
+from core.plugin.impl.tool import PluginToolManager
+from core.tools.errors import ToolProviderCredentialValidationError
+
+
+class DatasourcePluginProviderController(ABC):
+    entity: DatasourceProviderEntityWithPlugin
+    tenant_id: str
+
+    def __init__(self, entity: DatasourceProviderEntityWithPlugin, tenant_id: str) -> None:
+        self.entity = entity
+        self.tenant_id = tenant_id
+
+    @property
+    def need_credentials(self) -> bool:
+        """
+        returns whether the provider needs credentials
+
+        :return: whether the provider needs credentials
+        """
+        return self.entity.credentials_schema is not None and len(self.entity.credentials_schema) != 0
+
+    def _validate_credentials(self, user_id: str, credentials: dict[str, Any]) -> None:
+        """
+        validate the credentials of the provider
+        """
+        manager = PluginToolManager()
+        if not manager.validate_datasource_credentials(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            provider=self.entity.identity.name,
+            credentials=credentials,
+        ):
+            raise ToolProviderCredentialValidationError("Invalid credentials")
+
+    @property
+    def provider_type(self) -> DatasourceProviderType:
+        """
+        returns the type of the provider
+        """
+        return DatasourceProviderType.LOCAL_FILE
+
+    @abstractmethod
+    def get_datasource(self, datasource_name: str) -> DatasourcePlugin:
+        """
+        return datasource with given name
+        """
+        pass
+
+    def validate_credentials_format(self, credentials: dict[str, Any]) -> None:
+        """
+        validate the format of the credentials of the provider and set the default value if needed
+
+        :param credentials: the credentials of the tool
+        """
+        credentials_schema = dict[str, ProviderConfig]()
+        if credentials_schema is None:
+            return
+
+        for credential in self.entity.credentials_schema:
+            credentials_schema[credential.name] = credential
+
+        credentials_need_to_validate: dict[str, ProviderConfig] = {}
+        for credential_name in credentials_schema:
+            credentials_need_to_validate[credential_name] = credentials_schema[credential_name]
+
+        for credential_name in credentials:
+            if credential_name not in credentials_need_to_validate:
+                raise ToolProviderCredentialValidationError(
+                    f"credential {credential_name} not found in provider {self.entity.identity.name}"
+                )
+
+            # check type
+            credential_schema = credentials_need_to_validate[credential_name]
+            if not credential_schema.required and credentials[credential_name] is None:
+                continue
+
+            if credential_schema.type in {ProviderConfig.Type.SECRET_INPUT, ProviderConfig.Type.TEXT_INPUT}:
+                if not isinstance(credentials[credential_name], str):
+                    raise ToolProviderCredentialValidationError(f"credential {credential_name} should be string")
+
+            elif credential_schema.type == ProviderConfig.Type.SELECT:
+                if not isinstance(credentials[credential_name], str):
+                    raise ToolProviderCredentialValidationError(f"credential {credential_name} should be string")
+
+                options = credential_schema.options
+                if not isinstance(options, list):
+                    raise ToolProviderCredentialValidationError(f"credential {credential_name} options should be list")
+
+                if credentials[credential_name] not in [x.value for x in options]:
+                    raise ToolProviderCredentialValidationError(
+                        f"credential {credential_name} should be one of {options}"
+                    )
+
+            credentials_need_to_validate.pop(credential_name)
+
+        for credential_name in credentials_need_to_validate:
+            credential_schema = credentials_need_to_validate[credential_name]
+            if credential_schema.required:
+                raise ToolProviderCredentialValidationError(f"credential {credential_name} is required")
+
+            # the credential is not set currently, set the default value if needed
+            if credential_schema.default is not None:
+                default_value = credential_schema.default
+                # parse default value into the correct type
+                if credential_schema.type in {
+                    ProviderConfig.Type.SECRET_INPUT,
+                    ProviderConfig.Type.TEXT_INPUT,
+                    ProviderConfig.Type.SELECT,
+                }:
+                    default_value = str(default_value)
+
+                credentials[credential_name] = default_value
--- a/api/core/datasource/__base/datasource_runtime.py
+++ b/api/core/datasource/__base/datasource_runtime.py
@ -0,0 +1,36 @@
+from typing import Any, Optional
+
+from openai import BaseModel
+from pydantic import Field
+
+from core.app.entities.app_invoke_entities import InvokeFrom
+from core.datasource.entities.datasource_entities import DatasourceInvokeFrom
+
+
+class DatasourceRuntime(BaseModel):
+    """
+    Meta data of a datasource call processing
+    """
+
+    tenant_id: str
+    datasource_id: Optional[str] = None
+    invoke_from: Optional[InvokeFrom] = None
+    datasource_invoke_from: Optional[DatasourceInvokeFrom] = None
+    credentials: dict[str, Any] = Field(default_factory=dict)
+    runtime_parameters: dict[str, Any] = Field(default_factory=dict)
+
+
+class FakeDatasourceRuntime(DatasourceRuntime):
+    """
+    Fake datasource runtime for testing
+    """
+
+    def __init__(self):
+        super().__init__(
+            tenant_id="fake_tenant_id",
+            datasource_id="fake_datasource_id",
+            invoke_from=InvokeFrom.DEBUGGER,
+            datasource_invoke_from=DatasourceInvokeFrom.RAG_PIPELINE,
+            credentials={},
+            runtime_parameters={},
+        )
--- a/api/core/datasource/init.py
+++ b/api/core/datasource/init.py
--- a/api/core/datasource/datasource_file_manager.py
+++ b/api/core/datasource/datasource_file_manager.py
@ -0,0 +1,244 @@
+import base64
+import hashlib
+import hmac
+import logging
+import os
+import time
+from mimetypes import guess_extension, guess_type
+from typing import Optional, Union
+from uuid import uuid4
+
+import httpx
+
+from configs import dify_config
+from core.helper import ssrf_proxy
+from extensions.ext_database import db
+from extensions.ext_storage import storage
+from models.enums import CreatorUserRole
+from models.model import MessageFile, UploadFile
+from models.tools import ToolFile
+
+logger = logging.getLogger(__name__)
+
+
+class DatasourceFileManager:
+    @staticmethod
+    def sign_file(datasource_file_id: str, extension: str) -> str:
+        """
+        sign file to get a temporary url
+        """
+        base_url = dify_config.FILES_URL
+        file_preview_url = f"{base_url}/files/datasources/{datasource_file_id}{extension}"
+
+        timestamp = str(int(time.time()))
+        nonce = os.urandom(16).hex()
+        data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
+        secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
+        sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+        encoded_sign = base64.urlsafe_b64encode(sign).decode()
+
+        return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
+
+    @staticmethod
+    def verify_file(datasource_file_id: str, timestamp: str, nonce: str, sign: str) -> bool:
+        """
+        verify signature
+        """
+        data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
+        secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
+        recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+        recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
+
+        # verify signature
+        if sign != recalculated_encoded_sign:
+            return False
+
+        current_time = int(time.time())
+        return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
+
+    @staticmethod
+    def create_file_by_raw(
+        *,
+        user_id: str,
+        tenant_id: str,
+        conversation_id: Optional[str],
+        file_binary: bytes,
+        mimetype: str,
+        filename: Optional[str] = None,
+    ) -> UploadFile:
+        extension = guess_extension(mimetype) or ".bin"
+        unique_name = uuid4().hex
+        unique_filename = f"{unique_name}{extension}"
+        # default just as before
+        present_filename = unique_filename
+        if filename is not None:
+            has_extension = len(filename.split(".")) > 1
+            # Add extension flexibly
+            present_filename = filename if has_extension else f"{filename}{extension}"
+        filepath = f"datasources/{tenant_id}/{unique_filename}"
+        storage.save(filepath, file_binary)
+
+        upload_file = UploadFile(
+            tenant_id=tenant_id,
+            storage_type=dify_config.STORAGE_TYPE,
+            key=filepath,
+            name=present_filename,
+            size=len(file_binary),
+            extension=extension,
+            mime_type=mimetype,
+            created_by_role=CreatorUserRole.ACCOUNT,
+            created_by=user_id,
+            used=False,
+            hash=hashlib.sha3_256(file_binary).hexdigest(),
+            source_url="",
+        )
+
+        db.session.add(upload_file)
+        db.session.commit()
+        db.session.refresh(upload_file)
+
+        return upload_file
+
+    @staticmethod
+    def create_file_by_url(
+        user_id: str,
+        tenant_id: str,
+        file_url: str,
+        conversation_id: Optional[str] = None,
+    ) -> UploadFile:
+        # try to download image
+        try:
+            response = ssrf_proxy.get(file_url)
+            response.raise_for_status()
+            blob = response.content
+        except httpx.TimeoutException:
+            raise ValueError(f"timeout when downloading file from {file_url}")
+
+        mimetype = (
+            guess_type(file_url)[0]
+            or response.headers.get("Content-Type", "").split(";")[0].strip()
+            or "application/octet-stream"
+        )
+        extension = guess_extension(mimetype) or ".bin"
+        unique_name = uuid4().hex
+        filename = f"{unique_name}{extension}"
+        filepath = f"tools/{tenant_id}/{filename}"
+        storage.save(filepath, blob)
+
+        upload_file = UploadFile(
+            tenant_id=tenant_id,
+            storage_type=dify_config.STORAGE_TYPE,
+            key=filepath,
+            name=filename,
+            size=len(blob),
+            extension=extension,
+            mime_type=mimetype,
+            created_by_role=CreatorUserRole.ACCOUNT,
+            created_by=user_id,
+            used=False,
+            hash=hashlib.sha3_256(blob).hexdigest(),
+            source_url=file_url,
+        )
+
+        db.session.add(upload_file)
+        db.session.commit()
+
+        return upload_file
+
+    @staticmethod
+    def get_file_binary(id: str) -> Union[tuple[bytes, str], None]:
+        """
+        get file binary
+
+        :param id: the id of the file
+
+        :return: the binary of the file, mime type
+        """
+        upload_file: UploadFile | None = (
+            db.session.query(UploadFile)
+            .filter(
+                UploadFile.id == id,
+            )
+            .first()
+        )
+
+        if not upload_file:
+            return None
+
+        blob = storage.load_once(upload_file.key)
+
+        return blob, upload_file.mime_type
+
+    @staticmethod
+    def get_file_binary_by_message_file_id(id: str) -> Union[tuple[bytes, str], None]:
+        """
+        get file binary
+
+        :param id: the id of the file
+
+        :return: the binary of the file, mime type
+        """
+        message_file: MessageFile | None = (
+            db.session.query(MessageFile)
+            .filter(
+                MessageFile.id == id,
+            )
+            .first()
+        )
+
+        # Check if message_file is not None
+        if message_file is not None:
+            # get tool file id
+            if message_file.url is not None:
+                tool_file_id = message_file.url.split("/")[-1]
+                # trim extension
+                tool_file_id = tool_file_id.split(".")[0]
+            else:
+                tool_file_id = None
+        else:
+            tool_file_id = None
+
+        tool_file: ToolFile | None = (
+            db.session.query(ToolFile)
+            .filter(
+                ToolFile.id == tool_file_id,
+            )
+            .first()
+        )
+
+        if not tool_file:
+            return None
+
+        blob = storage.load_once(tool_file.file_key)
+
+        return blob, tool_file.mimetype
+
+    @staticmethod
+    def get_file_generator_by_upload_file_id(upload_file_id: str):
+        """
+        get file binary
+
+        :param tool_file_id: the id of the tool file
+
+        :return: the binary of the file, mime type
+        """
+        upload_file: UploadFile | None = (
+            db.session.query(UploadFile)
+            .filter(
+                UploadFile.id == upload_file_id,
+            )
+            .first()
+        )
+
+        if not upload_file:
+            return None, None
+
+        stream = storage.load_stream(upload_file.key)
+
+        return stream, upload_file.mime_type
+
+
+# init tool_file_parser
+# from core.file.datasource_file_parser import datasource_file_manager
+#
+# datasource_file_manager["manager"] = DatasourceFileManager
--- a/api/core/datasource/datasource_manager.py
+++ b/api/core/datasource/datasource_manager.py
@ -0,0 +1,100 @@
+import logging
+from threading import Lock
+from typing import Union
+
+import contexts
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
+from core.datasource.entities.common_entities import I18nObject
+from core.datasource.entities.datasource_entities import DatasourceProviderType
+from core.datasource.errors import DatasourceProviderNotFoundError
+from core.datasource.local_file.local_file_provider import LocalFileDatasourcePluginProviderController
+from core.datasource.online_document.online_document_provider import OnlineDocumentDatasourcePluginProviderController
+from core.datasource.website_crawl.website_crawl_provider import WebsiteCrawlDatasourcePluginProviderController
+from core.plugin.impl.datasource import PluginDatasourceManager
+
+logger = logging.getLogger(__name__)
+
+
+class DatasourceManager:
+    _builtin_provider_lock = Lock()
+    _hardcoded_providers: dict[str, DatasourcePluginProviderController] = {}
+    _builtin_providers_loaded = False
+    _builtin_tools_labels: dict[str, Union[I18nObject, None]] = {}
+
+    @classmethod
+    def get_datasource_plugin_provider(
+        cls, provider_id: str, tenant_id: str, datasource_type: DatasourceProviderType
+    ) -> DatasourcePluginProviderController:
+        """
+        get the datasource plugin provider
+        """
+        # check if context is set
+        try:
+            contexts.datasource_plugin_providers.get()
+        except LookupError:
+            contexts.datasource_plugin_providers.set({})
+            contexts.datasource_plugin_providers_lock.set(Lock())
+
+        with contexts.datasource_plugin_providers_lock.get():
+            datasource_plugin_providers = contexts.datasource_plugin_providers.get()
+            if provider_id in datasource_plugin_providers:
+                return datasource_plugin_providers[provider_id]
+
+            manager = PluginDatasourceManager()
+            provider_entity = manager.fetch_datasource_provider(tenant_id, provider_id)
+            if not provider_entity:
+                raise DatasourceProviderNotFoundError(f"plugin provider {provider_id} not found")
+
+            match datasource_type:
+                case DatasourceProviderType.ONLINE_DOCUMENT:
+                    controller = OnlineDocumentDatasourcePluginProviderController(
+                        entity=provider_entity.declaration,
+                        plugin_id=provider_entity.plugin_id,
+                        plugin_unique_identifier=provider_entity.plugin_unique_identifier,
+                        tenant_id=tenant_id,
+                    )
+                case DatasourceProviderType.WEBSITE_CRAWL:
+                    controller = WebsiteCrawlDatasourcePluginProviderController(
+                        entity=provider_entity.declaration,
+                        plugin_id=provider_entity.plugin_id,
+                        plugin_unique_identifier=provider_entity.plugin_unique_identifier,
+                        tenant_id=tenant_id,
+                    )
+                case DatasourceProviderType.LOCAL_FILE:
+                    controller = LocalFileDatasourcePluginProviderController(
+                        entity=provider_entity.declaration,
+                        plugin_id=provider_entity.plugin_id,
+                        plugin_unique_identifier=provider_entity.plugin_unique_identifier,
+                        tenant_id=tenant_id,
+                    )
+                case _:
+                    raise ValueError(f"Unsupported datasource type: {datasource_type}")
+
+            datasource_plugin_providers[provider_id] = controller
+
+        return controller
+
+    @classmethod
+    def get_datasource_runtime(
+        cls,
+        provider_id: str,
+        datasource_name: str,
+        tenant_id: str,
+        datasource_type: DatasourceProviderType,
+    ) -> DatasourcePlugin:
+        """
+        get the datasource runtime
+
+        :param provider_type: the type of the provider
+        :param provider_id: the id of the provider
+        :param datasource_name: the name of the datasource
+        :param tenant_id: the tenant id
+
+        :return: the datasource plugin
+        """
+        return cls.get_datasource_plugin_provider(
+            provider_id,
+            tenant_id,
+            datasource_type,
+        ).get_datasource(datasource_name)
--- a/api/core/datasource/entities/api_entities.py
+++ b/api/core/datasource/entities/api_entities.py
@ -0,0 +1,71 @@
+from typing import Literal, Optional
+
+from pydantic import BaseModel, Field, field_validator
+
+from core.datasource.entities.datasource_entities import DatasourceParameter
+from core.model_runtime.utils.encoders import jsonable_encoder
+from core.tools.entities.common_entities import I18nObject
+
+
+class DatasourceApiEntity(BaseModel):
+    author: str
+    name: str  # identifier
+    label: I18nObject  # label
+    description: I18nObject
+    parameters: Optional[list[DatasourceParameter]] = None
+    labels: list[str] = Field(default_factory=list)
+    output_schema: Optional[dict] = None
+
+
+ToolProviderTypeApiLiteral = Optional[Literal["builtin", "api", "workflow"]]
+
+
+class DatasourceProviderApiEntity(BaseModel):
+    id: str
+    author: str
+    name: str  # identifier
+    description: I18nObject
+    icon: str | dict
+    label: I18nObject  # label
+    type: str
+    masked_credentials: Optional[dict] = None
+    original_credentials: Optional[dict] = None
+    is_team_authorization: bool = False
+    allow_delete: bool = True
+    plugin_id: Optional[str] = Field(default="", description="The plugin id of the datasource")
+    plugin_unique_identifier: Optional[str] = Field(default="", description="The unique identifier of the datasource")
+    datasources: list[DatasourceApiEntity] = Field(default_factory=list)
+    labels: list[str] = Field(default_factory=list)
+
+    @field_validator("datasources", mode="before")
+    @classmethod
+    def convert_none_to_empty_list(cls, v):
+        return v if v is not None else []
+
+    def to_dict(self) -> dict:
+        # -------------
+        # overwrite datasource parameter types for temp fix
+        datasources = jsonable_encoder(self.datasources)
+        for datasource in datasources:
+            if datasource.get("parameters"):
+                for parameter in datasource.get("parameters"):
+                    if parameter.get("type") == DatasourceParameter.DatasourceParameterType.SYSTEM_FILES.value:
+                        parameter["type"] = "files"
+        # -------------
+
+        return {
+            "id": self.id,
+            "author": self.author,
+            "name": self.name,
+            "plugin_id": self.plugin_id,
+            "plugin_unique_identifier": self.plugin_unique_identifier,
+            "description": self.description.to_dict(),
+            "icon": self.icon,
+            "label": self.label.to_dict(),
+            "type": self.type.value,
+            "team_credentials": self.masked_credentials,
+            "is_team_authorization": self.is_team_authorization,
+            "allow_delete": self.allow_delete,
+            "datasources": datasources,
+            "labels": self.labels,
+        }
--- a/api/core/datasource/entities/common_entities.py
+++ b/api/core/datasource/entities/common_entities.py
@ -0,0 +1,23 @@
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+
+class I18nObject(BaseModel):
+    """
+    Model class for i18n object.
+    """
+
+    en_US: str
+    zh_Hans: Optional[str] = Field(default=None)
+    pt_BR: Optional[str] = Field(default=None)
+    ja_JP: Optional[str] = Field(default=None)
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self.zh_Hans = self.zh_Hans or self.en_US
+        self.pt_BR = self.pt_BR or self.en_US
+        self.ja_JP = self.ja_JP or self.en_US
+
+    def to_dict(self) -> dict:
+        return {"zh_Hans": self.zh_Hans, "en_US": self.en_US, "pt_BR": self.pt_BR, "ja_JP": self.ja_JP}
--- a/api/core/datasource/entities/datasource_entities.py
+++ b/api/core/datasource/entities/datasource_entities.py
@ -0,0 +1,361 @@
+import enum
+from enum import Enum
+from typing import Any, Optional
+
+from pydantic import BaseModel, Field, ValidationInfo, field_validator
+
+from core.entities.provider_entities import ProviderConfig
+from core.plugin.entities.oauth import OAuthSchema
+from core.plugin.entities.parameters import (
+    PluginParameter,
+    PluginParameterOption,
+    PluginParameterType,
+    as_normal_type,
+    cast_parameter_value,
+    init_frontend_parameter,
+)
+from core.tools.entities.common_entities import I18nObject
+from core.tools.entities.tool_entities import ToolInvokeMessage, ToolLabelEnum
+
+
+class DatasourceProviderType(enum.StrEnum):
+    """
+    Enum class for datasource provider
+    """
+
+    ONLINE_DOCUMENT = "online_document"
+    LOCAL_FILE = "local_file"
+    WEBSITE_CRAWL = "website_crawl"
+    ONLINE_DRIVE = "online_drive"
+
+    @classmethod
+    def value_of(cls, value: str) -> "DatasourceProviderType":
+        """
+        Get value of given mode.
+
+        :param value: mode value
+        :return: mode
+        """
+        for mode in cls:
+            if mode.value == value:
+                return mode
+        raise ValueError(f"invalid mode value {value}")
+
+
+class DatasourceParameter(PluginParameter):
+    """
+    Overrides type
+    """
+
+    class DatasourceParameterType(enum.StrEnum):
+        """
+        removes TOOLS_SELECTOR from PluginParameterType
+        """
+
+        STRING = PluginParameterType.STRING.value
+        NUMBER = PluginParameterType.NUMBER.value
+        BOOLEAN = PluginParameterType.BOOLEAN.value
+        SELECT = PluginParameterType.SELECT.value
+        SECRET_INPUT = PluginParameterType.SECRET_INPUT.value
+        FILE = PluginParameterType.FILE.value
+        FILES = PluginParameterType.FILES.value
+
+        # deprecated, should not use.
+        SYSTEM_FILES = PluginParameterType.SYSTEM_FILES.value
+
+        def as_normal_type(self):
+            return as_normal_type(self)
+
+        def cast_value(self, value: Any):
+            return cast_parameter_value(self, value)
+
+    type: DatasourceParameterType = Field(..., description="The type of the parameter")
+    description: I18nObject = Field(..., description="The description of the parameter")
+
+    @classmethod
+    def get_simple_instance(
+        cls,
+        name: str,
+        typ: DatasourceParameterType,
+        required: bool,
+        options: Optional[list[str]] = None,
+    ) -> "DatasourceParameter":
+        """
+        get a simple datasource parameter
+
+        :param name: the name of the parameter
+        :param llm_description: the description presented to the LLM
+        :param typ: the type of the parameter
+        :param required: if the parameter is required
+        :param options: the options of the parameter
+        """
+        # convert options to ToolParameterOption
+        # FIXME fix the type error
+        if options:
+            option_objs = [
+                PluginParameterOption(value=option, label=I18nObject(en_US=option, zh_Hans=option))
+                for option in options
+            ]
+        else:
+            option_objs = []
+
+        return cls(
+            name=name,
+            label=I18nObject(en_US="", zh_Hans=""),
+            placeholder=None,
+            type=typ,
+            required=required,
+            options=option_objs,
+            description=I18nObject(en_US="", zh_Hans=""),
+        )
+
+    def init_frontend_parameter(self, value: Any):
+        return init_frontend_parameter(self, self.type, value)
+
+
+class DatasourceIdentity(BaseModel):
+    author: str = Field(..., description="The author of the datasource")
+    name: str = Field(..., description="The name of the datasource")
+    label: I18nObject = Field(..., description="The label of the datasource")
+    provider: str = Field(..., description="The provider of the datasource")
+    icon: Optional[str] = None
+
+
+class DatasourceEntity(BaseModel):
+    identity: DatasourceIdentity
+    parameters: list[DatasourceParameter] = Field(default_factory=list)
+    description: I18nObject = Field(..., description="The label of the datasource")
+
+    @field_validator("parameters", mode="before")
+    @classmethod
+    def set_parameters(cls, v, validation_info: ValidationInfo) -> list[DatasourceParameter]:
+        return v or []
+
+
+class DatasourceProviderIdentity(BaseModel):
+    author: str = Field(..., description="The author of the tool")
+    name: str = Field(..., description="The name of the tool")
+    description: I18nObject = Field(..., description="The description of the tool")
+    icon: str = Field(..., description="The icon of the tool")
+    label: I18nObject = Field(..., description="The label of the tool")
+    tags: Optional[list[ToolLabelEnum]] = Field(
+        default=[],
+        description="The tags of the tool",
+    )
+
+
+class DatasourceProviderEntity(BaseModel):
+    """
+    Datasource provider entity
+    """
+
+    identity: DatasourceProviderIdentity
+    credentials_schema: list[ProviderConfig] = Field(default_factory=list)
+    oauth_schema: Optional[OAuthSchema] = None
+    provider_type: DatasourceProviderType
+
+
+class DatasourceProviderEntityWithPlugin(DatasourceProviderEntity):
+    datasources: list[DatasourceEntity] = Field(default_factory=list)
+
+
+class DatasourceInvokeMeta(BaseModel):
+    """
+    Datasource invoke meta
+    """
+
+    time_cost: float = Field(..., description="The time cost of the tool invoke")
+    error: Optional[str] = None
+    tool_config: Optional[dict] = None
+
+    @classmethod
+    def empty(cls) -> "DatasourceInvokeMeta":
+        """
+        Get an empty instance of DatasourceInvokeMeta
+        """
+        return cls(time_cost=0.0, error=None, tool_config={})
+
+    @classmethod
+    def error_instance(cls, error: str) -> "DatasourceInvokeMeta":
+        """
+        Get an instance of DatasourceInvokeMeta with error
+        """
+        return cls(time_cost=0.0, error=error, tool_config={})
+
+    def to_dict(self) -> dict:
+        return {
+            "time_cost": self.time_cost,
+            "error": self.error,
+            "tool_config": self.tool_config,
+        }
+
+
+class DatasourceLabel(BaseModel):
+    """
+    Datasource label
+    """
+
+    name: str = Field(..., description="The name of the tool")
+    label: I18nObject = Field(..., description="The label of the tool")
+    icon: str = Field(..., description="The icon of the tool")
+
+
+class DatasourceInvokeFrom(Enum):
+    """
+    Enum class for datasource invoke
+    """
+
+    RAG_PIPELINE = "rag_pipeline"
+
+
+class OnlineDocumentPage(BaseModel):
+    """
+    Online document page
+    """
+
+    page_id: str = Field(..., description="The page id")
+    page_name: str = Field(..., description="The page title")
+    page_icon: Optional[dict] = Field(None, description="The page icon")
+    type: str = Field(..., description="The type of the page")
+    last_edited_time: str = Field(..., description="The last edited time")
+    parent_id: Optional[str] = Field(None, description="The parent page id")
+
+
+class OnlineDocumentInfo(BaseModel):
+    """
+    Online document info
+    """
+
+    workspace_id: str = Field(..., description="The workspace id")
+    workspace_name: str = Field(..., description="The workspace name")
+    workspace_icon: str = Field(..., description="The workspace icon")
+    total: int = Field(..., description="The total number of documents")
+    pages: list[OnlineDocumentPage] = Field(..., description="The pages of the online document")
+
+
+class OnlineDocumentPagesMessage(BaseModel):
+    """
+    Get online document pages response
+    """
+
+    result: list[OnlineDocumentInfo]
+
+
+class GetOnlineDocumentPageContentRequest(BaseModel):
+    """
+    Get online document page content request
+    """
+
+    workspace_id: str = Field(..., description="The workspace id")
+    page_id: str = Field(..., description="The page id")
+    type: str = Field(..., description="The type of the page")
+
+
+class OnlineDocumentPageContent(BaseModel):
+    """
+    Online document page content
+    """
+
+    workspace_id: str = Field(..., description="The workspace id")
+    page_id: str = Field(..., description="The page id")
+    content: str = Field(..., description="The content of the page")
+
+
+class GetOnlineDocumentPageContentResponse(BaseModel):
+    """
+    Get online document page content response
+    """
+
+    result: OnlineDocumentPageContent
+
+
+class GetWebsiteCrawlRequest(BaseModel):
+    """
+    Get website crawl request
+    """
+
+    crawl_parameters: dict = Field(..., description="The crawl parameters")
+
+
+class WebSiteInfoDetail(BaseModel):
+    source_url: str = Field(..., description="The url of the website")
+    content: str = Field(..., description="The content of the website")
+    title: str = Field(..., description="The title of the website")
+    description: str = Field(..., description="The description of the website")
+
+
+class WebSiteInfo(BaseModel):
+    """
+    Website info
+    """
+
+    status: Optional[str] = Field(..., description="crawl job status")
+    web_info_list: Optional[list[WebSiteInfoDetail]] = []
+    total: Optional[int] = Field(default=0, description="The total number of websites")
+    completed: Optional[int] = Field(default=0, description="The number of completed websites")
+
+
+class WebsiteCrawlMessage(BaseModel):
+    """
+    Get website crawl response
+    """
+
+    result: WebSiteInfo = WebSiteInfo(status="", web_info_list=[], total=0, completed=0)
+
+
+class DatasourceMessage(ToolInvokeMessage):
+    pass
+
+
+#########################
+# Online driver file
+#########################
+
+
+class OnlineDriveFile(BaseModel):
+    """
+    Online driver file
+    """
+
+    key: str = Field(..., description="The key of the file")
+    size: int = Field(..., description="The size of the file")
+
+
+class OnlineDriveFileBucket(BaseModel):
+    """
+    Online driver file bucket
+    """
+
+    bucket: Optional[str] = Field(None, description="The bucket of the file")
+    files: list[OnlineDriveFile] = Field(..., description="The files of the bucket")
+    is_truncated: bool = Field(False, description="Whether the bucket has more files")
+
+
+class OnlineDriveBrowseFilesRequest(BaseModel):
+    """
+    Get online driver file list request
+    """
+
+    prefix: Optional[str] = Field(None, description="File path prefix for filtering eg: 'docs/dify/'")
+    bucket: Optional[str] = Field(None, description="Storage bucket name")
+    max_keys: int = Field(20, description="Maximum number of files to return")
+    start_after: Optional[str] = Field(
+        None, description="Pagination token for continuing from a specific file eg: 'docs/dify/1.txt'"
+    )
+
+
+class OnlineDriveBrowseFilesResponse(BaseModel):
+    """
+    Get online driver file list response
+    """
+
+    result: list[OnlineDriveFileBucket] = Field(..., description="The bucket of the files")
+
+
+class OnlineDriveDownloadFileRequest(BaseModel):
+    """
+    Get online driver file
+    """
+
+    key: str = Field(..., description="The name of the file")
+    bucket: Optional[str] = Field(None, description="The name of the bucket")
--- a/api/core/datasource/errors.py
+++ b/api/core/datasource/errors.py
@ -0,0 +1,37 @@
+from core.datasource.entities.datasource_entities import DatasourceInvokeMeta
+
+
+class DatasourceProviderNotFoundError(ValueError):
+    pass
+
+
+class DatasourceNotFoundError(ValueError):
+    pass
+
+
+class DatasourceParameterValidationError(ValueError):
+    pass
+
+
+class DatasourceProviderCredentialValidationError(ValueError):
+    pass
+
+
+class DatasourceNotSupportedError(ValueError):
+    pass
+
+
+class DatasourceInvokeError(ValueError):
+    pass
+
+
+class DatasourceApiSchemaError(ValueError):
+    pass
+
+
+class DatasourceEngineInvokeError(Exception):
+    meta: DatasourceInvokeMeta
+
+    def __init__(self, meta, **kwargs):
+        self.meta = meta
+        super().__init__(**kwargs)
--- a/api/core/datasource/local_file/local_file_plugin.py
+++ b/api/core/datasource/local_file/local_file_plugin.py
@ -0,0 +1,28 @@
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import (
+    DatasourceEntity,
+    DatasourceProviderType,
+)
+
+
+class LocalFileDatasourcePlugin(DatasourcePlugin):
+    tenant_id: str
+    icon: str
+    plugin_unique_identifier: str
+
+    def __init__(
+        self,
+        entity: DatasourceEntity,
+        runtime: DatasourceRuntime,
+        tenant_id: str,
+        icon: str,
+        plugin_unique_identifier: str,
+    ) -> None:
+        super().__init__(entity, runtime)
+        self.tenant_id = tenant_id
+        self.icon = icon
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    def datasource_provider_type(self) -> str:
+        return DatasourceProviderType.LOCAL_FILE
--- a/api/core/datasource/local_file/local_file_provider.py
+++ b/api/core/datasource/local_file/local_file_provider.py
@ -0,0 +1,56 @@
+from typing import Any
+
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
+from core.datasource.local_file.local_file_plugin import LocalFileDatasourcePlugin
+
+
+class LocalFileDatasourcePluginProviderController(DatasourcePluginProviderController):
+    entity: DatasourceProviderEntityWithPlugin
+    plugin_id: str
+    plugin_unique_identifier: str
+
+    def __init__(
+        self, entity: DatasourceProviderEntityWithPlugin, plugin_id: str, plugin_unique_identifier: str, tenant_id: str
+    ) -> None:
+        super().__init__(entity, tenant_id)
+        self.plugin_id = plugin_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    @property
+    def provider_type(self) -> DatasourceProviderType:
+        """
+        returns the type of the provider
+        """
+        return DatasourceProviderType.LOCAL_FILE
+
+    def _validate_credentials(self, user_id: str, credentials: dict[str, Any]) -> None:
+        """
+        validate the credentials of the provider
+        """
+        pass
+
+    def get_datasource(self, datasource_name: str) -> LocalFileDatasourcePlugin:  # type: ignore
+        """
+        return datasource with given name
+        """
+        datasource_entity = next(
+            (
+                datasource_entity
+                for datasource_entity in self.entity.datasources
+                if datasource_entity.identity.name == datasource_name
+            ),
+            None,
+        )
+
+        if not datasource_entity:
+            raise ValueError(f"Datasource with name {datasource_name} not found")
+
+        return LocalFileDatasourcePlugin(
+            entity=datasource_entity,
+            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
+            tenant_id=self.tenant_id,
+            icon=self.entity.identity.icon,
+            plugin_unique_identifier=self.plugin_unique_identifier,
+        )
--- a/api/core/datasource/online_document/online_document_plugin.py
+++ b/api/core/datasource/online_document/online_document_plugin.py
@ -0,0 +1,73 @@
+from collections.abc import Generator, Mapping
+from typing import Any
+
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import (
+    DatasourceEntity,
+    DatasourceMessage,
+    DatasourceProviderType,
+    GetOnlineDocumentPageContentRequest,
+    OnlineDocumentPagesMessage,
+)
+from core.plugin.impl.datasource import PluginDatasourceManager
+
+
+class OnlineDocumentDatasourcePlugin(DatasourcePlugin):
+    tenant_id: str
+    icon: str
+    plugin_unique_identifier: str
+    entity: DatasourceEntity
+    runtime: DatasourceRuntime
+
+    def __init__(
+        self,
+        entity: DatasourceEntity,
+        runtime: DatasourceRuntime,
+        tenant_id: str,
+        icon: str,
+        plugin_unique_identifier: str,
+    ) -> None:
+        super().__init__(entity, runtime)
+        self.tenant_id = tenant_id
+        self.icon = icon
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    def get_online_document_pages(
+        self,
+        user_id: str,
+        datasource_parameters: Mapping[str, Any],
+        provider_type: str,
+    ) -> Generator[OnlineDocumentPagesMessage, None, None]:
+        manager = PluginDatasourceManager()
+
+        return manager.get_online_document_pages(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            datasource_provider=self.entity.identity.provider,
+            datasource_name=self.entity.identity.name,
+            credentials=self.runtime.credentials,
+            datasource_parameters=datasource_parameters,
+            provider_type=provider_type,
+        )
+
+    def get_online_document_page_content(
+        self,
+        user_id: str,
+        datasource_parameters: GetOnlineDocumentPageContentRequest,
+        provider_type: str,
+    ) -> Generator[DatasourceMessage, None, None]:
+        manager = PluginDatasourceManager()
+
+        return manager.get_online_document_page_content(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            datasource_provider=self.entity.identity.provider,
+            datasource_name=self.entity.identity.name,
+            credentials=self.runtime.credentials,
+            datasource_parameters=datasource_parameters,
+            provider_type=provider_type,
+        )
+
+    def datasource_provider_type(self) -> str:
+        return DatasourceProviderType.ONLINE_DOCUMENT
--- a/api/core/datasource/online_document/online_document_provider.py
+++ b/api/core/datasource/online_document/online_document_provider.py
@ -0,0 +1,48 @@
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
+from core.datasource.online_document.online_document_plugin import OnlineDocumentDatasourcePlugin
+
+
+class OnlineDocumentDatasourcePluginProviderController(DatasourcePluginProviderController):
+    entity: DatasourceProviderEntityWithPlugin
+    plugin_id: str
+    plugin_unique_identifier: str
+
+    def __init__(
+        self, entity: DatasourceProviderEntityWithPlugin, plugin_id: str, plugin_unique_identifier: str, tenant_id: str
+    ) -> None:
+        super().__init__(entity, tenant_id)
+        self.plugin_id = plugin_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    @property
+    def provider_type(self) -> DatasourceProviderType:
+        """
+        returns the type of the provider
+        """
+        return DatasourceProviderType.ONLINE_DOCUMENT
+
+    def get_datasource(self, datasource_name: str) -> OnlineDocumentDatasourcePlugin:  # type: ignore
+        """
+        return datasource with given name
+        """
+        datasource_entity = next(
+            (
+                datasource_entity
+                for datasource_entity in self.entity.datasources
+                if datasource_entity.identity.name == datasource_name
+            ),
+            None,
+        )
+
+        if not datasource_entity:
+            raise ValueError(f"Datasource with name {datasource_name} not found")
+
+        return OnlineDocumentDatasourcePlugin(
+            entity=datasource_entity,
+            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
+            tenant_id=self.tenant_id,
+            icon=self.entity.identity.icon,
+            plugin_unique_identifier=self.plugin_unique_identifier,
+        )
--- a/api/core/datasource/online_drive/online_drive_plugin.py
+++ b/api/core/datasource/online_drive/online_drive_plugin.py
@ -0,0 +1,73 @@
+from collections.abc import Generator
+
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import (
+    DatasourceEntity,
+    DatasourceMessage,
+    DatasourceProviderType,
+    OnlineDriveBrowseFilesRequest,
+    OnlineDriveBrowseFilesResponse,
+    OnlineDriveDownloadFileRequest,
+)
+from core.plugin.impl.datasource import PluginDatasourceManager
+
+
+class OnlineDriveDatasourcePlugin(DatasourcePlugin):
+    tenant_id: str
+    icon: str
+    plugin_unique_identifier: str
+    entity: DatasourceEntity
+    runtime: DatasourceRuntime
+
+    def __init__(
+        self,
+        entity: DatasourceEntity,
+        runtime: DatasourceRuntime,
+        tenant_id: str,
+        icon: str,
+        plugin_unique_identifier: str,
+    ) -> None:
+        super().__init__(entity, runtime)
+        self.tenant_id = tenant_id
+        self.icon = icon
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    def online_drive_browse_files(
+        self,
+        user_id: str,
+        request: OnlineDriveBrowseFilesRequest,
+        provider_type: str,
+    ) -> Generator[OnlineDriveBrowseFilesResponse, None, None]:
+        manager = PluginDatasourceManager()
+
+        return manager.online_drive_browse_files(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            datasource_provider=self.entity.identity.provider,
+            datasource_name=self.entity.identity.name,
+            credentials=self.runtime.credentials,
+            request=request,
+            provider_type=provider_type,
+        )
+
+    def online_drive_download_file(
+        self,
+        user_id: str,
+        request: OnlineDriveDownloadFileRequest,
+        provider_type: str,
+    ) -> Generator[DatasourceMessage, None, None]:
+        manager = PluginDatasourceManager()
+
+        return manager.online_drive_download_file(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            datasource_provider=self.entity.identity.provider,
+            datasource_name=self.entity.identity.name,
+            credentials=self.runtime.credentials,
+            request=request,
+            provider_type=provider_type,
+        )
+
+    def datasource_provider_type(self) -> str:
+        return DatasourceProviderType.ONLINE_DRIVE
--- a/api/core/datasource/online_drive/online_drive_provider.py
+++ b/api/core/datasource/online_drive/online_drive_provider.py
@ -0,0 +1,48 @@
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
+from core.datasource.online_drive.online_drive_plugin import OnlineDriveDatasourcePlugin
+
+
+class OnlineDriveDatasourcePluginProviderController(DatasourcePluginProviderController):
+    entity: DatasourceProviderEntityWithPlugin
+    plugin_id: str
+    plugin_unique_identifier: str
+
+    def __init__(
+        self, entity: DatasourceProviderEntityWithPlugin, plugin_id: str, plugin_unique_identifier: str, tenant_id: str
+    ) -> None:
+        super().__init__(entity, tenant_id)
+        self.plugin_id = plugin_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    @property
+    def provider_type(self) -> DatasourceProviderType:
+        """
+        returns the type of the provider
+        """
+        return DatasourceProviderType.ONLINE_DRIVE
+
+    def get_datasource(self, datasource_name: str) -> OnlineDriveDatasourcePlugin:  # type: ignore
+        """
+        return datasource with given name
+        """
+        datasource_entity = next(
+            (
+                datasource_entity
+                for datasource_entity in self.entity.datasources
+                if datasource_entity.identity.name == datasource_name
+            ),
+            None,
+        )
+
+        if not datasource_entity:
+            raise ValueError(f"Datasource with name {datasource_name} not found")
+
+        return OnlineDriveDatasourcePlugin(
+            entity=datasource_entity,
+            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
+            tenant_id=self.tenant_id,
+            icon=self.entity.identity.icon,
+            plugin_unique_identifier=self.plugin_unique_identifier,
+        )
--- a/api/core/datasource/utils/init.py
+++ b/api/core/datasource/utils/init.py
--- a/api/core/datasource/utils/configuration.py
+++ b/api/core/datasource/utils/configuration.py
@ -0,0 +1,265 @@
+from copy import deepcopy
+from typing import Any
+
+from pydantic import BaseModel
+
+from core.entities.provider_entities import BasicProviderConfig
+from core.helper import encrypter
+from core.helper.tool_parameter_cache import ToolParameterCache, ToolParameterCacheType
+from core.helper.tool_provider_cache import ToolProviderCredentialsCache, ToolProviderCredentialsCacheType
+from core.tools.__base.tool import Tool
+from core.tools.entities.tool_entities import (
+    ToolParameter,
+    ToolProviderType,
+)
+
+
+class ProviderConfigEncrypter(BaseModel):
+    tenant_id: str
+    config: list[BasicProviderConfig]
+    provider_type: str
+    provider_identity: str
+
+    def _deep_copy(self, data: dict[str, str]) -> dict[str, str]:
+        """
+        deep copy data
+        """
+        return deepcopy(data)
+
+    def encrypt(self, data: dict[str, str]) -> dict[str, str]:
+        """
+        encrypt tool credentials with tenant id
+
+        return a deep copy of credentials with encrypted values
+        """
+        data = self._deep_copy(data)
+
+        # get fields need to be decrypted
+        fields = dict[str, BasicProviderConfig]()
+        for credential in self.config:
+            fields[credential.name] = credential
+
+        for field_name, field in fields.items():
+            if field.type == BasicProviderConfig.Type.SECRET_INPUT:
+                if field_name in data:
+                    encrypted = encrypter.encrypt_token(self.tenant_id, data[field_name] or "")
+                    data[field_name] = encrypted
+
+        return data
+
+    def mask_tool_credentials(self, data: dict[str, Any]) -> dict[str, Any]:
+        """
+        mask tool credentials
+
+        return a deep copy of credentials with masked values
+        """
+        data = self._deep_copy(data)
+
+        # get fields need to be decrypted
+        fields = dict[str, BasicProviderConfig]()
+        for credential in self.config:
+            fields[credential.name] = credential
+
+        for field_name, field in fields.items():
+            if field.type == BasicProviderConfig.Type.SECRET_INPUT:
+                if field_name in data:
+                    if len(data[field_name]) > 6:
+                        data[field_name] = (
+                            data[field_name][:2] + "*" * (len(data[field_name]) - 4) + data[field_name][-2:]
+                        )
+                    else:
+                        data[field_name] = "*" * len(data[field_name])
+
+        return data
+
+    def decrypt(self, data: dict[str, str]) -> dict[str, str]:
+        """
+        decrypt tool credentials with tenant id
+
+        return a deep copy of credentials with decrypted values
+        """
+        cache = ToolProviderCredentialsCache(
+            tenant_id=self.tenant_id,
+            identity_id=f"{self.provider_type}.{self.provider_identity}",
+            cache_type=ToolProviderCredentialsCacheType.PROVIDER,
+        )
+        cached_credentials = cache.get()
+        if cached_credentials:
+            return cached_credentials
+        data = self._deep_copy(data)
+        # get fields need to be decrypted
+        fields = dict[str, BasicProviderConfig]()
+        for credential in self.config:
+            fields[credential.name] = credential
+
+        for field_name, field in fields.items():
+            if field.type == BasicProviderConfig.Type.SECRET_INPUT:
+                if field_name in data:
+                    try:
+                        # if the value is None or empty string, skip decrypt
+                        if not data[field_name]:
+                            continue
+
+                        data[field_name] = encrypter.decrypt_token(self.tenant_id, data[field_name])
+                    except Exception:
+                        pass
+
+        cache.set(data)
+        return data
+
+    def delete_tool_credentials_cache(self):
+        cache = ToolProviderCredentialsCache(
+            tenant_id=self.tenant_id,
+            identity_id=f"{self.provider_type}.{self.provider_identity}",
+            cache_type=ToolProviderCredentialsCacheType.PROVIDER,
+        )
+        cache.delete()
+
+
+class ToolParameterConfigurationManager:
+    """
+    Tool parameter configuration manager
+    """
+
+    tenant_id: str
+    tool_runtime: Tool
+    provider_name: str
+    provider_type: ToolProviderType
+    identity_id: str
+
+    def __init__(
+        self, tenant_id: str, tool_runtime: Tool, provider_name: str, provider_type: ToolProviderType, identity_id: str
+    ) -> None:
+        self.tenant_id = tenant_id
+        self.tool_runtime = tool_runtime
+        self.provider_name = provider_name
+        self.provider_type = provider_type
+        self.identity_id = identity_id
+
+    def _deep_copy(self, parameters: dict[str, Any]) -> dict[str, Any]:
+        """
+        deep copy parameters
+        """
+        return deepcopy(parameters)
+
+    def _merge_parameters(self) -> list[ToolParameter]:
+        """
+        merge parameters
+        """
+        # get tool parameters
+        tool_parameters = self.tool_runtime.entity.parameters or []
+        # get tool runtime parameters
+        runtime_parameters = self.tool_runtime.get_runtime_parameters()
+        # override parameters
+        current_parameters = tool_parameters.copy()
+        for runtime_parameter in runtime_parameters:
+            found = False
+            for index, parameter in enumerate(current_parameters):
+                if parameter.name == runtime_parameter.name and parameter.form == runtime_parameter.form:
+                    current_parameters[index] = runtime_parameter
+                    found = True
+                    break
+
+            if not found and runtime_parameter.form == ToolParameter.ToolParameterForm.FORM:
+                current_parameters.append(runtime_parameter)
+
+        return current_parameters
+
+    def mask_tool_parameters(self, parameters: dict[str, Any]) -> dict[str, Any]:
+        """
+        mask tool parameters
+
+        return a deep copy of parameters with masked values
+        """
+        parameters = self._deep_copy(parameters)
+
+        # override parameters
+        current_parameters = self._merge_parameters()
+
+        for parameter in current_parameters:
+            if (
+                parameter.form == ToolParameter.ToolParameterForm.FORM
+                and parameter.type == ToolParameter.ToolParameterType.SECRET_INPUT
+            ):
+                if parameter.name in parameters:
+                    if len(parameters[parameter.name]) > 6:
+                        parameters[parameter.name] = (
+                            parameters[parameter.name][:2]
+                            + "*" * (len(parameters[parameter.name]) - 4)
+                            + parameters[parameter.name][-2:]
+                        )
+                    else:
+                        parameters[parameter.name] = "*" * len(parameters[parameter.name])
+
+        return parameters
+
+    def encrypt_tool_parameters(self, parameters: dict[str, Any]) -> dict[str, Any]:
+        """
+        encrypt tool parameters with tenant id
+
+        return a deep copy of parameters with encrypted values
+        """
+        # override parameters
+        current_parameters = self._merge_parameters()
+
+        parameters = self._deep_copy(parameters)
+
+        for parameter in current_parameters:
+            if (
+                parameter.form == ToolParameter.ToolParameterForm.FORM
+                and parameter.type == ToolParameter.ToolParameterType.SECRET_INPUT
+            ):
+                if parameter.name in parameters:
+                    encrypted = encrypter.encrypt_token(self.tenant_id, parameters[parameter.name])
+                    parameters[parameter.name] = encrypted
+
+        return parameters
+
+    def decrypt_tool_parameters(self, parameters: dict[str, Any]) -> dict[str, Any]:
+        """
+        decrypt tool parameters with tenant id
+
+        return a deep copy of parameters with decrypted values
+        """
+
+        cache = ToolParameterCache(
+            tenant_id=self.tenant_id,
+            provider=f"{self.provider_type.value}.{self.provider_name}",
+            tool_name=self.tool_runtime.entity.identity.name,
+            cache_type=ToolParameterCacheType.PARAMETER,
+            identity_id=self.identity_id,
+        )
+        cached_parameters = cache.get()
+        if cached_parameters:
+            return cached_parameters
+
+        # override parameters
+        current_parameters = self._merge_parameters()
+        has_secret_input = False
+
+        for parameter in current_parameters:
+            if (
+                parameter.form == ToolParameter.ToolParameterForm.FORM
+                and parameter.type == ToolParameter.ToolParameterType.SECRET_INPUT
+            ):
+                if parameter.name in parameters:
+                    try:
+                        has_secret_input = True
+                        parameters[parameter.name] = encrypter.decrypt_token(self.tenant_id, parameters[parameter.name])
+                    except Exception:
+                        pass
+
+        if has_secret_input:
+            cache.set(parameters)
+
+        return parameters
+
+    def delete_tool_parameters_cache(self):
+        cache = ToolParameterCache(
+            tenant_id=self.tenant_id,
+            provider=f"{self.provider_type.value}.{self.provider_name}",
+            tool_name=self.tool_runtime.entity.identity.name,
+            cache_type=ToolParameterCacheType.PARAMETER,
+            identity_id=self.identity_id,
+        )
+        cache.delete()
--- a/api/core/datasource/utils/message_transformer.py
+++ b/api/core/datasource/utils/message_transformer.py
@ -0,0 +1,121 @@
+import logging
+from collections.abc import Generator
+from mimetypes import guess_extension
+from typing import Optional
+
+from core.datasource.datasource_file_manager import DatasourceFileManager
+from core.datasource.entities.datasource_entities import DatasourceMessage
+from core.file import File, FileTransferMethod, FileType
+
+logger = logging.getLogger(__name__)
+
+
+class DatasourceFileMessageTransformer:
+    @classmethod
+    def transform_datasource_invoke_messages(
+        cls,
+        messages: Generator[DatasourceMessage, None, None],
+        user_id: str,
+        tenant_id: str,
+        conversation_id: Optional[str] = None,
+    ) -> Generator[DatasourceMessage, None, None]:
+        """
+        Transform datasource message and handle file download
+        """
+        for message in messages:
+            if message.type in {DatasourceMessage.MessageType.TEXT, DatasourceMessage.MessageType.LINK}:
+                yield message
+            elif message.type == DatasourceMessage.MessageType.IMAGE and isinstance(
+                message.message, DatasourceMessage.TextMessage
+            ):
+                # try to download image
+                try:
+                    assert isinstance(message.message, DatasourceMessage.TextMessage)
+
+                    file = DatasourceFileManager.create_file_by_url(
+                        user_id=user_id,
+                        tenant_id=tenant_id,
+                        file_url=message.message.text,
+                        conversation_id=conversation_id,
+                    )
+
+                    url = f"/files/datasources/{file.id}{guess_extension(file.mime_type) or '.png'}"
+
+                    yield DatasourceMessage(
+                        type=DatasourceMessage.MessageType.IMAGE_LINK,
+                        message=DatasourceMessage.TextMessage(text=url),
+                        meta=message.meta.copy() if message.meta is not None else {},
+                    )
+                except Exception as e:
+                    yield DatasourceMessage(
+                        type=DatasourceMessage.MessageType.TEXT,
+                        message=DatasourceMessage.TextMessage(
+                            text=f"Failed to download image: {message.message.text}: {e}"
+                        ),
+                        meta=message.meta.copy() if message.meta is not None else {},
+                    )
+            elif message.type == DatasourceMessage.MessageType.BLOB:
+                # get mime type and save blob to storage
+                meta = message.meta or {}
+
+                mimetype = meta.get("mime_type", "application/octet-stream")
+                # get filename from meta
+                filename = meta.get("file_name", None)
+                # if message is str, encode it to bytes
+
+                if not isinstance(message.message, DatasourceMessage.BlobMessage):
+                    raise ValueError("unexpected message type")
+
+                # FIXME: should do a type check here.
+                assert isinstance(message.message.blob, bytes)
+                file = DatasourceFileManager.create_file_by_raw(
+                    user_id=user_id,
+                    tenant_id=tenant_id,
+                    conversation_id=conversation_id,
+                    file_binary=message.message.blob,
+                    mimetype=mimetype,
+                    filename=filename,
+                )
+
+                url = cls.get_datasource_file_url(datasource_file_id=file.id, extension=guess_extension(file.mime_type))
+
+                # check if file is image
+                if "image" in mimetype:
+                    yield DatasourceMessage(
+                        type=DatasourceMessage.MessageType.IMAGE_LINK,
+                        message=DatasourceMessage.TextMessage(text=url),
+                        meta=meta.copy() if meta is not None else {},
+                    )
+                else:
+                    yield DatasourceMessage(
+                        type=DatasourceMessage.MessageType.BINARY_LINK,
+                        message=DatasourceMessage.TextMessage(text=url),
+                        meta=meta.copy() if meta is not None else {},
+                    )
+            elif message.type == DatasourceMessage.MessageType.FILE:
+                meta = message.meta or {}
+                file = meta.get("file", None)
+                if isinstance(file, File):
+                    if file.transfer_method == FileTransferMethod.TOOL_FILE:
+                        assert file.related_id is not None
+                        url = cls.get_datasource_file_url(datasource_file_id=file.related_id, extension=file.extension)
+                        if file.type == FileType.IMAGE:
+                            yield DatasourceMessage(
+                                type=DatasourceMessage.MessageType.IMAGE_LINK,
+                                message=DatasourceMessage.TextMessage(text=url),
+                                meta=meta.copy() if meta is not None else {},
+                            )
+                        else:
+                            yield DatasourceMessage(
+                                type=DatasourceMessage.MessageType.LINK,
+                                message=DatasourceMessage.TextMessage(text=url),
+                                meta=meta.copy() if meta is not None else {},
+                            )
+                    else:
+                        yield message
+            else:
+                yield message
+
+    @classmethod
+    def get_datasource_file_url(cls, datasource_file_id: str, extension: Optional[str]) -> str:
+        return f"/files/datasources/{datasource_file_id}{extension or '.bin'}"
--- a/api/core/datasource/utils/parser.py
+++ b/api/core/datasource/utils/parser.py
@ -0,0 +1,389 @@
+import re
+import uuid
+from json import dumps as json_dumps
+from json import loads as json_loads
+from json.decoder import JSONDecodeError
+from typing import Optional
+
+from flask import request
+from requests import get
+from yaml import YAMLError, safe_load  # type: ignore
+
+from core.tools.entities.common_entities import I18nObject
+from core.tools.entities.tool_bundle import ApiToolBundle
+from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolParameter
+from core.tools.errors import ToolApiSchemaError, ToolNotSupportedError, ToolProviderNotFoundError
+
+
+class ApiBasedToolSchemaParser:
+    @staticmethod
+    def parse_openapi_to_tool_bundle(
+        openapi: dict, extra_info: dict | None = None, warning: dict | None = None
+    ) -> list[ApiToolBundle]:
+        warning = warning if warning is not None else {}
+        extra_info = extra_info if extra_info is not None else {}
+
+        # set description to extra_info
+        extra_info["description"] = openapi["info"].get("description", "")
+
+        if len(openapi["servers"]) == 0:
+            raise ToolProviderNotFoundError("No server found in the openapi yaml.")
+
+        server_url = openapi["servers"][0]["url"]
+        request_env = request.headers.get("X-Request-Env")
+        if request_env:
+            matched_servers = [server["url"] for server in openapi["servers"] if server["env"] == request_env]
+            server_url = matched_servers[0] if matched_servers else server_url
+
+        # list all interfaces
+        interfaces = []
+        for path, path_item in openapi["paths"].items():
+            methods = ["get", "post", "put", "delete", "patch", "head", "options", "trace"]
+            for method in methods:
+                if method in path_item:
+                    interfaces.append(
+                        {
+                            "path": path,
+                            "method": method,
+                            "operation": path_item[method],
+                        }
+                    )
+
+        # get all parameters
+        bundles = []
+        for interface in interfaces:
+            # convert parameters
+            parameters = []
+            if "parameters" in interface["operation"]:
+                for parameter in interface["operation"]["parameters"]:
+                    tool_parameter = ToolParameter(
+                        name=parameter["name"],
+                        label=I18nObject(en_US=parameter["name"], zh_Hans=parameter["name"]),
+                        human_description=I18nObject(
+                            en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
+                        ),
+                        type=ToolParameter.ToolParameterType.STRING,
+                        required=parameter.get("required", False),
+                        form=ToolParameter.ToolParameterForm.LLM,
+                        llm_description=parameter.get("description"),
+                        default=parameter["schema"]["default"]
+                        if "schema" in parameter and "default" in parameter["schema"]
+                        else None,
+                        placeholder=I18nObject(
+                            en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
+                        ),
+                    )
+
+                    # check if there is a type
+                    typ = ApiBasedToolSchemaParser._get_tool_parameter_type(parameter)
+                    if typ:
+                        tool_parameter.type = typ
+
+                    parameters.append(tool_parameter)
+            # create tool bundle
+            # check if there is a request body
+            if "requestBody" in interface["operation"]:
+                request_body = interface["operation"]["requestBody"]
+                if "content" in request_body:
+                    for content_type, content in request_body["content"].items():
+                        # if there is a reference, get the reference and overwrite the content
+                        if "schema" not in content:
+                            continue
+
+                        if "$ref" in content["schema"]:
+                            # get the reference
+                            root = openapi
+                            reference = content["schema"]["$ref"].split("/")[1:]
+                            for ref in reference:
+                                root = root[ref]
+                            # overwrite the content
+                            interface["operation"]["requestBody"]["content"][content_type]["schema"] = root
+
+                    # parse body parameters
+                    if "schema" in interface["operation"]["requestBody"]["content"][content_type]:
+                        body_schema = interface["operation"]["requestBody"]["content"][content_type]["schema"]
+                        required = body_schema.get("required", [])
+                        properties = body_schema.get("properties", {})
+                        for name, property in properties.items():
+                            tool = ToolParameter(
+                                name=name,
+                                label=I18nObject(en_US=name, zh_Hans=name),
+                                human_description=I18nObject(
+                                    en_US=property.get("description", ""), zh_Hans=property.get("description", "")
+                                ),
+                                type=ToolParameter.ToolParameterType.STRING,
+                                required=name in required,
+                                form=ToolParameter.ToolParameterForm.LLM,
+                                llm_description=property.get("description", ""),
+                                default=property.get("default", None),
+                                placeholder=I18nObject(
+                                    en_US=property.get("description", ""), zh_Hans=property.get("description", "")
+                                ),
+                            )
+
+                            # check if there is a type
+                            typ = ApiBasedToolSchemaParser._get_tool_parameter_type(property)
+                            if typ:
+                                tool.type = typ
+
+                            parameters.append(tool)
+
+            # check if parameters is duplicated
+            parameters_count = {}
+            for parameter in parameters:
+                if parameter.name not in parameters_count:
+                    parameters_count[parameter.name] = 0
+                parameters_count[parameter.name] += 1
+            for name, count in parameters_count.items():
+                if count > 1:
+                    warning["duplicated_parameter"] = f"Parameter {name} is duplicated."
+
+            # check if there is a operation id, use $path_$method as operation id if not
+            if "operationId" not in interface["operation"]:
+                # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
+                path = interface["path"]
+                if interface["path"].startswith("/"):
+                    path = interface["path"][1:]
+                # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
+                path = re.sub(r"[^a-zA-Z0-9_-]", "", path)
+                if not path:
+                    path = str(uuid.uuid4())
+
+                interface["operation"]["operationId"] = f"{path}_{interface['method']}"
+
+            bundles.append(
+                ApiToolBundle(
+                    server_url=server_url + interface["path"],
+                    method=interface["method"],
+                    summary=interface["operation"]["description"]
+                    if "description" in interface["operation"]
+                    else interface["operation"].get("summary", None),
+                    operation_id=interface["operation"]["operationId"],
+                    parameters=parameters,
+                    author="",
+                    icon=None,
+                    openapi=interface["operation"],
+                )
+            )
+
+        return bundles
+
+    @staticmethod
+    def _get_tool_parameter_type(parameter: dict) -> Optional[ToolParameter.ToolParameterType]:
+        parameter = parameter or {}
+        typ: Optional[str] = None
+        if parameter.get("format") == "binary":
+            return ToolParameter.ToolParameterType.FILE
+
+        if "type" in parameter:
+            typ = parameter["type"]
+        elif "schema" in parameter and "type" in parameter["schema"]:
+            typ = parameter["schema"]["type"]
+
+        if typ in {"integer", "number"}:
+            return ToolParameter.ToolParameterType.NUMBER
+        elif typ == "boolean":
+            return ToolParameter.ToolParameterType.BOOLEAN
+        elif typ == "string":
+            return ToolParameter.ToolParameterType.STRING
+        elif typ == "array":
+            items = parameter.get("items") or parameter.get("schema", {}).get("items")
+            return ToolParameter.ToolParameterType.FILES if items and items.get("format") == "binary" else None
+        else:
+            return None
+
+    @staticmethod
+    def parse_openapi_yaml_to_tool_bundle(
+        yaml: str, extra_info: dict | None = None, warning: dict | None = None
+    ) -> list[ApiToolBundle]:
+        """
+        parse openapi yaml to tool bundle
+
+        :param yaml: the yaml string
+        :param extra_info: the extra info
+        :param warning: the warning message
+        :return: the tool bundle
+        """
+        warning = warning if warning is not None else {}
+        extra_info = extra_info if extra_info is not None else {}
+
+        openapi: dict = safe_load(yaml)
+        if openapi is None:
+            raise ToolApiSchemaError("Invalid openapi yaml.")
+        return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi, extra_info=extra_info, warning=warning)
+
+    @staticmethod
+    def parse_swagger_to_openapi(swagger: dict, extra_info: dict | None = None, warning: dict | None = None) -> dict:
+        warning = warning or {}
+        """
+        parse swagger to openapi
+
+        :param swagger: the swagger dict
+        :return: the openapi dict
+        """
+        # convert swagger to openapi
+        info = swagger.get("info", {"title": "Swagger", "description": "Swagger", "version": "1.0.0"})
+
+        servers = swagger.get("servers", [])
+
+        if len(servers) == 0:
+            raise ToolApiSchemaError("No server found in the swagger yaml.")
+
+        openapi = {
+            "openapi": "3.0.0",
+            "info": {
+                "title": info.get("title", "Swagger"),
+                "description": info.get("description", "Swagger"),
+                "version": info.get("version", "1.0.0"),
+            },
+            "servers": swagger["servers"],
+            "paths": {},
+            "components": {"schemas": {}},
+        }
+
+        # check paths
+        if "paths" not in swagger or len(swagger["paths"]) == 0:
+            raise ToolApiSchemaError("No paths found in the swagger yaml.")
+
+        # convert paths
+        for path, path_item in swagger["paths"].items():
+            openapi["paths"][path] = {}
+            for method, operation in path_item.items():
+                if "operationId" not in operation:
+                    raise ToolApiSchemaError(f"No operationId found in operation {method} {path}.")
+
+                if ("summary" not in operation or len(operation["summary"]) == 0) and (
+                    "description" not in operation or len(operation["description"]) == 0
+                ):
+                    if warning is not None:
+                        warning["missing_summary"] = f"No summary or description found in operation {method} {path}."
+
+                openapi["paths"][path][method] = {
+                    "operationId": operation["operationId"],
+                    "summary": operation.get("summary", ""),
+                    "description": operation.get("description", ""),
+                    "parameters": operation.get("parameters", []),
+                    "responses": operation.get("responses", {}),
+                }
+
+                if "requestBody" in operation:
+                    openapi["paths"][path][method]["requestBody"] = operation["requestBody"]
+
+        # convert definitions
+        for name, definition in swagger["definitions"].items():
+            openapi["components"]["schemas"][name] = definition
+
+        return openapi
+
+    @staticmethod
+    def parse_openai_plugin_json_to_tool_bundle(
+        json: str, extra_info: dict | None = None, warning: dict | None = None
+    ) -> list[ApiToolBundle]:
+        """
+        parse openapi plugin yaml to tool bundle
+
+        :param json: the json string
+        :param extra_info: the extra info
+        :param warning: the warning message
+        :return: the tool bundle
+        """
+        warning = warning if warning is not None else {}
+        extra_info = extra_info if extra_info is not None else {}
+
+        try:
+            openai_plugin = json_loads(json)
+            api = openai_plugin["api"]
+            api_url = api["url"]
+            api_type = api["type"]
+        except JSONDecodeError:
+            raise ToolProviderNotFoundError("Invalid openai plugin json.")
+
+        if api_type != "openapi":
+            raise ToolNotSupportedError("Only openapi is supported now.")
+
+        # get openapi yaml
+        response = get(api_url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "}, timeout=5)
+
+        if response.status_code != 200:
+            raise ToolProviderNotFoundError("cannot get openapi yaml from url.")
+
+        return ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle(
+            response.text, extra_info=extra_info, warning=warning
+        )
+
+    @staticmethod
+    def auto_parse_to_tool_bundle(
+        content: str, extra_info: dict | None = None, warning: dict | None = None
+    ) -> tuple[list[ApiToolBundle], str]:
+        """
+        auto parse to tool bundle
+
+        :param content: the content
+        :param extra_info: the extra info
+        :param warning: the warning message
+        :return: tools bundle, schema_type
+        """
+        warning = warning if warning is not None else {}
+        extra_info = extra_info if extra_info is not None else {}
+
+        content = content.strip()
+        loaded_content = None
+        json_error = None
+        yaml_error = None
+
+        try:
+            loaded_content = json_loads(content)
+        except JSONDecodeError as e:
+            json_error = e
+
+        if loaded_content is None:
+            try:
+                loaded_content = safe_load(content)
+            except YAMLError as e:
+                yaml_error = e
+        if loaded_content is None:
+            raise ToolApiSchemaError(
+                f"Invalid api schema, schema is neither json nor yaml. json error: {str(json_error)},"
+                f" yaml error: {str(yaml_error)}"
+            )
+
+        swagger_error = None
+        openapi_error = None
+        openapi_plugin_error = None
+        schema_type = None
+
+        try:
+            openapi = ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
+                loaded_content, extra_info=extra_info, warning=warning
+            )
+            schema_type = ApiProviderSchemaType.OPENAPI.value
+            return openapi, schema_type
+        except ToolApiSchemaError as e:
+            openapi_error = e
+
+        # openai parse error, fallback to swagger
+        try:
+            converted_swagger = ApiBasedToolSchemaParser.parse_swagger_to_openapi(
+                loaded_content, extra_info=extra_info, warning=warning
+            )
+            schema_type = ApiProviderSchemaType.SWAGGER.value
+            return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
+                converted_swagger, extra_info=extra_info, warning=warning
+            ), schema_type
+        except ToolApiSchemaError as e:
+            swagger_error = e
+
+        # swagger parse error, fallback to openai plugin
+        try:
+            openapi_plugin = ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle(
+                json_dumps(loaded_content), extra_info=extra_info, warning=warning
+            )
+            return openapi_plugin, ApiProviderSchemaType.OPENAI_PLUGIN.value
+        except ToolNotSupportedError as e:
+            # maybe it's not plugin at all
+            openapi_plugin_error = e
+
+        raise ToolApiSchemaError(
+            f"Invalid api schema, openapi error: {str(openapi_error)}, swagger error: {str(swagger_error)},"
+            f" openapi plugin error: {str(openapi_plugin_error)}"
+        )
--- a/api/core/datasource/utils/text_processing_utils.py
+++ b/api/core/datasource/utils/text_processing_utils.py
@ -0,0 +1,17 @@
+import re
+
+
+def remove_leading_symbols(text: str) -> str:
+    """
+    Remove leading punctuation or symbols from the given text.
+
+    Args:
+        text (str): The input text to process.
+
+    Returns:
+        str: The text with leading punctuation or symbols removed.
+    """
+    # Match Unicode ranges for punctuation and symbols
+    # FIXME this pattern is confused quick fix for #11868 maybe refactor it later
+    pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F!\"#$%&'()*+,./:;<=>?@^_`~]+"
+    return re.sub(pattern, "", text)
--- a/api/core/datasource/utils/uuid_utils.py
+++ b/api/core/datasource/utils/uuid_utils.py
@ -0,0 +1,9 @@
+import uuid
+
+
+def is_valid_uuid(uuid_str: str) -> bool:
+    try:
+        uuid.UUID(uuid_str)
+        return True
+    except Exception:
+        return False
--- a/api/core/datasource/utils/workflow_configuration_sync.py
+++ b/api/core/datasource/utils/workflow_configuration_sync.py
@ -0,0 +1,43 @@
+from collections.abc import Mapping, Sequence
+from typing import Any
+
+from core.app.app_config.entities import VariableEntity
+from core.tools.entities.tool_entities import WorkflowToolParameterConfiguration
+
+
+class WorkflowToolConfigurationUtils:
+    @classmethod
+    def check_parameter_configurations(cls, configurations: list[Mapping[str, Any]]):
+        for configuration in configurations:
+            WorkflowToolParameterConfiguration.model_validate(configuration)
+
+    @classmethod
+    def get_workflow_graph_variables(cls, graph: Mapping[str, Any]) -> Sequence[VariableEntity]:
+        """
+        get workflow graph variables
+        """
+        nodes = graph.get("nodes", [])
+        start_node = next(filter(lambda x: x.get("data", {}).get("type") == "start", nodes), None)
+
+        if not start_node:
+            return []
+
+        return [VariableEntity.model_validate(variable) for variable in start_node.get("data", {}).get("variables", [])]
+
+    @classmethod
+    def check_is_synced(
+        cls, variables: list[VariableEntity], tool_configurations: list[WorkflowToolParameterConfiguration]
+    ):
+        """
+        check is synced
+
+        raise ValueError if not synced
+        """
+        variable_names = [variable.variable for variable in variables]
+
+        if len(tool_configurations) != len(variables):
+            raise ValueError("parameter configuration mismatch, please republish the tool to update")
+
+        for parameter in tool_configurations:
+            if parameter.name not in variable_names:
+                raise ValueError("parameter configuration mismatch, please republish the tool to update")
--- a/api/core/datasource/utils/yaml_utils.py
+++ b/api/core/datasource/utils/yaml_utils.py
@ -0,0 +1,35 @@
+import logging
+from pathlib import Path
+from typing import Any
+
+import yaml  # type: ignore
+from yaml import YAMLError
+
+logger = logging.getLogger(__name__)
+
+
+def load_yaml_file(file_path: str, ignore_error: bool = True, default_value: Any = {}) -> Any:
+    """
+    Safe loading a YAML file
+    :param file_path: the path of the YAML file
+    :param ignore_error:
+        if True, return default_value if error occurs and the error will be logged in debug level
+        if False, raise error if error occurs
+    :param default_value: the value returned when errors ignored
+    :return: an object of the YAML content
+    """
+    if not file_path or not Path(file_path).exists():
+        if ignore_error:
+            return default_value
+        else:
+            raise FileNotFoundError(f"File not found: {file_path}")
+
+    with open(file_path, encoding="utf-8") as yaml_file:
+        try:
+            yaml_content = yaml.safe_load(yaml_file)
+            return yaml_content or default_value
+        except Exception as e:
+            if ignore_error:
+                return default_value
+            else:
+                raise YAMLError(f"Failed to load YAML file {file_path}: {e}") from e
--- a/api/core/datasource/website_crawl/website_crawl_plugin.py
+++ b/api/core/datasource/website_crawl/website_crawl_plugin.py
@ -0,0 +1,53 @@
+from collections.abc import Generator, Mapping
+from typing import Any
+
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import (
+    DatasourceEntity,
+    DatasourceProviderType,
+    WebsiteCrawlMessage,
+)
+from core.plugin.impl.datasource import PluginDatasourceManager
+
+
+class WebsiteCrawlDatasourcePlugin(DatasourcePlugin):
+    tenant_id: str
+    icon: str
+    plugin_unique_identifier: str
+    entity: DatasourceEntity
+    runtime: DatasourceRuntime
+
+    def __init__(
+        self,
+        entity: DatasourceEntity,
+        runtime: DatasourceRuntime,
+        tenant_id: str,
+        icon: str,
+        plugin_unique_identifier: str,
+    ) -> None:
+        super().__init__(entity, runtime)
+        self.tenant_id = tenant_id
+        self.icon = icon
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    def get_website_crawl(
+        self,
+        user_id: str,
+        datasource_parameters: Mapping[str, Any],
+        provider_type: str,
+    ) -> Generator[WebsiteCrawlMessage, None, None]:
+        manager = PluginDatasourceManager()
+
+        return manager.get_website_crawl(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            datasource_provider=self.entity.identity.provider,
+            datasource_name=self.entity.identity.name,
+            credentials=self.runtime.credentials,
+            datasource_parameters=datasource_parameters,
+            provider_type=provider_type,
+        )
+
+    def datasource_provider_type(self) -> str:
+        return DatasourceProviderType.WEBSITE_CRAWL
--- a/api/core/datasource/website_crawl/website_crawl_provider.py
+++ b/api/core/datasource/website_crawl/website_crawl_provider.py
@ -0,0 +1,52 @@
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
+from core.datasource.website_crawl.website_crawl_plugin import WebsiteCrawlDatasourcePlugin
+
+
+class WebsiteCrawlDatasourcePluginProviderController(DatasourcePluginProviderController):
+    entity: DatasourceProviderEntityWithPlugin
+    plugin_id: str
+    plugin_unique_identifier: str
+
+    def __init__(
+        self,
+        entity: DatasourceProviderEntityWithPlugin,
+        plugin_id: str,
+        plugin_unique_identifier: str,
+        tenant_id: str,
+    ) -> None:
+        super().__init__(entity, tenant_id)
+        self.plugin_id = plugin_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    @property
+    def provider_type(self) -> DatasourceProviderType:
+        """
+        returns the type of the provider
+        """
+        return DatasourceProviderType.WEBSITE_CRAWL
+
+    def get_datasource(self, datasource_name: str) -> WebsiteCrawlDatasourcePlugin:  # type: ignore
+        """
+        return datasource with given name
+        """
+        datasource_entity = next(
+            (
+                datasource_entity
+                for datasource_entity in self.entity.datasources
+                if datasource_entity.identity.name == datasource_name
+            ),
+            None,
+        )
+
+        if not datasource_entity:
+            raise ValueError(f"Datasource with name {datasource_name} not found")
+
+        return WebsiteCrawlDatasourcePlugin(
+            entity=datasource_entity,
+            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
+            tenant_id=self.tenant_id,
+            icon=self.entity.identity.icon,
+            plugin_unique_identifier=self.plugin_unique_identifier,
+        )
--- a/api/core/entities/knowledge_entities.py
+++ b/api/core/entities/knowledge_entities.py
@ -17,3 +17,27 @@ class IndexingEstimate(BaseModel):
    total_segments: int
    preview: list[PreviewDetail]
    qa_preview: Optional[list[QAPreviewDetail]] = None
+
+
+class PipelineDataset(BaseModel):
+    id: str
+    name: str
+    description: str
+    chunk_structure: str
+
+
+class PipelineDocument(BaseModel):
+    id: str
+    position: int
+    data_source_type: str
+    data_source_info: Optional[dict] = None
+    name: str
+    indexing_status: str
+    error: Optional[str] = None
+    enabled: bool
+
+
+class PipelineGenerateResponse(BaseModel):
+    batch: str
+    dataset: PipelineDataset
+    documents: list[PipelineDocument]
--- a/api/core/entities/parameter_entities.py
+++ b/api/core/entities/parameter_entities.py
@ -15,6 +15,11 @@ class CommonParameterType(StrEnum):
    MODEL_SELECTOR = "model-selector"
    TOOLS_SELECTOR = "array[tools]"

+    # Dynamic select parameter
+    # Once you are not sure about the available options until authorization is done
+    # eg: Select a Slack channel from a Slack workspace
+    DYNAMIC_SELECT = "dynamic-select"
+
    # TOOL_SELECTOR = "tool-selector"


--- a/api/core/file/datasource_file_parser.py
+++ b/api/core/file/datasource_file_parser.py
@ -0,0 +1,15 @@
+from typing import TYPE_CHECKING, Any, cast
+
+from core.datasource import datasource_file_manager
+from core.datasource.datasource_file_manager import DatasourceFileManager
+
+if TYPE_CHECKING:
+    from core.datasource.datasource_file_manager import DatasourceFileManager
+
+tool_file_manager: dict[str, Any] = {"manager": None}
+
+
+class DatasourceFileParser:
+    @staticmethod
+    def get_datasource_file_manager() -> "DatasourceFileManager":
+        return cast("DatasourceFileManager", datasource_file_manager["manager"])
--- a/api/core/file/enums.py
+++ b/api/core/file/enums.py
@ -20,6 +20,7 @@ class FileTransferMethod(StrEnum):
    REMOTE_URL = "remote_url"
    LOCAL_FILE = "local_file"
    TOOL_FILE = "tool_file"
+    DATASOURCE_FILE = "datasource_file"

    @staticmethod
    def value_of(value):
--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@ -534,7 +534,7 @@ class IndexingRunner:
        # chunk nodes by chunk size
        indexing_start_at = time.perf_counter()
        tokens = 0
-        if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX:
+        if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX and dataset.indexing_technique == "economy":
            # create keyword index
            create_keyword_thread = threading.Thread(
                target=self._process_keyword_index,
@ -572,7 +572,7 @@ class IndexingRunner:

                for future in futures:
                    tokens += future.result()
-        if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX:
+        if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX and dataset.indexing_technique == "economy":
            create_keyword_thread.join()
        indexing_end_at = time.perf_counter()

--- a/api/core/llm_generator/output_parser/structured_output.py
+++ b/api/core/llm_generator/output_parser/structured_output.py
@ -0,0 +1,374 @@
+import json
+from collections.abc import Generator, Mapping, Sequence
+from copy import deepcopy
+from enum import StrEnum
+from typing import Any, Literal, Optional, cast, overload
+
+import json_repair
+from pydantic import TypeAdapter, ValidationError
+
+from core.llm_generator.output_parser.errors import OutputParserError
+from core.llm_generator.prompts import STRUCTURED_OUTPUT_PROMPT
+from core.model_manager import ModelInstance
+from core.model_runtime.callbacks.base_callback import Callback
+from core.model_runtime.entities.llm_entities import (
+    LLMResult,
+    LLMResultChunk,
+    LLMResultChunkDelta,
+    LLMResultChunkWithStructuredOutput,
+    LLMResultWithStructuredOutput,
+)
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    PromptMessage,
+    PromptMessageTool,
+    SystemPromptMessage,
+)
+from core.model_runtime.entities.model_entities import AIModelEntity, ParameterRule
+
+
+class ResponseFormat(StrEnum):
+    """Constants for model response formats"""
+
+    JSON_SCHEMA = "json_schema"  # model's structured output mode. some model like gemini, gpt-4o,  support this mode.
+    JSON = "JSON"  # model's json mode. some model like claude support this mode.
+    JSON_OBJECT = "json_object"  # json mode's another alias. some model like deepseek-chat, qwen use this alias.
+
+
+class SpecialModelType(StrEnum):
+    """Constants for identifying model types"""
+
+    GEMINI = "gemini"
+    OLLAMA = "ollama"
+
+
+@overload
+def invoke_llm_with_structured_output(
+    provider: str,
+    model_schema: AIModelEntity,
+    model_instance: ModelInstance,
+    prompt_messages: Sequence[PromptMessage],
+    json_schema: Mapping[str, Any],
+    model_parameters: Optional[Mapping] = None,
+    tools: Sequence[PromptMessageTool] | None = None,
+    stop: Optional[list[str]] = None,
+    stream: Literal[True] = True,
+    user: Optional[str] = None,
+    callbacks: Optional[list[Callback]] = None,
+) -> Generator[LLMResultChunkWithStructuredOutput, None, None]: ...
+
+
+@overload
+def invoke_llm_with_structured_output(
+    provider: str,
+    model_schema: AIModelEntity,
+    model_instance: ModelInstance,
+    prompt_messages: Sequence[PromptMessage],
+    json_schema: Mapping[str, Any],
+    model_parameters: Optional[Mapping] = None,
+    tools: Sequence[PromptMessageTool] | None = None,
+    stop: Optional[list[str]] = None,
+    stream: Literal[False] = False,
+    user: Optional[str] = None,
+    callbacks: Optional[list[Callback]] = None,
+) -> LLMResultWithStructuredOutput: ...
+
+
+@overload
+def invoke_llm_with_structured_output(
+    provider: str,
+    model_schema: AIModelEntity,
+    model_instance: ModelInstance,
+    prompt_messages: Sequence[PromptMessage],
+    json_schema: Mapping[str, Any],
+    model_parameters: Optional[Mapping] = None,
+    tools: Sequence[PromptMessageTool] | None = None,
+    stop: Optional[list[str]] = None,
+    stream: bool = True,
+    user: Optional[str] = None,
+    callbacks: Optional[list[Callback]] = None,
+) -> LLMResultWithStructuredOutput | Generator[LLMResultChunkWithStructuredOutput, None, None]: ...
+
+
+def invoke_llm_with_structured_output(
+    provider: str,
+    model_schema: AIModelEntity,
+    model_instance: ModelInstance,
+    prompt_messages: Sequence[PromptMessage],
+    json_schema: Mapping[str, Any],
+    model_parameters: Optional[Mapping] = None,
+    tools: Sequence[PromptMessageTool] | None = None,
+    stop: Optional[list[str]] = None,
+    stream: bool = True,
+    user: Optional[str] = None,
+    callbacks: Optional[list[Callback]] = None,
+) -> LLMResultWithStructuredOutput | Generator[LLMResultChunkWithStructuredOutput, None, None]:
+    """
+    Invoke large language model with structured output
+    1. This method invokes model_instance.invoke_llm with json_schema
+    2. Try to parse the result as structured output
+
+    :param prompt_messages: prompt messages
+    :param json_schema: json schema
+    :param model_parameters: model parameters
+    :param tools: tools for tool calling
+    :param stop: stop words
+    :param stream: is stream response
+    :param user: unique user id
+    :param callbacks: callbacks
+    :return: full response or stream response chunk generator result
+    """
+
+    # handle native json schema
+    model_parameters_with_json_schema: dict[str, Any] = {
+        **(model_parameters or {}),
+    }
+
+    if model_schema.support_structure_output:
+        model_parameters = _handle_native_json_schema(
+            provider, model_schema, json_schema, model_parameters_with_json_schema, model_schema.parameter_rules
+        )
+    else:
+        # Set appropriate response format based on model capabilities
+        _set_response_format(model_parameters_with_json_schema, model_schema.parameter_rules)
+
+        # handle prompt based schema
+        prompt_messages = _handle_prompt_based_schema(
+            prompt_messages=prompt_messages,
+            structured_output_schema=json_schema,
+        )
+
+    llm_result = model_instance.invoke_llm(
+        prompt_messages=list(prompt_messages),
+        model_parameters=model_parameters_with_json_schema,
+        tools=tools,
+        stop=stop,
+        stream=stream,
+        user=user,
+        callbacks=callbacks,
+    )
+
+    if isinstance(llm_result, LLMResult):
+        if not isinstance(llm_result.message.content, str):
+            raise OutputParserError(
+                f"Failed to parse structured output, LLM result is not a string: {llm_result.message.content}"
+            )
+
+        return LLMResultWithStructuredOutput(
+            structured_output=_parse_structured_output(llm_result.message.content),
+            model=llm_result.model,
+            message=llm_result.message,
+            usage=llm_result.usage,
+            system_fingerprint=llm_result.system_fingerprint,
+            prompt_messages=llm_result.prompt_messages,
+        )
+    else:
+
+        def generator() -> Generator[LLMResultChunkWithStructuredOutput, None, None]:
+            result_text: str = ""
+            prompt_messages: Sequence[PromptMessage] = []
+            system_fingerprint: Optional[str] = None
+            for event in llm_result:
+                if isinstance(event, LLMResultChunk):
+                    if isinstance(event.delta.message.content, str):
+                        result_text += event.delta.message.content
+                        prompt_messages = event.prompt_messages
+                        system_fingerprint = event.system_fingerprint
+
+                yield LLMResultChunkWithStructuredOutput(
+                    model=model_schema.model,
+                    prompt_messages=prompt_messages,
+                    system_fingerprint=system_fingerprint,
+                    delta=event.delta,
+                )
+
+            yield LLMResultChunkWithStructuredOutput(
+                structured_output=_parse_structured_output(result_text),
+                model=model_schema.model,
+                prompt_messages=prompt_messages,
+                system_fingerprint=system_fingerprint,
+                delta=LLMResultChunkDelta(
+                    index=0,
+                    message=AssistantPromptMessage(content=""),
+                    usage=None,
+                    finish_reason=None,
+                ),
+            )
+
+        return generator()
+
+
+def _handle_native_json_schema(
+    provider: str,
+    model_schema: AIModelEntity,
+    structured_output_schema: Mapping,
+    model_parameters: dict,
+    rules: list[ParameterRule],
+) -> dict:
+    """
+    Handle structured output for models with native JSON schema support.
+
+    :param model_parameters: Model parameters to update
+    :param rules: Model parameter rules
+    :return: Updated model parameters with JSON schema configuration
+    """
+    # Process schema according to model requirements
+    schema_json = _prepare_schema_for_model(provider, model_schema, structured_output_schema)
+
+    # Set JSON schema in parameters
+    model_parameters["json_schema"] = json.dumps(schema_json, ensure_ascii=False)
+
+    # Set appropriate response format if required by the model
+    for rule in rules:
+        if rule.name == "response_format" and ResponseFormat.JSON_SCHEMA.value in rule.options:
+            model_parameters["response_format"] = ResponseFormat.JSON_SCHEMA.value
+
+    return model_parameters
+
+
+def _set_response_format(model_parameters: dict, rules: list) -> None:
+    """
+    Set the appropriate response format parameter based on model rules.
+
+    :param model_parameters: Model parameters to update
+    :param rules: Model parameter rules
+    """
+    for rule in rules:
+        if rule.name == "response_format":
+            if ResponseFormat.JSON.value in rule.options:
+                model_parameters["response_format"] = ResponseFormat.JSON.value
+            elif ResponseFormat.JSON_OBJECT.value in rule.options:
+                model_parameters["response_format"] = ResponseFormat.JSON_OBJECT.value
+
+
+def _handle_prompt_based_schema(
+    prompt_messages: Sequence[PromptMessage], structured_output_schema: Mapping
+) -> list[PromptMessage]:
+    """
+    Handle structured output for models without native JSON schema support.
+    This function modifies the prompt messages to include schema-based output requirements.
+
+    Args:
+        prompt_messages: Original sequence of prompt messages
+
+    Returns:
+        list[PromptMessage]: Updated prompt messages with structured output requirements
+    """
+    # Convert schema to string format
+    schema_str = json.dumps(structured_output_schema, ensure_ascii=False)
+
+    # Find existing system prompt with schema placeholder
+    system_prompt = next(
+        (prompt for prompt in prompt_messages if isinstance(prompt, SystemPromptMessage)),
+        None,
+    )
+    structured_output_prompt = STRUCTURED_OUTPUT_PROMPT.replace("{{schema}}", schema_str)
+    # Prepare system prompt content
+    system_prompt_content = (
+        structured_output_prompt + "\n\n" + system_prompt.content
+        if system_prompt and isinstance(system_prompt.content, str)
+        else structured_output_prompt
+    )
+    system_prompt = SystemPromptMessage(content=system_prompt_content)
+
+    # Extract content from the last user message
+
+    filtered_prompts = [prompt for prompt in prompt_messages if not isinstance(prompt, SystemPromptMessage)]
+    updated_prompt = [system_prompt] + filtered_prompts
+
+    return updated_prompt
+
+
+def _parse_structured_output(result_text: str) -> Mapping[str, Any]:
+    structured_output: Mapping[str, Any] = {}
+    parsed: Mapping[str, Any] = {}
+    try:
+        parsed = TypeAdapter(Mapping).validate_json(result_text)
+        if not isinstance(parsed, dict):
+            raise OutputParserError(f"Failed to parse structured output: {result_text}")
+        structured_output = parsed
+    except ValidationError:
+        # if the result_text is not a valid json, try to repair it
+        temp_parsed = json_repair.loads(result_text)
+        if not isinstance(temp_parsed, dict):
+            # handle reasoning model like deepseek-r1 got '<think>\n\n</think>\n' prefix
+            if isinstance(temp_parsed, list):
+                temp_parsed = next((item for item in temp_parsed if isinstance(item, dict)), {})
+            else:
+                raise OutputParserError(f"Failed to parse structured output: {result_text}")
+        structured_output = cast(dict, temp_parsed)
+    return structured_output
+
+
+def _prepare_schema_for_model(provider: str, model_schema: AIModelEntity, schema: Mapping) -> dict:
+    """
+    Prepare JSON schema based on model requirements.
+
+    Different models have different requirements for JSON schema formatting.
+    This function handles these differences.
+
+    :param schema: The original JSON schema
+    :return: Processed schema compatible with the current model
+    """
+
+    # Deep copy to avoid modifying the original schema
+    processed_schema = dict(deepcopy(schema))
+
+    # Convert boolean types to string types (common requirement)
+    convert_boolean_to_string(processed_schema)
+
+    # Apply model-specific transformations
+    if SpecialModelType.GEMINI in model_schema.model:
+        remove_additional_properties(processed_schema)
+        return processed_schema
+    elif SpecialModelType.OLLAMA in provider:
+        return processed_schema
+    else:
+        # Default format with name field
+        return {"schema": processed_schema, "name": "llm_response"}
+
+
+def remove_additional_properties(schema: dict) -> None:
+    """
+    Remove additionalProperties fields from JSON schema.
+    Used for models like Gemini that don't support this property.
+
+    :param schema: JSON schema to modify in-place
+    """
+    if not isinstance(schema, dict):
+        return
+
+    # Remove additionalProperties at current level
+    schema.pop("additionalProperties", None)
+
+    # Process nested structures recursively
+    for value in schema.values():
+        if isinstance(value, dict):
+            remove_additional_properties(value)
+        elif isinstance(value, list):
+            for item in value:
+                if isinstance(item, dict):
+                    remove_additional_properties(item)
+
+
+def convert_boolean_to_string(schema: dict) -> None:
+    """
+    Convert boolean type specifications to string in JSON schema.
+
+    :param schema: JSON schema to modify in-place
+    """
+    if not isinstance(schema, dict):
+        return
+
+    # Check for boolean type at current level
+    if schema.get("type") == "boolean":
+        schema["type"] = "string"
+
+    # Process nested dictionaries and lists recursively
+    for value in schema.values():
+        if isinstance(value, dict):
+            convert_boolean_to_string(value)
+        elif isinstance(value, list):
+            for item in value:
+                if isinstance(item, dict):
+                    convert_boolean_to_string(item)
--- a/api/core/llm_generator/prompts.py
+++ b/api/core/llm_generator/prompts.py
@ -291,3 +291,21 @@ Your task is to convert simple user descriptions into properly formatted JSON Sc

 Now, generate a JSON Schema based on my description
 """  # noqa: E501
+
+STRUCTURED_OUTPUT_PROMPT = """You’re a helpful AI assistant. You could answer questions and output in JSON format.
+constraints:
+    - You must output in JSON format.
+    - Do not output boolean value, use string type instead.
+    - Do not output integer or float value, use number type instead.
+eg:
+    Here is the JSON schema:
+    {"additionalProperties": false, "properties": {"age": {"type": "number"}, "name": {"type": "string"}}, "required": ["name", "age"], "type": "object"}
+
+    Here is the user's question:
+    My name is John Doe and I am 30 years old.
+
+    output:
+    {"name": "John Doe", "age": 30}
+Here is the JSON schema:
+{{schema}}
+"""  # noqa: E501
--- a/api/core/model_runtime/entities/llm_entities.py
+++ b/api/core/model_runtime/entities/llm_entities.py
@ -1,7 +1,7 @@
-from collections.abc import Sequence
+from collections.abc import Mapping, Sequence
 from decimal import Decimal
 from enum import StrEnum
-from typing import Optional
+from typing import Any, Optional

 from pydantic import BaseModel, Field

@ -101,6 +101,20 @@ class LLMResult(BaseModel):
    system_fingerprint: Optional[str] = None


+class LLMStructuredOutput(BaseModel):
+    """
+    Model class for llm structured output.
+    """
+
+    structured_output: Optional[Mapping[str, Any]] = None
+
+
+class LLMResultWithStructuredOutput(LLMResult, LLMStructuredOutput):
+    """
+    Model class for llm result with structured output.
+    """
+
+
 class LLMResultChunkDelta(BaseModel):
    """
    Model class for llm result chunk delta.
@ -123,6 +137,12 @@ class LLMResultChunk(BaseModel):
    delta: LLMResultChunkDelta


+class LLMResultChunkWithStructuredOutput(LLMResultChunk, LLMStructuredOutput):
+    """
+    Model class for llm result chunk with structured output.
+    """
+
+
 class NumTokensResult(PriceInfo):
    """
    Model class for number of tokens result.
--- a/api/core/ops/entities/trace_entity.py
+++ b/api/core/ops/entities/trace_entity.py
@ -135,3 +135,4 @@ class TraceTaskName(StrEnum):
    DATASET_RETRIEVAL_TRACE = "dataset_retrieval"
    TOOL_TRACE = "tool"
    GENERATE_NAME_TRACE = "generate_conversation_name"
+    DATASOURCE_TRACE = "datasource"
--- a/api/core/ops/langfuse_trace/langfuse_trace.py
+++ b/api/core/ops/langfuse_trace/langfuse_trace.py
@ -83,6 +83,7 @@ class LangFuseDataTrace(BaseTraceInstance):
                metadata=metadata,
                session_id=trace_info.conversation_id,
                tags=["message", "workflow"],
+                version=trace_info.workflow_run_version,
            )
            self.add_trace(langfuse_trace_data=trace_data)
            workflow_span_data = LangfuseSpan(
@ -108,6 +109,7 @@ class LangFuseDataTrace(BaseTraceInstance):
                metadata=metadata,
                session_id=trace_info.conversation_id,
                tags=["workflow"],
+                version=trace_info.workflow_run_version,
            )
            self.add_trace(langfuse_trace_data=trace_data)

@ -172,37 +174,7 @@ class LangFuseDataTrace(BaseTraceInstance):
                    }
                )

-            # add span
-            if trace_info.message_id:
-                span_data = LangfuseSpan(
-                    id=node_execution_id,
-                    name=node_type,
-                    input=inputs,
-                    output=outputs,
-                    trace_id=trace_id,
-                    start_time=created_at,
-                    end_time=finished_at,
-                    metadata=metadata,
-                    level=(LevelEnum.DEFAULT if status == "succeeded" else LevelEnum.ERROR),
-                    status_message=trace_info.error or "",
-                    parent_observation_id=trace_info.workflow_run_id,
-                )
-            else:
-                span_data = LangfuseSpan(
-                    id=node_execution_id,
-                    name=node_type,
-                    input=inputs,
-                    output=outputs,
-                    trace_id=trace_id,
-                    start_time=created_at,
-                    end_time=finished_at,
-                    metadata=metadata,
-                    level=(LevelEnum.DEFAULT if status == "succeeded" else LevelEnum.ERROR),
-                    status_message=trace_info.error or "",
-                )
-
-            self.add_span(langfuse_span_data=span_data)
-
+            # add generation span
            if process_data and process_data.get("model_mode") == "chat":
                total_token = metadata.get("total_tokens", 0)
                prompt_tokens = 0
@ -226,10 +198,10 @@ class LangFuseDataTrace(BaseTraceInstance):
                )

                node_generation_data = LangfuseGeneration(
-                    name="llm",
+                    id=node_execution_id,
+                    name=node_name,
                    trace_id=trace_id,
                    model=process_data.get("model_name"),
-                    parent_observation_id=node_execution_id,
                    start_time=created_at,
                    end_time=finished_at,
                    input=inputs,
@ -237,11 +209,30 @@ class LangFuseDataTrace(BaseTraceInstance):
                    metadata=metadata,
                    level=(LevelEnum.DEFAULT if status == "succeeded" else LevelEnum.ERROR),
                    status_message=trace_info.error or "",
+                    parent_observation_id=trace_info.workflow_run_id if trace_info.message_id else None,
                    usage=generation_usage,
                )

                self.add_generation(langfuse_generation_data=node_generation_data)

+            # add normal span
+            else:
+                span_data = LangfuseSpan(
+                    id=node_execution_id,
+                    name=node_name,
+                    input=inputs,
+                    output=outputs,
+                    trace_id=trace_id,
+                    start_time=created_at,
+                    end_time=finished_at,
+                    metadata=metadata,
+                    level=(LevelEnum.DEFAULT if status == "succeeded" else LevelEnum.ERROR),
+                    status_message=trace_info.error or "",
+                    parent_observation_id=trace_info.workflow_run_id if trace_info.message_id else None,
+                )
+
+                self.add_span(langfuse_span_data=span_data)
+
    def message_trace(self, trace_info: MessageTraceInfo, **kwargs):
        # get message file data
        file_list = trace_info.file_list
@ -284,7 +275,7 @@ class LangFuseDataTrace(BaseTraceInstance):
        )
        self.add_trace(langfuse_trace_data=trace_data)

-        # start add span
+        # add generation
        generation_usage = GenerationUsage(
            input=trace_info.message_tokens,
            output=trace_info.answer_tokens,
--- a/api/core/plugin/backwards_invocation/model.py
+++ b/api/core/plugin/backwards_invocation/model.py
@ -2,8 +2,15 @@ import tempfile
 from binascii import hexlify, unhexlify
 from collections.abc import Generator

+from core.llm_generator.output_parser.structured_output import invoke_llm_with_structured_output
 from core.model_manager import ModelManager
-from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.llm_entities import (
+    LLMResult,
+    LLMResultChunk,
+    LLMResultChunkDelta,
+    LLMResultChunkWithStructuredOutput,
+    LLMResultWithStructuredOutput,
+)
 from core.model_runtime.entities.message_entities import (
    PromptMessage,
    SystemPromptMessage,
@ -12,6 +19,7 @@ from core.model_runtime.entities.message_entities import (
 from core.plugin.backwards_invocation.base import BaseBackwardsInvocation
 from core.plugin.entities.request import (
    RequestInvokeLLM,
+    RequestInvokeLLMWithStructuredOutput,
    RequestInvokeModeration,
    RequestInvokeRerank,
    RequestInvokeSpeech2Text,
@ -81,6 +89,72 @@ class PluginModelBackwardsInvocation(BaseBackwardsInvocation):

            return handle_non_streaming(response)

+    @classmethod
+    def invoke_llm_with_structured_output(
+        cls, user_id: str, tenant: Tenant, payload: RequestInvokeLLMWithStructuredOutput
+    ):
+        """
+        invoke llm with structured output
+        """
+        model_instance = ModelManager().get_model_instance(
+            tenant_id=tenant.id,
+            provider=payload.provider,
+            model_type=payload.model_type,
+            model=payload.model,
+        )
+
+        model_schema = model_instance.model_type_instance.get_model_schema(payload.model, model_instance.credentials)
+
+        if not model_schema:
+            raise ValueError(f"Model schema not found for {payload.model}")
+
+        response = invoke_llm_with_structured_output(
+            provider=payload.provider,
+            model_schema=model_schema,
+            model_instance=model_instance,
+            prompt_messages=payload.prompt_messages,
+            json_schema=payload.structured_output_schema,
+            tools=payload.tools,
+            stop=payload.stop,
+            stream=True if payload.stream is None else payload.stream,
+            user=user_id,
+            model_parameters=payload.completion_params,
+        )
+
+        if isinstance(response, Generator):
+
+            def handle() -> Generator[LLMResultChunkWithStructuredOutput, None, None]:
+                for chunk in response:
+                    if chunk.delta.usage:
+                        llm_utils.deduct_llm_quota(
+                            tenant_id=tenant.id, model_instance=model_instance, usage=chunk.delta.usage
+                        )
+                    chunk.prompt_messages = []
+                    yield chunk
+
+            return handle()
+        else:
+            if response.usage:
+                llm_utils.deduct_llm_quota(tenant_id=tenant.id, model_instance=model_instance, usage=response.usage)
+
+            def handle_non_streaming(
+                response: LLMResultWithStructuredOutput,
+            ) -> Generator[LLMResultChunkWithStructuredOutput, None, None]:
+                yield LLMResultChunkWithStructuredOutput(
+                    model=response.model,
+                    prompt_messages=[],
+                    system_fingerprint=response.system_fingerprint,
+                    structured_output=response.structured_output,
+                    delta=LLMResultChunkDelta(
+                        index=0,
+                        message=response.message,
+                        usage=response.usage,
+                        finish_reason="",
+                    ),
+                )
+
+            return handle_non_streaming(response)
+
    @classmethod
    def invoke_text_embedding(cls, user_id: str, tenant: Tenant, payload: RequestInvokeTextEmbedding):
        """
--- a/api/core/plugin/entities/oauth.py
+++ b/api/core/plugin/entities/oauth.py
@ -0,0 +1,21 @@
+from collections.abc import Sequence
+
+from pydantic import BaseModel, Field
+
+from core.entities.provider_entities import ProviderConfig
+
+
+class OAuthSchema(BaseModel):
+    """
+    OAuth schema
+    """
+
+    client_schema: Sequence[ProviderConfig] = Field(
+        default_factory=list,
+        description="client schema like client_id, client_secret, etc.",
+    )
+
+    credentials_schema: Sequence[ProviderConfig] = Field(
+        default_factory=list,
+        description="credentials schema like access_token, refresh_token, etc.",
+    )
--- a/api/core/plugin/entities/parameters.py
+++ b/api/core/plugin/entities/parameters.py
@ -10,6 +10,9 @@ from core.tools.entities.common_entities import I18nObject
 class PluginParameterOption(BaseModel):
    value: str = Field(..., description="The value of the option")
    label: I18nObject = Field(..., description="The label of the option")
+    icon: Optional[str] = Field(
+        default=None, description="The icon of the option, can be a url or a base64 encoded image"
+    )

    @field_validator("value", mode="before")
    @classmethod
@ -35,6 +38,7 @@ class PluginParameterType(enum.StrEnum):
    APP_SELECTOR = CommonParameterType.APP_SELECTOR.value
    MODEL_SELECTOR = CommonParameterType.MODEL_SELECTOR.value
    TOOLS_SELECTOR = CommonParameterType.TOOLS_SELECTOR.value
+    DYNAMIC_SELECT = CommonParameterType.DYNAMIC_SELECT.value

    # deprecated, should not use.
    SYSTEM_FILES = CommonParameterType.SYSTEM_FILES.value
--- a/api/core/plugin/entities/plugin.py
+++ b/api/core/plugin/entities/plugin.py
@ -8,6 +8,7 @@ from pydantic import BaseModel, Field, model_validator
 from werkzeug.exceptions import NotFound

 from core.agent.plugin_entities import AgentStrategyProviderEntity
+from core.datasource.entities.datasource_entities import DatasourceProviderEntity
 from core.model_runtime.entities.provider_entities import ProviderEntity
 from core.plugin.entities.base import BasePluginEntity
 from core.plugin.entities.endpoint import EndpointProviderDeclaration
@ -62,6 +63,7 @@ class PluginCategory(enum.StrEnum):
    Model = "model"
    Extension = "extension"
    AgentStrategy = "agent-strategy"
+    Datasource = "datasource"


 class PluginDeclaration(BaseModel):
@ -69,6 +71,7 @@ class PluginDeclaration(BaseModel):
        tools: Optional[list[str]] = Field(default_factory=list[str])
        models: Optional[list[str]] = Field(default_factory=list[str])
        endpoints: Optional[list[str]] = Field(default_factory=list[str])
+        datasources: Optional[list[str]] = Field(default_factory=list[str])

    class Meta(BaseModel):
        minimum_dify_version: Optional[str] = Field(default=None, pattern=r"^\d{1,4}(\.\d{1,4}){1,3}(-\w{1,16})?$")
@ -90,6 +93,7 @@ class PluginDeclaration(BaseModel):
    model: Optional[ProviderEntity] = None
    endpoint: Optional[EndpointProviderDeclaration] = None
    agent_strategy: Optional[AgentStrategyProviderEntity] = None
+    datasource: Optional[DatasourceProviderEntity] = None
    meta: Meta

    @model_validator(mode="before")
@ -100,6 +104,8 @@ class PluginDeclaration(BaseModel):
            values["category"] = PluginCategory.Tool
        elif values.get("model"):
            values["category"] = PluginCategory.Model
+        elif values.get("datasource"):
+            values["category"] = PluginCategory.Datasource
        elif values.get("agent_strategy"):
            values["category"] = PluginCategory.AgentStrategy
        else:
@ -193,6 +199,11 @@ class ToolProviderID(GenericProviderID):
                self.plugin_name = f"{self.provider_name}_tool"


+class DatasourceProviderID(GenericProviderID):
+    def __init__(self, value: str, is_hardcoded: bool = False) -> None:
+        super().__init__(value, is_hardcoded)
+
+
 class PluginDependency(BaseModel):
    class Type(enum.StrEnum):
        Github = PluginInstallationSource.Github.value
--- a/api/core/plugin/entities/plugin_daemon.py
+++ b/api/core/plugin/entities/plugin_daemon.py
@ -1,4 +1,4 @@
-from collections.abc import Mapping
+from collections.abc import Mapping, Sequence
 from datetime import datetime
 from enum import StrEnum
 from typing import Any, Generic, Optional, TypeVar
@ -6,9 +6,11 @@ from typing import Any, Generic, Optional, TypeVar
 from pydantic import BaseModel, ConfigDict, Field

 from core.agent.plugin_entities import AgentProviderEntityWithPlugin
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin
 from core.model_runtime.entities.model_entities import AIModelEntity
 from core.model_runtime.entities.provider_entities import ProviderEntity
 from core.plugin.entities.base import BasePluginEntity
+from core.plugin.entities.parameters import PluginParameterOption
 from core.plugin.entities.plugin import PluginDeclaration, PluginEntity
 from core.tools.entities.common_entities import I18nObject
 from core.tools.entities.tool_entities import ToolProviderEntityWithPlugin
@ -47,6 +49,14 @@ class PluginToolProviderEntity(BaseModel):
    declaration: ToolProviderEntityWithPlugin


+class PluginDatasourceProviderEntity(BaseModel):
+    provider: str
+    plugin_unique_identifier: str
+    plugin_id: str
+    is_authorized: bool = False
+    declaration: DatasourceProviderEntityWithPlugin
+
+
 class PluginAgentProviderEntity(BaseModel):
    provider: str
    plugin_unique_identifier: str
@ -186,3 +196,7 @@ class PluginOAuthCredentialsResponse(BaseModel):
 class PluginListResponse(BaseModel):
    list: list[PluginEntity]
    total: int
+
+
+class PluginDynamicSelectOptionsResponse(BaseModel):
+    options: Sequence[PluginParameterOption] = Field(description="The options of the dynamic select.")
--- a/api/core/plugin/entities/request.py
+++ b/api/core/plugin/entities/request.py
@ -82,6 +82,16 @@ class RequestInvokeLLM(BaseRequestInvokeModel):
        return v


+class RequestInvokeLLMWithStructuredOutput(RequestInvokeLLM):
+    """
+    Request to invoke LLM with structured output
+    """
+
+    structured_output_schema: dict[str, Any] = Field(
+        default_factory=dict, description="The schema of the structured output in JSON schema format"
+    )
+
+
 class RequestInvokeTextEmbedding(BaseRequestInvokeModel):
    """
    Request to invoke text embedding
--- a/api/core/plugin/impl/datasource.py
+++ b/api/core/plugin/impl/datasource.py
@ -0,0 +1,329 @@
+from collections.abc import Generator, Mapping
+from typing import Any
+
+from core.datasource.entities.datasource_entities import (
+    DatasourceMessage,
+    GetOnlineDocumentPageContentRequest,
+    OnlineDocumentPagesMessage,
+    OnlineDriveBrowseFilesRequest,
+    OnlineDriveBrowseFilesResponse,
+    OnlineDriveDownloadFileRequest,
+    WebsiteCrawlMessage,
+)
+from core.plugin.entities.plugin import DatasourceProviderID, GenericProviderID
+from core.plugin.entities.plugin_daemon import (
+    PluginBasicBooleanResponse,
+    PluginDatasourceProviderEntity,
+)
+from core.plugin.impl.base import BasePluginClient
+from services.tools.tools_transform_service import ToolTransformService
+
+
+class PluginDatasourceManager(BasePluginClient):
+    def fetch_datasource_providers(self, tenant_id: str) -> list[PluginDatasourceProviderEntity]:
+        """
+        Fetch datasource providers for the given tenant.
+        """
+
+        def transformer(json_response: dict[str, Any]) -> dict:
+            if json_response.get("data"):
+                for provider in json_response.get("data", []):
+                    declaration = provider.get("declaration", {}) or {}
+                    provider_name = declaration.get("identity", {}).get("name")
+                    for datasource in declaration.get("datasources", []):
+                        datasource["identity"]["provider"] = provider_name
+
+            return json_response
+
+        response = self._request_with_plugin_daemon_response(
+            "GET",
+            f"plugin/{tenant_id}/management/datasources",
+            list[PluginDatasourceProviderEntity],
+            params={"page": 1, "page_size": 256},
+            transformer=transformer,
+        )
+        local_file_datasource_provider = PluginDatasourceProviderEntity(**self._get_local_file_datasource_provider())
+
+        for provider in response:
+            ToolTransformService.repack_provider(tenant_id=tenant_id, provider=provider)
+        all_response = [local_file_datasource_provider] + response
+
+        for provider in all_response:
+            provider.declaration.identity.name = f"{provider.plugin_id}/{provider.declaration.identity.name}"
+
+            # override the provider name for each tool to plugin_id/provider_name
+            for tool in provider.declaration.datasources:
+                tool.identity.provider = provider.declaration.identity.name
+
+        return all_response
+
+    def fetch_datasource_provider(self, tenant_id: str, provider_id: str) -> PluginDatasourceProviderEntity:
+        """
+        Fetch datasource provider for the given tenant and plugin.
+        """
+        if provider_id == "langgenius/file/file":
+            return PluginDatasourceProviderEntity(**self._get_local_file_datasource_provider())
+
+        tool_provider_id = DatasourceProviderID(provider_id)
+
+        def transformer(json_response: dict[str, Any]) -> dict:
+            data = json_response.get("data")
+            if data:
+                for datasource in data.get("declaration", {}).get("datasources", []):
+                    datasource["identity"]["provider"] = tool_provider_id.provider_name
+
+            return json_response
+
+        response = self._request_with_plugin_daemon_response(
+            "GET",
+            f"plugin/{tenant_id}/management/datasource",
+            PluginDatasourceProviderEntity,
+            params={"provider": tool_provider_id.provider_name, "plugin_id": tool_provider_id.plugin_id},
+            transformer=transformer,
+        )
+
+        response.declaration.identity.name = f"{response.plugin_id}/{response.declaration.identity.name}"
+
+        # override the provider name for each tool to plugin_id/provider_name
+        for datasource in response.declaration.datasources:
+            datasource.identity.provider = response.declaration.identity.name
+
+        return response
+
+    def get_website_crawl(
+        self,
+        tenant_id: str,
+        user_id: str,
+        datasource_provider: str,
+        datasource_name: str,
+        credentials: dict[str, Any],
+        datasource_parameters: Mapping[str, Any],
+        provider_type: str,
+    ) -> Generator[WebsiteCrawlMessage, None, None]:
+        """
+        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
+        """
+
+        datasource_provider_id = GenericProviderID(datasource_provider)
+
+        return self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/datasource/get_website_crawl",
+            WebsiteCrawlMessage,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": datasource_provider_id.provider_name,
+                    "datasource": datasource_name,
+                    "credentials": credentials,
+                    "datasource_parameters": datasource_parameters,
+                },
+            },
+            headers={
+                "X-Plugin-ID": datasource_provider_id.plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+
+    def get_online_document_pages(
+        self,
+        tenant_id: str,
+        user_id: str,
+        datasource_provider: str,
+        datasource_name: str,
+        credentials: dict[str, Any],
+        datasource_parameters: Mapping[str, Any],
+        provider_type: str,
+    ) -> Generator[OnlineDocumentPagesMessage, None, None]:
+        """
+        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
+        """
+
+        datasource_provider_id = GenericProviderID(datasource_provider)
+
+        return self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/datasource/get_online_document_pages",
+            OnlineDocumentPagesMessage,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": datasource_provider_id.provider_name,
+                    "datasource": datasource_name,
+                    "credentials": credentials,
+                    "datasource_parameters": datasource_parameters,
+                },
+            },
+            headers={
+                "X-Plugin-ID": datasource_provider_id.plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+
+    def get_online_document_page_content(
+        self,
+        tenant_id: str,
+        user_id: str,
+        datasource_provider: str,
+        datasource_name: str,
+        credentials: dict[str, Any],
+        datasource_parameters: GetOnlineDocumentPageContentRequest,
+        provider_type: str,
+    ) -> Generator[DatasourceMessage, None, None]:
+        """
+        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
+        """
+
+        datasource_provider_id = GenericProviderID(datasource_provider)
+
+        return self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/datasource/get_online_document_page_content",
+            DatasourceMessage,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": datasource_provider_id.provider_name,
+                    "datasource": datasource_name,
+                    "credentials": credentials,
+                    "page": datasource_parameters.model_dump(),
+                },
+            },
+            headers={
+                "X-Plugin-ID": datasource_provider_id.plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+
+    def online_drive_browse_files(
+        self,
+        tenant_id: str,
+        user_id: str,
+        datasource_provider: str,
+        datasource_name: str,
+        credentials: dict[str, Any],
+        request: OnlineDriveBrowseFilesRequest,
+        provider_type: str,
+    ) -> Generator[OnlineDriveBrowseFilesResponse, None, None]:
+        """
+        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
+        """
+
+        datasource_provider_id = GenericProviderID(datasource_provider)
+
+        response = self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/datasource/online_drive_browse_files",
+            OnlineDriveBrowseFilesResponse,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": datasource_provider_id.provider_name,
+                    "datasource": datasource_name,
+                    "credentials": credentials,
+                    "request": request.model_dump(),
+                },
+            },
+            headers={
+                "X-Plugin-ID": datasource_provider_id.plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+        yield from response
+
+    def online_drive_download_file(
+        self,
+        tenant_id: str,
+        user_id: str,
+        datasource_provider: str,
+        datasource_name: str,
+        credentials: dict[str, Any],
+        request: OnlineDriveDownloadFileRequest,
+        provider_type: str,
+    ) -> Generator[DatasourceMessage, None, None]:
+        """
+        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
+        """
+
+        datasource_provider_id = GenericProviderID(datasource_provider)
+
+        response = self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/datasource/online_drive_download_file",
+            DatasourceMessage,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": datasource_provider_id.provider_name,
+                    "datasource": datasource_name,
+                    "credentials": credentials,
+                    "request": request.model_dump(),
+                },
+            },
+            headers={
+                "X-Plugin-ID": datasource_provider_id.plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+        yield from response
+
+    def validate_provider_credentials(
+        self, tenant_id: str, user_id: str, provider: str, plugin_id: str, credentials: dict[str, Any]
+    ) -> bool:
+        """
+        validate the credentials of the provider
+        """
+        # datasource_provider_id = GenericProviderID(provider_id)
+
+        response = self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/datasource/validate_credentials",
+            PluginBasicBooleanResponse,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": provider,
+                    "credentials": credentials,
+                },
+            },
+            headers={
+                "X-Plugin-ID": plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+
+        for resp in response:
+            return resp.result
+
+        return False
+
+    def _get_local_file_datasource_provider(self) -> dict[str, Any]:
+        return {
+            "id": "langgenius/file/file",
+            "plugin_id": "langgenius/file",
+            "provider": "file",
+            "plugin_unique_identifier": "langgenius/file:0.0.1@dify",
+            "declaration": {
+                "identity": {
+                    "author": "langgenius",
+                    "name": "file",
+                    "label": {"zh_Hans": "File", "en_US": "File", "pt_BR": "File", "ja_JP": "File"},
+                    "icon": "https://assets.dify.ai/images/File%20Upload.svg",
+                    "description": {"zh_Hans": "File", "en_US": "File", "pt_BR": "File", "ja_JP": "File"},
+                },
+                "credentials_schema": [],
+                "provider_type": "local_file",
+                "datasources": [
+                    {
+                        "identity": {
+                            "author": "langgenius",
+                            "name": "upload-file",
+                            "provider": "file",
+                            "label": {"zh_Hans": "File", "en_US": "File", "pt_BR": "File", "ja_JP": "File"},
+                        },
+                        "parameters": [],
+                        "description": {"zh_Hans": "File", "en_US": "File", "pt_BR": "File", "ja_JP": "File"},
+                    }
+                ],
+            },
+        }
--- a/api/core/plugin/impl/dynamic_select.py
+++ b/api/core/plugin/impl/dynamic_select.py
@ -0,0 +1,45 @@
+from collections.abc import Mapping
+from typing import Any
+
+from core.plugin.entities.plugin import GenericProviderID
+from core.plugin.entities.plugin_daemon import PluginDynamicSelectOptionsResponse
+from core.plugin.impl.base import BasePluginClient
+
+
+class DynamicSelectClient(BasePluginClient):
+    def fetch_dynamic_select_options(
+        self,
+        tenant_id: str,
+        user_id: str,
+        plugin_id: str,
+        provider: str,
+        action: str,
+        credentials: Mapping[str, Any],
+        parameter: str,
+    ) -> PluginDynamicSelectOptionsResponse:
+        """
+        Fetch dynamic select options for a plugin parameter.
+        """
+        response = self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/dynamic_select/fetch_parameter_options",
+            PluginDynamicSelectOptionsResponse,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": GenericProviderID(provider).provider_name,
+                    "credentials": credentials,
+                    "provider_action": action,
+                    "parameter": parameter,
+                },
+            },
+            headers={
+                "X-Plugin-ID": plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+
+        for options in response:
+            return options
+
+        raise ValueError(f"Plugin service returned no options for parameter '{parameter}' in provider '{provider}'")
--- a/api/core/plugin/impl/tool.py
+++ b/api/core/plugin/impl/tool.py
@ -4,7 +4,10 @@ from typing import Any, Optional
 from pydantic import BaseModel

 from core.plugin.entities.plugin import GenericProviderID, ToolProviderID
-from core.plugin.entities.plugin_daemon import PluginBasicBooleanResponse, PluginToolProviderEntity
+from core.plugin.entities.plugin_daemon import (
+    PluginBasicBooleanResponse,
+    PluginToolProviderEntity,
+)
 from core.plugin.impl.base import BasePluginClient
 from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter

@ -197,6 +200,36 @@ class PluginToolManager(BasePluginClient):

        return False

+    def validate_datasource_credentials(
+        self, tenant_id: str, user_id: str, provider: str, credentials: dict[str, Any]
+    ) -> bool:
+        """
+        validate the credentials of the datasource
+        """
+        tool_provider_id = GenericProviderID(provider)
+
+        response = self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/datasource/validate_credentials",
+            PluginBasicBooleanResponse,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": tool_provider_id.provider_name,
+                    "credentials": credentials,
+                },
+            },
+            headers={
+                "X-Plugin-ID": tool_provider_id.plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+
+        for resp in response:
+            return resp.result
+
+        return False
+
    def get_runtime_parameters(
        self,
        tenant_id: str,
--- a/api/core/rag/datasource/keyword/jieba/jieba.py
+++ b/api/core/rag/datasource/keyword/jieba/jieba.py
@ -28,10 +28,12 @@ class Jieba(BaseKeyword):
        with redis_client.lock(lock_name, timeout=600):
            keyword_table_handler = JiebaKeywordTableHandler()
            keyword_table = self._get_dataset_keyword_table()
+            keyword_number = (
+                self.dataset.keyword_number if self.dataset.keyword_number else self._config.max_keywords_per_chunk
+            )
+
            for text in texts:
-                keywords = keyword_table_handler.extract_keywords(
-                    text.page_content, self._config.max_keywords_per_chunk
-                )
+                keywords = keyword_table_handler.extract_keywords(text.page_content, keyword_number)
                if text.metadata is not None:
                    self._update_segment_keywords(self.dataset.id, text.metadata["doc_id"], list(keywords))
                    keyword_table = self._add_text_to_keyword_table(
@ -49,18 +51,17 @@ class Jieba(BaseKeyword):

            keyword_table = self._get_dataset_keyword_table()
            keywords_list = kwargs.get("keywords_list")
+            keyword_number = (
+                self.dataset.keyword_number if self.dataset.keyword_number else self._config.max_keywords_per_chunk
+            )
            for i in range(len(texts)):
                text = texts[i]
                if keywords_list:
                    keywords = keywords_list[i]
                    if not keywords:
-                        keywords = keyword_table_handler.extract_keywords(
-                            text.page_content, self._config.max_keywords_per_chunk
-                        )
+                        keywords = keyword_table_handler.extract_keywords(text.page_content, keyword_number)
                else:
-                    keywords = keyword_table_handler.extract_keywords(
-                        text.page_content, self._config.max_keywords_per_chunk
-                    )
+                    keywords = keyword_table_handler.extract_keywords(text.page_content, keyword_number)
                if text.metadata is not None:
                    self._update_segment_keywords(self.dataset.id, text.metadata["doc_id"], list(keywords))
                    keyword_table = self._add_text_to_keyword_table(
@ -239,7 +240,11 @@ class Jieba(BaseKeyword):
                    keyword_table or {}, segment.index_node_id, pre_segment_data["keywords"]
                )
            else:
-                keywords = keyword_table_handler.extract_keywords(segment.content, self._config.max_keywords_per_chunk)
+                keyword_number = (
+                    self.dataset.keyword_number if self.dataset.keyword_number else self._config.max_keywords_per_chunk
+                )
+
+                keywords = keyword_table_handler.extract_keywords(segment.content, keyword_number)
                segment.keywords = list(keywords)
                keyword_table = self._add_text_to_keyword_table(
                    keyword_table or {}, segment.index_node_id, list(keywords)
--- a/Show More
+++ b/Show More