add build task for release/e-3.0.2

fix: allow update plugin install settings (#22111 )
Merge branch 'fix/explore-tabs-change-failed' into fix/e-300
2026-02-11 22:05:43 +08:00 · 2025-07-11 16:11:41 +08:00 · 2025-07-11 16:07:39 +08:00 · 2025-06-30 17:45:59 +08:00 · 2025-06-30 17:45:23 +08:00 · 2025-06-30 17:34:13 +08:00
975 changed files with 6550 additions and 44101 deletions
--- a/.github/workflows/api-tests.yml
+++ b/.github/workflows/api-tests.yml
@ -47,17 +47,15 @@ jobs:
      - name: Run Unit tests
        run: |
          uv run --project api bash dev/pytest/pytest_unit_tests.sh
-
-      - name: Coverage Summary
-        run: |
-          set -x
          # Extract coverage percentage and create a summary
          TOTAL_COVERAGE=$(python -c 'import json; print(json.load(open("coverage.json"))["totals"]["percent_covered_display"])')

          # Create a detailed coverage summary
          echo "### Test Coverage Summary :test_tube:" >> $GITHUB_STEP_SUMMARY
          echo "Total Coverage: ${TOTAL_COVERAGE}%" >> $GITHUB_STEP_SUMMARY
-          uv run --project api coverage report --format=markdown >> $GITHUB_STEP_SUMMARY
+          echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+          uv run --project api coverage report >> $GITHUB_STEP_SUMMARY
+          echo "\`\`\`" >> $GITHUB_STEP_SUMMARY

      - name: Run dify config tests
        run: uv run --project api dev/pytest/pytest_config_tests.py
--- a/.github/workflows/build-push.yml
+++ b/.github/workflows/build-push.yml
@ -6,7 +6,7 @@ on:
      - "main"
      - "deploy/dev"
      - "deploy/enterprise"
-      - "deploy/rag-dev"
+      - "release/e-*"
    tags:
      - "*"

--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@ -4,7 +4,7 @@ on:
  workflow_run:
    workflows: ["Build and Push API & Web"]
    branches:
-      - "deploy/rag-dev"
+      - "deploy/dev"
    types:
      - completed

@ -12,13 +12,12 @@ jobs:
  deploy:
    runs-on: ubuntu-latest
    if: |
-      github.event.workflow_run.conclusion == 'success' &&
-      github.event.workflow_run.head_branch == 'deploy/rag-dev'
+      github.event.workflow_run.conclusion == 'success'
    steps:
      - name: Deploy to server
        uses: appleboy/ssh-action@v0.1.8
        with:
-          host: ${{ secrets.RAG_SSH_HOST }}
+          host: ${{ secrets.SSH_HOST }}
          username: ${{ secrets.SSH_USER }}
          key: ${{ secrets.SSH_PRIVATE_KEY }}
          script: |
--- a/.gitignore
+++ b/.gitignore
@ -214,4 +214,3 @@ mise.toml

 # AI Assistant
 .roo/
-api/.env.backup
--- a/api/app.py
+++ b/api/app.py
@ -1,3 +1,4 @@
+import os
 import sys


@ -16,20 +17,20 @@ else:
    # It seems that JetBrains Python debugger does not work well with gevent,
    # so we need to disable gevent in debug mode.
    # If you are using debugpy and set GEVENT_SUPPORT=True, you can debug with gevent.
-    # if (flask_debug := os.environ.get("FLASK_DEBUG", "0")) and flask_debug.lower() in {"false", "0", "no"}:
-    # from gevent import monkey
-    #
-    # # gevent
-    # monkey.patch_all()
-    #
-    # from grpc.experimental import gevent as grpc_gevent  # type: ignore
-    #
-    # # grpc gevent
-    # grpc_gevent.init_gevent()
+    if (flask_debug := os.environ.get("FLASK_DEBUG", "0")) and flask_debug.lower() in {"false", "0", "no"}:
+        from gevent import monkey

-    # import psycogreen.gevent  # type: ignore
-    #
-    # psycogreen.gevent.patch_psycopg()
+        # gevent
+        monkey.patch_all()
+
+        from grpc.experimental import gevent as grpc_gevent  # type: ignore
+
+        # grpc gevent
+        grpc_gevent.init_gevent()
+
+        import psycogreen.gevent  # type: ignore
+
+        psycogreen.gevent.patch_psycopg()

    from app_factory import create_app

--- a/api/configs/app_config.py
+++ b/api/configs/app_config.py
@ -1,11 +1,8 @@
 import logging
-from pathlib import Path
 from typing import Any

 from pydantic.fields import FieldInfo
-from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict, TomlConfigSettingsSource
-
-from libs.file_utils import search_file_upwards
+from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict

 from .deploy import DeploymentConfig
 from .enterprise import EnterpriseFeatureConfig
@ -102,12 +99,4 @@ class DifyConfig(
            RemoteSettingsSourceFactory(settings_cls),
            dotenv_settings,
            file_secret_settings,
-            TomlConfigSettingsSource(
-                settings_cls=settings_cls,
-                toml_file=search_file_upwards(
-                    base_dir_path=Path(__file__).parent,
-                    target_file_name="pyproject.toml",
-                    max_search_parent_depth=2,
-                ),
-            ),
        )
--- a/api/configs/feature/hosted_service/init.py
+++ b/api/configs/feature/hosted_service/init.py
@ -222,28 +222,11 @@ class HostedFetchAppTemplateConfig(BaseSettings):
    )


-class HostedFetchPipelineTemplateConfig(BaseSettings):
-    """
-    Configuration for fetching pipeline templates
-    """
-
-    HOSTED_FETCH_PIPELINE_TEMPLATES_MODE: str = Field(
-        description="Mode for fetching pipeline templates: remote, db, or builtin default to remote,",
-        default="database",
-    )
-
-    HOSTED_FETCH_PIPELINE_TEMPLATES_REMOTE_DOMAIN: str = Field(
-        description="Domain for fetching remote pipeline templates",
-        default="https://tmpl.dify.ai",
-    )
-
-
 class HostedServiceConfig(
    # place the configs in alphabet order
    HostedAnthropicConfig,
    HostedAzureOpenAiConfig,
    HostedFetchAppTemplateConfig,
-    HostedFetchPipelineTemplateConfig,
    HostedMinmaxConfig,
    HostedOpenAiConfig,
    HostedSparkConfig,
--- a/api/configs/packaging/init.py
+++ b/api/configs/packaging/init.py
@ -1,13 +1,17 @@
 from pydantic import Field
-
-from configs.packaging.pyproject import PyProjectConfig, PyProjectTomlConfig
+from pydantic_settings import BaseSettings


-class PackagingInfo(PyProjectTomlConfig):
+class PackagingInfo(BaseSettings):
    """
    Packaging build information
    """

+    CURRENT_VERSION: str = Field(
+        description="Dify version",
+        default="1.5.0",
+    )
+
    COMMIT_SHA: str = Field(
        description="SHA-1 checksum of the git commit used to build the app",
        default="",
--- a/api/configs/packaging/pyproject.py
+++ b/api/configs/packaging/pyproject.py
@ -1,17 +0,0 @@
-from pydantic import BaseModel, Field
-from pydantic_settings import BaseSettings
-
-
-class PyProjectConfig(BaseModel):
-    version: str = Field(description="Dify version", default="")
-
-
-class PyProjectTomlConfig(BaseSettings):
-    """
-    configs in api/pyproject.toml
-    """
-
-    project: PyProjectConfig = Field(
-        description="configs in the project section of pyproject.toml",
-        default=PyProjectConfig(),
-    )
--- a/api/contexts/init.py
+++ b/api/contexts/init.py
@ -3,7 +3,6 @@ from threading import Lock
 from typing import TYPE_CHECKING

 from contexts.wrapper import RecyclableContextVar
-from core.datasource.__base.datasource_provider import DatasourcePluginProviderController

 if TYPE_CHECKING:
    from core.model_runtime.entities.model_entities import AIModelEntity
@ -34,11 +33,3 @@ plugin_model_schema_lock: RecyclableContextVar[Lock] = RecyclableContextVar(Cont
 plugin_model_schemas: RecyclableContextVar[dict[str, "AIModelEntity"]] = RecyclableContextVar(
    ContextVar("plugin_model_schemas")
 )
-
-datasource_plugin_providers: RecyclableContextVar[dict[str, "DatasourcePluginProviderController"]] = (
-    RecyclableContextVar(ContextVar("datasource_plugin_providers"))
-)
-
-datasource_plugin_providers_lock: RecyclableContextVar[Lock] = RecyclableContextVar(
-    ContextVar("datasource_plugin_providers_lock")
-)
--- a/api/controllers/console/init.py
+++ b/api/controllers/console/init.py
@ -76,6 +76,7 @@ from .billing import billing, compliance

 # Import datasets controllers
 from .datasets import (
+    data_source,
    datasets,
    datasets_document,
    datasets_segments,
@ -84,14 +85,6 @@ from .datasets import (
    metadata,
    website,
 )
-from .datasets.rag_pipeline import (
-    datasource_auth,
-    datasource_content_preview,
-    rag_pipeline,
-    rag_pipeline_datasets,
-    rag_pipeline_import,
-    rag_pipeline_workflow,
-)

 # Import explore controllers
 from .explore import (
--- a/api/controllers/console/auth/data_source_oauth.py
+++ b/api/controllers/console/auth/data_source_oauth.py
@ -41,7 +41,7 @@ class OAuthDataSource(Resource):
            if not internal_secret:
                return ({"error": "Internal secret is not set"},)
            oauth_provider.save_internal_access_token(internal_secret)
-            return {"data": "internal"}
+            return {"data": ""}
        else:
            auth_url = oauth_provider.get_authorization_url()
            return {"data": auth_url}, 200
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@ -283,15 +283,6 @@ class DatasetApi(Resource):
            location="json",
            help="Invalid external knowledge api id.",
        )
-
-        parser.add_argument(
-            "icon_info",
-            type=dict,
-            required=False,
-            nullable=True,
-            location="json",
-            help="Invalid icon info.",
-        )
        args = parser.parse_args()
        data = request.get_json()

--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@ -1,4 +1,3 @@
-import json
 import logging
 from argparse import ArgumentTypeError
 from datetime import UTC, datetime
@ -52,7 +51,6 @@ from fields.document_fields import (
 )
 from libs.login import login_required
 from models import Dataset, DatasetProcessRule, Document, DocumentSegment, UploadFile
-from models.dataset import DocumentPipelineExecutionLog
 from services.dataset_service import DatasetService, DocumentService
 from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig

@ -663,7 +661,7 @@ class DocumentDetailApi(DocumentResource):
            response = {"id": document.id, "doc_type": document.doc_type, "doc_metadata": document.doc_metadata_details}
        elif metadata == "without":
            dataset_process_rules = DatasetService.get_process_rules(dataset_id)
-            document_process_rules = document.dataset_process_rule.to_dict() if document.dataset_process_rule else {}
+            document_process_rules = document.dataset_process_rule.to_dict()
            data_source_info = document.data_source_detail_dict
            response = {
                "id": document.id,
@ -1030,41 +1028,6 @@ class WebsiteDocumentSyncApi(DocumentResource):
        return {"result": "success"}, 200


-class DocumentPipelineExecutionLogApi(DocumentResource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def get(self, dataset_id, document_id):
-        dataset_id = str(dataset_id)
-        document_id = str(document_id)
-
-        dataset = DatasetService.get_dataset(dataset_id)
-        if not dataset:
-            raise NotFound("Dataset not found.")
-        document = DocumentService.get_document(dataset.id, document_id)
-        if not document:
-            raise NotFound("Document not found.")
-        log = (
-            db.session.query(DocumentPipelineExecutionLog)
-            .filter_by(document_id=document_id)
-            .order_by(DocumentPipelineExecutionLog.created_at.desc())
-            .first()
-        )
-        if not log:
-            return {
-                "datasource_info": None,
-                "datasource_type": None,
-                "input_data": None,
-                "datasource_node_id": None,
-            }, 200
-        return {
-            "datasource_info": json.loads(log.datasource_info),
-            "datasource_type": log.datasource_type,
-            "input_data": log.input_data,
-            "datasource_node_id": log.datasource_node_id,
-        }, 200
-
-
 api.add_resource(GetProcessRuleApi, "/datasets/process-rule")
 api.add_resource(DatasetDocumentListApi, "/datasets/<uuid:dataset_id>/documents")
 api.add_resource(DatasetInitApi, "/datasets/init")
@ -1087,6 +1050,3 @@ api.add_resource(DocumentRetryApi, "/datasets/<uuid:dataset_id>/retry")
 api.add_resource(DocumentRenameApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/rename")

 api.add_resource(WebsiteDocumentSyncApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/website-sync")
-api.add_resource(
-    DocumentPipelineExecutionLogApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/pipeline-execution-log"
-)
--- a/api/controllers/console/datasets/error.py
+++ b/api/controllers/console/datasets/error.py
@ -101,9 +101,3 @@ class ChildChunkDeleteIndexError(BaseHTTPException):
    error_code = "child_chunk_delete_index_error"
    description = "Delete child chunk index failed: {message}"
    code = 500
-
-
-class PipelineNotFoundError(BaseHTTPException):
-    error_code = "pipeline_not_found"
-    description = "Pipeline not found."
-    code = 404
--- a/api/controllers/console/datasets/rag_pipeline/datasource_auth.py
+++ b/api/controllers/console/datasets/rag_pipeline/datasource_auth.py
@ -1,197 +0,0 @@
-from flask import redirect, request
-from flask_login import current_user  # type: ignore
-from flask_restful import (  # type: ignore
-    Resource,  # type: ignore
-    reqparse,
-)
-from werkzeug.exceptions import Forbidden, NotFound
-
-from configs import dify_config
-from controllers.console import api
-from controllers.console.wraps import (
-    account_initialization_required,
-    setup_required,
-)
-from core.model_runtime.errors.validate import CredentialsValidateFailedError
-from core.plugin.impl.oauth import OAuthHandler
-from extensions.ext_database import db
-from libs.login import login_required
-from models.oauth import DatasourceOauthParamConfig, DatasourceProvider
-from services.datasource_provider_service import DatasourceProviderService
-
-
-class DatasourcePluginOauthApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def get(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
-        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
-        args = parser.parse_args()
-        provider = args["provider"]
-        plugin_id = args["plugin_id"]
-        # Check user role first
-        if not current_user.is_editor:
-            raise Forbidden()
-        # get all plugin oauth configs
-        plugin_oauth_config = (
-            db.session.query(DatasourceOauthParamConfig).filter_by(provider=provider, plugin_id=plugin_id).first()
-        )
-        if not plugin_oauth_config:
-            raise NotFound()
-        oauth_handler = OAuthHandler()
-        redirect_url = (
-            f"{dify_config.CONSOLE_WEB_URL}/oauth/datasource/callback?provider={provider}&plugin_id={plugin_id}"
-        )
-        system_credentials = plugin_oauth_config.system_credentials
-        if system_credentials:
-            system_credentials["redirect_url"] = redirect_url
-        response = oauth_handler.get_authorization_url(
-            current_user.current_tenant.id, current_user.id, plugin_id, provider, system_credentials=system_credentials
-        )
-        return response.model_dump()
-
-
-class DatasourceOauthCallback(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def get(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
-        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
-        args = parser.parse_args()
-        provider = args["provider"]
-        plugin_id = args["plugin_id"]
-        oauth_handler = OAuthHandler()
-        plugin_oauth_config = (
-            db.session.query(DatasourceOauthParamConfig).filter_by(provider=provider, plugin_id=plugin_id).first()
-        )
-        if not plugin_oauth_config:
-            raise NotFound()
-        credentials = oauth_handler.get_credentials(
-            current_user.current_tenant.id,
-            current_user.id,
-            plugin_id,
-            provider,
-            system_credentials=plugin_oauth_config.system_credentials,
-            request=request,
-        )
-        datasource_provider = DatasourceProvider(
-            plugin_id=plugin_id, provider=provider, auth_type="oauth", encrypted_credentials=credentials
-        )
-        db.session.add(datasource_provider)
-        db.session.commit()
-        return redirect(f"{dify_config.CONSOLE_WEB_URL}")
-
-
-class DatasourceAuth(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def post(self):
-        if not current_user.is_editor:
-            raise Forbidden()
-
-        parser = reqparse.RequestParser()
-        parser.add_argument("provider", type=str, required=True, nullable=False, location="json")
-        parser.add_argument("name", type=str, required=False, nullable=False, location="json", default="test")
-        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="json")
-        parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
-        args = parser.parse_args()
-
-        datasource_provider_service = DatasourceProviderService()
-
-        try:
-            datasource_provider_service.datasource_provider_credentials_validate(
-                tenant_id=current_user.current_tenant_id,
-                provider=args["provider"],
-                plugin_id=args["plugin_id"],
-                credentials=args["credentials"],
-                name=args["name"],
-            )
-        except CredentialsValidateFailedError as ex:
-            raise ValueError(str(ex))
-
-        return {"result": "success"}, 201
-
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def get(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
-        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
-        args = parser.parse_args()
-        datasource_provider_service = DatasourceProviderService()
-        datasources = datasource_provider_service.get_datasource_credentials(
-            tenant_id=current_user.current_tenant_id, provider=args["provider"], plugin_id=args["plugin_id"]
-        )
-        return {"result": datasources}, 200
-
-
-class DatasourceAuthUpdateDeleteApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def delete(self, auth_id: str):
-        parser = reqparse.RequestParser()
-        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
-        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
-        args = parser.parse_args()
-        if not current_user.is_editor:
-            raise Forbidden()
-        datasource_provider_service = DatasourceProviderService()
-        datasource_provider_service.remove_datasource_credentials(
-            tenant_id=current_user.current_tenant_id,
-            auth_id=auth_id,
-            provider=args["provider"],
-            plugin_id=args["plugin_id"],
-        )
-        return {"result": "success"}, 200
-
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def patch(self, auth_id: str):
-        parser = reqparse.RequestParser()
-        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
-        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
-        parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
-        args = parser.parse_args()
-        if not current_user.is_editor:
-            raise Forbidden()
-        try:
-            datasource_provider_service = DatasourceProviderService()
-            datasource_provider_service.update_datasource_credentials(
-                tenant_id=current_user.current_tenant_id,
-                auth_id=auth_id,
-                provider=args["provider"],
-                plugin_id=args["plugin_id"],
-                credentials=args["credentials"],
-            )
-        except CredentialsValidateFailedError as ex:
-            raise ValueError(str(ex))
-
-        return {"result": "success"}, 201
-
-
-# Import Rag Pipeline
-api.add_resource(
-    DatasourcePluginOauthApi,
-    "/oauth/plugin/datasource",
-)
-api.add_resource(
-    DatasourceOauthCallback,
-    "/oauth/plugin/datasource/callback",
-)
-api.add_resource(
-    DatasourceAuth,
-    "/auth/plugin/datasource",
-)
-
-api.add_resource(
-    DatasourceAuthUpdateDeleteApi,
-    "/auth/plugin/datasource/<string:auth_id>",
-)
--- a/api/controllers/console/datasets/rag_pipeline/datasource_content_preview.py
+++ b/api/controllers/console/datasets/rag_pipeline/datasource_content_preview.py
@ -1,55 +0,0 @@
-from flask_restful import (  # type: ignore
-    Resource,  # type: ignore
-    reqparse,
-)
-from werkzeug.exceptions import Forbidden
-
-from controllers.console import api
-from controllers.console.datasets.wraps import get_rag_pipeline
-from controllers.console.wraps import account_initialization_required, setup_required
-from libs.login import current_user, login_required
-from models import Account
-from models.dataset import Pipeline
-from services.rag_pipeline.rag_pipeline import RagPipelineService
-
-
-class DataSourceContentPreviewApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @get_rag_pipeline
-    def post(self, pipeline: Pipeline, node_id: str):
-        """
-        Run datasource content preview
-        """
-        if not isinstance(current_user, Account):
-            raise Forbidden()
-
-        parser = reqparse.RequestParser()
-        parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
-        parser.add_argument("datasource_type", type=str, required=True, location="json")
-        args = parser.parse_args()
-
-        inputs = args.get("inputs")
-        if inputs is None:
-            raise ValueError("missing inputs")
-        datasource_type = args.get("datasource_type")
-        if datasource_type is None:
-            raise ValueError("missing datasource_type")
-
-        rag_pipeline_service = RagPipelineService()
-        preview_content = rag_pipeline_service.run_datasource_node_preview(
-            pipeline=pipeline,
-            node_id=node_id,
-            user_inputs=inputs,
-            account=current_user,
-            datasource_type=datasource_type,
-            is_published=True,
-        )
-        return preview_content, 200
-
-
-api.add_resource(
-    DataSourceContentPreviewApi,
-    "/rag/pipelines/<uuid:pipeline_id>/workflows/published/datasource/nodes/<string:node_id>/preview",
-)
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
@ -1,162 +0,0 @@
-import logging
-
-from flask import request
-from flask_restful import Resource, reqparse
-from sqlalchemy.orm import Session
-
-from controllers.console import api
-from controllers.console.wraps import (
-    account_initialization_required,
-    enterprise_license_required,
-    setup_required,
-)
-from extensions.ext_database import db
-from libs.login import login_required
-from models.dataset import PipelineCustomizedTemplate
-from services.entities.knowledge_entities.rag_pipeline_entities import PipelineTemplateInfoEntity
-from services.rag_pipeline.rag_pipeline import RagPipelineService
-
-logger = logging.getLogger(__name__)
-
-
-def _validate_name(name):
-    if not name or len(name) < 1 or len(name) > 40:
-        raise ValueError("Name must be between 1 to 40 characters.")
-    return name
-
-
-def _validate_description_length(description):
-    if len(description) > 400:
-        raise ValueError("Description cannot exceed 400 characters.")
-    return description
-
-
-class PipelineTemplateListApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @enterprise_license_required
-    def get(self):
-        type = request.args.get("type", default="built-in", type=str)
-        language = request.args.get("language", default="en-US", type=str)
-        # get pipeline templates
-        pipeline_templates = RagPipelineService.get_pipeline_templates(type, language)
-        return pipeline_templates, 200
-
-
-class PipelineTemplateDetailApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @enterprise_license_required
-    def get(self, template_id: str):
-        type = request.args.get("type", default="built-in", type=str)
-        rag_pipeline_service = RagPipelineService()
-        pipeline_template = rag_pipeline_service.get_pipeline_template_detail(template_id, type)
-        return pipeline_template, 200
-
-
-class CustomizedPipelineTemplateApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @enterprise_license_required
-    def patch(self, template_id: str):
-        parser = reqparse.RequestParser()
-        parser.add_argument(
-            "name",
-            nullable=False,
-            required=True,
-            help="Name must be between 1 to 40 characters.",
-            type=_validate_name,
-        )
-        parser.add_argument(
-            "description",
-            type=str,
-            nullable=True,
-            required=False,
-            default="",
-        )
-        parser.add_argument(
-            "icon_info",
-            type=dict,
-            location="json",
-            nullable=True,
-        )
-        args = parser.parse_args()
-        pipeline_template_info = PipelineTemplateInfoEntity(**args)
-        RagPipelineService.update_customized_pipeline_template(template_id, pipeline_template_info)
-        return 200
-
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @enterprise_license_required
-    def delete(self, template_id: str):
-        RagPipelineService.delete_customized_pipeline_template(template_id)
-        return 200
-
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @enterprise_license_required
-    def post(self, template_id: str):
-        with Session(db.engine) as session:
-            template = (
-                session.query(PipelineCustomizedTemplate).filter(PipelineCustomizedTemplate.id == template_id).first()
-            )
-            if not template:
-                raise ValueError("Customized pipeline template not found.")
-
-        return {"data": template.yaml_content}, 200
-
-
-class PublishCustomizedPipelineTemplateApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @enterprise_license_required
-    def post(self, pipeline_id: str):
-        parser = reqparse.RequestParser()
-        parser.add_argument(
-            "name",
-            nullable=False,
-            required=True,
-            help="Name must be between 1 to 40 characters.",
-            type=_validate_name,
-        )
-        parser.add_argument(
-            "description",
-            type=str,
-            nullable=True,
-            required=False,
-            default="",
-        )
-        parser.add_argument(
-            "icon_info",
-            type=dict,
-            location="json",
-            nullable=True,
-        )
-        args = parser.parse_args()
-        rag_pipeline_service = RagPipelineService()
-        rag_pipeline_service.publish_customized_pipeline_template(pipeline_id, args)
-        return {"result": "success"}
-
-
-api.add_resource(
-    PipelineTemplateListApi,
-    "/rag/pipeline/templates",
-)
-api.add_resource(
-    PipelineTemplateDetailApi,
-    "/rag/pipeline/templates/<string:template_id>",
-)
-api.add_resource(
-    CustomizedPipelineTemplateApi,
-    "/rag/pipeline/customized/templates/<string:template_id>",
-)
-api.add_resource(
-    PublishCustomizedPipelineTemplateApi,
-    "/rag/pipelines/<string:pipeline_id>/customized/publish",
-)
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_datasets.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_datasets.py
@ -1,171 +0,0 @@
-from flask_login import current_user  # type: ignore  # type: ignore
-from flask_restful import Resource, marshal, reqparse  # type: ignore
-from werkzeug.exceptions import Forbidden
-
-import services
-from controllers.console import api
-from controllers.console.datasets.error import DatasetNameDuplicateError
-from controllers.console.wraps import (
-    account_initialization_required,
-    cloud_edition_billing_rate_limit_check,
-    setup_required,
-)
-from fields.dataset_fields import dataset_detail_fields
-from libs.login import login_required
-from models.dataset import DatasetPermissionEnum
-from services.dataset_service import DatasetPermissionService, DatasetService
-from services.entities.knowledge_entities.rag_pipeline_entities import RagPipelineDatasetCreateEntity
-from services.rag_pipeline.rag_pipeline_dsl_service import RagPipelineDslService
-
-
-def _validate_name(name):
-    if not name or len(name) < 1 or len(name) > 40:
-        raise ValueError("Name must be between 1 to 40 characters.")
-    return name
-
-
-def _validate_description_length(description):
-    if len(description) > 400:
-        raise ValueError("Description cannot exceed 400 characters.")
-    return description
-
-
-class CreateRagPipelineDatasetApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @cloud_edition_billing_rate_limit_check("knowledge")
-    def post(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument(
-            "name",
-            nullable=False,
-            required=True,
-            help="type is required. Name must be between 1 to 40 characters.",
-            type=_validate_name,
-        )
-        parser.add_argument(
-            "description",
-            type=str,
-            nullable=True,
-            required=False,
-            default="",
-        )
-
-        parser.add_argument(
-            "icon_info",
-            type=dict,
-            nullable=True,
-            required=False,
-            default={},
-        )
-
-        parser.add_argument(
-            "permission",
-            type=str,
-            choices=(DatasetPermissionEnum.ONLY_ME, DatasetPermissionEnum.ALL_TEAM, DatasetPermissionEnum.PARTIAL_TEAM),
-            nullable=True,
-            required=False,
-            default=DatasetPermissionEnum.ONLY_ME,
-        )
-
-        parser.add_argument(
-            "partial_member_list",
-            type=list,
-            nullable=True,
-            required=False,
-            default=[],
-        )
-
-        parser.add_argument(
-            "yaml_content",
-            type=str,
-            nullable=False,
-            required=True,
-            help="yaml_content is required.",
-        )
-
-        args = parser.parse_args()
-
-        # The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
-        if not current_user.is_dataset_editor:
-            raise Forbidden()
-        rag_pipeline_dataset_create_entity = RagPipelineDatasetCreateEntity(**args)
-        try:
-            import_info = RagPipelineDslService.create_rag_pipeline_dataset(
-                tenant_id=current_user.current_tenant_id,
-                rag_pipeline_dataset_create_entity=rag_pipeline_dataset_create_entity,
-            )
-            if rag_pipeline_dataset_create_entity.permission == "partial_members":
-                DatasetPermissionService.update_partial_member_list(
-                    current_user.current_tenant_id,
-                    import_info["dataset_id"],
-                    rag_pipeline_dataset_create_entity.partial_member_list,
-                )
-        except services.errors.dataset.DatasetNameDuplicateError:
-            raise DatasetNameDuplicateError()
-
-        return import_info, 201
-
-
-class CreateEmptyRagPipelineDatasetApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @cloud_edition_billing_rate_limit_check("knowledge")
-    def post(self):
-        # The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
-        if not current_user.is_dataset_editor:
-            raise Forbidden()
-
-        parser = reqparse.RequestParser()
-        parser.add_argument(
-            "name",
-            nullable=False,
-            required=True,
-            help="type is required. Name must be between 1 to 40 characters.",
-            type=_validate_name,
-        )
-        parser.add_argument(
-            "description",
-            type=str,
-            nullable=True,
-            required=False,
-            default="",
-        )
-
-        parser.add_argument(
-            "icon_info",
-            type=dict,
-            nullable=True,
-            required=False,
-            default={},
-        )
-
-        parser.add_argument(
-            "permission",
-            type=str,
-            choices=(DatasetPermissionEnum.ONLY_ME, DatasetPermissionEnum.ALL_TEAM, DatasetPermissionEnum.PARTIAL_TEAM),
-            nullable=True,
-            required=False,
-            default=DatasetPermissionEnum.ONLY_ME,
-        )
-
-        parser.add_argument(
-            "partial_member_list",
-            type=list,
-            nullable=True,
-            required=False,
-            default=[],
-        )
-
-        args = parser.parse_args()
-        dataset = DatasetService.create_empty_rag_pipeline_dataset(
-            tenant_id=current_user.current_tenant_id,
-            rag_pipeline_dataset_create_entity=RagPipelineDatasetCreateEntity(**args),
-        )
-        return marshal(dataset, dataset_detail_fields), 201
-
-
-api.add_resource(CreateRagPipelineDatasetApi, "/rag/pipeline/dataset")
-api.add_resource(CreateEmptyRagPipelineDatasetApi, "/rag/pipeline/empty-dataset")
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py
@ -1,146 +0,0 @@
-from typing import cast
-
-from flask_login import current_user  # type: ignore
-from flask_restful import Resource, marshal_with, reqparse  # type: ignore
-from sqlalchemy.orm import Session
-from werkzeug.exceptions import Forbidden
-
-from controllers.console import api
-from controllers.console.datasets.wraps import get_rag_pipeline
-from controllers.console.wraps import (
-    account_initialization_required,
-    setup_required,
-)
-from extensions.ext_database import db
-from fields.rag_pipeline_fields import pipeline_import_check_dependencies_fields, pipeline_import_fields
-from libs.login import login_required
-from models import Account
-from models.dataset import Pipeline
-from services.app_dsl_service import ImportStatus
-from services.rag_pipeline.rag_pipeline_dsl_service import RagPipelineDslService
-
-
-class RagPipelineImportApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @marshal_with(pipeline_import_fields)
-    def post(self):
-        # Check user role first
-        if not current_user.is_editor:
-            raise Forbidden()
-
-        parser = reqparse.RequestParser()
-        parser.add_argument("mode", type=str, required=True, location="json")
-        parser.add_argument("yaml_content", type=str, location="json")
-        parser.add_argument("yaml_url", type=str, location="json")
-        parser.add_argument("name", type=str, location="json")
-        parser.add_argument("description", type=str, location="json")
-        parser.add_argument("icon_type", type=str, location="json")
-        parser.add_argument("icon", type=str, location="json")
-        parser.add_argument("icon_background", type=str, location="json")
-        parser.add_argument("pipeline_id", type=str, location="json")
-        args = parser.parse_args()
-
-        # Create service with session
-        with Session(db.engine) as session:
-            import_service = RagPipelineDslService(session)
-            # Import app
-            account = cast(Account, current_user)
-            result = import_service.import_rag_pipeline(
-                account=account,
-                import_mode=args["mode"],
-                yaml_content=args.get("yaml_content"),
-                yaml_url=args.get("yaml_url"),
-                pipeline_id=args.get("pipeline_id"),
-            )
-            session.commit()
-
-        # Return appropriate status code based on result
-        status = result.status
-        if status == ImportStatus.FAILED.value:
-            return result.model_dump(mode="json"), 400
-        elif status == ImportStatus.PENDING.value:
-            return result.model_dump(mode="json"), 202
-        return result.model_dump(mode="json"), 200
-
-
-class RagPipelineImportConfirmApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @marshal_with(pipeline_import_fields)
-    def post(self, import_id):
-        # Check user role first
-        if not current_user.is_editor:
-            raise Forbidden()
-
-        # Create service with session
-        with Session(db.engine) as session:
-            import_service = RagPipelineDslService(session)
-            # Confirm import
-            account = cast(Account, current_user)
-            result = import_service.confirm_import(import_id=import_id, account=account)
-            session.commit()
-
-        # Return appropriate status code based on result
-        if result.status == ImportStatus.FAILED.value:
-            return result.model_dump(mode="json"), 400
-        return result.model_dump(mode="json"), 200
-
-
-class RagPipelineImportCheckDependenciesApi(Resource):
-    @setup_required
-    @login_required
-    @get_rag_pipeline
-    @account_initialization_required
-    @marshal_with(pipeline_import_check_dependencies_fields)
-    def get(self, pipeline: Pipeline):
-        if not current_user.is_editor:
-            raise Forbidden()
-
-        with Session(db.engine) as session:
-            import_service = RagPipelineDslService(session)
-            result = import_service.check_dependencies(pipeline=pipeline)
-
-        return result.model_dump(mode="json"), 200
-
-
-class RagPipelineExportApi(Resource):
-    @setup_required
-    @login_required
-    @get_rag_pipeline
-    @account_initialization_required
-    def get(self, pipeline: Pipeline):
-        if not current_user.is_editor:
-            raise Forbidden()
-
-            # Add include_secret params
-        parser = reqparse.RequestParser()
-        parser.add_argument("include_secret", type=bool, default=False, location="args")
-        args = parser.parse_args()
-
-        with Session(db.engine) as session:
-            export_service = RagPipelineDslService(session)
-            result = export_service.export_rag_pipeline_dsl(pipeline=pipeline, include_secret=args["include_secret"])
-
-        return {"data": result}, 200
-
-
-# Import Rag Pipeline
-api.add_resource(
-    RagPipelineImportApi,
-    "/rag/pipelines/imports",
-)
-api.add_resource(
-    RagPipelineImportConfirmApi,
-    "/rag/pipelines/imports/<string:import_id>/confirm",
-)
-api.add_resource(
-    RagPipelineImportCheckDependenciesApi,
-    "/rag/pipelines/imports/<string:pipeline_id>/check-dependencies",
-)
-api.add_resource(
-    RagPipelineExportApi,
-    "/rag/pipelines/<string:pipeline_id>/exports",
-)
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py
--- a/api/controllers/console/datasets/wraps.py
+++ b/api/controllers/console/datasets/wraps.py
@ -1,43 +0,0 @@
-from collections.abc import Callable
-from functools import wraps
-from typing import Optional
-
-from controllers.console.datasets.error import PipelineNotFoundError
-from extensions.ext_database import db
-from libs.login import current_user
-from models.dataset import Pipeline
-
-
-def get_rag_pipeline(
-    view: Optional[Callable] = None,
-):
-    def decorator(view_func):
-        @wraps(view_func)
-        def decorated_view(*args, **kwargs):
-            if not kwargs.get("pipeline_id"):
-                raise ValueError("missing pipeline_id in path parameters")
-
-            pipeline_id = kwargs.get("pipeline_id")
-            pipeline_id = str(pipeline_id)
-
-            del kwargs["pipeline_id"]
-
-            pipeline = (
-                db.session.query(Pipeline)
-                .filter(Pipeline.id == pipeline_id, Pipeline.tenant_id == current_user.current_tenant_id)
-                .first()
-            )
-
-            if not pipeline:
-                raise PipelineNotFoundError()
-
-            kwargs["pipeline"] = pipeline
-
-            return view_func(*args, **kwargs)
-
-        return decorated_view
-
-    if view is None:
-        return decorator
-    else:
-        return decorator(view)
--- a/api/controllers/console/version.py
+++ b/api/controllers/console/version.py
@ -18,7 +18,7 @@ class VersionApi(Resource):
        check_update_url = dify_config.CHECK_UPDATE_URL

        result = {
-            "version": dify_config.project.version,
+            "version": dify_config.CURRENT_VERSION,
            "release_date": "",
            "release_notes": "",
            "can_auto_update": False,
--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@ -3,7 +3,7 @@ import json
 from flask import request
 from flask_restful import marshal, reqparse
 from sqlalchemy import desc, select
-from werkzeug.exceptions import Forbidden, NotFound
+from werkzeug.exceptions import NotFound

 import services
 from controllers.common.errors import FilenameNotExistsError
@ -18,7 +18,6 @@ from controllers.service_api.app.error import (
 from controllers.service_api.dataset.error import (
    ArchivedDocumentImmutableError,
    DocumentIndexingError,
-    InvalidMetadataError,
 )
 from controllers.service_api.wraps import (
    DatasetApiResource,
@ -467,101 +466,6 @@ class DocumentIndexingStatusApi(DatasetApiResource):
        return data


-class DocumentDetailApi(DatasetApiResource):
-    METADATA_CHOICES = {"all", "only", "without"}
-
-    def get(self, tenant_id, dataset_id, document_id):
-        dataset_id = str(dataset_id)
-        document_id = str(document_id)
-
-        dataset = self.get_dataset(dataset_id, tenant_id)
-
-        document = DocumentService.get_document(dataset.id, document_id)
-
-        if not document:
-            raise NotFound("Document not found.")
-
-        if document.tenant_id != str(tenant_id):
-            raise Forbidden("No permission.")
-
-        metadata = request.args.get("metadata", "all")
-        if metadata not in self.METADATA_CHOICES:
-            raise InvalidMetadataError(f"Invalid metadata value: {metadata}")
-
-        if metadata == "only":
-            response = {"id": document.id, "doc_type": document.doc_type, "doc_metadata": document.doc_metadata_details}
-        elif metadata == "without":
-            dataset_process_rules = DatasetService.get_process_rules(dataset_id)
-            document_process_rules = document.dataset_process_rule.to_dict()
-            data_source_info = document.data_source_detail_dict
-            response = {
-                "id": document.id,
-                "position": document.position,
-                "data_source_type": document.data_source_type,
-                "data_source_info": data_source_info,
-                "dataset_process_rule_id": document.dataset_process_rule_id,
-                "dataset_process_rule": dataset_process_rules,
-                "document_process_rule": document_process_rules,
-                "name": document.name,
-                "created_from": document.created_from,
-                "created_by": document.created_by,
-                "created_at": document.created_at.timestamp(),
-                "tokens": document.tokens,
-                "indexing_status": document.indexing_status,
-                "completed_at": int(document.completed_at.timestamp()) if document.completed_at else None,
-                "updated_at": int(document.updated_at.timestamp()) if document.updated_at else None,
-                "indexing_latency": document.indexing_latency,
-                "error": document.error,
-                "enabled": document.enabled,
-                "disabled_at": int(document.disabled_at.timestamp()) if document.disabled_at else None,
-                "disabled_by": document.disabled_by,
-                "archived": document.archived,
-                "segment_count": document.segment_count,
-                "average_segment_length": document.average_segment_length,
-                "hit_count": document.hit_count,
-                "display_status": document.display_status,
-                "doc_form": document.doc_form,
-                "doc_language": document.doc_language,
-            }
-        else:
-            dataset_process_rules = DatasetService.get_process_rules(dataset_id)
-            document_process_rules = document.dataset_process_rule.to_dict()
-            data_source_info = document.data_source_detail_dict
-            response = {
-                "id": document.id,
-                "position": document.position,
-                "data_source_type": document.data_source_type,
-                "data_source_info": data_source_info,
-                "dataset_process_rule_id": document.dataset_process_rule_id,
-                "dataset_process_rule": dataset_process_rules,
-                "document_process_rule": document_process_rules,
-                "name": document.name,
-                "created_from": document.created_from,
-                "created_by": document.created_by,
-                "created_at": document.created_at.timestamp(),
-                "tokens": document.tokens,
-                "indexing_status": document.indexing_status,
-                "completed_at": int(document.completed_at.timestamp()) if document.completed_at else None,
-                "updated_at": int(document.updated_at.timestamp()) if document.updated_at else None,
-                "indexing_latency": document.indexing_latency,
-                "error": document.error,
-                "enabled": document.enabled,
-                "disabled_at": int(document.disabled_at.timestamp()) if document.disabled_at else None,
-                "disabled_by": document.disabled_by,
-                "archived": document.archived,
-                "doc_type": document.doc_type,
-                "doc_metadata": document.doc_metadata_details,
-                "segment_count": document.segment_count,
-                "average_segment_length": document.average_segment_length,
-                "hit_count": document.hit_count,
-                "display_status": document.display_status,
-                "doc_form": document.doc_form,
-                "doc_language": document.doc_language,
-            }
-
-        return response
-
-
 api.add_resource(
    DocumentAddByTextApi,
    "/datasets/<uuid:dataset_id>/document/create_by_text",
@ -585,4 +489,3 @@ api.add_resource(
 api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
 api.add_resource(DocumentListApi, "/datasets/<uuid:dataset_id>/documents")
 api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<string:batch>/indexing-status")
-api.add_resource(DocumentDetailApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
--- a/api/controllers/service_api/index.py
+++ b/api/controllers/service_api/index.py
@ -9,7 +9,7 @@ class IndexApi(Resource):
        return {
            "welcome": "Dify OpenAPI",
            "api_version": "v1",
-            "server_version": dify_config.project.version,
+            "server_version": dify_config.CURRENT_VERSION,
        }


--- a/api/controllers/service_api/wraps.py
+++ b/api/controllers/service_api/wraps.py
@ -11,13 +11,13 @@ from flask_restful import Resource
 from pydantic import BaseModel
 from sqlalchemy import select, update
 from sqlalchemy.orm import Session
-from werkzeug.exceptions import Forbidden, NotFound, Unauthorized
+from werkzeug.exceptions import Forbidden, Unauthorized

 from extensions.ext_database import db
 from extensions.ext_redis import redis_client
 from libs.login import _get_user
 from models.account import Account, Tenant, TenantAccountJoin, TenantStatus
-from models.dataset import Dataset, RateLimitLog
+from models.dataset import RateLimitLog
 from models.model import ApiToken, App, EndUser
 from services.feature_service import FeatureService

@ -317,11 +317,3 @@ def create_or_update_end_user_for_user_id(app_model: App, user_id: Optional[str]

 class DatasetApiResource(Resource):
    method_decorators = [validate_dataset_token]
-
-    def get_dataset(self, dataset_id: str, tenant_id: str) -> Dataset:
-        dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id, Dataset.tenant_id == tenant_id).first()
-
-        if not dataset:
-            raise NotFound("Dataset not found.")
-
-        return dataset
--- a/api/core/app/app_config/entities.py
+++ b/api/core/app/app_config/entities.py
@ -113,9 +113,9 @@ class VariableEntity(BaseModel):
    hide: bool = False
    max_length: Optional[int] = None
    options: Sequence[str] = Field(default_factory=list)
-    allowed_file_types: Optional[Sequence[FileType]] = Field(default_factory=list)
-    allowed_file_extensions: Optional[Sequence[str]] = Field(default_factory=list)
-    allowed_file_upload_methods: Optional[Sequence[FileTransferMethod]] = Field(default_factory=list)
+    allowed_file_types: Sequence[FileType] = Field(default_factory=list)
+    allowed_file_extensions: Sequence[str] = Field(default_factory=list)
+    allowed_file_upload_methods: Sequence[FileTransferMethod] = Field(default_factory=list)

    @field_validator("description", mode="before")
    @classmethod
@ -128,16 +128,6 @@ class VariableEntity(BaseModel):
        return v or []


-class RagPipelineVariableEntity(VariableEntity):
-    """
-    Rag Pipeline Variable Entity.
-    """
-
-    tooltips: Optional[str] = None
-    placeholder: Optional[str] = None
-    belong_to_node_id: str
-
-
 class ExternalDataVariableEntity(BaseModel):
    """
    External Data Variable Entity.
@ -295,7 +285,7 @@ class AppConfig(BaseModel):
    tenant_id: str
    app_id: str
    app_mode: AppMode
-    additional_features: Optional[AppAdditionalFeatures] = None
+    additional_features: AppAdditionalFeatures
    variables: list[VariableEntity] = []
    sensitive_word_avoidance: Optional[SensitiveWordAvoidanceEntity] = None

--- a/api/core/app/app_config/workflow_ui_based_app/variables/manager.py
+++ b/api/core/app/app_config/workflow_ui_based_app/variables/manager.py
@ -1,4 +1,4 @@
-from core.app.app_config.entities import RagPipelineVariableEntity, VariableEntity
+from core.app.app_config.entities import VariableEntity
 from models.workflow import Workflow


@ -20,19 +20,3 @@ class WorkflowVariablesConfigManager:
            variables.append(VariableEntity.model_validate(variable))

        return variables
-
-    @classmethod
-    def convert_rag_pipeline_variable(cls, workflow: Workflow) -> list[RagPipelineVariableEntity]:
-        """
-        Convert workflow start variables to variables
-
-        :param workflow: workflow instance
-        """
-        variables = []
-
-        user_input_form = workflow.rag_pipeline_user_input_form()
-        # variables
-        for variable in user_input_form:
-            variables.append(RagPipelineVariableEntity.model_validate(variable))
-
-        return variables
--- a/api/core/app/apps/advanced_chat/app_generator.py
+++ b/api/core/app/apps/advanced_chat/app_generator.py
@ -27,9 +27,6 @@ from core.ops.ops_trace_manager import TraceQueueManager
 from core.prompt.utils.get_thread_messages_length import get_thread_messages_length
 from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository
 from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository
-from core.workflow.repositories.draft_variable_repository import (
-    DraftVariableSaverFactory,
-)
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
 from core.workflow.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader
@ -39,10 +36,7 @@ from libs.flask_utils import preserve_flask_contexts
 from models import Account, App, Conversation, EndUser, Message, Workflow, WorkflowNodeExecutionTriggeredFrom
 from models.enums import WorkflowRunTriggeredFrom
 from services.conversation_service import ConversationService
-from services.workflow_draft_variable_service import (
-    DraftVarLoader,
-    WorkflowDraftVariableService,
-)
+from services.workflow_draft_variable_service import DraftVarLoader, WorkflowDraftVariableService

 logger = logging.getLogger(__name__)

@ -456,7 +450,6 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
            workflow_execution_repository=workflow_execution_repository,
            workflow_node_execution_repository=workflow_node_execution_repository,
            stream=stream,
-            draft_var_saver_factory=self._get_draft_var_saver_factory(invoke_from),
        )

        return AdvancedChatAppGenerateResponseConverter.convert(response=response, invoke_from=invoke_from)
@ -528,7 +521,6 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
        user: Union[Account, EndUser],
        workflow_execution_repository: WorkflowExecutionRepository,
        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
-        draft_var_saver_factory: DraftVariableSaverFactory,
        stream: bool = False,
    ) -> Union[ChatbotAppBlockingResponse, Generator[ChatbotAppStreamResponse, None, None]]:
        """
@ -555,7 +547,6 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
            workflow_execution_repository=workflow_execution_repository,
            workflow_node_execution_repository=workflow_node_execution_repository,
            stream=stream,
-            draft_var_saver_factory=draft_var_saver_factory,
        )

        try:
--- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py
+++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
@ -64,7 +64,6 @@ from core.workflow.entities.workflow_execution import WorkflowExecutionStatus, W
 from core.workflow.enums import SystemVariableKey
 from core.workflow.graph_engine.entities.graph_runtime_state import GraphRuntimeState
 from core.workflow.nodes import NodeType
-from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
 from core.workflow.workflow_cycle_manager import CycleManagerWorkflowInfo, WorkflowCycleManager
@ -95,7 +94,6 @@ class AdvancedChatAppGenerateTaskPipeline:
        dialogue_count: int,
        workflow_execution_repository: WorkflowExecutionRepository,
        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
-        draft_var_saver_factory: DraftVariableSaverFactory,
    ) -> None:
        self._base_task_pipeline = BasedGenerateTaskPipeline(
            application_generate_entity=application_generate_entity,
@ -155,7 +153,6 @@ class AdvancedChatAppGenerateTaskPipeline:
        self._conversation_name_generate_thread: Thread | None = None
        self._recorded_files: list[Mapping[str, Any]] = []
        self._workflow_run_id: str = ""
-        self._draft_var_saver_factory = draft_var_saver_factory

    def process(self) -> Union[ChatbotAppBlockingResponse, Generator[ChatbotAppStreamResponse, None, None]]:
        """
@ -374,7 +371,6 @@ class AdvancedChatAppGenerateTaskPipeline:
                        workflow_node_execution=workflow_node_execution,
                    )
                    session.commit()
-                self._save_output_for_event(event, workflow_node_execution.id)

                if node_finish_resp:
                    yield node_finish_resp
@ -394,8 +390,6 @@ class AdvancedChatAppGenerateTaskPipeline:
                    task_id=self._application_generate_entity.task_id,
                    workflow_node_execution=workflow_node_execution,
                )
-                if isinstance(event, QueueNodeExceptionEvent):
-                    self._save_output_for_event(event, workflow_node_execution.id)

                if node_finish_resp:
                    yield node_finish_resp
@ -765,15 +759,3 @@ class AdvancedChatAppGenerateTaskPipeline:
        if not message:
            raise ValueError(f"Message not found: {self._message_id}")
        return message
-
-    def _save_output_for_event(self, event: QueueNodeSucceededEvent | QueueNodeExceptionEvent, node_execution_id: str):
-        with Session(db.engine) as session, session.begin():
-            saver = self._draft_var_saver_factory(
-                session=session,
-                app_id=self._application_generate_entity.app_config.app_id,
-                node_id=event.node_id,
-                node_type=event.node_type,
-                node_execution_id=node_execution_id,
-                enclosing_node_id=event.in_loop_id or event.in_iteration_id,
-            )
-            saver.save(event.process_data, event.outputs)
--- a/api/core/app/apps/base_app_generator.py
+++ b/api/core/app/apps/base_app_generator.py
@ -1,20 +1,10 @@
 import json
 from collections.abc import Generator, Mapping, Sequence
-from typing import TYPE_CHECKING, Any, Optional, Union, final
-
-from sqlalchemy.orm import Session
+from typing import TYPE_CHECKING, Any, Optional, Union

 from core.app.app_config.entities import VariableEntityType
-from core.app.entities.app_invoke_entities import InvokeFrom
 from core.file import File, FileUploadConfig
-from core.workflow.nodes.enums import NodeType
-from core.workflow.repositories.draft_variable_repository import (
-    DraftVariableSaver,
-    DraftVariableSaverFactory,
-    NoopDraftVariableSaver,
-)
 from factories import file_factory
-from services.workflow_draft_variable_service import DraftVariableSaver as DraftVariableSaverImpl

 if TYPE_CHECKING:
    from core.app.app_config.entities import VariableEntity
@ -169,38 +159,3 @@ class BaseAppGenerator:
                        yield f"event: {message}\n\n"

            return gen()
-
-    @final
-    @staticmethod
-    def _get_draft_var_saver_factory(invoke_from: InvokeFrom) -> DraftVariableSaverFactory:
-        if invoke_from == InvokeFrom.DEBUGGER:
-
-            def draft_var_saver_factory(
-                session: Session,
-                app_id: str,
-                node_id: str,
-                node_type: NodeType,
-                node_execution_id: str,
-                enclosing_node_id: str | None = None,
-            ) -> DraftVariableSaver:
-                return DraftVariableSaverImpl(
-                    session=session,
-                    app_id=app_id,
-                    node_id=node_id,
-                    node_type=node_type,
-                    node_execution_id=node_execution_id,
-                    enclosing_node_id=enclosing_node_id,
-                )
-        else:
-
-            def draft_var_saver_factory(
-                session: Session,
-                app_id: str,
-                node_id: str,
-                node_type: NodeType,
-                node_execution_id: str,
-                enclosing_node_id: str | None = None,
-            ) -> DraftVariableSaver:
-                return NoopDraftVariableSaver()
-
-        return draft_var_saver_factory
--- a/api/core/app/apps/common/workflow_response_converter.py
+++ b/api/core/app/apps/common/workflow_response_converter.py
@ -43,13 +43,10 @@ from core.app.entities.task_entities import (
    WorkflowStartStreamResponse,
 )
 from core.file import FILE_MODEL_IDENTITY, File
-from core.plugin.impl.datasource import PluginDatasourceManager
 from core.tools.tool_manager import ToolManager
-from core.variables.segments import ArrayFileSegment, FileSegment, Segment
 from core.workflow.entities.workflow_execution import WorkflowExecution
 from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution, WorkflowNodeExecutionStatus
 from core.workflow.nodes import NodeType
-from core.workflow.nodes.datasource.entities import DatasourceNodeData
 from core.workflow.nodes.tool.entities import ToolNodeData
 from core.workflow.workflow_type_encoder import WorkflowRuntimeTypeConverter
 from models import (
@ -185,14 +182,6 @@ class WorkflowResponseConverter:
                provider_type=node_data.provider_type,
                provider_id=node_data.provider_id,
            )
-        elif event.node_type == NodeType.DATASOURCE:
-            node_data = cast(DatasourceNodeData, event.node_data)
-            manager = PluginDatasourceManager()
-            provider_entity = manager.fetch_datasource_provider(
-                self._application_generate_entity.app_config.tenant_id,
-                f"{node_data.plugin_id}/{node_data.provider_name}",
-            )
-            response.data.extras["icon"] = provider_entity.declaration.identity.icon

        return response

@ -517,8 +506,7 @@ class WorkflowResponseConverter:
        # Convert to tuple to match Sequence type
        return tuple(flattened_files)

-    @classmethod
-    def _fetch_files_from_variable_value(cls, value: Union[dict, list, Segment]) -> Sequence[Mapping[str, Any]]:
+    def _fetch_files_from_variable_value(self, value: Union[dict, list]) -> Sequence[Mapping[str, Any]]:
        """
        Fetch files from variable value
        :param value: variable value
@ -527,30 +515,20 @@ class WorkflowResponseConverter:
        if not value:
            return []

-        files: list[Mapping[str, Any]] = []
-        if isinstance(value, FileSegment):
-            files.append(value.value.to_dict())
-        elif isinstance(value, ArrayFileSegment):
-            files.extend([i.to_dict() for i in value.value])
-        elif isinstance(value, File):
-            files.append(value.to_dict())
-        elif isinstance(value, list):
+        files = []
+        if isinstance(value, list):
            for item in value:
-                file = cls._get_file_var_from_value(item)
+                file = self._get_file_var_from_value(item)
                if file:
                    files.append(file)
-        elif isinstance(
-            value,
-            dict,
-        ):
-            file = cls._get_file_var_from_value(value)
+        elif isinstance(value, dict):
+            file = self._get_file_var_from_value(value)
            if file:
                files.append(file)

        return files

-    @classmethod
-    def _get_file_var_from_value(cls, value: Union[dict, list]) -> Mapping[str, Any] | None:
+    def _get_file_var_from_value(self, value: Union[dict, list]) -> Mapping[str, Any] | None:
        """
        Get file var from value
        :param value: variable value
--- a/api/core/app/apps/pipeline/init.py
+++ b/api/core/app/apps/pipeline/init.py
--- a/api/core/app/apps/pipeline/generate_response_converter.py
+++ b/api/core/app/apps/pipeline/generate_response_converter.py
@ -1,95 +0,0 @@
-from collections.abc import Generator
-from typing import cast
-
-from core.app.apps.base_app_generate_response_converter import AppGenerateResponseConverter
-from core.app.entities.task_entities import (
-    AppStreamResponse,
-    ErrorStreamResponse,
-    NodeFinishStreamResponse,
-    NodeStartStreamResponse,
-    PingStreamResponse,
-    WorkflowAppBlockingResponse,
-    WorkflowAppStreamResponse,
-)
-
-
-class WorkflowAppGenerateResponseConverter(AppGenerateResponseConverter):
-    _blocking_response_type = WorkflowAppBlockingResponse
-
-    @classmethod
-    def convert_blocking_full_response(cls, blocking_response: WorkflowAppBlockingResponse) -> dict:  # type: ignore[override]
-        """
-        Convert blocking full response.
-        :param blocking_response: blocking response
-        :return:
-        """
-        return dict(blocking_response.to_dict())
-
-    @classmethod
-    def convert_blocking_simple_response(cls, blocking_response: WorkflowAppBlockingResponse) -> dict:  # type: ignore[override]
-        """
-        Convert blocking simple response.
-        :param blocking_response: blocking response
-        :return:
-        """
-        return cls.convert_blocking_full_response(blocking_response)
-
-    @classmethod
-    def convert_stream_full_response(
-        cls, stream_response: Generator[AppStreamResponse, None, None]
-    ) -> Generator[dict | str, None, None]:
-        """
-        Convert stream full response.
-        :param stream_response: stream response
-        :return:
-        """
-        for chunk in stream_response:
-            chunk = cast(WorkflowAppStreamResponse, chunk)
-            sub_stream_response = chunk.stream_response
-
-            if isinstance(sub_stream_response, PingStreamResponse):
-                yield "ping"
-                continue
-
-            response_chunk = {
-                "event": sub_stream_response.event.value,
-                "workflow_run_id": chunk.workflow_run_id,
-            }
-
-            if isinstance(sub_stream_response, ErrorStreamResponse):
-                data = cls._error_to_stream_response(sub_stream_response.err)
-                response_chunk.update(data)
-            else:
-                response_chunk.update(sub_stream_response.to_dict())
-            yield response_chunk
-
-    @classmethod
-    def convert_stream_simple_response(
-        cls, stream_response: Generator[AppStreamResponse, None, None]
-    ) -> Generator[dict | str, None, None]:
-        """
-        Convert stream simple response.
-        :param stream_response: stream response
-        :return:
-        """
-        for chunk in stream_response:
-            chunk = cast(WorkflowAppStreamResponse, chunk)
-            sub_stream_response = chunk.stream_response
-
-            if isinstance(sub_stream_response, PingStreamResponse):
-                yield "ping"
-                continue
-
-            response_chunk = {
-                "event": sub_stream_response.event.value,
-                "workflow_run_id": chunk.workflow_run_id,
-            }
-
-            if isinstance(sub_stream_response, ErrorStreamResponse):
-                data = cls._error_to_stream_response(sub_stream_response.err)
-                response_chunk.update(data)
-            elif isinstance(sub_stream_response, NodeStartStreamResponse | NodeFinishStreamResponse):
-                response_chunk.update(sub_stream_response.to_ignore_detail_dict())
-            else:
-                response_chunk.update(sub_stream_response.to_dict())
-            yield response_chunk
--- a/api/core/app/apps/pipeline/pipeline_config_manager.py
+++ b/api/core/app/apps/pipeline/pipeline_config_manager.py
@ -1,64 +0,0 @@
-from core.app.app_config.base_app_config_manager import BaseAppConfigManager
-from core.app.app_config.common.sensitive_word_avoidance.manager import SensitiveWordAvoidanceConfigManager
-from core.app.app_config.entities import RagPipelineVariableEntity, WorkflowUIBasedAppConfig
-from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
-from core.app.app_config.features.text_to_speech.manager import TextToSpeechConfigManager
-from core.app.app_config.workflow_ui_based_app.variables.manager import WorkflowVariablesConfigManager
-from models.dataset import Pipeline
-from models.model import AppMode
-from models.workflow import Workflow
-
-
-class PipelineConfig(WorkflowUIBasedAppConfig):
-    """
-    Pipeline Config Entity.
-    """
-
-    rag_pipeline_variables: list[RagPipelineVariableEntity] = []
-    pass
-
-
-class PipelineConfigManager(BaseAppConfigManager):
-    @classmethod
-    def get_pipeline_config(cls, pipeline: Pipeline, workflow: Workflow) -> PipelineConfig:
-        pipeline_config = PipelineConfig(
-            tenant_id=pipeline.tenant_id,
-            app_id=pipeline.id,
-            app_mode=AppMode.RAG_PIPELINE,
-            workflow_id=workflow.id,
-            rag_pipeline_variables=WorkflowVariablesConfigManager.convert_rag_pipeline_variable(workflow=workflow),
-        )
-
-        return pipeline_config
-
-    @classmethod
-    def config_validate(cls, tenant_id: str, config: dict, only_structure_validate: bool = False) -> dict:
-        """
-        Validate for pipeline config
-
-        :param tenant_id: tenant id
-        :param config: app model config args
-        :param only_structure_validate: only validate the structure of the config
-        """
-        related_config_keys = []
-
-        # file upload validation
-        config, current_related_config_keys = FileUploadConfigManager.validate_and_set_defaults(config=config)
-        related_config_keys.extend(current_related_config_keys)
-
-        # text_to_speech
-        config, current_related_config_keys = TextToSpeechConfigManager.validate_and_set_defaults(config)
-        related_config_keys.extend(current_related_config_keys)
-
-        # moderation validation
-        config, current_related_config_keys = SensitiveWordAvoidanceConfigManager.validate_and_set_defaults(
-            tenant_id=tenant_id, config=config, only_structure_validate=only_structure_validate
-        )
-        related_config_keys.extend(current_related_config_keys)
-
-        related_config_keys = list(set(related_config_keys))
-
-        # Filter out extra parameters
-        filtered_config = {key: config.get(key) for key in related_config_keys}
-
-        return filtered_config
--- a/api/core/app/apps/pipeline/pipeline_generator.py
+++ b/api/core/app/apps/pipeline/pipeline_generator.py
@ -1,621 +0,0 @@
-import contextvars
-import datetime
-import json
-import logging
-import secrets
-import threading
-import time
-import uuid
-from collections.abc import Generator, Mapping
-from typing import Any, Literal, Optional, Union, overload
-
-from flask import Flask, current_app
-from pydantic import ValidationError
-from sqlalchemy.orm import sessionmaker
-
-import contexts
-from configs import dify_config
-from core.app.apps.base_app_generator import BaseAppGenerator
-from core.app.apps.base_app_queue_manager import AppQueueManager, GenerateTaskStoppedError, PublishFrom
-from core.app.apps.pipeline.pipeline_config_manager import PipelineConfigManager
-from core.app.apps.pipeline.pipeline_queue_manager import PipelineQueueManager
-from core.app.apps.pipeline.pipeline_runner import PipelineRunner
-from core.app.apps.workflow.generate_response_converter import WorkflowAppGenerateResponseConverter
-from core.app.apps.workflow.generate_task_pipeline import WorkflowAppGenerateTaskPipeline
-from core.app.entities.app_invoke_entities import InvokeFrom, RagPipelineGenerateEntity
-from core.app.entities.task_entities import WorkflowAppBlockingResponse, WorkflowAppStreamResponse
-from core.entities.knowledge_entities import PipelineDataset, PipelineDocument
-from core.model_runtime.errors.invoke import InvokeAuthorizationError
-from core.rag.index_processor.constant.built_in_field import BuiltInField
-from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository
-from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository
-from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
-from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
-from extensions.ext_database import db
-from libs.flask_utils import preserve_flask_contexts
-from models import Account, EndUser, Workflow, WorkflowNodeExecutionTriggeredFrom
-from models.dataset import Document, DocumentPipelineExecutionLog, Pipeline
-from models.enums import WorkflowRunTriggeredFrom
-from models.model import AppMode
-from services.dataset_service import DocumentService
-
-logger = logging.getLogger(__name__)
-
-
-class PipelineGenerator(BaseAppGenerator):
-    @overload
-    def generate(
-        self,
-        *,
-        pipeline: Pipeline,
-        workflow: Workflow,
-        user: Union[Account, EndUser],
-        args: Mapping[str, Any],
-        invoke_from: InvokeFrom,
-        streaming: Literal[True],
-        call_depth: int,
-        workflow_thread_pool_id: Optional[str],
-    ) -> Mapping[str, Any] | Generator[Mapping | str, None, None] | None: ...
-
-    @overload
-    def generate(
-        self,
-        *,
-        pipeline: Pipeline,
-        workflow: Workflow,
-        user: Union[Account, EndUser],
-        args: Mapping[str, Any],
-        invoke_from: InvokeFrom,
-        streaming: Literal[False],
-        call_depth: int,
-        workflow_thread_pool_id: Optional[str],
-    ) -> Mapping[str, Any]: ...
-
-    @overload
-    def generate(
-        self,
-        *,
-        pipeline: Pipeline,
-        workflow: Workflow,
-        user: Union[Account, EndUser],
-        args: Mapping[str, Any],
-        invoke_from: InvokeFrom,
-        streaming: bool,
-        call_depth: int,
-        workflow_thread_pool_id: Optional[str],
-    ) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None]]: ...
-
-    def generate(
-        self,
-        *,
-        pipeline: Pipeline,
-        workflow: Workflow,
-        user: Union[Account, EndUser],
-        args: Mapping[str, Any],
-        invoke_from: InvokeFrom,
-        streaming: bool = True,
-        call_depth: int = 0,
-        workflow_thread_pool_id: Optional[str] = None,
-    ) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None], None]:
-        # convert to app config
-        pipeline_config = PipelineConfigManager.get_pipeline_config(
-            pipeline=pipeline,
-            workflow=workflow,
-        )
-        # Add null check for dataset
-        dataset = pipeline.dataset
-        if not dataset:
-            raise ValueError("Pipeline dataset is required")
-        inputs: Mapping[str, Any] = args["inputs"]
-        start_node_id: str = args["start_node_id"]
-        datasource_type: str = args["datasource_type"]
-        datasource_info_list: list[Mapping[str, Any]] = args["datasource_info_list"]
-        batch = time.strftime("%Y%m%d%H%M%S") + str(secrets.randbelow(900000) + 100000)
-        documents = []
-        if invoke_from == InvokeFrom.PUBLISHED:
-            for datasource_info in datasource_info_list:
-                position = DocumentService.get_documents_position(dataset.id)
-                document = self._build_document(
-                    tenant_id=pipeline.tenant_id,
-                    dataset_id=dataset.id,
-                    built_in_field_enabled=dataset.built_in_field_enabled,
-                    datasource_type=datasource_type,
-                    datasource_info=datasource_info,
-                    created_from="rag-pipeline",
-                    position=position,
-                    account=user,
-                    batch=batch,
-                    document_form=dataset.chunk_structure,
-                )
-                db.session.add(document)
-                documents.append(document)
-            db.session.commit()
-
-        # run in child thread
-        for i, datasource_info in enumerate(datasource_info_list):
-            workflow_run_id = str(uuid.uuid4())
-            document_id = None
-            if invoke_from == InvokeFrom.PUBLISHED:
-                document_id = documents[i].id
-                document_pipeline_execution_log = DocumentPipelineExecutionLog(
-                    document_id=document_id,
-                    datasource_type=datasource_type,
-                    datasource_info=json.dumps(datasource_info),
-                    datasource_node_id=start_node_id,
-                    input_data=inputs,
-                    pipeline_id=pipeline.id,
-                    created_by=user.id,
-                )
-                db.session.add(document_pipeline_execution_log)
-                db.session.commit()
-            application_generate_entity = RagPipelineGenerateEntity(
-                task_id=str(uuid.uuid4()),
-                app_config=pipeline_config,
-                pipeline_config=pipeline_config,
-                datasource_type=datasource_type,
-                datasource_info=datasource_info,
-                dataset_id=dataset.id,
-                start_node_id=start_node_id,
-                batch=batch,
-                document_id=document_id,
-                inputs=self._prepare_user_inputs(
-                    user_inputs=inputs,
-                    variables=pipeline_config.rag_pipeline_variables,
-                    tenant_id=pipeline.tenant_id,
-                    strict_type_validation=True if invoke_from == InvokeFrom.SERVICE_API else False,
-                ),
-                files=[],
-                user_id=user.id,
-                stream=streaming,
-                invoke_from=invoke_from,
-                call_depth=call_depth,
-                workflow_execution_id=workflow_run_id,
-            )
-
-            contexts.plugin_tool_providers.set({})
-            contexts.plugin_tool_providers_lock.set(threading.Lock())
-            if invoke_from == InvokeFrom.DEBUGGER:
-                workflow_triggered_from = WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING
-            else:
-                workflow_triggered_from = WorkflowRunTriggeredFrom.RAG_PIPELINE_RUN
-            # Create workflow node execution repository
-            session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
-            workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
-                session_factory=session_factory,
-                user=user,
-                app_id=application_generate_entity.app_config.app_id,
-                triggered_from=workflow_triggered_from,
-            )
-
-            workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
-                session_factory=session_factory,
-                user=user,
-                app_id=application_generate_entity.app_config.app_id,
-                triggered_from=WorkflowNodeExecutionTriggeredFrom.RAG_PIPELINE_RUN,
-            )
-            if invoke_from == InvokeFrom.DEBUGGER:
-                return self._generate(
-                    flask_app=current_app._get_current_object(),  # type: ignore
-                    context=contextvars.copy_context(),
-                    pipeline=pipeline,
-                    workflow_id=workflow.id,
-                    user=user,
-                    application_generate_entity=application_generate_entity,
-                    invoke_from=invoke_from,
-                    workflow_execution_repository=workflow_execution_repository,
-                    workflow_node_execution_repository=workflow_node_execution_repository,
-                    streaming=streaming,
-                    workflow_thread_pool_id=workflow_thread_pool_id,
-                )
-            else:
-                # run in child thread
-                context = contextvars.copy_context()
-
-                worker_thread = threading.Thread(
-                    target=self._generate,
-                    kwargs={
-                        "flask_app": current_app._get_current_object(),  # type: ignore
-                        "context": context,
-                        "pipeline": pipeline,
-                        "workflow_id": workflow.id,
-                        "user": user,
-                        "application_generate_entity": application_generate_entity,
-                        "invoke_from": invoke_from,
-                        "workflow_execution_repository": workflow_execution_repository,
-                        "workflow_node_execution_repository": workflow_node_execution_repository,
-                        "streaming": streaming,
-                        "workflow_thread_pool_id": workflow_thread_pool_id,
-                    },
-                )
-
-                worker_thread.start()
-        # return batch, dataset, documents
-        return {
-            "batch": batch,
-            "dataset": PipelineDataset(
-                id=dataset.id,
-                name=dataset.name,
-                description=dataset.description,
-                chunk_structure=dataset.chunk_structure,
-            ).model_dump(),
-            "documents": [
-                PipelineDocument(
-                    id=document.id,
-                    position=document.position,
-                    data_source_type=document.data_source_type,
-                    data_source_info=json.loads(document.data_source_info) if document.data_source_info else None,
-                    name=document.name,
-                    indexing_status=document.indexing_status,
-                    error=document.error,
-                    enabled=document.enabled,
-                ).model_dump()
-                for document in documents
-            ],
-        }
-
-    def _generate(
-        self,
-        *,
-        flask_app: Flask,
-        context: contextvars.Context,
-        pipeline: Pipeline,
-        workflow_id: str,
-        user: Union[Account, EndUser],
-        application_generate_entity: RagPipelineGenerateEntity,
-        invoke_from: InvokeFrom,
-        workflow_execution_repository: WorkflowExecutionRepository,
-        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
-        streaming: bool = True,
-        workflow_thread_pool_id: Optional[str] = None,
-    ) -> Union[Mapping[str, Any], Generator[str | Mapping[str, Any], None, None]]:
-        """
-        Generate App response.
-
-        :param pipeline: Pipeline
-        :param workflow: Workflow
-        :param user: account or end user
-        :param application_generate_entity: application generate entity
-        :param invoke_from: invoke from source
-        :param workflow_execution_repository: repository for workflow execution
-        :param workflow_node_execution_repository: repository for workflow node execution
-        :param streaming: is stream
-        :param workflow_thread_pool_id: workflow thread pool id
-        """
-        with preserve_flask_contexts(flask_app, context_vars=context):
-            # init queue manager
-            workflow = db.session.query(Workflow).filter(Workflow.id == workflow_id).first()
-            if not workflow:
-                raise ValueError(f"Workflow not found: {workflow_id}")
-            queue_manager = PipelineQueueManager(
-                task_id=application_generate_entity.task_id,
-                user_id=application_generate_entity.user_id,
-                invoke_from=application_generate_entity.invoke_from,
-                app_mode=AppMode.RAG_PIPELINE,
-            )
-            context = contextvars.copy_context()
-
-            # new thread
-            worker_thread = threading.Thread(
-                target=self._generate_worker,
-                kwargs={
-                    "flask_app": current_app._get_current_object(),  # type: ignore
-                    "context": context,
-                    "queue_manager": queue_manager,
-                    "application_generate_entity": application_generate_entity,
-                    "workflow_thread_pool_id": workflow_thread_pool_id,
-                },
-            )
-
-            worker_thread.start()
-
-            # return response or stream generator
-            response = self._handle_response(
-                application_generate_entity=application_generate_entity,
-                workflow=workflow,
-                queue_manager=queue_manager,
-                user=user,
-                workflow_execution_repository=workflow_execution_repository,
-                workflow_node_execution_repository=workflow_node_execution_repository,
-                stream=streaming,
-            )
-
-            return WorkflowAppGenerateResponseConverter.convert(response=response, invoke_from=invoke_from)
-
-    def single_iteration_generate(
-        self,
-        pipeline: Pipeline,
-        workflow: Workflow,
-        node_id: str,
-        user: Account | EndUser,
-        args: Mapping[str, Any],
-        streaming: bool = True,
-    ) -> Mapping[str, Any] | Generator[str | Mapping[str, Any], None, None]:
-        """
-        Generate App response.
-
-        :param app_model: App
-        :param workflow: Workflow
-        :param node_id: the node id
-        :param user: account or end user
-        :param args: request args
-        :param streaming: is streamed
-        """
-        if not node_id:
-            raise ValueError("node_id is required")
-
-        if args.get("inputs") is None:
-            raise ValueError("inputs is required")
-
-        # convert to app config
-        pipeline_config = PipelineConfigManager.get_pipeline_config(pipeline=pipeline, workflow=workflow)
-
-        dataset = pipeline.dataset
-        if not dataset:
-            raise ValueError("Pipeline dataset is required")
-
-        # init application generate entity - use RagPipelineGenerateEntity instead
-        application_generate_entity = RagPipelineGenerateEntity(
-            task_id=str(uuid.uuid4()),
-            app_config=pipeline_config,
-            pipeline_config=pipeline_config,
-            datasource_type=args.get("datasource_type", ""),
-            datasource_info=args.get("datasource_info", {}),
-            dataset_id=dataset.id,
-            batch=args.get("batch", ""),
-            document_id=args.get("document_id"),
-            inputs={},
-            files=[],
-            user_id=user.id,
-            stream=streaming,
-            invoke_from=InvokeFrom.DEBUGGER,
-            call_depth=0,
-            workflow_execution_id=str(uuid.uuid4()),
-        )
-        contexts.plugin_tool_providers.set({})
-        contexts.plugin_tool_providers_lock.set(threading.Lock())
-        # Create workflow node execution repository
-        session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
-
-        workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
-            session_factory=session_factory,
-            user=user,
-            app_id=application_generate_entity.app_config.app_id,
-            triggered_from=WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING,
-        )
-
-        workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
-            session_factory=session_factory,
-            user=user,
-            app_id=application_generate_entity.app_config.app_id,
-            triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP,
-        )
-
-        return self._generate(
-            flask_app=current_app._get_current_object(),  # type: ignore
-            pipeline=pipeline,
-            workflow_id=workflow.id,
-            user=user,
-            invoke_from=InvokeFrom.DEBUGGER,
-            application_generate_entity=application_generate_entity,
-            workflow_execution_repository=workflow_execution_repository,
-            workflow_node_execution_repository=workflow_node_execution_repository,
-            streaming=streaming,
-        )
-
-    def single_loop_generate(
-        self,
-        pipeline: Pipeline,
-        workflow: Workflow,
-        node_id: str,
-        user: Account | EndUser,
-        args: Mapping[str, Any],
-        streaming: bool = True,
-    ) -> Mapping[str, Any] | Generator[str | Mapping[str, Any], None, None]:
-        """
-        Generate App response.
-
-        :param app_model: App
-        :param workflow: Workflow
-        :param node_id: the node id
-        :param user: account or end user
-        :param args: request args
-        :param streaming: is streamed
-        """
-        if not node_id:
-            raise ValueError("node_id is required")
-
-        if args.get("inputs") is None:
-            raise ValueError("inputs is required")
-
-        dataset = pipeline.dataset
-        if not dataset:
-            raise ValueError("Pipeline dataset is required")
-
-        # convert to app config
-        pipeline_config = PipelineConfigManager.get_pipeline_config(pipeline=pipeline, workflow=workflow)
-
-        # init application generate entity
-        application_generate_entity = RagPipelineGenerateEntity(
-            task_id=str(uuid.uuid4()),
-            app_config=pipeline_config,
-            pipeline_config=pipeline_config,
-            datasource_type=args.get("datasource_type", ""),
-            datasource_info=args.get("datasource_info", {}),
-            batch=args.get("batch", ""),
-            document_id=args.get("document_id"),
-            dataset_id=dataset.id,
-            inputs={},
-            files=[],
-            user_id=user.id,
-            stream=streaming,
-            invoke_from=InvokeFrom.DEBUGGER,
-            extras={"auto_generate_conversation_name": False},
-            single_loop_run=RagPipelineGenerateEntity.SingleLoopRunEntity(node_id=node_id, inputs=args["inputs"]),
-            workflow_execution_id=str(uuid.uuid4()),
-        )
-        contexts.plugin_tool_providers.set({})
-        contexts.plugin_tool_providers_lock.set(threading.Lock())
-
-        # Create workflow node execution repository
-        session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
-
-        workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
-            session_factory=session_factory,
-            user=user,
-            app_id=application_generate_entity.app_config.app_id,
-            triggered_from=WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING,
-        )
-
-        workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
-            session_factory=session_factory,
-            user=user,
-            app_id=application_generate_entity.app_config.app_id,
-            triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP,
-        )
-
-        return self._generate(
-            flask_app=current_app._get_current_object(),  # type: ignore
-            pipeline=pipeline,
-            workflow=workflow,
-            user=user,
-            invoke_from=InvokeFrom.DEBUGGER,
-            application_generate_entity=application_generate_entity,
-            workflow_execution_repository=workflow_execution_repository,
-            workflow_node_execution_repository=workflow_node_execution_repository,
-            streaming=streaming,
-        )
-
-    def _generate_worker(
-        self,
-        flask_app: Flask,
-        application_generate_entity: RagPipelineGenerateEntity,
-        queue_manager: AppQueueManager,
-        context: contextvars.Context,
-        workflow_thread_pool_id: Optional[str] = None,
-    ) -> None:
-        """
-        Generate worker in a new thread.
-        :param flask_app: Flask app
-        :param application_generate_entity: application generate entity
-        :param queue_manager: queue manager
-        :param workflow_thread_pool_id: workflow thread pool id
-        :return:
-        """
-
-        with preserve_flask_contexts(flask_app, context_vars=context):
-            try:
-                # workflow app
-                runner = PipelineRunner(
-                    application_generate_entity=application_generate_entity,
-                    queue_manager=queue_manager,
-                    workflow_thread_pool_id=workflow_thread_pool_id,
-                )
-
-                runner.run()
-            except GenerateTaskStoppedError:
-                pass
-            except InvokeAuthorizationError:
-                queue_manager.publish_error(
-                    InvokeAuthorizationError("Incorrect API key provided"), PublishFrom.APPLICATION_MANAGER
-                )
-            except ValidationError as e:
-                logger.exception("Validation Error when generating")
-                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
-            except ValueError as e:
-                if dify_config.DEBUG:
-                    logger.exception("Error when generating")
-                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
-            except Exception as e:
-                logger.exception("Unknown Error when generating")
-                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
-            finally:
-                db.session.close()
-
-    def _handle_response(
-        self,
-        application_generate_entity: RagPipelineGenerateEntity,
-        workflow: Workflow,
-        queue_manager: AppQueueManager,
-        user: Union[Account, EndUser],
-        workflow_execution_repository: WorkflowExecutionRepository,
-        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
-        stream: bool = False,
-    ) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]:
-        """
-        Handle response.
-        :param application_generate_entity: application generate entity
-        :param workflow: workflow
-        :param queue_manager: queue manager
-        :param user: account or end user
-        :param stream: is stream
-        :param workflow_node_execution_repository: optional repository for workflow node execution
-        :return:
-        """
-        # init generate task pipeline
-        generate_task_pipeline = WorkflowAppGenerateTaskPipeline(
-            application_generate_entity=application_generate_entity,
-            workflow=workflow,
-            queue_manager=queue_manager,
-            user=user,
-            stream=stream,
-            workflow_node_execution_repository=workflow_node_execution_repository,
-            workflow_execution_repository=workflow_execution_repository,
-        )
-
-        try:
-            return generate_task_pipeline.process()
-        except ValueError as e:
-            if len(e.args) > 0 and e.args[0] == "I/O operation on closed file.":  # ignore this error
-                raise GenerateTaskStoppedError()
-            else:
-                logger.exception(
-                    f"Fails to process generate task pipeline, task_id: {application_generate_entity.task_id}"
-                )
-                raise e
-
-    def _build_document(
-        self,
-        tenant_id: str,
-        dataset_id: str,
-        built_in_field_enabled: bool,
-        datasource_type: str,
-        datasource_info: Mapping[str, Any],
-        created_from: str,
-        position: int,
-        account: Union[Account, EndUser],
-        batch: str,
-        document_form: str,
-    ):
-        if datasource_type == "local_file":
-            name = datasource_info["name"]
-        elif datasource_type == "online_document":
-            name = datasource_info["page"]["page_name"]
-        elif datasource_type == "website_crawl":
-            name = datasource_info["title"]
-        else:
-            raise ValueError(f"Unsupported datasource type: {datasource_type}")
-
-        document = Document(
-            tenant_id=tenant_id,
-            dataset_id=dataset_id,
-            position=position,
-            data_source_type=datasource_type,
-            data_source_info=json.dumps(datasource_info),
-            batch=batch,
-            name=name,
-            created_from=created_from,
-            created_by=account.id,
-            doc_form=document_form,
-        )
-        doc_metadata = {}
-        if built_in_field_enabled:
-            doc_metadata = {
-                BuiltInField.document_name: name,
-                BuiltInField.uploader: account.name,
-                BuiltInField.upload_date: datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M:%S"),
-                BuiltInField.last_update_date: datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M:%S"),
-                BuiltInField.source: datasource_type,
-            }
-        if doc_metadata:
-            document.doc_metadata = doc_metadata
-        return document
--- a/api/core/app/apps/pipeline/pipeline_queue_manager.py
+++ b/api/core/app/apps/pipeline/pipeline_queue_manager.py
@ -1,44 +0,0 @@
-from core.app.apps.base_app_queue_manager import AppQueueManager, GenerateTaskStoppedError, PublishFrom
-from core.app.entities.app_invoke_entities import InvokeFrom
-from core.app.entities.queue_entities import (
-    AppQueueEvent,
-    QueueErrorEvent,
-    QueueMessageEndEvent,
-    QueueStopEvent,
-    QueueWorkflowFailedEvent,
-    QueueWorkflowPartialSuccessEvent,
-    QueueWorkflowSucceededEvent,
-    WorkflowQueueMessage,
-)
-
-
-class PipelineQueueManager(AppQueueManager):
-    def __init__(self, task_id: str, user_id: str, invoke_from: InvokeFrom, app_mode: str) -> None:
-        super().__init__(task_id, user_id, invoke_from)
-
-        self._app_mode = app_mode
-
-    def _publish(self, event: AppQueueEvent, pub_from: PublishFrom) -> None:
-        """
-        Publish event to queue
-        :param event:
-        :param pub_from:
-        :return:
-        """
-        message = WorkflowQueueMessage(task_id=self._task_id, app_mode=self._app_mode, event=event)
-
-        self._q.put(message)
-
-        if isinstance(
-            event,
-            QueueStopEvent
-            | QueueErrorEvent
-            | QueueMessageEndEvent
-            | QueueWorkflowSucceededEvent
-            | QueueWorkflowFailedEvent
-            | QueueWorkflowPartialSuccessEvent,
-        ):
-            self.stop_listen()
-
-        if pub_from == PublishFrom.APPLICATION_MANAGER and self._is_stopped():
-            raise GenerateTaskStoppedError()
--- a/api/core/app/apps/pipeline/pipeline_runner.py
+++ b/api/core/app/apps/pipeline/pipeline_runner.py
@ -1,221 +0,0 @@
-import logging
-from collections.abc import Mapping
-from typing import Any, Optional, cast
-
-from configs import dify_config
-from core.app.apps.base_app_queue_manager import AppQueueManager
-from core.app.apps.pipeline.pipeline_config_manager import PipelineConfig
-from core.app.apps.workflow_app_runner import WorkflowBasedAppRunner
-from core.app.entities.app_invoke_entities import (
-    InvokeFrom,
-    RagPipelineGenerateEntity,
-)
-from core.variables.variables import RAGPipelineVariable, RAGPipelineVariableInput
-from core.workflow.callbacks import WorkflowCallback, WorkflowLoggingCallback
-from core.workflow.entities.variable_pool import VariablePool
-from core.workflow.enums import SystemVariableKey
-from core.workflow.graph_engine.entities.graph import Graph
-from core.workflow.workflow_entry import WorkflowEntry
-from extensions.ext_database import db
-from models.dataset import Pipeline
-from models.enums import UserFrom
-from models.model import EndUser
-from models.workflow import Workflow, WorkflowType
-
-logger = logging.getLogger(__name__)
-
-
-class PipelineRunner(WorkflowBasedAppRunner):
-    """
-    Pipeline Application Runner
-    """
-
-    def __init__(
-        self,
-        application_generate_entity: RagPipelineGenerateEntity,
-        queue_manager: AppQueueManager,
-        workflow_thread_pool_id: Optional[str] = None,
-    ) -> None:
-        """
-        :param application_generate_entity: application generate entity
-        :param queue_manager: application queue manager
-        :param workflow_thread_pool_id: workflow thread pool id
-        """
-        self.application_generate_entity = application_generate_entity
-        self.queue_manager = queue_manager
-        self.workflow_thread_pool_id = workflow_thread_pool_id
-
-    def _get_app_id(self) -> str:
-        return self.application_generate_entity.app_config.app_id
-
-    def run(self) -> None:
-        """
-        Run application
-        """
-        app_config = self.application_generate_entity.app_config
-        app_config = cast(PipelineConfig, app_config)
-
-        user_id = None
-        if self.application_generate_entity.invoke_from in {InvokeFrom.WEB_APP, InvokeFrom.SERVICE_API}:
-            end_user = db.session.query(EndUser).filter(EndUser.id == self.application_generate_entity.user_id).first()
-            if end_user:
-                user_id = end_user.session_id
-        else:
-            user_id = self.application_generate_entity.user_id
-
-        pipeline = db.session.query(Pipeline).filter(Pipeline.id == app_config.app_id).first()
-        if not pipeline:
-            raise ValueError("Pipeline not found")
-
-        workflow = self.get_workflow(pipeline=pipeline, workflow_id=app_config.workflow_id)
-        if not workflow:
-            raise ValueError("Workflow not initialized")
-
-        db.session.close()
-
-        workflow_callbacks: list[WorkflowCallback] = []
-        if dify_config.DEBUG:
-            workflow_callbacks.append(WorkflowLoggingCallback())
-
-        # if only single iteration run is requested
-        if self.application_generate_entity.single_iteration_run:
-            # if only single iteration run is requested
-            graph, variable_pool = self._get_graph_and_variable_pool_of_single_iteration(
-                workflow=workflow,
-                node_id=self.application_generate_entity.single_iteration_run.node_id,
-                user_inputs=self.application_generate_entity.single_iteration_run.inputs,
-            )
-        elif self.application_generate_entity.single_loop_run:
-            # if only single loop run is requested
-            graph, variable_pool = self._get_graph_and_variable_pool_of_single_loop(
-                workflow=workflow,
-                node_id=self.application_generate_entity.single_loop_run.node_id,
-                user_inputs=self.application_generate_entity.single_loop_run.inputs,
-            )
-        else:
-            inputs = self.application_generate_entity.inputs
-            files = self.application_generate_entity.files
-
-            # Create a variable pool.
-            system_inputs = {
-                SystemVariableKey.FILES: files,
-                SystemVariableKey.USER_ID: user_id,
-                SystemVariableKey.APP_ID: app_config.app_id,
-                SystemVariableKey.WORKFLOW_ID: app_config.workflow_id,
-                SystemVariableKey.WORKFLOW_EXECUTION_ID: self.application_generate_entity.workflow_execution_id,
-                SystemVariableKey.DOCUMENT_ID: self.application_generate_entity.document_id,
-                SystemVariableKey.BATCH: self.application_generate_entity.batch,
-                SystemVariableKey.DATASET_ID: self.application_generate_entity.dataset_id,
-                SystemVariableKey.DATASOURCE_TYPE: self.application_generate_entity.datasource_type,
-                SystemVariableKey.DATASOURCE_INFO: self.application_generate_entity.datasource_info,
-                SystemVariableKey.INVOKE_FROM: self.application_generate_entity.invoke_from.value,
-            }
-            rag_pipeline_variables = []
-            if workflow.rag_pipeline_variables:
-                for v in workflow.rag_pipeline_variables:
-                    rag_pipeline_variable = RAGPipelineVariable(**v)
-                    if (
-                        rag_pipeline_variable.belong_to_node_id
-                        in (self.application_generate_entity.start_node_id, "shared")
-                    ) and rag_pipeline_variable.variable in inputs:
-                        rag_pipeline_variables.append(
-                            RAGPipelineVariableInput(
-                                variable=rag_pipeline_variable,
-                                value=inputs[rag_pipeline_variable.variable],
-                            )
-                        )
-
-            variable_pool = VariablePool(
-                system_variables=system_inputs,
-                user_inputs=inputs,
-                environment_variables=workflow.environment_variables,
-                conversation_variables=[],
-                rag_pipeline_variables=rag_pipeline_variables,
-            )
-
-            # init graph
-            graph = self._init_rag_pipeline_graph(
-                graph_config=workflow.graph_dict,
-                start_node_id=self.application_generate_entity.start_node_id,
-            )
-
-        # RUN WORKFLOW
-        workflow_entry = WorkflowEntry(
-            tenant_id=workflow.tenant_id,
-            app_id=workflow.app_id,
-            workflow_id=workflow.id,
-            workflow_type=WorkflowType.value_of(workflow.type),
-            graph=graph,
-            graph_config=workflow.graph_dict,
-            user_id=self.application_generate_entity.user_id,
-            user_from=(
-                UserFrom.ACCOUNT
-                if self.application_generate_entity.invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER}
-                else UserFrom.END_USER
-            ),
-            invoke_from=self.application_generate_entity.invoke_from,
-            call_depth=self.application_generate_entity.call_depth,
-            variable_pool=variable_pool,
-            thread_pool_id=self.workflow_thread_pool_id,
-        )
-
-        generator = workflow_entry.run(callbacks=workflow_callbacks)
-
-        for event in generator:
-            self._handle_event(workflow_entry, event)
-
-    def get_workflow(self, pipeline: Pipeline, workflow_id: str) -> Optional[Workflow]:
-        """
-        Get workflow
-        """
-        # fetch workflow by workflow_id
-        workflow = (
-            db.session.query(Workflow)
-            .filter(
-                Workflow.tenant_id == pipeline.tenant_id, Workflow.app_id == pipeline.id, Workflow.id == workflow_id
-            )
-            .first()
-        )
-
-        # return workflow
-        return workflow
-
-    def _init_rag_pipeline_graph(self, graph_config: Mapping[str, Any], start_node_id: Optional[str] = None) -> Graph:
-        """
-        Init pipeline graph
-        """
-        if "nodes" not in graph_config or "edges" not in graph_config:
-            raise ValueError("nodes or edges not found in workflow graph")
-
-        if not isinstance(graph_config.get("nodes"), list):
-            raise ValueError("nodes in workflow graph must be a list")
-
-        if not isinstance(graph_config.get("edges"), list):
-            raise ValueError("edges in workflow graph must be a list")
-        nodes = graph_config.get("nodes", [])
-        edges = graph_config.get("edges", [])
-        real_run_nodes = []
-        real_edges = []
-        exclude_node_ids = []
-        for node in nodes:
-            node_id = node.get("id")
-            node_type = node.get("data", {}).get("type", "")
-            if node_type == "datasource":
-                if start_node_id != node_id:
-                    exclude_node_ids.append(node_id)
-                    continue
-            real_run_nodes.append(node)
-        for edge in edges:
-            if edge.get("source") in exclude_node_ids:
-                continue
-            real_edges.append(edge)
-        graph_config = dict(graph_config)
-        graph_config["nodes"] = real_run_nodes
-        graph_config["edges"] = real_edges
-        # init graph
-        graph = Graph.init(graph_config=graph_config)
-
-        if not graph:
-            raise ValueError("graph not found in workflow")
-
-        return graph
--- a/api/core/app/apps/workflow/app_generator.py
+++ b/api/core/app/apps/workflow/app_generator.py
@ -25,7 +25,6 @@ from core.model_runtime.errors.invoke import InvokeAuthorizationError
 from core.ops.ops_trace_manager import TraceQueueManager
 from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository
 from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository
-from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
 from core.workflow.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader
@ -220,9 +219,6 @@ class WorkflowAppGenerator(BaseAppGenerator):
        # new thread with request context and contextvars
        context = contextvars.copy_context()

-        # release database connection, because the following new thread operations may take a long time
-        db.session.close()
-
        worker_thread = threading.Thread(
            target=self._generate_worker,
            kwargs={
@ -237,10 +233,6 @@ class WorkflowAppGenerator(BaseAppGenerator):

        worker_thread.start()

-        draft_var_saver_factory = self._get_draft_var_saver_factory(
-            invoke_from,
-        )
-
        # return response or stream generator
        response = self._handle_response(
            application_generate_entity=application_generate_entity,
@ -249,7 +241,6 @@ class WorkflowAppGenerator(BaseAppGenerator):
            user=user,
            workflow_execution_repository=workflow_execution_repository,
            workflow_node_execution_repository=workflow_node_execution_repository,
-            draft_var_saver_factory=draft_var_saver_factory,
            stream=streaming,
        )

@ -480,7 +471,6 @@ class WorkflowAppGenerator(BaseAppGenerator):
        user: Union[Account, EndUser],
        workflow_execution_repository: WorkflowExecutionRepository,
        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
-        draft_var_saver_factory: DraftVariableSaverFactory,
        stream: bool = False,
    ) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]:
        """
@ -501,7 +491,6 @@ class WorkflowAppGenerator(BaseAppGenerator):
            user=user,
            workflow_execution_repository=workflow_execution_repository,
            workflow_node_execution_repository=workflow_node_execution_repository,
-            draft_var_saver_factory=draft_var_saver_factory,
            stream=stream,
        )

--- a/api/core/app/apps/workflow/generate_task_pipeline.py
+++ b/api/core/app/apps/workflow/generate_task_pipeline.py
@ -56,7 +56,6 @@ from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk
 from core.ops.ops_trace_manager import TraceQueueManager
 from core.workflow.entities.workflow_execution import WorkflowExecution, WorkflowExecutionStatus, WorkflowType
 from core.workflow.enums import SystemVariableKey
-from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
 from core.workflow.workflow_cycle_manager import CycleManagerWorkflowInfo, WorkflowCycleManager
@ -88,7 +87,6 @@ class WorkflowAppGenerateTaskPipeline:
        stream: bool,
        workflow_execution_repository: WorkflowExecutionRepository,
        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
-        draft_var_saver_factory: DraftVariableSaverFactory,
    ) -> None:
        self._base_task_pipeline = BasedGenerateTaskPipeline(
            application_generate_entity=application_generate_entity,
@ -133,8 +131,6 @@ class WorkflowAppGenerateTaskPipeline:
        self._application_generate_entity = application_generate_entity
        self._workflow_features_dict = workflow.features_dict
        self._workflow_run_id = ""
-        self._invoke_from = queue_manager._invoke_from
-        self._draft_var_saver_factory = draft_var_saver_factory

    def process(self) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]:
        """
@ -326,8 +322,6 @@ class WorkflowAppGenerateTaskPipeline:
                    workflow_node_execution=workflow_node_execution,
                )

-                self._save_output_for_event(event, workflow_node_execution.id)
-
                if node_success_response:
                    yield node_success_response
            elif isinstance(
@ -345,8 +339,6 @@ class WorkflowAppGenerateTaskPipeline:
                    task_id=self._application_generate_entity.task_id,
                    workflow_node_execution=workflow_node_execution,
                )
-                if isinstance(event, QueueNodeExceptionEvent):
-                    self._save_output_for_event(event, workflow_node_execution.id)

                if node_failed_response:
                    yield node_failed_response
@ -601,15 +593,3 @@ class WorkflowAppGenerateTaskPipeline:
        )

        return response
-
-    def _save_output_for_event(self, event: QueueNodeSucceededEvent | QueueNodeExceptionEvent, node_execution_id: str):
-        with Session(db.engine) as session, session.begin():
-            saver = self._draft_var_saver_factory(
-                session=session,
-                app_id=self._application_generate_entity.app_config.app_id,
-                node_id=event.node_id,
-                node_type=event.node_type,
-                node_execution_id=node_execution_id,
-                enclosing_node_id=event.in_loop_id or event.in_iteration_id,
-            )
-            saver.save(event.process_data, event.outputs)
--- a/api/core/app/apps/workflow_app_runner.py
+++ b/api/core/app/apps/workflow_app_runner.py
@ -1,6 +1,8 @@
 from collections.abc import Mapping
 from typing import Any, Optional, cast

+from sqlalchemy.orm import Session
+
 from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
 from core.app.apps.base_app_runner import AppRunner
 from core.app.entities.queue_entities import (
@ -33,6 +35,7 @@ from core.workflow.entities.variable_pool import VariablePool
 from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey
 from core.workflow.graph_engine.entities.event import (
    AgentLogEvent,
+    BaseNodeEvent,
    GraphEngineEvent,
    GraphRunFailedEvent,
    GraphRunPartialSucceededEvent,
@ -67,6 +70,9 @@ from core.workflow.workflow_entry import WorkflowEntry
 from extensions.ext_database import db
 from models.model import App
 from models.workflow import Workflow
+from services.workflow_draft_variable_service import (
+    DraftVariableSaver,
+)


 class WorkflowBasedAppRunner(AppRunner):
@ -394,6 +400,7 @@ class WorkflowBasedAppRunner(AppRunner):
                    in_loop_id=event.in_loop_id,
                )
            )
+            self._save_draft_var_for_event(event)

        elif isinstance(event, NodeRunFailedEvent):
            self._publish_event(
@ -457,6 +464,7 @@ class WorkflowBasedAppRunner(AppRunner):
                    in_loop_id=event.in_loop_id,
                )
            )
+            self._save_draft_var_for_event(event)

        elif isinstance(event, NodeInIterationFailedEvent):
            self._publish_event(
@ -710,3 +718,30 @@ class WorkflowBasedAppRunner(AppRunner):

    def _publish_event(self, event: AppQueueEvent) -> None:
        self.queue_manager.publish(event, PublishFrom.APPLICATION_MANAGER)
+
+    def _save_draft_var_for_event(self, event: BaseNodeEvent):
+        run_result = event.route_node_state.node_run_result
+        if run_result is None:
+            return
+        process_data = run_result.process_data
+        outputs = run_result.outputs
+        with Session(bind=db.engine) as session, session.begin():
+            draft_var_saver = DraftVariableSaver(
+                session=session,
+                app_id=self._get_app_id(),
+                node_id=event.node_id,
+                node_type=event.node_type,
+                # FIXME(QuantumGhost): rely on private state of queue_manager is not ideal.
+                invoke_from=self.queue_manager._invoke_from,
+                node_execution_id=event.id,
+                enclosing_node_id=event.in_loop_id or event.in_iteration_id or None,
+            )
+            draft_var_saver.save(process_data=process_data, outputs=outputs)
+
+
+def _remove_first_element_from_variable_string(key: str) -> str:
+    """
+    Remove the first element from the prefix.
+    """
+    prefix, remaining = key.split(".", maxsplit=1)
+    return remaining
--- a/api/core/app/entities/app_invoke_entities.py
+++ b/api/core/app/entities/app_invoke_entities.py
@ -36,7 +36,6 @@ class InvokeFrom(Enum):
    # DEBUGGER indicates that this invocation is from
    # the workflow (or chatflow) edit page.
    DEBUGGER = "debugger"
-    PUBLISHED = "published"

    @classmethod
    def value_of(cls, value: str):
@ -241,38 +240,3 @@ class WorkflowAppGenerateEntity(AppGenerateEntity):
        inputs: dict

    single_loop_run: Optional[SingleLoopRunEntity] = None
-
-
-class RagPipelineGenerateEntity(WorkflowAppGenerateEntity):
-    """
-    RAG Pipeline Application Generate Entity.
-    """
-
-    # pipeline config
-    pipeline_config: WorkflowUIBasedAppConfig
-    datasource_type: str
-    datasource_info: Mapping[str, Any]
-    dataset_id: str
-    batch: str
-    document_id: Optional[str] = None
-    start_node_id: Optional[str] = None
-
-    class SingleIterationRunEntity(BaseModel):
-        """
-        Single Iteration Run Entity.
-        """
-
-        node_id: str
-        inputs: dict
-
-    single_iteration_run: Optional[SingleIterationRunEntity] = None
-
-    class SingleLoopRunEntity(BaseModel):
-        """
-        Single Loop Run Entity.
-        """
-
-        node_id: str
-        inputs: dict
-
-    single_loop_run: Optional[SingleLoopRunEntity] = None
--- a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
+++ b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
@ -395,7 +395,6 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
        message.provider_response_latency = time.perf_counter() - self._start_at
        message.total_price = usage.total_price
        message.currency = usage.currency
-        self._task_state.llm_result.usage.latency = message.provider_response_latency
        message.message_metadata = self._task_state.metadata.model_dump_json()

        if trace_manager:
--- a/api/core/callback_handler/agent_tool_callback_handler.py
+++ b/api/core/callback_handler/agent_tool_callback_handler.py
@ -105,14 +105,6 @@ class DifyAgentCallbackHandler(BaseModel):

        self.current_loop += 1

-    def on_datasource_start(self, datasource_name: str, datasource_inputs: Mapping[str, Any]) -> None:
-        """Run on datasource start."""
-        if dify_config.DEBUG:
-            print_text(
-                "\n[on_datasource_start] DatasourceCall:" + datasource_name + "\n" + str(datasource_inputs) + "\n",
-                color=self.color,
-            )
-
    @property
    def ignore_agent(self) -> bool:
        """Whether to ignore agent callbacks."""
--- a/api/core/datasource/__base/datasource_plugin.py
+++ b/api/core/datasource/__base/datasource_plugin.py
@ -1,33 +0,0 @@
-from abc import ABC, abstractmethod
-
-from core.datasource.__base.datasource_runtime import DatasourceRuntime
-from core.datasource.entities.datasource_entities import (
-    DatasourceEntity,
-    DatasourceProviderType,
-)
-
-
-class DatasourcePlugin(ABC):
-    entity: DatasourceEntity
-    runtime: DatasourceRuntime
-
-    def __init__(
-        self,
-        entity: DatasourceEntity,
-        runtime: DatasourceRuntime,
-    ) -> None:
-        self.entity = entity
-        self.runtime = runtime
-
-    @abstractmethod
-    def datasource_provider_type(self) -> str:
-        """
-        returns the type of the datasource provider
-        """
-        return DatasourceProviderType.LOCAL_FILE
-
-    def fork_datasource_runtime(self, runtime: DatasourceRuntime) -> "DatasourcePlugin":
-        return self.__class__(
-            entity=self.entity.model_copy(),
-            runtime=runtime,
-        )
--- a/api/core/datasource/__base/datasource_provider.py
+++ b/api/core/datasource/__base/datasource_provider.py
@ -1,118 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any
-
-from core.datasource.__base.datasource_plugin import DatasourcePlugin
-from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
-from core.entities.provider_entities import ProviderConfig
-from core.plugin.impl.tool import PluginToolManager
-from core.tools.errors import ToolProviderCredentialValidationError
-
-
-class DatasourcePluginProviderController(ABC):
-    entity: DatasourceProviderEntityWithPlugin
-    tenant_id: str
-
-    def __init__(self, entity: DatasourceProviderEntityWithPlugin, tenant_id: str) -> None:
-        self.entity = entity
-        self.tenant_id = tenant_id
-
-    @property
-    def need_credentials(self) -> bool:
-        """
-        returns whether the provider needs credentials
-
-        :return: whether the provider needs credentials
-        """
-        return self.entity.credentials_schema is not None and len(self.entity.credentials_schema) != 0
-
-    def _validate_credentials(self, user_id: str, credentials: dict[str, Any]) -> None:
-        """
-        validate the credentials of the provider
-        """
-        manager = PluginToolManager()
-        if not manager.validate_datasource_credentials(
-            tenant_id=self.tenant_id,
-            user_id=user_id,
-            provider=self.entity.identity.name,
-            credentials=credentials,
-        ):
-            raise ToolProviderCredentialValidationError("Invalid credentials")
-
-    @property
-    def provider_type(self) -> DatasourceProviderType:
-        """
-        returns the type of the provider
-        """
-        return DatasourceProviderType.LOCAL_FILE
-
-    @abstractmethod
-    def get_datasource(self, datasource_name: str) -> DatasourcePlugin:
-        """
-        return datasource with given name
-        """
-        pass
-
-    def validate_credentials_format(self, credentials: dict[str, Any]) -> None:
-        """
-        validate the format of the credentials of the provider and set the default value if needed
-
-        :param credentials: the credentials of the tool
-        """
-        credentials_schema = dict[str, ProviderConfig]()
-        if credentials_schema is None:
-            return
-
-        for credential in self.entity.credentials_schema:
-            credentials_schema[credential.name] = credential
-
-        credentials_need_to_validate: dict[str, ProviderConfig] = {}
-        for credential_name in credentials_schema:
-            credentials_need_to_validate[credential_name] = credentials_schema[credential_name]
-
-        for credential_name in credentials:
-            if credential_name not in credentials_need_to_validate:
-                raise ToolProviderCredentialValidationError(
-                    f"credential {credential_name} not found in provider {self.entity.identity.name}"
-                )
-
-            # check type
-            credential_schema = credentials_need_to_validate[credential_name]
-            if not credential_schema.required and credentials[credential_name] is None:
-                continue
-
-            if credential_schema.type in {ProviderConfig.Type.SECRET_INPUT, ProviderConfig.Type.TEXT_INPUT}:
-                if not isinstance(credentials[credential_name], str):
-                    raise ToolProviderCredentialValidationError(f"credential {credential_name} should be string")
-
-            elif credential_schema.type == ProviderConfig.Type.SELECT:
-                if not isinstance(credentials[credential_name], str):
-                    raise ToolProviderCredentialValidationError(f"credential {credential_name} should be string")
-
-                options = credential_schema.options
-                if not isinstance(options, list):
-                    raise ToolProviderCredentialValidationError(f"credential {credential_name} options should be list")
-
-                if credentials[credential_name] not in [x.value for x in options]:
-                    raise ToolProviderCredentialValidationError(
-                        f"credential {credential_name} should be one of {options}"
-                    )
-
-            credentials_need_to_validate.pop(credential_name)
-
-        for credential_name in credentials_need_to_validate:
-            credential_schema = credentials_need_to_validate[credential_name]
-            if credential_schema.required:
-                raise ToolProviderCredentialValidationError(f"credential {credential_name} is required")
-
-            # the credential is not set currently, set the default value if needed
-            if credential_schema.default is not None:
-                default_value = credential_schema.default
-                # parse default value into the correct type
-                if credential_schema.type in {
-                    ProviderConfig.Type.SECRET_INPUT,
-                    ProviderConfig.Type.TEXT_INPUT,
-                    ProviderConfig.Type.SELECT,
-                }:
-                    default_value = str(default_value)
-
-                credentials[credential_name] = default_value
--- a/api/core/datasource/__base/datasource_runtime.py
+++ b/api/core/datasource/__base/datasource_runtime.py
@ -1,36 +0,0 @@
-from typing import Any, Optional
-
-from openai import BaseModel
-from pydantic import Field
-
-from core.app.entities.app_invoke_entities import InvokeFrom
-from core.datasource.entities.datasource_entities import DatasourceInvokeFrom
-
-
-class DatasourceRuntime(BaseModel):
-    """
-    Meta data of a datasource call processing
-    """
-
-    tenant_id: str
-    datasource_id: Optional[str] = None
-    invoke_from: Optional[InvokeFrom] = None
-    datasource_invoke_from: Optional[DatasourceInvokeFrom] = None
-    credentials: dict[str, Any] = Field(default_factory=dict)
-    runtime_parameters: dict[str, Any] = Field(default_factory=dict)
-
-
-class FakeDatasourceRuntime(DatasourceRuntime):
-    """
-    Fake datasource runtime for testing
-    """
-
-    def __init__(self):
-        super().__init__(
-            tenant_id="fake_tenant_id",
-            datasource_id="fake_datasource_id",
-            invoke_from=InvokeFrom.DEBUGGER,
-            datasource_invoke_from=DatasourceInvokeFrom.RAG_PIPELINE,
-            credentials={},
-            runtime_parameters={},
-        )
--- a/api/core/datasource/init.py
+++ b/api/core/datasource/init.py
--- a/api/core/datasource/datasource_file_manager.py
+++ b/api/core/datasource/datasource_file_manager.py
@ -1,244 +0,0 @@
-import base64
-import hashlib
-import hmac
-import logging
-import os
-import time
-from mimetypes import guess_extension, guess_type
-from typing import Optional, Union
-from uuid import uuid4
-
-import httpx
-
-from configs import dify_config
-from core.helper import ssrf_proxy
-from extensions.ext_database import db
-from extensions.ext_storage import storage
-from models.enums import CreatorUserRole
-from models.model import MessageFile, UploadFile
-from models.tools import ToolFile
-
-logger = logging.getLogger(__name__)
-
-
-class DatasourceFileManager:
-    @staticmethod
-    def sign_file(datasource_file_id: str, extension: str) -> str:
-        """
-        sign file to get a temporary url
-        """
-        base_url = dify_config.FILES_URL
-        file_preview_url = f"{base_url}/files/datasources/{datasource_file_id}{extension}"
-
-        timestamp = str(int(time.time()))
-        nonce = os.urandom(16).hex()
-        data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
-        secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
-        sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
-        encoded_sign = base64.urlsafe_b64encode(sign).decode()
-
-        return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
-
-    @staticmethod
-    def verify_file(datasource_file_id: str, timestamp: str, nonce: str, sign: str) -> bool:
-        """
-        verify signature
-        """
-        data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
-        secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
-        recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
-        recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
-
-        # verify signature
-        if sign != recalculated_encoded_sign:
-            return False
-
-        current_time = int(time.time())
-        return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
-
-    @staticmethod
-    def create_file_by_raw(
-        *,
-        user_id: str,
-        tenant_id: str,
-        conversation_id: Optional[str],
-        file_binary: bytes,
-        mimetype: str,
-        filename: Optional[str] = None,
-    ) -> UploadFile:
-        extension = guess_extension(mimetype) or ".bin"
-        unique_name = uuid4().hex
-        unique_filename = f"{unique_name}{extension}"
-        # default just as before
-        present_filename = unique_filename
-        if filename is not None:
-            has_extension = len(filename.split(".")) > 1
-            # Add extension flexibly
-            present_filename = filename if has_extension else f"{filename}{extension}"
-        filepath = f"datasources/{tenant_id}/{unique_filename}"
-        storage.save(filepath, file_binary)
-
-        upload_file = UploadFile(
-            tenant_id=tenant_id,
-            storage_type=dify_config.STORAGE_TYPE,
-            key=filepath,
-            name=present_filename,
-            size=len(file_binary),
-            extension=extension,
-            mime_type=mimetype,
-            created_by_role=CreatorUserRole.ACCOUNT,
-            created_by=user_id,
-            used=False,
-            hash=hashlib.sha3_256(file_binary).hexdigest(),
-            source_url="",
-        )
-
-        db.session.add(upload_file)
-        db.session.commit()
-        db.session.refresh(upload_file)
-
-        return upload_file
-
-    @staticmethod
-    def create_file_by_url(
-        user_id: str,
-        tenant_id: str,
-        file_url: str,
-        conversation_id: Optional[str] = None,
-    ) -> UploadFile:
-        # try to download image
-        try:
-            response = ssrf_proxy.get(file_url)
-            response.raise_for_status()
-            blob = response.content
-        except httpx.TimeoutException:
-            raise ValueError(f"timeout when downloading file from {file_url}")
-
-        mimetype = (
-            guess_type(file_url)[0]
-            or response.headers.get("Content-Type", "").split(";")[0].strip()
-            or "application/octet-stream"
-        )
-        extension = guess_extension(mimetype) or ".bin"
-        unique_name = uuid4().hex
-        filename = f"{unique_name}{extension}"
-        filepath = f"tools/{tenant_id}/{filename}"
-        storage.save(filepath, blob)
-
-        upload_file = UploadFile(
-            tenant_id=tenant_id,
-            storage_type=dify_config.STORAGE_TYPE,
-            key=filepath,
-            name=filename,
-            size=len(blob),
-            extension=extension,
-            mime_type=mimetype,
-            created_by_role=CreatorUserRole.ACCOUNT,
-            created_by=user_id,
-            used=False,
-            hash=hashlib.sha3_256(blob).hexdigest(),
-            source_url=file_url,
-        )
-
-        db.session.add(upload_file)
-        db.session.commit()
-
-        return upload_file
-
-    @staticmethod
-    def get_file_binary(id: str) -> Union[tuple[bytes, str], None]:
-        """
-        get file binary
-
-        :param id: the id of the file
-
-        :return: the binary of the file, mime type
-        """
-        upload_file: UploadFile | None = (
-            db.session.query(UploadFile)
-            .filter(
-                UploadFile.id == id,
-            )
-            .first()
-        )
-
-        if not upload_file:
-            return None
-
-        blob = storage.load_once(upload_file.key)
-
-        return blob, upload_file.mime_type
-
-    @staticmethod
-    def get_file_binary_by_message_file_id(id: str) -> Union[tuple[bytes, str], None]:
-        """
-        get file binary
-
-        :param id: the id of the file
-
-        :return: the binary of the file, mime type
-        """
-        message_file: MessageFile | None = (
-            db.session.query(MessageFile)
-            .filter(
-                MessageFile.id == id,
-            )
-            .first()
-        )
-
-        # Check if message_file is not None
-        if message_file is not None:
-            # get tool file id
-            if message_file.url is not None:
-                tool_file_id = message_file.url.split("/")[-1]
-                # trim extension
-                tool_file_id = tool_file_id.split(".")[0]
-            else:
-                tool_file_id = None
-        else:
-            tool_file_id = None
-
-        tool_file: ToolFile | None = (
-            db.session.query(ToolFile)
-            .filter(
-                ToolFile.id == tool_file_id,
-            )
-            .first()
-        )
-
-        if not tool_file:
-            return None
-
-        blob = storage.load_once(tool_file.file_key)
-
-        return blob, tool_file.mimetype
-
-    @staticmethod
-    def get_file_generator_by_upload_file_id(upload_file_id: str):
-        """
-        get file binary
-
-        :param tool_file_id: the id of the tool file
-
-        :return: the binary of the file, mime type
-        """
-        upload_file: UploadFile | None = (
-            db.session.query(UploadFile)
-            .filter(
-                UploadFile.id == upload_file_id,
-            )
-            .first()
-        )
-
-        if not upload_file:
-            return None, None
-
-        stream = storage.load_stream(upload_file.key)
-
-        return stream, upload_file.mime_type
-
-
-# init tool_file_parser
-# from core.file.datasource_file_parser import datasource_file_manager
-#
-# datasource_file_manager["manager"] = DatasourceFileManager
--- a/api/core/datasource/datasource_manager.py
+++ b/api/core/datasource/datasource_manager.py
@ -1,100 +0,0 @@
-import logging
-from threading import Lock
-from typing import Union
-
-import contexts
-from core.datasource.__base.datasource_plugin import DatasourcePlugin
-from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
-from core.datasource.entities.common_entities import I18nObject
-from core.datasource.entities.datasource_entities import DatasourceProviderType
-from core.datasource.errors import DatasourceProviderNotFoundError
-from core.datasource.local_file.local_file_provider import LocalFileDatasourcePluginProviderController
-from core.datasource.online_document.online_document_provider import OnlineDocumentDatasourcePluginProviderController
-from core.datasource.website_crawl.website_crawl_provider import WebsiteCrawlDatasourcePluginProviderController
-from core.plugin.impl.datasource import PluginDatasourceManager
-
-logger = logging.getLogger(__name__)
-
-
-class DatasourceManager:
-    _builtin_provider_lock = Lock()
-    _hardcoded_providers: dict[str, DatasourcePluginProviderController] = {}
-    _builtin_providers_loaded = False
-    _builtin_tools_labels: dict[str, Union[I18nObject, None]] = {}
-
-    @classmethod
-    def get_datasource_plugin_provider(
-        cls, provider_id: str, tenant_id: str, datasource_type: DatasourceProviderType
-    ) -> DatasourcePluginProviderController:
-        """
-        get the datasource plugin provider
-        """
-        # check if context is set
-        try:
-            contexts.datasource_plugin_providers.get()
-        except LookupError:
-            contexts.datasource_plugin_providers.set({})
-            contexts.datasource_plugin_providers_lock.set(Lock())
-
-        with contexts.datasource_plugin_providers_lock.get():
-            datasource_plugin_providers = contexts.datasource_plugin_providers.get()
-            if provider_id in datasource_plugin_providers:
-                return datasource_plugin_providers[provider_id]
-
-            manager = PluginDatasourceManager()
-            provider_entity = manager.fetch_datasource_provider(tenant_id, provider_id)
-            if not provider_entity:
-                raise DatasourceProviderNotFoundError(f"plugin provider {provider_id} not found")
-
-            match datasource_type:
-                case DatasourceProviderType.ONLINE_DOCUMENT:
-                    controller = OnlineDocumentDatasourcePluginProviderController(
-                        entity=provider_entity.declaration,
-                        plugin_id=provider_entity.plugin_id,
-                        plugin_unique_identifier=provider_entity.plugin_unique_identifier,
-                        tenant_id=tenant_id,
-                    )
-                case DatasourceProviderType.WEBSITE_CRAWL:
-                    controller = WebsiteCrawlDatasourcePluginProviderController(
-                        entity=provider_entity.declaration,
-                        plugin_id=provider_entity.plugin_id,
-                        plugin_unique_identifier=provider_entity.plugin_unique_identifier,
-                        tenant_id=tenant_id,
-                    )
-                case DatasourceProviderType.LOCAL_FILE:
-                    controller = LocalFileDatasourcePluginProviderController(
-                        entity=provider_entity.declaration,
-                        plugin_id=provider_entity.plugin_id,
-                        plugin_unique_identifier=provider_entity.plugin_unique_identifier,
-                        tenant_id=tenant_id,
-                    )
-                case _:
-                    raise ValueError(f"Unsupported datasource type: {datasource_type}")
-
-            datasource_plugin_providers[provider_id] = controller
-
-        return controller
-
-    @classmethod
-    def get_datasource_runtime(
-        cls,
-        provider_id: str,
-        datasource_name: str,
-        tenant_id: str,
-        datasource_type: DatasourceProviderType,
-    ) -> DatasourcePlugin:
-        """
-        get the datasource runtime
-
-        :param provider_type: the type of the provider
-        :param provider_id: the id of the provider
-        :param datasource_name: the name of the datasource
-        :param tenant_id: the tenant id
-
-        :return: the datasource plugin
-        """
-        return cls.get_datasource_plugin_provider(
-            provider_id,
-            tenant_id,
-            datasource_type,
-        ).get_datasource(datasource_name)
--- a/api/core/datasource/entities/api_entities.py
+++ b/api/core/datasource/entities/api_entities.py
@ -1,71 +0,0 @@
-from typing import Literal, Optional
-
-from pydantic import BaseModel, Field, field_validator
-
-from core.datasource.entities.datasource_entities import DatasourceParameter
-from core.model_runtime.utils.encoders import jsonable_encoder
-from core.tools.entities.common_entities import I18nObject
-
-
-class DatasourceApiEntity(BaseModel):
-    author: str
-    name: str  # identifier
-    label: I18nObject  # label
-    description: I18nObject
-    parameters: Optional[list[DatasourceParameter]] = None
-    labels: list[str] = Field(default_factory=list)
-    output_schema: Optional[dict] = None
-
-
-ToolProviderTypeApiLiteral = Optional[Literal["builtin", "api", "workflow"]]
-
-
-class DatasourceProviderApiEntity(BaseModel):
-    id: str
-    author: str
-    name: str  # identifier
-    description: I18nObject
-    icon: str | dict
-    label: I18nObject  # label
-    type: str
-    masked_credentials: Optional[dict] = None
-    original_credentials: Optional[dict] = None
-    is_team_authorization: bool = False
-    allow_delete: bool = True
-    plugin_id: Optional[str] = Field(default="", description="The plugin id of the datasource")
-    plugin_unique_identifier: Optional[str] = Field(default="", description="The unique identifier of the datasource")
-    datasources: list[DatasourceApiEntity] = Field(default_factory=list)
-    labels: list[str] = Field(default_factory=list)
-
-    @field_validator("datasources", mode="before")
-    @classmethod
-    def convert_none_to_empty_list(cls, v):
-        return v if v is not None else []
-
-    def to_dict(self) -> dict:
-        # -------------
-        # overwrite datasource parameter types for temp fix
-        datasources = jsonable_encoder(self.datasources)
-        for datasource in datasources:
-            if datasource.get("parameters"):
-                for parameter in datasource.get("parameters"):
-                    if parameter.get("type") == DatasourceParameter.DatasourceParameterType.SYSTEM_FILES.value:
-                        parameter["type"] = "files"
-        # -------------
-
-        return {
-            "id": self.id,
-            "author": self.author,
-            "name": self.name,
-            "plugin_id": self.plugin_id,
-            "plugin_unique_identifier": self.plugin_unique_identifier,
-            "description": self.description.to_dict(),
-            "icon": self.icon,
-            "label": self.label.to_dict(),
-            "type": self.type.value,
-            "team_credentials": self.masked_credentials,
-            "is_team_authorization": self.is_team_authorization,
-            "allow_delete": self.allow_delete,
-            "datasources": datasources,
-            "labels": self.labels,
-        }
--- a/api/core/datasource/entities/common_entities.py
+++ b/api/core/datasource/entities/common_entities.py
@ -1,23 +0,0 @@
-from typing import Optional
-
-from pydantic import BaseModel, Field
-
-
-class I18nObject(BaseModel):
-    """
-    Model class for i18n object.
-    """
-
-    en_US: str
-    zh_Hans: Optional[str] = Field(default=None)
-    pt_BR: Optional[str] = Field(default=None)
-    ja_JP: Optional[str] = Field(default=None)
-
-    def __init__(self, **data):
-        super().__init__(**data)
-        self.zh_Hans = self.zh_Hans or self.en_US
-        self.pt_BR = self.pt_BR or self.en_US
-        self.ja_JP = self.ja_JP or self.en_US
-
-    def to_dict(self) -> dict:
-        return {"zh_Hans": self.zh_Hans, "en_US": self.en_US, "pt_BR": self.pt_BR, "ja_JP": self.ja_JP}
--- a/api/core/datasource/entities/datasource_entities.py
+++ b/api/core/datasource/entities/datasource_entities.py
@ -1,361 +0,0 @@
-import enum
-from enum import Enum
-from typing import Any, Optional
-
-from pydantic import BaseModel, Field, ValidationInfo, field_validator
-
-from core.entities.provider_entities import ProviderConfig
-from core.plugin.entities.oauth import OAuthSchema
-from core.plugin.entities.parameters import (
-    PluginParameter,
-    PluginParameterOption,
-    PluginParameterType,
-    as_normal_type,
-    cast_parameter_value,
-    init_frontend_parameter,
-)
-from core.tools.entities.common_entities import I18nObject
-from core.tools.entities.tool_entities import ToolInvokeMessage, ToolLabelEnum
-
-
-class DatasourceProviderType(enum.StrEnum):
-    """
-    Enum class for datasource provider
-    """
-
-    ONLINE_DOCUMENT = "online_document"
-    LOCAL_FILE = "local_file"
-    WEBSITE_CRAWL = "website_crawl"
-    ONLINE_DRIVE = "online_drive"
-
-    @classmethod
-    def value_of(cls, value: str) -> "DatasourceProviderType":
-        """
-        Get value of given mode.
-
-        :param value: mode value
-        :return: mode
-        """
-        for mode in cls:
-            if mode.value == value:
-                return mode
-        raise ValueError(f"invalid mode value {value}")
-
-
-class DatasourceParameter(PluginParameter):
-    """
-    Overrides type
-    """
-
-    class DatasourceParameterType(enum.StrEnum):
-        """
-        removes TOOLS_SELECTOR from PluginParameterType
-        """
-
-        STRING = PluginParameterType.STRING.value
-        NUMBER = PluginParameterType.NUMBER.value
-        BOOLEAN = PluginParameterType.BOOLEAN.value
-        SELECT = PluginParameterType.SELECT.value
-        SECRET_INPUT = PluginParameterType.SECRET_INPUT.value
-        FILE = PluginParameterType.FILE.value
-        FILES = PluginParameterType.FILES.value
-
-        # deprecated, should not use.
-        SYSTEM_FILES = PluginParameterType.SYSTEM_FILES.value
-
-        def as_normal_type(self):
-            return as_normal_type(self)
-
-        def cast_value(self, value: Any):
-            return cast_parameter_value(self, value)
-
-    type: DatasourceParameterType = Field(..., description="The type of the parameter")
-    description: I18nObject = Field(..., description="The description of the parameter")
-
-    @classmethod
-    def get_simple_instance(
-        cls,
-        name: str,
-        typ: DatasourceParameterType,
-        required: bool,
-        options: Optional[list[str]] = None,
-    ) -> "DatasourceParameter":
-        """
-        get a simple datasource parameter
-
-        :param name: the name of the parameter
-        :param llm_description: the description presented to the LLM
-        :param typ: the type of the parameter
-        :param required: if the parameter is required
-        :param options: the options of the parameter
-        """
-        # convert options to ToolParameterOption
-        # FIXME fix the type error
-        if options:
-            option_objs = [
-                PluginParameterOption(value=option, label=I18nObject(en_US=option, zh_Hans=option))
-                for option in options
-            ]
-        else:
-            option_objs = []
-
-        return cls(
-            name=name,
-            label=I18nObject(en_US="", zh_Hans=""),
-            placeholder=None,
-            type=typ,
-            required=required,
-            options=option_objs,
-            description=I18nObject(en_US="", zh_Hans=""),
-        )
-
-    def init_frontend_parameter(self, value: Any):
-        return init_frontend_parameter(self, self.type, value)
-
-
-class DatasourceIdentity(BaseModel):
-    author: str = Field(..., description="The author of the datasource")
-    name: str = Field(..., description="The name of the datasource")
-    label: I18nObject = Field(..., description="The label of the datasource")
-    provider: str = Field(..., description="The provider of the datasource")
-    icon: Optional[str] = None
-
-
-class DatasourceEntity(BaseModel):
-    identity: DatasourceIdentity
-    parameters: list[DatasourceParameter] = Field(default_factory=list)
-    description: I18nObject = Field(..., description="The label of the datasource")
-
-    @field_validator("parameters", mode="before")
-    @classmethod
-    def set_parameters(cls, v, validation_info: ValidationInfo) -> list[DatasourceParameter]:
-        return v or []
-
-
-class DatasourceProviderIdentity(BaseModel):
-    author: str = Field(..., description="The author of the tool")
-    name: str = Field(..., description="The name of the tool")
-    description: I18nObject = Field(..., description="The description of the tool")
-    icon: str = Field(..., description="The icon of the tool")
-    label: I18nObject = Field(..., description="The label of the tool")
-    tags: Optional[list[ToolLabelEnum]] = Field(
-        default=[],
-        description="The tags of the tool",
-    )
-
-
-class DatasourceProviderEntity(BaseModel):
-    """
-    Datasource provider entity
-    """
-
-    identity: DatasourceProviderIdentity
-    credentials_schema: list[ProviderConfig] = Field(default_factory=list)
-    oauth_schema: Optional[OAuthSchema] = None
-    provider_type: DatasourceProviderType
-
-
-class DatasourceProviderEntityWithPlugin(DatasourceProviderEntity):
-    datasources: list[DatasourceEntity] = Field(default_factory=list)
-
-
-class DatasourceInvokeMeta(BaseModel):
-    """
-    Datasource invoke meta
-    """
-
-    time_cost: float = Field(..., description="The time cost of the tool invoke")
-    error: Optional[str] = None
-    tool_config: Optional[dict] = None
-
-    @classmethod
-    def empty(cls) -> "DatasourceInvokeMeta":
-        """
-        Get an empty instance of DatasourceInvokeMeta
-        """
-        return cls(time_cost=0.0, error=None, tool_config={})
-
-    @classmethod
-    def error_instance(cls, error: str) -> "DatasourceInvokeMeta":
-        """
-        Get an instance of DatasourceInvokeMeta with error
-        """
-        return cls(time_cost=0.0, error=error, tool_config={})
-
-    def to_dict(self) -> dict:
-        return {
-            "time_cost": self.time_cost,
-            "error": self.error,
-            "tool_config": self.tool_config,
-        }
-
-
-class DatasourceLabel(BaseModel):
-    """
-    Datasource label
-    """
-
-    name: str = Field(..., description="The name of the tool")
-    label: I18nObject = Field(..., description="The label of the tool")
-    icon: str = Field(..., description="The icon of the tool")
-
-
-class DatasourceInvokeFrom(Enum):
-    """
-    Enum class for datasource invoke
-    """
-
-    RAG_PIPELINE = "rag_pipeline"
-
-
-class OnlineDocumentPage(BaseModel):
-    """
-    Online document page
-    """
-
-    page_id: str = Field(..., description="The page id")
-    page_name: str = Field(..., description="The page title")
-    page_icon: Optional[dict] = Field(None, description="The page icon")
-    type: str = Field(..., description="The type of the page")
-    last_edited_time: str = Field(..., description="The last edited time")
-    parent_id: Optional[str] = Field(None, description="The parent page id")
-
-
-class OnlineDocumentInfo(BaseModel):
-    """
-    Online document info
-    """
-
-    workspace_id: str = Field(..., description="The workspace id")
-    workspace_name: str = Field(..., description="The workspace name")
-    workspace_icon: str = Field(..., description="The workspace icon")
-    total: int = Field(..., description="The total number of documents")
-    pages: list[OnlineDocumentPage] = Field(..., description="The pages of the online document")
-
-
-class OnlineDocumentPagesMessage(BaseModel):
-    """
-    Get online document pages response
-    """
-
-    result: list[OnlineDocumentInfo]
-
-
-class GetOnlineDocumentPageContentRequest(BaseModel):
-    """
-    Get online document page content request
-    """
-
-    workspace_id: str = Field(..., description="The workspace id")
-    page_id: str = Field(..., description="The page id")
-    type: str = Field(..., description="The type of the page")
-
-
-class OnlineDocumentPageContent(BaseModel):
-    """
-    Online document page content
-    """
-
-    workspace_id: str = Field(..., description="The workspace id")
-    page_id: str = Field(..., description="The page id")
-    content: str = Field(..., description="The content of the page")
-
-
-class GetOnlineDocumentPageContentResponse(BaseModel):
-    """
-    Get online document page content response
-    """
-
-    result: OnlineDocumentPageContent
-
-
-class GetWebsiteCrawlRequest(BaseModel):
-    """
-    Get website crawl request
-    """
-
-    crawl_parameters: dict = Field(..., description="The crawl parameters")
-
-
-class WebSiteInfoDetail(BaseModel):
-    source_url: str = Field(..., description="The url of the website")
-    content: str = Field(..., description="The content of the website")
-    title: str = Field(..., description="The title of the website")
-    description: str = Field(..., description="The description of the website")
-
-
-class WebSiteInfo(BaseModel):
-    """
-    Website info
-    """
-
-    status: Optional[str] = Field(..., description="crawl job status")
-    web_info_list: Optional[list[WebSiteInfoDetail]] = []
-    total: Optional[int] = Field(default=0, description="The total number of websites")
-    completed: Optional[int] = Field(default=0, description="The number of completed websites")
-
-
-class WebsiteCrawlMessage(BaseModel):
-    """
-    Get website crawl response
-    """
-
-    result: WebSiteInfo = WebSiteInfo(status="", web_info_list=[], total=0, completed=0)
-
-
-class DatasourceMessage(ToolInvokeMessage):
-    pass
-
-
-#########################
-# Online driver file
-#########################
-
-
-class OnlineDriveFile(BaseModel):
-    """
-    Online driver file
-    """
-
-    key: str = Field(..., description="The key of the file")
-    size: int = Field(..., description="The size of the file")
-
-
-class OnlineDriveFileBucket(BaseModel):
-    """
-    Online driver file bucket
-    """
-
-    bucket: Optional[str] = Field(None, description="The bucket of the file")
-    files: list[OnlineDriveFile] = Field(..., description="The files of the bucket")
-    is_truncated: bool = Field(False, description="Whether the bucket has more files")
-
-
-class OnlineDriveBrowseFilesRequest(BaseModel):
-    """
-    Get online driver file list request
-    """
-
-    prefix: Optional[str] = Field(None, description="File path prefix for filtering eg: 'docs/dify/'")
-    bucket: Optional[str] = Field(None, description="Storage bucket name")
-    max_keys: int = Field(20, description="Maximum number of files to return")
-    start_after: Optional[str] = Field(
-        None, description="Pagination token for continuing from a specific file eg: 'docs/dify/1.txt'"
-    )
-
-
-class OnlineDriveBrowseFilesResponse(BaseModel):
-    """
-    Get online driver file list response
-    """
-
-    result: list[OnlineDriveFileBucket] = Field(..., description="The bucket of the files")
-
-
-class OnlineDriveDownloadFileRequest(BaseModel):
-    """
-    Get online driver file
-    """
-
-    key: str = Field(..., description="The name of the file")
-    bucket: Optional[str] = Field(None, description="The name of the bucket")
--- a/api/core/datasource/errors.py
+++ b/api/core/datasource/errors.py
@ -1,37 +0,0 @@
-from core.datasource.entities.datasource_entities import DatasourceInvokeMeta
-
-
-class DatasourceProviderNotFoundError(ValueError):
-    pass
-
-
-class DatasourceNotFoundError(ValueError):
-    pass
-
-
-class DatasourceParameterValidationError(ValueError):
-    pass
-
-
-class DatasourceProviderCredentialValidationError(ValueError):
-    pass
-
-
-class DatasourceNotSupportedError(ValueError):
-    pass
-
-
-class DatasourceInvokeError(ValueError):
-    pass
-
-
-class DatasourceApiSchemaError(ValueError):
-    pass
-
-
-class DatasourceEngineInvokeError(Exception):
-    meta: DatasourceInvokeMeta
-
-    def __init__(self, meta, **kwargs):
-        self.meta = meta
-        super().__init__(**kwargs)
--- a/api/core/datasource/local_file/local_file_plugin.py
+++ b/api/core/datasource/local_file/local_file_plugin.py
@ -1,28 +0,0 @@
-from core.datasource.__base.datasource_plugin import DatasourcePlugin
-from core.datasource.__base.datasource_runtime import DatasourceRuntime
-from core.datasource.entities.datasource_entities import (
-    DatasourceEntity,
-    DatasourceProviderType,
-)
-
-
-class LocalFileDatasourcePlugin(DatasourcePlugin):
-    tenant_id: str
-    icon: str
-    plugin_unique_identifier: str
-
-    def __init__(
-        self,
-        entity: DatasourceEntity,
-        runtime: DatasourceRuntime,
-        tenant_id: str,
-        icon: str,
-        plugin_unique_identifier: str,
-    ) -> None:
-        super().__init__(entity, runtime)
-        self.tenant_id = tenant_id
-        self.icon = icon
-        self.plugin_unique_identifier = plugin_unique_identifier
-
-    def datasource_provider_type(self) -> str:
-        return DatasourceProviderType.LOCAL_FILE
--- a/api/core/datasource/local_file/local_file_provider.py
+++ b/api/core/datasource/local_file/local_file_provider.py
@ -1,56 +0,0 @@
-from typing import Any
-
-from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
-from core.datasource.__base.datasource_runtime import DatasourceRuntime
-from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
-from core.datasource.local_file.local_file_plugin import LocalFileDatasourcePlugin
-
-
-class LocalFileDatasourcePluginProviderController(DatasourcePluginProviderController):
-    entity: DatasourceProviderEntityWithPlugin
-    plugin_id: str
-    plugin_unique_identifier: str
-
-    def __init__(
-        self, entity: DatasourceProviderEntityWithPlugin, plugin_id: str, plugin_unique_identifier: str, tenant_id: str
-    ) -> None:
-        super().__init__(entity, tenant_id)
-        self.plugin_id = plugin_id
-        self.plugin_unique_identifier = plugin_unique_identifier
-
-    @property
-    def provider_type(self) -> DatasourceProviderType:
-        """
-        returns the type of the provider
-        """
-        return DatasourceProviderType.LOCAL_FILE
-
-    def _validate_credentials(self, user_id: str, credentials: dict[str, Any]) -> None:
-        """
-        validate the credentials of the provider
-        """
-        pass
-
-    def get_datasource(self, datasource_name: str) -> LocalFileDatasourcePlugin:  # type: ignore
-        """
-        return datasource with given name
-        """
-        datasource_entity = next(
-            (
-                datasource_entity
-                for datasource_entity in self.entity.datasources
-                if datasource_entity.identity.name == datasource_name
-            ),
-            None,
-        )
-
-        if not datasource_entity:
-            raise ValueError(f"Datasource with name {datasource_name} not found")
-
-        return LocalFileDatasourcePlugin(
-            entity=datasource_entity,
-            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
-            tenant_id=self.tenant_id,
-            icon=self.entity.identity.icon,
-            plugin_unique_identifier=self.plugin_unique_identifier,
-        )
--- a/api/core/datasource/online_document/online_document_plugin.py
+++ b/api/core/datasource/online_document/online_document_plugin.py
@ -1,73 +0,0 @@
-from collections.abc import Generator, Mapping
-from typing import Any
-
-from core.datasource.__base.datasource_plugin import DatasourcePlugin
-from core.datasource.__base.datasource_runtime import DatasourceRuntime
-from core.datasource.entities.datasource_entities import (
-    DatasourceEntity,
-    DatasourceMessage,
-    DatasourceProviderType,
-    GetOnlineDocumentPageContentRequest,
-    OnlineDocumentPagesMessage,
-)
-from core.plugin.impl.datasource import PluginDatasourceManager
-
-
-class OnlineDocumentDatasourcePlugin(DatasourcePlugin):
-    tenant_id: str
-    icon: str
-    plugin_unique_identifier: str
-    entity: DatasourceEntity
-    runtime: DatasourceRuntime
-
-    def __init__(
-        self,
-        entity: DatasourceEntity,
-        runtime: DatasourceRuntime,
-        tenant_id: str,
-        icon: str,
-        plugin_unique_identifier: str,
-    ) -> None:
-        super().__init__(entity, runtime)
-        self.tenant_id = tenant_id
-        self.icon = icon
-        self.plugin_unique_identifier = plugin_unique_identifier
-
-    def get_online_document_pages(
-        self,
-        user_id: str,
-        datasource_parameters: Mapping[str, Any],
-        provider_type: str,
-    ) -> Generator[OnlineDocumentPagesMessage, None, None]:
-        manager = PluginDatasourceManager()
-
-        return manager.get_online_document_pages(
-            tenant_id=self.tenant_id,
-            user_id=user_id,
-            datasource_provider=self.entity.identity.provider,
-            datasource_name=self.entity.identity.name,
-            credentials=self.runtime.credentials,
-            datasource_parameters=datasource_parameters,
-            provider_type=provider_type,
-        )
-
-    def get_online_document_page_content(
-        self,
-        user_id: str,
-        datasource_parameters: GetOnlineDocumentPageContentRequest,
-        provider_type: str,
-    ) -> Generator[DatasourceMessage, None, None]:
-        manager = PluginDatasourceManager()
-
-        return manager.get_online_document_page_content(
-            tenant_id=self.tenant_id,
-            user_id=user_id,
-            datasource_provider=self.entity.identity.provider,
-            datasource_name=self.entity.identity.name,
-            credentials=self.runtime.credentials,
-            datasource_parameters=datasource_parameters,
-            provider_type=provider_type,
-        )
-
-    def datasource_provider_type(self) -> str:
-        return DatasourceProviderType.ONLINE_DOCUMENT
--- a/api/core/datasource/online_document/online_document_provider.py
+++ b/api/core/datasource/online_document/online_document_provider.py
@ -1,48 +0,0 @@
-from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
-from core.datasource.__base.datasource_runtime import DatasourceRuntime
-from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
-from core.datasource.online_document.online_document_plugin import OnlineDocumentDatasourcePlugin
-
-
-class OnlineDocumentDatasourcePluginProviderController(DatasourcePluginProviderController):
-    entity: DatasourceProviderEntityWithPlugin
-    plugin_id: str
-    plugin_unique_identifier: str
-
-    def __init__(
-        self, entity: DatasourceProviderEntityWithPlugin, plugin_id: str, plugin_unique_identifier: str, tenant_id: str
-    ) -> None:
-        super().__init__(entity, tenant_id)
-        self.plugin_id = plugin_id
-        self.plugin_unique_identifier = plugin_unique_identifier
-
-    @property
-    def provider_type(self) -> DatasourceProviderType:
-        """
-        returns the type of the provider
-        """
-        return DatasourceProviderType.ONLINE_DOCUMENT
-
-    def get_datasource(self, datasource_name: str) -> OnlineDocumentDatasourcePlugin:  # type: ignore
-        """
-        return datasource with given name
-        """
-        datasource_entity = next(
-            (
-                datasource_entity
-                for datasource_entity in self.entity.datasources
-                if datasource_entity.identity.name == datasource_name
-            ),
-            None,
-        )
-
-        if not datasource_entity:
-            raise ValueError(f"Datasource with name {datasource_name} not found")
-
-        return OnlineDocumentDatasourcePlugin(
-            entity=datasource_entity,
-            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
-            tenant_id=self.tenant_id,
-            icon=self.entity.identity.icon,
-            plugin_unique_identifier=self.plugin_unique_identifier,
-        )
--- a/api/core/datasource/online_drive/online_drive_plugin.py
+++ b/api/core/datasource/online_drive/online_drive_plugin.py
@ -1,73 +0,0 @@
-from collections.abc import Generator
-
-from core.datasource.__base.datasource_plugin import DatasourcePlugin
-from core.datasource.__base.datasource_runtime import DatasourceRuntime
-from core.datasource.entities.datasource_entities import (
-    DatasourceEntity,
-    DatasourceMessage,
-    DatasourceProviderType,
-    OnlineDriveBrowseFilesRequest,
-    OnlineDriveBrowseFilesResponse,
-    OnlineDriveDownloadFileRequest,
-)
-from core.plugin.impl.datasource import PluginDatasourceManager
-
-
-class OnlineDriveDatasourcePlugin(DatasourcePlugin):
-    tenant_id: str
-    icon: str
-    plugin_unique_identifier: str
-    entity: DatasourceEntity
-    runtime: DatasourceRuntime
-
-    def __init__(
-        self,
-        entity: DatasourceEntity,
-        runtime: DatasourceRuntime,
-        tenant_id: str,
-        icon: str,
-        plugin_unique_identifier: str,
-    ) -> None:
-        super().__init__(entity, runtime)
-        self.tenant_id = tenant_id
-        self.icon = icon
-        self.plugin_unique_identifier = plugin_unique_identifier
-
-    def online_drive_browse_files(
-        self,
-        user_id: str,
-        request: OnlineDriveBrowseFilesRequest,
-        provider_type: str,
-    ) -> Generator[OnlineDriveBrowseFilesResponse, None, None]:
-        manager = PluginDatasourceManager()
-
-        return manager.online_drive_browse_files(
-            tenant_id=self.tenant_id,
-            user_id=user_id,
-            datasource_provider=self.entity.identity.provider,
-            datasource_name=self.entity.identity.name,
-            credentials=self.runtime.credentials,
-            request=request,
-            provider_type=provider_type,
-        )
-
-    def online_drive_download_file(
-        self,
-        user_id: str,
-        request: OnlineDriveDownloadFileRequest,
-        provider_type: str,
-    ) -> Generator[DatasourceMessage, None, None]:
-        manager = PluginDatasourceManager()
-
-        return manager.online_drive_download_file(
-            tenant_id=self.tenant_id,
-            user_id=user_id,
-            datasource_provider=self.entity.identity.provider,
-            datasource_name=self.entity.identity.name,
-            credentials=self.runtime.credentials,
-            request=request,
-            provider_type=provider_type,
-        )
-
-    def datasource_provider_type(self) -> str:
-        return DatasourceProviderType.ONLINE_DRIVE
--- a/api/core/datasource/online_drive/online_drive_provider.py
+++ b/api/core/datasource/online_drive/online_drive_provider.py
@ -1,48 +0,0 @@
-from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
-from core.datasource.__base.datasource_runtime import DatasourceRuntime
-from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
-from core.datasource.online_drive.online_drive_plugin import OnlineDriveDatasourcePlugin
-
-
-class OnlineDriveDatasourcePluginProviderController(DatasourcePluginProviderController):
-    entity: DatasourceProviderEntityWithPlugin
-    plugin_id: str
-    plugin_unique_identifier: str
-
-    def __init__(
-        self, entity: DatasourceProviderEntityWithPlugin, plugin_id: str, plugin_unique_identifier: str, tenant_id: str
-    ) -> None:
-        super().__init__(entity, tenant_id)
-        self.plugin_id = plugin_id
-        self.plugin_unique_identifier = plugin_unique_identifier
-
-    @property
-    def provider_type(self) -> DatasourceProviderType:
-        """
-        returns the type of the provider
-        """
-        return DatasourceProviderType.ONLINE_DRIVE
-
-    def get_datasource(self, datasource_name: str) -> OnlineDriveDatasourcePlugin:  # type: ignore
-        """
-        return datasource with given name
-        """
-        datasource_entity = next(
-            (
-                datasource_entity
-                for datasource_entity in self.entity.datasources
-                if datasource_entity.identity.name == datasource_name
-            ),
-            None,
-        )
-
-        if not datasource_entity:
-            raise ValueError(f"Datasource with name {datasource_name} not found")
-
-        return OnlineDriveDatasourcePlugin(
-            entity=datasource_entity,
-            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
-            tenant_id=self.tenant_id,
-            icon=self.entity.identity.icon,
-            plugin_unique_identifier=self.plugin_unique_identifier,
-        )
--- a/api/core/datasource/utils/init.py
+++ b/api/core/datasource/utils/init.py
--- a/api/core/datasource/utils/configuration.py
+++ b/api/core/datasource/utils/configuration.py
@ -1,265 +0,0 @@
-from copy import deepcopy
-from typing import Any
-
-from pydantic import BaseModel
-
-from core.entities.provider_entities import BasicProviderConfig
-from core.helper import encrypter
-from core.helper.tool_parameter_cache import ToolParameterCache, ToolParameterCacheType
-from core.helper.tool_provider_cache import ToolProviderCredentialsCache, ToolProviderCredentialsCacheType
-from core.tools.__base.tool import Tool
-from core.tools.entities.tool_entities import (
-    ToolParameter,
-    ToolProviderType,
-)
-
-
-class ProviderConfigEncrypter(BaseModel):
-    tenant_id: str
-    config: list[BasicProviderConfig]
-    provider_type: str
-    provider_identity: str
-
-    def _deep_copy(self, data: dict[str, str]) -> dict[str, str]:
-        """
-        deep copy data
-        """
-        return deepcopy(data)
-
-    def encrypt(self, data: dict[str, str]) -> dict[str, str]:
-        """
-        encrypt tool credentials with tenant id
-
-        return a deep copy of credentials with encrypted values
-        """
-        data = self._deep_copy(data)
-
-        # get fields need to be decrypted
-        fields = dict[str, BasicProviderConfig]()
-        for credential in self.config:
-            fields[credential.name] = credential
-
-        for field_name, field in fields.items():
-            if field.type == BasicProviderConfig.Type.SECRET_INPUT:
-                if field_name in data:
-                    encrypted = encrypter.encrypt_token(self.tenant_id, data[field_name] or "")
-                    data[field_name] = encrypted
-
-        return data
-
-    def mask_tool_credentials(self, data: dict[str, Any]) -> dict[str, Any]:
-        """
-        mask tool credentials
-
-        return a deep copy of credentials with masked values
-        """
-        data = self._deep_copy(data)
-
-        # get fields need to be decrypted
-        fields = dict[str, BasicProviderConfig]()
-        for credential in self.config:
-            fields[credential.name] = credential
-
-        for field_name, field in fields.items():
-            if field.type == BasicProviderConfig.Type.SECRET_INPUT:
-                if field_name in data:
-                    if len(data[field_name]) > 6:
-                        data[field_name] = (
-                            data[field_name][:2] + "*" * (len(data[field_name]) - 4) + data[field_name][-2:]
-                        )
-                    else:
-                        data[field_name] = "*" * len(data[field_name])
-
-        return data
-
-    def decrypt(self, data: dict[str, str]) -> dict[str, str]:
-        """
-        decrypt tool credentials with tenant id
-
-        return a deep copy of credentials with decrypted values
-        """
-        cache = ToolProviderCredentialsCache(
-            tenant_id=self.tenant_id,
-            identity_id=f"{self.provider_type}.{self.provider_identity}",
-            cache_type=ToolProviderCredentialsCacheType.PROVIDER,
-        )
-        cached_credentials = cache.get()
-        if cached_credentials:
-            return cached_credentials
-        data = self._deep_copy(data)
-        # get fields need to be decrypted
-        fields = dict[str, BasicProviderConfig]()
-        for credential in self.config:
-            fields[credential.name] = credential
-
-        for field_name, field in fields.items():
-            if field.type == BasicProviderConfig.Type.SECRET_INPUT:
-                if field_name in data:
-                    try:
-                        # if the value is None or empty string, skip decrypt
-                        if not data[field_name]:
-                            continue
-
-                        data[field_name] = encrypter.decrypt_token(self.tenant_id, data[field_name])
-                    except Exception:
-                        pass
-
-        cache.set(data)
-        return data
-
-    def delete_tool_credentials_cache(self):
-        cache = ToolProviderCredentialsCache(
-            tenant_id=self.tenant_id,
-            identity_id=f"{self.provider_type}.{self.provider_identity}",
-            cache_type=ToolProviderCredentialsCacheType.PROVIDER,
-        )
-        cache.delete()
-
-
-class ToolParameterConfigurationManager:
-    """
-    Tool parameter configuration manager
-    """
-
-    tenant_id: str
-    tool_runtime: Tool
-    provider_name: str
-    provider_type: ToolProviderType
-    identity_id: str
-
-    def __init__(
-        self, tenant_id: str, tool_runtime: Tool, provider_name: str, provider_type: ToolProviderType, identity_id: str
-    ) -> None:
-        self.tenant_id = tenant_id
-        self.tool_runtime = tool_runtime
-        self.provider_name = provider_name
-        self.provider_type = provider_type
-        self.identity_id = identity_id
-
-    def _deep_copy(self, parameters: dict[str, Any]) -> dict[str, Any]:
-        """
-        deep copy parameters
-        """
-        return deepcopy(parameters)
-
-    def _merge_parameters(self) -> list[ToolParameter]:
-        """
-        merge parameters
-        """
-        # get tool parameters
-        tool_parameters = self.tool_runtime.entity.parameters or []
-        # get tool runtime parameters
-        runtime_parameters = self.tool_runtime.get_runtime_parameters()
-        # override parameters
-        current_parameters = tool_parameters.copy()
-        for runtime_parameter in runtime_parameters:
-            found = False
-            for index, parameter in enumerate(current_parameters):
-                if parameter.name == runtime_parameter.name and parameter.form == runtime_parameter.form:
-                    current_parameters[index] = runtime_parameter
-                    found = True
-                    break
-
-            if not found and runtime_parameter.form == ToolParameter.ToolParameterForm.FORM:
-                current_parameters.append(runtime_parameter)
-
-        return current_parameters
-
-    def mask_tool_parameters(self, parameters: dict[str, Any]) -> dict[str, Any]:
-        """
-        mask tool parameters
-
-        return a deep copy of parameters with masked values
-        """
-        parameters = self._deep_copy(parameters)
-
-        # override parameters
-        current_parameters = self._merge_parameters()
-
-        for parameter in current_parameters:
-            if (
-                parameter.form == ToolParameter.ToolParameterForm.FORM
-                and parameter.type == ToolParameter.ToolParameterType.SECRET_INPUT
-            ):
-                if parameter.name in parameters:
-                    if len(parameters[parameter.name]) > 6:
-                        parameters[parameter.name] = (
-                            parameters[parameter.name][:2]
-                            + "*" * (len(parameters[parameter.name]) - 4)
-                            + parameters[parameter.name][-2:]
-                        )
-                    else:
-                        parameters[parameter.name] = "*" * len(parameters[parameter.name])
-
-        return parameters
-
-    def encrypt_tool_parameters(self, parameters: dict[str, Any]) -> dict[str, Any]:
-        """
-        encrypt tool parameters with tenant id
-
-        return a deep copy of parameters with encrypted values
-        """
-        # override parameters
-        current_parameters = self._merge_parameters()
-
-        parameters = self._deep_copy(parameters)
-
-        for parameter in current_parameters:
-            if (
-                parameter.form == ToolParameter.ToolParameterForm.FORM
-                and parameter.type == ToolParameter.ToolParameterType.SECRET_INPUT
-            ):
-                if parameter.name in parameters:
-                    encrypted = encrypter.encrypt_token(self.tenant_id, parameters[parameter.name])
-                    parameters[parameter.name] = encrypted
-
-        return parameters
-
-    def decrypt_tool_parameters(self, parameters: dict[str, Any]) -> dict[str, Any]:
-        """
-        decrypt tool parameters with tenant id
-
-        return a deep copy of parameters with decrypted values
-        """
-
-        cache = ToolParameterCache(
-            tenant_id=self.tenant_id,
-            provider=f"{self.provider_type.value}.{self.provider_name}",
-            tool_name=self.tool_runtime.entity.identity.name,
-            cache_type=ToolParameterCacheType.PARAMETER,
-            identity_id=self.identity_id,
-        )
-        cached_parameters = cache.get()
-        if cached_parameters:
-            return cached_parameters
-
-        # override parameters
-        current_parameters = self._merge_parameters()
-        has_secret_input = False
-
-        for parameter in current_parameters:
-            if (
-                parameter.form == ToolParameter.ToolParameterForm.FORM
-                and parameter.type == ToolParameter.ToolParameterType.SECRET_INPUT
-            ):
-                if parameter.name in parameters:
-                    try:
-                        has_secret_input = True
-                        parameters[parameter.name] = encrypter.decrypt_token(self.tenant_id, parameters[parameter.name])
-                    except Exception:
-                        pass
-
-        if has_secret_input:
-            cache.set(parameters)
-
-        return parameters
-
-    def delete_tool_parameters_cache(self):
-        cache = ToolParameterCache(
-            tenant_id=self.tenant_id,
-            provider=f"{self.provider_type.value}.{self.provider_name}",
-            tool_name=self.tool_runtime.entity.identity.name,
-            cache_type=ToolParameterCacheType.PARAMETER,
-            identity_id=self.identity_id,
-        )
-        cache.delete()
--- a/api/core/datasource/utils/message_transformer.py
+++ b/api/core/datasource/utils/message_transformer.py
@ -1,121 +0,0 @@
-import logging
-from collections.abc import Generator
-from mimetypes import guess_extension
-from typing import Optional
-
-from core.datasource.datasource_file_manager import DatasourceFileManager
-from core.datasource.entities.datasource_entities import DatasourceMessage
-from core.file import File, FileTransferMethod, FileType
-
-logger = logging.getLogger(__name__)
-
-
-class DatasourceFileMessageTransformer:
-    @classmethod
-    def transform_datasource_invoke_messages(
-        cls,
-        messages: Generator[DatasourceMessage, None, None],
-        user_id: str,
-        tenant_id: str,
-        conversation_id: Optional[str] = None,
-    ) -> Generator[DatasourceMessage, None, None]:
-        """
-        Transform datasource message and handle file download
-        """
-        for message in messages:
-            if message.type in {DatasourceMessage.MessageType.TEXT, DatasourceMessage.MessageType.LINK}:
-                yield message
-            elif message.type == DatasourceMessage.MessageType.IMAGE and isinstance(
-                message.message, DatasourceMessage.TextMessage
-            ):
-                # try to download image
-                try:
-                    assert isinstance(message.message, DatasourceMessage.TextMessage)
-
-                    file = DatasourceFileManager.create_file_by_url(
-                        user_id=user_id,
-                        tenant_id=tenant_id,
-                        file_url=message.message.text,
-                        conversation_id=conversation_id,
-                    )
-
-                    url = f"/files/datasources/{file.id}{guess_extension(file.mime_type) or '.png'}"
-
-                    yield DatasourceMessage(
-                        type=DatasourceMessage.MessageType.IMAGE_LINK,
-                        message=DatasourceMessage.TextMessage(text=url),
-                        meta=message.meta.copy() if message.meta is not None else {},
-                    )
-                except Exception as e:
-                    yield DatasourceMessage(
-                        type=DatasourceMessage.MessageType.TEXT,
-                        message=DatasourceMessage.TextMessage(
-                            text=f"Failed to download image: {message.message.text}: {e}"
-                        ),
-                        meta=message.meta.copy() if message.meta is not None else {},
-                    )
-            elif message.type == DatasourceMessage.MessageType.BLOB:
-                # get mime type and save blob to storage
-                meta = message.meta or {}
-
-                mimetype = meta.get("mime_type", "application/octet-stream")
-                # get filename from meta
-                filename = meta.get("file_name", None)
-                # if message is str, encode it to bytes
-
-                if not isinstance(message.message, DatasourceMessage.BlobMessage):
-                    raise ValueError("unexpected message type")
-
-                # FIXME: should do a type check here.
-                assert isinstance(message.message.blob, bytes)
-                file = DatasourceFileManager.create_file_by_raw(
-                    user_id=user_id,
-                    tenant_id=tenant_id,
-                    conversation_id=conversation_id,
-                    file_binary=message.message.blob,
-                    mimetype=mimetype,
-                    filename=filename,
-                )
-
-                url = cls.get_datasource_file_url(datasource_file_id=file.id, extension=guess_extension(file.mime_type))
-
-                # check if file is image
-                if "image" in mimetype:
-                    yield DatasourceMessage(
-                        type=DatasourceMessage.MessageType.IMAGE_LINK,
-                        message=DatasourceMessage.TextMessage(text=url),
-                        meta=meta.copy() if meta is not None else {},
-                    )
-                else:
-                    yield DatasourceMessage(
-                        type=DatasourceMessage.MessageType.BINARY_LINK,
-                        message=DatasourceMessage.TextMessage(text=url),
-                        meta=meta.copy() if meta is not None else {},
-                    )
-            elif message.type == DatasourceMessage.MessageType.FILE:
-                meta = message.meta or {}
-                file = meta.get("file", None)
-                if isinstance(file, File):
-                    if file.transfer_method == FileTransferMethod.TOOL_FILE:
-                        assert file.related_id is not None
-                        url = cls.get_datasource_file_url(datasource_file_id=file.related_id, extension=file.extension)
-                        if file.type == FileType.IMAGE:
-                            yield DatasourceMessage(
-                                type=DatasourceMessage.MessageType.IMAGE_LINK,
-                                message=DatasourceMessage.TextMessage(text=url),
-                                meta=meta.copy() if meta is not None else {},
-                            )
-                        else:
-                            yield DatasourceMessage(
-                                type=DatasourceMessage.MessageType.LINK,
-                                message=DatasourceMessage.TextMessage(text=url),
-                                meta=meta.copy() if meta is not None else {},
-                            )
-                    else:
-                        yield message
-            else:
-                yield message
-
-    @classmethod
-    def get_datasource_file_url(cls, datasource_file_id: str, extension: Optional[str]) -> str:
-        return f"/files/datasources/{datasource_file_id}{extension or '.bin'}"
--- a/api/core/datasource/utils/parser.py
+++ b/api/core/datasource/utils/parser.py
@ -1,389 +0,0 @@
-import re
-import uuid
-from json import dumps as json_dumps
-from json import loads as json_loads
-from json.decoder import JSONDecodeError
-from typing import Optional
-
-from flask import request
-from requests import get
-from yaml import YAMLError, safe_load  # type: ignore
-
-from core.tools.entities.common_entities import I18nObject
-from core.tools.entities.tool_bundle import ApiToolBundle
-from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolParameter
-from core.tools.errors import ToolApiSchemaError, ToolNotSupportedError, ToolProviderNotFoundError
-
-
-class ApiBasedToolSchemaParser:
-    @staticmethod
-    def parse_openapi_to_tool_bundle(
-        openapi: dict, extra_info: dict | None = None, warning: dict | None = None
-    ) -> list[ApiToolBundle]:
-        warning = warning if warning is not None else {}
-        extra_info = extra_info if extra_info is not None else {}
-
-        # set description to extra_info
-        extra_info["description"] = openapi["info"].get("description", "")
-
-        if len(openapi["servers"]) == 0:
-            raise ToolProviderNotFoundError("No server found in the openapi yaml.")
-
-        server_url = openapi["servers"][0]["url"]
-        request_env = request.headers.get("X-Request-Env")
-        if request_env:
-            matched_servers = [server["url"] for server in openapi["servers"] if server["env"] == request_env]
-            server_url = matched_servers[0] if matched_servers else server_url
-
-        # list all interfaces
-        interfaces = []
-        for path, path_item in openapi["paths"].items():
-            methods = ["get", "post", "put", "delete", "patch", "head", "options", "trace"]
-            for method in methods:
-                if method in path_item:
-                    interfaces.append(
-                        {
-                            "path": path,
-                            "method": method,
-                            "operation": path_item[method],
-                        }
-                    )
-
-        # get all parameters
-        bundles = []
-        for interface in interfaces:
-            # convert parameters
-            parameters = []
-            if "parameters" in interface["operation"]:
-                for parameter in interface["operation"]["parameters"]:
-                    tool_parameter = ToolParameter(
-                        name=parameter["name"],
-                        label=I18nObject(en_US=parameter["name"], zh_Hans=parameter["name"]),
-                        human_description=I18nObject(
-                            en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
-                        ),
-                        type=ToolParameter.ToolParameterType.STRING,
-                        required=parameter.get("required", False),
-                        form=ToolParameter.ToolParameterForm.LLM,
-                        llm_description=parameter.get("description"),
-                        default=parameter["schema"]["default"]
-                        if "schema" in parameter and "default" in parameter["schema"]
-                        else None,
-                        placeholder=I18nObject(
-                            en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
-                        ),
-                    )
-
-                    # check if there is a type
-                    typ = ApiBasedToolSchemaParser._get_tool_parameter_type(parameter)
-                    if typ:
-                        tool_parameter.type = typ
-
-                    parameters.append(tool_parameter)
-            # create tool bundle
-            # check if there is a request body
-            if "requestBody" in interface["operation"]:
-                request_body = interface["operation"]["requestBody"]
-                if "content" in request_body:
-                    for content_type, content in request_body["content"].items():
-                        # if there is a reference, get the reference and overwrite the content
-                        if "schema" not in content:
-                            continue
-
-                        if "$ref" in content["schema"]:
-                            # get the reference
-                            root = openapi
-                            reference = content["schema"]["$ref"].split("/")[1:]
-                            for ref in reference:
-                                root = root[ref]
-                            # overwrite the content
-                            interface["operation"]["requestBody"]["content"][content_type]["schema"] = root
-
-                    # parse body parameters
-                    if "schema" in interface["operation"]["requestBody"]["content"][content_type]:
-                        body_schema = interface["operation"]["requestBody"]["content"][content_type]["schema"]
-                        required = body_schema.get("required", [])
-                        properties = body_schema.get("properties", {})
-                        for name, property in properties.items():
-                            tool = ToolParameter(
-                                name=name,
-                                label=I18nObject(en_US=name, zh_Hans=name),
-                                human_description=I18nObject(
-                                    en_US=property.get("description", ""), zh_Hans=property.get("description", "")
-                                ),
-                                type=ToolParameter.ToolParameterType.STRING,
-                                required=name in required,
-                                form=ToolParameter.ToolParameterForm.LLM,
-                                llm_description=property.get("description", ""),
-                                default=property.get("default", None),
-                                placeholder=I18nObject(
-                                    en_US=property.get("description", ""), zh_Hans=property.get("description", "")
-                                ),
-                            )
-
-                            # check if there is a type
-                            typ = ApiBasedToolSchemaParser._get_tool_parameter_type(property)
-                            if typ:
-                                tool.type = typ
-
-                            parameters.append(tool)
-
-            # check if parameters is duplicated
-            parameters_count = {}
-            for parameter in parameters:
-                if parameter.name not in parameters_count:
-                    parameters_count[parameter.name] = 0
-                parameters_count[parameter.name] += 1
-            for name, count in parameters_count.items():
-                if count > 1:
-                    warning["duplicated_parameter"] = f"Parameter {name} is duplicated."
-
-            # check if there is a operation id, use $path_$method as operation id if not
-            if "operationId" not in interface["operation"]:
-                # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
-                path = interface["path"]
-                if interface["path"].startswith("/"):
-                    path = interface["path"][1:]
-                # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
-                path = re.sub(r"[^a-zA-Z0-9_-]", "", path)
-                if not path:
-                    path = str(uuid.uuid4())
-
-                interface["operation"]["operationId"] = f"{path}_{interface['method']}"
-
-            bundles.append(
-                ApiToolBundle(
-                    server_url=server_url + interface["path"],
-                    method=interface["method"],
-                    summary=interface["operation"]["description"]
-                    if "description" in interface["operation"]
-                    else interface["operation"].get("summary", None),
-                    operation_id=interface["operation"]["operationId"],
-                    parameters=parameters,
-                    author="",
-                    icon=None,
-                    openapi=interface["operation"],
-                )
-            )
-
-        return bundles
-
-    @staticmethod
-    def _get_tool_parameter_type(parameter: dict) -> Optional[ToolParameter.ToolParameterType]:
-        parameter = parameter or {}
-        typ: Optional[str] = None
-        if parameter.get("format") == "binary":
-            return ToolParameter.ToolParameterType.FILE
-
-        if "type" in parameter:
-            typ = parameter["type"]
-        elif "schema" in parameter and "type" in parameter["schema"]:
-            typ = parameter["schema"]["type"]
-
-        if typ in {"integer", "number"}:
-            return ToolParameter.ToolParameterType.NUMBER
-        elif typ == "boolean":
-            return ToolParameter.ToolParameterType.BOOLEAN
-        elif typ == "string":
-            return ToolParameter.ToolParameterType.STRING
-        elif typ == "array":
-            items = parameter.get("items") or parameter.get("schema", {}).get("items")
-            return ToolParameter.ToolParameterType.FILES if items and items.get("format") == "binary" else None
-        else:
-            return None
-
-    @staticmethod
-    def parse_openapi_yaml_to_tool_bundle(
-        yaml: str, extra_info: dict | None = None, warning: dict | None = None
-    ) -> list[ApiToolBundle]:
-        """
-        parse openapi yaml to tool bundle
-
-        :param yaml: the yaml string
-        :param extra_info: the extra info
-        :param warning: the warning message
-        :return: the tool bundle
-        """
-        warning = warning if warning is not None else {}
-        extra_info = extra_info if extra_info is not None else {}
-
-        openapi: dict = safe_load(yaml)
-        if openapi is None:
-            raise ToolApiSchemaError("Invalid openapi yaml.")
-        return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi, extra_info=extra_info, warning=warning)
-
-    @staticmethod
-    def parse_swagger_to_openapi(swagger: dict, extra_info: dict | None = None, warning: dict | None = None) -> dict:
-        warning = warning or {}
-        """
-        parse swagger to openapi
-
-        :param swagger: the swagger dict
-        :return: the openapi dict
-        """
-        # convert swagger to openapi
-        info = swagger.get("info", {"title": "Swagger", "description": "Swagger", "version": "1.0.0"})
-
-        servers = swagger.get("servers", [])
-
-        if len(servers) == 0:
-            raise ToolApiSchemaError("No server found in the swagger yaml.")
-
-        openapi = {
-            "openapi": "3.0.0",
-            "info": {
-                "title": info.get("title", "Swagger"),
-                "description": info.get("description", "Swagger"),
-                "version": info.get("version", "1.0.0"),
-            },
-            "servers": swagger["servers"],
-            "paths": {},
-            "components": {"schemas": {}},
-        }
-
-        # check paths
-        if "paths" not in swagger or len(swagger["paths"]) == 0:
-            raise ToolApiSchemaError("No paths found in the swagger yaml.")
-
-        # convert paths
-        for path, path_item in swagger["paths"].items():
-            openapi["paths"][path] = {}
-            for method, operation in path_item.items():
-                if "operationId" not in operation:
-                    raise ToolApiSchemaError(f"No operationId found in operation {method} {path}.")
-
-                if ("summary" not in operation or len(operation["summary"]) == 0) and (
-                    "description" not in operation or len(operation["description"]) == 0
-                ):
-                    if warning is not None:
-                        warning["missing_summary"] = f"No summary or description found in operation {method} {path}."
-
-                openapi["paths"][path][method] = {
-                    "operationId": operation["operationId"],
-                    "summary": operation.get("summary", ""),
-                    "description": operation.get("description", ""),
-                    "parameters": operation.get("parameters", []),
-                    "responses": operation.get("responses", {}),
-                }
-
-                if "requestBody" in operation:
-                    openapi["paths"][path][method]["requestBody"] = operation["requestBody"]
-
-        # convert definitions
-        for name, definition in swagger["definitions"].items():
-            openapi["components"]["schemas"][name] = definition
-
-        return openapi
-
-    @staticmethod
-    def parse_openai_plugin_json_to_tool_bundle(
-        json: str, extra_info: dict | None = None, warning: dict | None = None
-    ) -> list[ApiToolBundle]:
-        """
-        parse openapi plugin yaml to tool bundle
-
-        :param json: the json string
-        :param extra_info: the extra info
-        :param warning: the warning message
-        :return: the tool bundle
-        """
-        warning = warning if warning is not None else {}
-        extra_info = extra_info if extra_info is not None else {}
-
-        try:
-            openai_plugin = json_loads(json)
-            api = openai_plugin["api"]
-            api_url = api["url"]
-            api_type = api["type"]
-        except JSONDecodeError:
-            raise ToolProviderNotFoundError("Invalid openai plugin json.")
-
-        if api_type != "openapi":
-            raise ToolNotSupportedError("Only openapi is supported now.")
-
-        # get openapi yaml
-        response = get(api_url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "}, timeout=5)
-
-        if response.status_code != 200:
-            raise ToolProviderNotFoundError("cannot get openapi yaml from url.")
-
-        return ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle(
-            response.text, extra_info=extra_info, warning=warning
-        )
-
-    @staticmethod
-    def auto_parse_to_tool_bundle(
-        content: str, extra_info: dict | None = None, warning: dict | None = None
-    ) -> tuple[list[ApiToolBundle], str]:
-        """
-        auto parse to tool bundle
-
-        :param content: the content
-        :param extra_info: the extra info
-        :param warning: the warning message
-        :return: tools bundle, schema_type
-        """
-        warning = warning if warning is not None else {}
-        extra_info = extra_info if extra_info is not None else {}
-
-        content = content.strip()
-        loaded_content = None
-        json_error = None
-        yaml_error = None
-
-        try:
-            loaded_content = json_loads(content)
-        except JSONDecodeError as e:
-            json_error = e
-
-        if loaded_content is None:
-            try:
-                loaded_content = safe_load(content)
-            except YAMLError as e:
-                yaml_error = e
-        if loaded_content is None:
-            raise ToolApiSchemaError(
-                f"Invalid api schema, schema is neither json nor yaml. json error: {str(json_error)},"
-                f" yaml error: {str(yaml_error)}"
-            )
-
-        swagger_error = None
-        openapi_error = None
-        openapi_plugin_error = None
-        schema_type = None
-
-        try:
-            openapi = ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
-                loaded_content, extra_info=extra_info, warning=warning
-            )
-            schema_type = ApiProviderSchemaType.OPENAPI.value
-            return openapi, schema_type
-        except ToolApiSchemaError as e:
-            openapi_error = e
-
-        # openai parse error, fallback to swagger
-        try:
-            converted_swagger = ApiBasedToolSchemaParser.parse_swagger_to_openapi(
-                loaded_content, extra_info=extra_info, warning=warning
-            )
-            schema_type = ApiProviderSchemaType.SWAGGER.value
-            return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
-                converted_swagger, extra_info=extra_info, warning=warning
-            ), schema_type
-        except ToolApiSchemaError as e:
-            swagger_error = e
-
-        # swagger parse error, fallback to openai plugin
-        try:
-            openapi_plugin = ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle(
-                json_dumps(loaded_content), extra_info=extra_info, warning=warning
-            )
-            return openapi_plugin, ApiProviderSchemaType.OPENAI_PLUGIN.value
-        except ToolNotSupportedError as e:
-            # maybe it's not plugin at all
-            openapi_plugin_error = e
-
-        raise ToolApiSchemaError(
-            f"Invalid api schema, openapi error: {str(openapi_error)}, swagger error: {str(swagger_error)},"
-            f" openapi plugin error: {str(openapi_plugin_error)}"
-        )
--- a/api/core/datasource/utils/text_processing_utils.py
+++ b/api/core/datasource/utils/text_processing_utils.py
@ -1,17 +0,0 @@
-import re
-
-
-def remove_leading_symbols(text: str) -> str:
-    """
-    Remove leading punctuation or symbols from the given text.
-
-    Args:
-        text (str): The input text to process.
-
-    Returns:
-        str: The text with leading punctuation or symbols removed.
-    """
-    # Match Unicode ranges for punctuation and symbols
-    # FIXME this pattern is confused quick fix for #11868 maybe refactor it later
-    pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F!\"#$%&'()*+,./:;<=>?@^_`~]+"
-    return re.sub(pattern, "", text)
--- a/api/core/datasource/utils/uuid_utils.py
+++ b/api/core/datasource/utils/uuid_utils.py
@ -1,9 +0,0 @@
-import uuid
-
-
-def is_valid_uuid(uuid_str: str) -> bool:
-    try:
-        uuid.UUID(uuid_str)
-        return True
-    except Exception:
-        return False
--- a/api/core/datasource/utils/workflow_configuration_sync.py
+++ b/api/core/datasource/utils/workflow_configuration_sync.py
@ -1,43 +0,0 @@
-from collections.abc import Mapping, Sequence
-from typing import Any
-
-from core.app.app_config.entities import VariableEntity
-from core.tools.entities.tool_entities import WorkflowToolParameterConfiguration
-
-
-class WorkflowToolConfigurationUtils:
-    @classmethod
-    def check_parameter_configurations(cls, configurations: list[Mapping[str, Any]]):
-        for configuration in configurations:
-            WorkflowToolParameterConfiguration.model_validate(configuration)
-
-    @classmethod
-    def get_workflow_graph_variables(cls, graph: Mapping[str, Any]) -> Sequence[VariableEntity]:
-        """
-        get workflow graph variables
-        """
-        nodes = graph.get("nodes", [])
-        start_node = next(filter(lambda x: x.get("data", {}).get("type") == "start", nodes), None)
-
-        if not start_node:
-            return []
-
-        return [VariableEntity.model_validate(variable) for variable in start_node.get("data", {}).get("variables", [])]
-
-    @classmethod
-    def check_is_synced(
-        cls, variables: list[VariableEntity], tool_configurations: list[WorkflowToolParameterConfiguration]
-    ):
-        """
-        check is synced
-
-        raise ValueError if not synced
-        """
-        variable_names = [variable.variable for variable in variables]
-
-        if len(tool_configurations) != len(variables):
-            raise ValueError("parameter configuration mismatch, please republish the tool to update")
-
-        for parameter in tool_configurations:
-            if parameter.name not in variable_names:
-                raise ValueError("parameter configuration mismatch, please republish the tool to update")
--- a/api/core/datasource/utils/yaml_utils.py
+++ b/api/core/datasource/utils/yaml_utils.py
@ -1,35 +0,0 @@
-import logging
-from pathlib import Path
-from typing import Any
-
-import yaml  # type: ignore
-from yaml import YAMLError
-
-logger = logging.getLogger(__name__)
-
-
-def load_yaml_file(file_path: str, ignore_error: bool = True, default_value: Any = {}) -> Any:
-    """
-    Safe loading a YAML file
-    :param file_path: the path of the YAML file
-    :param ignore_error:
-        if True, return default_value if error occurs and the error will be logged in debug level
-        if False, raise error if error occurs
-    :param default_value: the value returned when errors ignored
-    :return: an object of the YAML content
-    """
-    if not file_path or not Path(file_path).exists():
-        if ignore_error:
-            return default_value
-        else:
-            raise FileNotFoundError(f"File not found: {file_path}")
-
-    with open(file_path, encoding="utf-8") as yaml_file:
-        try:
-            yaml_content = yaml.safe_load(yaml_file)
-            return yaml_content or default_value
-        except Exception as e:
-            if ignore_error:
-                return default_value
-            else:
-                raise YAMLError(f"Failed to load YAML file {file_path}: {e}") from e
--- a/api/core/datasource/website_crawl/website_crawl_plugin.py
+++ b/api/core/datasource/website_crawl/website_crawl_plugin.py
@ -1,53 +0,0 @@
-from collections.abc import Generator, Mapping
-from typing import Any
-
-from core.datasource.__base.datasource_plugin import DatasourcePlugin
-from core.datasource.__base.datasource_runtime import DatasourceRuntime
-from core.datasource.entities.datasource_entities import (
-    DatasourceEntity,
-    DatasourceProviderType,
-    WebsiteCrawlMessage,
-)
-from core.plugin.impl.datasource import PluginDatasourceManager
-
-
-class WebsiteCrawlDatasourcePlugin(DatasourcePlugin):
-    tenant_id: str
-    icon: str
-    plugin_unique_identifier: str
-    entity: DatasourceEntity
-    runtime: DatasourceRuntime
-
-    def __init__(
-        self,
-        entity: DatasourceEntity,
-        runtime: DatasourceRuntime,
-        tenant_id: str,
-        icon: str,
-        plugin_unique_identifier: str,
-    ) -> None:
-        super().__init__(entity, runtime)
-        self.tenant_id = tenant_id
-        self.icon = icon
-        self.plugin_unique_identifier = plugin_unique_identifier
-
-    def get_website_crawl(
-        self,
-        user_id: str,
-        datasource_parameters: Mapping[str, Any],
-        provider_type: str,
-    ) -> Generator[WebsiteCrawlMessage, None, None]:
-        manager = PluginDatasourceManager()
-
-        return manager.get_website_crawl(
-            tenant_id=self.tenant_id,
-            user_id=user_id,
-            datasource_provider=self.entity.identity.provider,
-            datasource_name=self.entity.identity.name,
-            credentials=self.runtime.credentials,
-            datasource_parameters=datasource_parameters,
-            provider_type=provider_type,
-        )
-
-    def datasource_provider_type(self) -> str:
-        return DatasourceProviderType.WEBSITE_CRAWL
--- a/api/core/datasource/website_crawl/website_crawl_provider.py
+++ b/api/core/datasource/website_crawl/website_crawl_provider.py
@ -1,52 +0,0 @@
-from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
-from core.datasource.__base.datasource_runtime import DatasourceRuntime
-from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
-from core.datasource.website_crawl.website_crawl_plugin import WebsiteCrawlDatasourcePlugin
-
-
-class WebsiteCrawlDatasourcePluginProviderController(DatasourcePluginProviderController):
-    entity: DatasourceProviderEntityWithPlugin
-    plugin_id: str
-    plugin_unique_identifier: str
-
-    def __init__(
-        self,
-        entity: DatasourceProviderEntityWithPlugin,
-        plugin_id: str,
-        plugin_unique_identifier: str,
-        tenant_id: str,
-    ) -> None:
-        super().__init__(entity, tenant_id)
-        self.plugin_id = plugin_id
-        self.plugin_unique_identifier = plugin_unique_identifier
-
-    @property
-    def provider_type(self) -> DatasourceProviderType:
-        """
-        returns the type of the provider
-        """
-        return DatasourceProviderType.WEBSITE_CRAWL
-
-    def get_datasource(self, datasource_name: str) -> WebsiteCrawlDatasourcePlugin:  # type: ignore
-        """
-        return datasource with given name
-        """
-        datasource_entity = next(
-            (
-                datasource_entity
-                for datasource_entity in self.entity.datasources
-                if datasource_entity.identity.name == datasource_name
-            ),
-            None,
-        )
-
-        if not datasource_entity:
-            raise ValueError(f"Datasource with name {datasource_name} not found")
-
-        return WebsiteCrawlDatasourcePlugin(
-            entity=datasource_entity,
-            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
-            tenant_id=self.tenant_id,
-            icon=self.entity.identity.icon,
-            plugin_unique_identifier=self.plugin_unique_identifier,
-        )
--- a/api/core/entities/knowledge_entities.py
+++ b/api/core/entities/knowledge_entities.py
@ -17,27 +17,3 @@ class IndexingEstimate(BaseModel):
    total_segments: int
    preview: list[PreviewDetail]
    qa_preview: Optional[list[QAPreviewDetail]] = None
-
-
-class PipelineDataset(BaseModel):
-    id: str
-    name: str
-    description: str
-    chunk_structure: str
-
-
-class PipelineDocument(BaseModel):
-    id: str
-    position: int
-    data_source_type: str
-    data_source_info: Optional[dict] = None
-    name: str
-    indexing_status: str
-    error: Optional[str] = None
-    enabled: bool
-
-
-class PipelineGenerateResponse(BaseModel):
-    batch: str
-    dataset: PipelineDataset
-    documents: list[PipelineDocument]
--- a/api/core/file/datasource_file_parser.py
+++ b/api/core/file/datasource_file_parser.py
@ -1,15 +0,0 @@
-from typing import TYPE_CHECKING, Any, cast
-
-from core.datasource import datasource_file_manager
-from core.datasource.datasource_file_manager import DatasourceFileManager
-
-if TYPE_CHECKING:
-    from core.datasource.datasource_file_manager import DatasourceFileManager
-
-tool_file_manager: dict[str, Any] = {"manager": None}
-
-
-class DatasourceFileParser:
-    @staticmethod
-    def get_datasource_file_manager() -> "DatasourceFileManager":
-        return cast("DatasourceFileManager", datasource_file_manager["manager"])
--- a/api/core/file/enums.py
+++ b/api/core/file/enums.py
@ -20,7 +20,6 @@ class FileTransferMethod(StrEnum):
    REMOTE_URL = "remote_url"
    LOCAL_FILE = "local_file"
    TOOL_FILE = "tool_file"
-    DATASOURCE_FILE = "datasource_file"

    @staticmethod
    def value_of(value):
--- a/api/core/ops/entities/trace_entity.py
+++ b/api/core/ops/entities/trace_entity.py
@ -135,4 +135,3 @@ class TraceTaskName(StrEnum):
    DATASET_RETRIEVAL_TRACE = "dataset_retrieval"
    TOOL_TRACE = "tool"
    GENERATE_NAME_TRACE = "generate_conversation_name"
-    DATASOURCE_TRACE = "datasource"
--- a/api/core/ops/langfuse_trace/langfuse_trace.py
+++ b/api/core/ops/langfuse_trace/langfuse_trace.py
@ -83,7 +83,6 @@ class LangFuseDataTrace(BaseTraceInstance):
                metadata=metadata,
                session_id=trace_info.conversation_id,
                tags=["message", "workflow"],
-                version=trace_info.workflow_run_version,
            )
            self.add_trace(langfuse_trace_data=trace_data)
            workflow_span_data = LangfuseSpan(
@ -109,7 +108,6 @@ class LangFuseDataTrace(BaseTraceInstance):
                metadata=metadata,
                session_id=trace_info.conversation_id,
                tags=["workflow"],
-                version=trace_info.workflow_run_version,
            )
            self.add_trace(langfuse_trace_data=trace_data)

@ -174,7 +172,37 @@ class LangFuseDataTrace(BaseTraceInstance):
                    }
                )

-            # add generation span
+            # add span
+            if trace_info.message_id:
+                span_data = LangfuseSpan(
+                    id=node_execution_id,
+                    name=node_type,
+                    input=inputs,
+                    output=outputs,
+                    trace_id=trace_id,
+                    start_time=created_at,
+                    end_time=finished_at,
+                    metadata=metadata,
+                    level=(LevelEnum.DEFAULT if status == "succeeded" else LevelEnum.ERROR),
+                    status_message=trace_info.error or "",
+                    parent_observation_id=trace_info.workflow_run_id,
+                )
+            else:
+                span_data = LangfuseSpan(
+                    id=node_execution_id,
+                    name=node_type,
+                    input=inputs,
+                    output=outputs,
+                    trace_id=trace_id,
+                    start_time=created_at,
+                    end_time=finished_at,
+                    metadata=metadata,
+                    level=(LevelEnum.DEFAULT if status == "succeeded" else LevelEnum.ERROR),
+                    status_message=trace_info.error or "",
+                )
+
+            self.add_span(langfuse_span_data=span_data)
+
            if process_data and process_data.get("model_mode") == "chat":
                total_token = metadata.get("total_tokens", 0)
                prompt_tokens = 0
@ -198,10 +226,10 @@ class LangFuseDataTrace(BaseTraceInstance):
                )

                node_generation_data = LangfuseGeneration(
-                    id=node_execution_id,
-                    name=node_name,
+                    name="llm",
                    trace_id=trace_id,
                    model=process_data.get("model_name"),
+                    parent_observation_id=node_execution_id,
                    start_time=created_at,
                    end_time=finished_at,
                    input=inputs,
@ -209,30 +237,11 @@ class LangFuseDataTrace(BaseTraceInstance):
                    metadata=metadata,
                    level=(LevelEnum.DEFAULT if status == "succeeded" else LevelEnum.ERROR),
                    status_message=trace_info.error or "",
-                    parent_observation_id=trace_info.workflow_run_id if trace_info.message_id else None,
                    usage=generation_usage,
                )

                self.add_generation(langfuse_generation_data=node_generation_data)

-            # add normal span
-            else:
-                span_data = LangfuseSpan(
-                    id=node_execution_id,
-                    name=node_name,
-                    input=inputs,
-                    output=outputs,
-                    trace_id=trace_id,
-                    start_time=created_at,
-                    end_time=finished_at,
-                    metadata=metadata,
-                    level=(LevelEnum.DEFAULT if status == "succeeded" else LevelEnum.ERROR),
-                    status_message=trace_info.error or "",
-                    parent_observation_id=trace_info.workflow_run_id if trace_info.message_id else None,
-                )
-
-                self.add_span(langfuse_span_data=span_data)
-
    def message_trace(self, trace_info: MessageTraceInfo, **kwargs):
        # get message file data
        file_list = trace_info.file_list
@ -275,7 +284,7 @@ class LangFuseDataTrace(BaseTraceInstance):
        )
        self.add_trace(langfuse_trace_data=trace_data)

-        # add generation
+        # start add span
        generation_usage = GenerationUsage(
            input=trace_info.message_tokens,
            output=trace_info.answer_tokens,
--- a/api/core/plugin/entities/oauth.py
+++ b/api/core/plugin/entities/oauth.py
@ -1,21 +0,0 @@
-from collections.abc import Sequence
-
-from pydantic import BaseModel, Field
-
-from core.entities.provider_entities import ProviderConfig
-
-
-class OAuthSchema(BaseModel):
-    """
-    OAuth schema
-    """
-
-    client_schema: Sequence[ProviderConfig] = Field(
-        default_factory=list,
-        description="client schema like client_id, client_secret, etc.",
-    )
-
-    credentials_schema: Sequence[ProviderConfig] = Field(
-        default_factory=list,
-        description="credentials schema like access_token, refresh_token, etc.",
-    )
--- a/api/core/plugin/entities/plugin.py
+++ b/api/core/plugin/entities/plugin.py
@ -8,7 +8,6 @@ from pydantic import BaseModel, Field, model_validator
 from werkzeug.exceptions import NotFound

 from core.agent.plugin_entities import AgentStrategyProviderEntity
-from core.datasource.entities.datasource_entities import DatasourceProviderEntity
 from core.model_runtime.entities.provider_entities import ProviderEntity
 from core.plugin.entities.base import BasePluginEntity
 from core.plugin.entities.endpoint import EndpointProviderDeclaration
@ -63,7 +62,6 @@ class PluginCategory(enum.StrEnum):
    Model = "model"
    Extension = "extension"
    AgentStrategy = "agent-strategy"
-    Datasource = "datasource"


 class PluginDeclaration(BaseModel):
@ -71,7 +69,6 @@ class PluginDeclaration(BaseModel):
        tools: Optional[list[str]] = Field(default_factory=list[str])
        models: Optional[list[str]] = Field(default_factory=list[str])
        endpoints: Optional[list[str]] = Field(default_factory=list[str])
-        datasources: Optional[list[str]] = Field(default_factory=list[str])

    class Meta(BaseModel):
        minimum_dify_version: Optional[str] = Field(default=None, pattern=r"^\d{1,4}(\.\d{1,4}){1,3}(-\w{1,16})?$")
@ -93,7 +90,6 @@ class PluginDeclaration(BaseModel):
    model: Optional[ProviderEntity] = None
    endpoint: Optional[EndpointProviderDeclaration] = None
    agent_strategy: Optional[AgentStrategyProviderEntity] = None
-    datasource: Optional[DatasourceProviderEntity] = None
    meta: Meta

    @model_validator(mode="before")
@ -104,8 +100,6 @@ class PluginDeclaration(BaseModel):
            values["category"] = PluginCategory.Tool
        elif values.get("model"):
            values["category"] = PluginCategory.Model
-        elif values.get("datasource"):
-            values["category"] = PluginCategory.Datasource
        elif values.get("agent_strategy"):
            values["category"] = PluginCategory.AgentStrategy
        else:
@ -199,11 +193,6 @@ class ToolProviderID(GenericProviderID):
                self.plugin_name = f"{self.provider_name}_tool"


-class DatasourceProviderID(GenericProviderID):
-    def __init__(self, value: str, is_hardcoded: bool = False) -> None:
-        super().__init__(value, is_hardcoded)
-
-
 class PluginDependency(BaseModel):
    class Type(enum.StrEnum):
        Github = PluginInstallationSource.Github.value
--- a/api/core/plugin/entities/plugin_daemon.py
+++ b/api/core/plugin/entities/plugin_daemon.py
@ -6,7 +6,6 @@ from typing import Any, Generic, Optional, TypeVar
 from pydantic import BaseModel, ConfigDict, Field

 from core.agent.plugin_entities import AgentProviderEntityWithPlugin
-from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin
 from core.model_runtime.entities.model_entities import AIModelEntity
 from core.model_runtime.entities.provider_entities import ProviderEntity
 from core.plugin.entities.base import BasePluginEntity
@ -49,14 +48,6 @@ class PluginToolProviderEntity(BaseModel):
    declaration: ToolProviderEntityWithPlugin


-class PluginDatasourceProviderEntity(BaseModel):
-    provider: str
-    plugin_unique_identifier: str
-    plugin_id: str
-    is_authorized: bool = False
-    declaration: DatasourceProviderEntityWithPlugin
-
-
 class PluginAgentProviderEntity(BaseModel):
    provider: str
    plugin_unique_identifier: str
--- a/api/core/plugin/impl/datasource.py
+++ b/api/core/plugin/impl/datasource.py
@ -1,329 +0,0 @@
-from collections.abc import Generator, Mapping
-from typing import Any
-
-from core.datasource.entities.datasource_entities import (
-    DatasourceMessage,
-    GetOnlineDocumentPageContentRequest,
-    OnlineDocumentPagesMessage,
-    OnlineDriveBrowseFilesRequest,
-    OnlineDriveBrowseFilesResponse,
-    OnlineDriveDownloadFileRequest,
-    WebsiteCrawlMessage,
-)
-from core.plugin.entities.plugin import DatasourceProviderID, GenericProviderID
-from core.plugin.entities.plugin_daemon import (
-    PluginBasicBooleanResponse,
-    PluginDatasourceProviderEntity,
-)
-from core.plugin.impl.base import BasePluginClient
-from services.tools.tools_transform_service import ToolTransformService
-
-
-class PluginDatasourceManager(BasePluginClient):
-    def fetch_datasource_providers(self, tenant_id: str) -> list[PluginDatasourceProviderEntity]:
-        """
-        Fetch datasource providers for the given tenant.
-        """
-
-        def transformer(json_response: dict[str, Any]) -> dict:
-            if json_response.get("data"):
-                for provider in json_response.get("data", []):
-                    declaration = provider.get("declaration", {}) or {}
-                    provider_name = declaration.get("identity", {}).get("name")
-                    for datasource in declaration.get("datasources", []):
-                        datasource["identity"]["provider"] = provider_name
-
-            return json_response
-
-        response = self._request_with_plugin_daemon_response(
-            "GET",
-            f"plugin/{tenant_id}/management/datasources",
-            list[PluginDatasourceProviderEntity],
-            params={"page": 1, "page_size": 256},
-            transformer=transformer,
-        )
-        local_file_datasource_provider = PluginDatasourceProviderEntity(**self._get_local_file_datasource_provider())
-
-        for provider in response:
-            ToolTransformService.repack_provider(tenant_id=tenant_id, provider=provider)
-        all_response = [local_file_datasource_provider] + response
-
-        for provider in all_response:
-            provider.declaration.identity.name = f"{provider.plugin_id}/{provider.declaration.identity.name}"
-
-            # override the provider name for each tool to plugin_id/provider_name
-            for tool in provider.declaration.datasources:
-                tool.identity.provider = provider.declaration.identity.name
-
-        return all_response
-
-    def fetch_datasource_provider(self, tenant_id: str, provider_id: str) -> PluginDatasourceProviderEntity:
-        """
-        Fetch datasource provider for the given tenant and plugin.
-        """
-        if provider_id == "langgenius/file/file":
-            return PluginDatasourceProviderEntity(**self._get_local_file_datasource_provider())
-
-        tool_provider_id = DatasourceProviderID(provider_id)
-
-        def transformer(json_response: dict[str, Any]) -> dict:
-            data = json_response.get("data")
-            if data:
-                for datasource in data.get("declaration", {}).get("datasources", []):
-                    datasource["identity"]["provider"] = tool_provider_id.provider_name
-
-            return json_response
-
-        response = self._request_with_plugin_daemon_response(
-            "GET",
-            f"plugin/{tenant_id}/management/datasource",
-            PluginDatasourceProviderEntity,
-            params={"provider": tool_provider_id.provider_name, "plugin_id": tool_provider_id.plugin_id},
-            transformer=transformer,
-        )
-
-        response.declaration.identity.name = f"{response.plugin_id}/{response.declaration.identity.name}"
-
-        # override the provider name for each tool to plugin_id/provider_name
-        for datasource in response.declaration.datasources:
-            datasource.identity.provider = response.declaration.identity.name
-
-        return response
-
-    def get_website_crawl(
-        self,
-        tenant_id: str,
-        user_id: str,
-        datasource_provider: str,
-        datasource_name: str,
-        credentials: dict[str, Any],
-        datasource_parameters: Mapping[str, Any],
-        provider_type: str,
-    ) -> Generator[WebsiteCrawlMessage, None, None]:
-        """
-        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
-        """
-
-        datasource_provider_id = GenericProviderID(datasource_provider)
-
-        return self._request_with_plugin_daemon_response_stream(
-            "POST",
-            f"plugin/{tenant_id}/dispatch/datasource/get_website_crawl",
-            WebsiteCrawlMessage,
-            data={
-                "user_id": user_id,
-                "data": {
-                    "provider": datasource_provider_id.provider_name,
-                    "datasource": datasource_name,
-                    "credentials": credentials,
-                    "datasource_parameters": datasource_parameters,
-                },
-            },
-            headers={
-                "X-Plugin-ID": datasource_provider_id.plugin_id,
-                "Content-Type": "application/json",
-            },
-        )
-
-    def get_online_document_pages(
-        self,
-        tenant_id: str,
-        user_id: str,
-        datasource_provider: str,
-        datasource_name: str,
-        credentials: dict[str, Any],
-        datasource_parameters: Mapping[str, Any],
-        provider_type: str,
-    ) -> Generator[OnlineDocumentPagesMessage, None, None]:
-        """
-        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
-        """
-
-        datasource_provider_id = GenericProviderID(datasource_provider)
-
-        return self._request_with_plugin_daemon_response_stream(
-            "POST",
-            f"plugin/{tenant_id}/dispatch/datasource/get_online_document_pages",
-            OnlineDocumentPagesMessage,
-            data={
-                "user_id": user_id,
-                "data": {
-                    "provider": datasource_provider_id.provider_name,
-                    "datasource": datasource_name,
-                    "credentials": credentials,
-                    "datasource_parameters": datasource_parameters,
-                },
-            },
-            headers={
-                "X-Plugin-ID": datasource_provider_id.plugin_id,
-                "Content-Type": "application/json",
-            },
-        )
-
-    def get_online_document_page_content(
-        self,
-        tenant_id: str,
-        user_id: str,
-        datasource_provider: str,
-        datasource_name: str,
-        credentials: dict[str, Any],
-        datasource_parameters: GetOnlineDocumentPageContentRequest,
-        provider_type: str,
-    ) -> Generator[DatasourceMessage, None, None]:
-        """
-        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
-        """
-
-        datasource_provider_id = GenericProviderID(datasource_provider)
-
-        return self._request_with_plugin_daemon_response_stream(
-            "POST",
-            f"plugin/{tenant_id}/dispatch/datasource/get_online_document_page_content",
-            DatasourceMessage,
-            data={
-                "user_id": user_id,
-                "data": {
-                    "provider": datasource_provider_id.provider_name,
-                    "datasource": datasource_name,
-                    "credentials": credentials,
-                    "page": datasource_parameters.model_dump(),
-                },
-            },
-            headers={
-                "X-Plugin-ID": datasource_provider_id.plugin_id,
-                "Content-Type": "application/json",
-            },
-        )
-
-    def online_drive_browse_files(
-        self,
-        tenant_id: str,
-        user_id: str,
-        datasource_provider: str,
-        datasource_name: str,
-        credentials: dict[str, Any],
-        request: OnlineDriveBrowseFilesRequest,
-        provider_type: str,
-    ) -> Generator[OnlineDriveBrowseFilesResponse, None, None]:
-        """
-        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
-        """
-
-        datasource_provider_id = GenericProviderID(datasource_provider)
-
-        response = self._request_with_plugin_daemon_response_stream(
-            "POST",
-            f"plugin/{tenant_id}/dispatch/datasource/online_drive_browse_files",
-            OnlineDriveBrowseFilesResponse,
-            data={
-                "user_id": user_id,
-                "data": {
-                    "provider": datasource_provider_id.provider_name,
-                    "datasource": datasource_name,
-                    "credentials": credentials,
-                    "request": request.model_dump(),
-                },
-            },
-            headers={
-                "X-Plugin-ID": datasource_provider_id.plugin_id,
-                "Content-Type": "application/json",
-            },
-        )
-        yield from response
-
-    def online_drive_download_file(
-        self,
-        tenant_id: str,
-        user_id: str,
-        datasource_provider: str,
-        datasource_name: str,
-        credentials: dict[str, Any],
-        request: OnlineDriveDownloadFileRequest,
-        provider_type: str,
-    ) -> Generator[DatasourceMessage, None, None]:
-        """
-        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
-        """
-
-        datasource_provider_id = GenericProviderID(datasource_provider)
-
-        response = self._request_with_plugin_daemon_response_stream(
-            "POST",
-            f"plugin/{tenant_id}/dispatch/datasource/online_drive_download_file",
-            DatasourceMessage,
-            data={
-                "user_id": user_id,
-                "data": {
-                    "provider": datasource_provider_id.provider_name,
-                    "datasource": datasource_name,
-                    "credentials": credentials,
-                    "request": request.model_dump(),
-                },
-            },
-            headers={
-                "X-Plugin-ID": datasource_provider_id.plugin_id,
-                "Content-Type": "application/json",
-            },
-        )
-        yield from response
-
-    def validate_provider_credentials(
-        self, tenant_id: str, user_id: str, provider: str, plugin_id: str, credentials: dict[str, Any]
-    ) -> bool:
-        """
-        validate the credentials of the provider
-        """
-        # datasource_provider_id = GenericProviderID(provider_id)
-
-        response = self._request_with_plugin_daemon_response_stream(
-            "POST",
-            f"plugin/{tenant_id}/dispatch/datasource/validate_credentials",
-            PluginBasicBooleanResponse,
-            data={
-                "user_id": user_id,
-                "data": {
-                    "provider": provider,
-                    "credentials": credentials,
-                },
-            },
-            headers={
-                "X-Plugin-ID": plugin_id,
-                "Content-Type": "application/json",
-            },
-        )
-
-        for resp in response:
-            return resp.result
-
-        return False
-
-    def _get_local_file_datasource_provider(self) -> dict[str, Any]:
-        return {
-            "id": "langgenius/file/file",
-            "plugin_id": "langgenius/file",
-            "provider": "file",
-            "plugin_unique_identifier": "langgenius/file:0.0.1@dify",
-            "declaration": {
-                "identity": {
-                    "author": "langgenius",
-                    "name": "file",
-                    "label": {"zh_Hans": "File", "en_US": "File", "pt_BR": "File", "ja_JP": "File"},
-                    "icon": "https://assets.dify.ai/images/File%20Upload.svg",
-                    "description": {"zh_Hans": "File", "en_US": "File", "pt_BR": "File", "ja_JP": "File"},
-                },
-                "credentials_schema": [],
-                "provider_type": "local_file",
-                "datasources": [
-                    {
-                        "identity": {
-                            "author": "langgenius",
-                            "name": "upload-file",
-                            "provider": "file",
-                            "label": {"zh_Hans": "File", "en_US": "File", "pt_BR": "File", "ja_JP": "File"},
-                        },
-                        "parameters": [],
-                        "description": {"zh_Hans": "File", "en_US": "File", "pt_BR": "File", "ja_JP": "File"},
-                    }
-                ],
-            },
-        }
--- a/api/core/plugin/impl/dynamic_select.py
+++ b/api/core/plugin/impl/dynamic_select.py
@ -42,4 +42,4 @@ class DynamicSelectClient(BasePluginClient):
        for options in response:
            return options

-        raise ValueError(f"Plugin service returned no options for parameter '{parameter}' in provider '{provider}'")
+        raise ValueError("Plugin service returned no options")
--- a/api/core/plugin/impl/tool.py
+++ b/api/core/plugin/impl/tool.py
@ -4,10 +4,7 @@ from typing import Any, Optional
 from pydantic import BaseModel

 from core.plugin.entities.plugin import GenericProviderID, ToolProviderID
-from core.plugin.entities.plugin_daemon import (
-    PluginBasicBooleanResponse,
-    PluginToolProviderEntity,
-)
+from core.plugin.entities.plugin_daemon import PluginBasicBooleanResponse, PluginToolProviderEntity
 from core.plugin.impl.base import BasePluginClient
 from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter

@ -200,36 +197,6 @@ class PluginToolManager(BasePluginClient):

        return False

-    def validate_datasource_credentials(
-        self, tenant_id: str, user_id: str, provider: str, credentials: dict[str, Any]
-    ) -> bool:
-        """
-        validate the credentials of the datasource
-        """
-        tool_provider_id = GenericProviderID(provider)
-
-        response = self._request_with_plugin_daemon_response_stream(
-            "POST",
-            f"plugin/{tenant_id}/dispatch/datasource/validate_credentials",
-            PluginBasicBooleanResponse,
-            data={
-                "user_id": user_id,
-                "data": {
-                    "provider": tool_provider_id.provider_name,
-                    "credentials": credentials,
-                },
-            },
-            headers={
-                "X-Plugin-ID": tool_provider_id.plugin_id,
-                "Content-Type": "application/json",
-            },
-        )
-
-        for resp in response:
-            return resp.result
-
-        return False
-
    def get_runtime_parameters(
        self,
        tenant_id: str,
--- a/api/core/rag/datasource/keyword/jieba/jieba.py
+++ b/api/core/rag/datasource/keyword/jieba/jieba.py
@ -28,12 +28,10 @@ class Jieba(BaseKeyword):
        with redis_client.lock(lock_name, timeout=600):
            keyword_table_handler = JiebaKeywordTableHandler()
            keyword_table = self._get_dataset_keyword_table()
-            keyword_number = (
-                self.dataset.keyword_number if self.dataset.keyword_number else self._config.max_keywords_per_chunk
-            )
-
            for text in texts:
-                keywords = keyword_table_handler.extract_keywords(text.page_content, keyword_number)
+                keywords = keyword_table_handler.extract_keywords(
+                    text.page_content, self._config.max_keywords_per_chunk
+                )
                if text.metadata is not None:
                    self._update_segment_keywords(self.dataset.id, text.metadata["doc_id"], list(keywords))
                    keyword_table = self._add_text_to_keyword_table(
@ -51,17 +49,18 @@ class Jieba(BaseKeyword):

            keyword_table = self._get_dataset_keyword_table()
            keywords_list = kwargs.get("keywords_list")
-            keyword_number = (
-                self.dataset.keyword_number if self.dataset.keyword_number else self._config.max_keywords_per_chunk
-            )
            for i in range(len(texts)):
                text = texts[i]
                if keywords_list:
                    keywords = keywords_list[i]
                    if not keywords:
-                        keywords = keyword_table_handler.extract_keywords(text.page_content, keyword_number)
+                        keywords = keyword_table_handler.extract_keywords(
+                            text.page_content, self._config.max_keywords_per_chunk
+                        )
                else:
-                    keywords = keyword_table_handler.extract_keywords(text.page_content, keyword_number)
+                    keywords = keyword_table_handler.extract_keywords(
+                        text.page_content, self._config.max_keywords_per_chunk
+                    )
                if text.metadata is not None:
                    self._update_segment_keywords(self.dataset.id, text.metadata["doc_id"], list(keywords))
                    keyword_table = self._add_text_to_keyword_table(
@ -240,11 +239,7 @@ class Jieba(BaseKeyword):
                    keyword_table or {}, segment.index_node_id, pre_segment_data["keywords"]
                )
            else:
-                keyword_number = (
-                    self.dataset.keyword_number if self.dataset.keyword_number else self._config.max_keywords_per_chunk
-                )
-
-                keywords = keyword_table_handler.extract_keywords(segment.content, keyword_number)
+                keywords = keyword_table_handler.extract_keywords(segment.content, self._config.max_keywords_per_chunk)
                segment.keywords = list(keywords)
                keyword_table = self._add_text_to_keyword_table(
                    keyword_table or {}, segment.index_node_id, list(keywords)
--- a/api/core/rag/entities/event.py
+++ b/api/core/rag/entities/event.py
@ -1,38 +0,0 @@
-from collections.abc import Mapping
-from enum import Enum
-from typing import Any, Optional
-
-from pydantic import BaseModel, Field
-
-
-class DatasourceStreamEvent(Enum):
-    """
-    Datasource Stream event
-    """
-
-    PROCESSING = "datasource_processing"
-    COMPLETED = "datasource_completed"
-    ERROR = "datasource_error"
-
-
-class BaseDatasourceEvent(BaseModel):
-    pass
-
-
-class DatasourceErrorEvent(BaseDatasourceEvent):
-    event: str = DatasourceStreamEvent.ERROR.value
-    error: str = Field(..., description="error message")
-
-
-class DatasourceCompletedEvent(BaseDatasourceEvent):
-    event: str = DatasourceStreamEvent.COMPLETED.value
-    data: Mapping[str, Any] | list = Field(..., description="result")
-    total: Optional[int] = Field(default=0, description="total")
-    completed: Optional[int] = Field(default=0, description="completed")
-    time_consuming: Optional[float] = Field(default=0.0, description="time consuming")
-
-
-class DatasourceProcessingEvent(BaseDatasourceEvent):
-    event: str = DatasourceStreamEvent.PROCESSING.value
-    total: Optional[int] = Field(..., description="total")
-    completed: Optional[int] = Field(..., description="completed")
--- a/api/core/rag/index_processor/constant/built_in_field.py
+++ b/api/core/rag/index_processor/constant/built_in_field.py
@ -13,5 +13,3 @@ class MetadataDataSource(Enum):
    upload_file = "file_upload"
    website_crawl = "website"
    notion_import = "notion"
-    local_file = "file_upload"
-    online_document = "online_document"
--- a/api/core/rag/index_processor/index_processor_base.py
+++ b/api/core/rag/index_processor/index_processor_base.py
@ -1,8 +1,7 @@
 """Abstract interface for document loader implementations."""

 from abc import ABC, abstractmethod
-from collections.abc import Mapping
-from typing import Any, Optional
+from typing import Optional

 from configs import dify_config
 from core.model_manager import ModelInstance
@ -14,7 +13,6 @@ from core.rag.splitter.fixed_text_splitter import (
 )
 from core.rag.splitter.text_splitter import TextSplitter
 from models.dataset import Dataset, DatasetProcessRule
-from models.dataset import Document as DatasetDocument


 class BaseIndexProcessor(ABC):
@ -35,14 +33,6 @@ class BaseIndexProcessor(ABC):
    def clean(self, dataset: Dataset, node_ids: Optional[list[str]], with_keywords: bool = True, **kwargs):
        raise NotImplementedError

-    @abstractmethod
-    def index(self, dataset: Dataset, document: DatasetDocument, chunks: Mapping[str, Any]):
-        raise NotImplementedError
-
-    @abstractmethod
-    def format_preview(self, chunks: Mapping[str, Any]) -> Mapping[str, Any]:
-        raise NotImplementedError
-
    @abstractmethod
    def retrieve(
        self,
--- a/api/core/rag/index_processor/processor/paragraph_index_processor.py
+++ b/api/core/rag/index_processor/processor/paragraph_index_processor.py
@ -1,22 +1,19 @@
 """Paragraph index processor."""

 import uuid
-from collections.abc import Mapping
-from typing import Any, Optional
+from typing import Optional

 from core.rag.cleaner.clean_processor import CleanProcessor
 from core.rag.datasource.keyword.keyword_factory import Keyword
 from core.rag.datasource.retrieval_service import RetrievalService
 from core.rag.datasource.vdb.vector_factory import Vector
-from core.rag.docstore.dataset_docstore import DatasetDocumentStore
 from core.rag.extractor.entity.extract_setting import ExtractSetting
 from core.rag.extractor.extract_processor import ExtractProcessor
 from core.rag.index_processor.index_processor_base import BaseIndexProcessor
-from core.rag.models.document import Document, GeneralStructureChunk
+from core.rag.models.document import Document
 from core.tools.utils.text_processing_utils import remove_leading_symbols
 from libs import helper
 from models.dataset import Dataset, DatasetProcessRule
-from models.dataset import Document as DatasetDocument
 from services.entities.knowledge_entities.knowledge_entities import Rule


@ -130,34 +127,3 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
                doc = Document(page_content=result.page_content, metadata=metadata)
                docs.append(doc)
        return docs
-
-    def index(self, dataset: Dataset, document: DatasetDocument, chunks: Mapping[str, Any]):
-        paragraph = GeneralStructureChunk(**chunks)
-        documents = []
-        for content in paragraph.general_chunks:
-            metadata = {
-                "dataset_id": dataset.id,
-                "document_id": document.id,
-                "doc_id": str(uuid.uuid4()),
-                "doc_hash": helper.generate_text_hash(content),
-            }
-            doc = Document(page_content=content, metadata=metadata)
-            documents.append(doc)
-        if documents:
-            # save node to document segment
-            doc_store = DatasetDocumentStore(dataset=dataset, user_id=document.created_by, document_id=document.id)
-            # add document segments
-            doc_store.add_documents(docs=documents, save_child=False)
-            if dataset.indexing_technique == "high_quality":
-                vector = Vector(dataset)
-                vector.create(documents)
-            elif dataset.indexing_technique == "economy":
-                keyword = Keyword(dataset)
-                keyword.add_texts(documents)
-
-    def format_preview(self, chunks: Mapping[str, Any]) -> Mapping[str, Any]:
-        paragraph = GeneralStructureChunk(**chunks)
-        preview = []
-        for content in paragraph.general_chunks:
-            preview.append({"content": content})
-        return {"preview": preview, "total_segments": len(paragraph.general_chunks)}
--- a/api/core/rag/index_processor/processor/parent_child_index_processor.py
+++ b/api/core/rag/index_processor/processor/parent_child_index_processor.py
@ -1,23 +1,20 @@
 """Paragraph index processor."""

 import uuid
-from collections.abc import Mapping
-from typing import Any, Optional
+from typing import Optional

 from configs import dify_config
 from core.model_manager import ModelInstance
 from core.rag.cleaner.clean_processor import CleanProcessor
 from core.rag.datasource.retrieval_service import RetrievalService
 from core.rag.datasource.vdb.vector_factory import Vector
-from core.rag.docstore.dataset_docstore import DatasetDocumentStore
 from core.rag.extractor.entity.extract_setting import ExtractSetting
 from core.rag.extractor.extract_processor import ExtractProcessor
 from core.rag.index_processor.index_processor_base import BaseIndexProcessor
-from core.rag.models.document import ChildDocument, Document, ParentChildStructureChunk
+from core.rag.models.document import ChildDocument, Document
 from extensions.ext_database import db
 from libs import helper
 from models.dataset import ChildChunk, Dataset, DocumentSegment
-from models.dataset import Document as DatasetDocument
 from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule


@ -205,40 +202,3 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
                    child_document.page_content = child_page_content
                    child_nodes.append(child_document)
        return child_nodes
-
-    def index(self, dataset: Dataset, document: DatasetDocument, chunks: Mapping[str, Any]):
-        parent_childs = ParentChildStructureChunk(**chunks)
-        documents = []
-        for parent_child in parent_childs.parent_child_chunks:
-            metadata = {
-                "dataset_id": dataset.id,
-                "document_id": document.id,
-                "doc_id": str(uuid.uuid4()),
-                "doc_hash": helper.generate_text_hash(parent_child.parent_content),
-            }
-            child_documents = []
-            for child in parent_child.child_contents:
-                child_metadata = {
-                    "dataset_id": dataset.id,
-                    "document_id": document.id,
-                    "doc_id": str(uuid.uuid4()),
-                    "doc_hash": helper.generate_text_hash(child),
-                }
-                child_documents.append(ChildDocument(page_content=child, metadata=child_metadata))
-            doc = Document(page_content=parent_child.parent_content, metadata=metadata, children=child_documents)
-            documents.append(doc)
-        if documents:
-            # save node to document segment
-            doc_store = DatasetDocumentStore(dataset=dataset, user_id=document.created_by, document_id=document.id)
-            # add document segments
-            doc_store.add_documents(docs=documents, save_child=True)
-            if dataset.indexing_technique == "high_quality":
-                vector = Vector(dataset)
-                vector.create(documents)
-
-    def format_preview(self, chunks: Mapping[str, Any]) -> Mapping[str, Any]:
-        parent_childs = ParentChildStructureChunk(**chunks)
-        preview = []
-        for parent_child in parent_childs.parent_child_chunks:
-            preview.append({"content": parent_child.parent_content, "child_chunks": parent_child.child_contents})
-        return {"preview": preview, "total_segments": len(parent_childs.parent_child_chunks)}
--- a/api/core/rag/index_processor/processor/qa_index_processor.py
+++ b/api/core/rag/index_processor/processor/qa_index_processor.py
@ -4,8 +4,7 @@ import logging
 import re
 import threading
 import uuid
-from collections.abc import Mapping
-from typing import Any, Optional
+from typing import Optional

 import pandas as pd
 from flask import Flask, current_app
@ -15,15 +14,13 @@ from core.llm_generator.llm_generator import LLMGenerator
 from core.rag.cleaner.clean_processor import CleanProcessor
 from core.rag.datasource.retrieval_service import RetrievalService
 from core.rag.datasource.vdb.vector_factory import Vector
-from core.rag.docstore.dataset_docstore import DatasetDocumentStore
 from core.rag.extractor.entity.extract_setting import ExtractSetting
 from core.rag.extractor.extract_processor import ExtractProcessor
 from core.rag.index_processor.index_processor_base import BaseIndexProcessor
-from core.rag.models.document import Document, QAStructureChunk
+from core.rag.models.document import Document
 from core.tools.utils.text_processing_utils import remove_leading_symbols
 from libs import helper
 from models.dataset import Dataset
-from models.dataset import Document as DatasetDocument
 from services.entities.knowledge_entities.knowledge_entities import Rule


@ -164,36 +161,6 @@ class QAIndexProcessor(BaseIndexProcessor):
                docs.append(doc)
        return docs

-    def index(self, dataset: Dataset, document: DatasetDocument, chunks: Mapping[str, Any]):
-        qa_chunks = QAStructureChunk(**chunks)
-        documents = []
-        for qa_chunk in qa_chunks.qa_chunks:
-            metadata = {
-                "dataset_id": dataset.id,
-                "document_id": document.id,
-                "doc_id": str(uuid.uuid4()),
-                "doc_hash": helper.generate_text_hash(qa_chunk.question),
-                "answer": qa_chunk.answer,
-            }
-            doc = Document(page_content=qa_chunk.question, metadata=metadata)
-            documents.append(doc)
-        if documents:
-            # save node to document segment
-            doc_store = DatasetDocumentStore(dataset=dataset, user_id=document.created_by, document_id=document.id)
-            doc_store.add_documents(docs=documents, save_child=False)
-            if dataset.indexing_technique == "high_quality":
-                vector = Vector(dataset)
-                vector.create(documents)
-            else:
-                raise ValueError("Indexing technique must be high quality.")
-
-    def format_preview(self, chunks: Mapping[str, Any]) -> Mapping[str, Any]:
-        qa_chunks = QAStructureChunk(**chunks)
-        preview = []
-        for qa_chunk in qa_chunks.qa_chunks:
-            preview.append({"question": qa_chunk.question, "answer": qa_chunk.answer})
-        return {"qa_preview": preview, "total_segments": len(qa_chunks.qa_chunks)}
-
    def _format_qa_document(self, flask_app: Flask, tenant_id: str, document_node, all_qa_documents, document_language):
        format_documents = []
        if document_node.page_content is None or not document_node.page_content.strip():
--- a/api/core/rag/models/document.py
+++ b/api/core/rag/models/document.py
@ -35,48 +35,6 @@ class Document(BaseModel):
    children: Optional[list[ChildDocument]] = None


-class GeneralStructureChunk(BaseModel):
-    """
-    General Structure Chunk.
-    """
-
-    general_chunks: list[str]
-
-
-class ParentChildChunk(BaseModel):
-    """
-    Parent Child Chunk.
-    """
-
-    parent_content: str
-    child_contents: list[str]
-
-
-class ParentChildStructureChunk(BaseModel):
-    """
-    Parent Child Structure Chunk.
-    """
-
-    parent_child_chunks: list[ParentChildChunk]
-
-
-class QAChunk(BaseModel):
-    """
-    QA Chunk.
-    """
-
-    question: str
-    answer: str
-
-
-class QAStructureChunk(BaseModel):
-    """
-    QAStructureChunk.
-    """
-
-    qa_chunks: list[QAChunk]
-
-
 class BaseDocumentTransformer(ABC):
    """Abstract base class for document transformation systems.

--- a/api/core/rag/retrieval/dataset_retrieval.py
+++ b/api/core/rag/retrieval/dataset_retrieval.py
@ -1010,9 +1010,6 @@ class DatasetRetrieval:
    def _process_metadata_filter_func(
        self, sequence: int, condition: str, metadata_name: str, value: Optional[Any], filters: list
    ):
-        if value is None:
-            return
-
        key = f"{metadata_name}_{sequence}"
        key_value = f"{metadata_name}_{sequence}_value"
        match condition:
--- a/api/core/rag/retrieval/retrieval_methods.py
+++ b/api/core/rag/retrieval/retrieval_methods.py
@ -5,7 +5,6 @@ class RetrievalMethod(Enum):
    SEMANTIC_SEARCH = "semantic_search"
    FULL_TEXT_SEARCH = "full_text_search"
    HYBRID_SEARCH = "hybrid_search"
-    KEYWORD_SEARCH = "keyword_search"

    @staticmethod
    def is_support_semantic_search(retrieval_method: str) -> bool:
--- a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py
+++ b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py
@ -262,7 +262,6 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository)
        self,
        workflow_run_id: str,
        order_config: Optional[OrderConfig] = None,
-        triggered_from: WorkflowNodeExecutionTriggeredFrom = WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN,
    ) -> Sequence[WorkflowNodeExecutionModel]:
        """
        Retrieve all WorkflowNodeExecution database models for a specific workflow run.
@ -284,7 +283,7 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository)
            stmt = select(WorkflowNodeExecutionModel).where(
                WorkflowNodeExecutionModel.workflow_run_id == workflow_run_id,
                WorkflowNodeExecutionModel.tenant_id == self._tenant_id,
-                WorkflowNodeExecutionModel.triggered_from == triggered_from,
+                WorkflowNodeExecutionModel.triggered_from == WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN,
            )

            if self._app_id:
@ -318,7 +317,6 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository)
        self,
        workflow_run_id: str,
        order_config: Optional[OrderConfig] = None,
-        triggered_from: WorkflowNodeExecutionTriggeredFrom = WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN,
    ) -> Sequence[WorkflowNodeExecution]:
        """
        Retrieve all NodeExecution instances for a specific workflow run.
@ -336,7 +334,7 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository)
            A list of NodeExecution instances
        """
        # Get the database models using the new method
-        db_models = self.get_db_models_by_workflow_run(workflow_run_id, order_config, triggered_from)
+        db_models = self.get_db_models_by_workflow_run(workflow_run_id, order_config)

        # Convert database models to domain models
        domain_models = []
--- a/api/core/tools/builtin_tool/providers/code/tools/simple_code.py
+++ b/api/core/tools/builtin_tool/providers/code/tools/simple_code.py
@ -4,7 +4,6 @@ from typing import Any, Optional
 from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage
 from core.tools.builtin_tool.tool import BuiltinTool
 from core.tools.entities.tool_entities import ToolInvokeMessage
-from core.tools.errors import ToolInvokeError


 class SimpleCode(BuiltinTool):
@ -26,8 +25,6 @@ class SimpleCode(BuiltinTool):
        if language not in {CodeLanguage.PYTHON3, CodeLanguage.JAVASCRIPT}:
            raise ValueError(f"Only python3 and javascript are supported, not {language}")

-        try:
-            result = CodeExecutor.execute_code(language, "", code)
-            yield self.create_text_message(result)
-        except Exception as e:
-            raise ToolInvokeError(str(e))
+        result = CodeExecutor.execute_code(language, "", code)
+
+        yield self.create_text_message(result)
--- a/api/core/variables/variables.py
+++ b/api/core/variables/variables.py
@ -1,8 +1,8 @@
 from collections.abc import Sequence
-from typing import Any, cast
+from typing import cast
 from uuid import uuid4

-from pydantic import BaseModel, Field
+from pydantic import Field

 from core.helper import encrypter

@ -93,32 +93,3 @@ class FileVariable(FileSegment, Variable):

 class ArrayFileVariable(ArrayFileSegment, ArrayVariable):
    pass
-
-
-class RAGPipelineVariable(BaseModel):
-    belong_to_node_id: str = Field(description="belong to which node id, shared means public")
-    type: str = Field(description="variable type, text-input, paragraph, select, number,  file, file-list")
-    label: str = Field(description="label")
-    description: str | None = Field(description="description", default="")
-    variable: str = Field(description="variable key", default="")
-    max_length: int | None = Field(
-        description="max length, applicable to text-input, paragraph, and file-list", default=0
-    )
-    default_value: Any = Field(description="default value", default="")
-    placeholder: str | None = Field(description="placeholder", default="")
-    unit: str | None = Field(description="unit, applicable to Number", default="")
-    tooltips: str | None = Field(description="helpful text", default="")
-    allowed_file_types: list[str] | None = Field(
-        description="image, document, audio, video, custom.", default_factory=list
-    )
-    allowed_file_extensions: list[str] | None = Field(description="e.g. ['.jpg', '.mp3']", default_factory=list)
-    allowed_file_upload_methods: list[str] | None = Field(
-        description="remote_url, local_file, tool_file.", default_factory=list
-    )
-    required: bool = Field(description="optional, default false", default=False)
-    options: list[str] | None = Field(default_factory=list)
-
-
-class RAGPipelineVariableInput(BaseModel):
-    variable: RAGPipelineVariable
-    value: Any
--- a/api/core/workflow/constants.py
+++ b/api/core/workflow/constants.py
@ -1,4 +1,3 @@
 SYSTEM_VARIABLE_NODE_ID = "sys"
 ENVIRONMENT_VARIABLE_NODE_ID = "env"
 CONVERSATION_VARIABLE_NODE_ID = "conversation"
-RAG_PIPELINE_VARIABLE_NODE_ID = "rag"
--- a/api/core/workflow/entities/variable_pool.py
+++ b/api/core/workflow/entities/variable_pool.py
@ -9,13 +9,7 @@ from core.file import File, FileAttribute, file_manager
 from core.variables import Segment, SegmentGroup, Variable
 from core.variables.consts import MIN_SELECTORS_LENGTH
 from core.variables.segments import FileSegment, NoneSegment
-from core.variables.variables import RAGPipelineVariableInput
-from core.workflow.constants import (
-    CONVERSATION_VARIABLE_NODE_ID,
-    ENVIRONMENT_VARIABLE_NODE_ID,
-    RAG_PIPELINE_VARIABLE_NODE_ID,
-    SYSTEM_VARIABLE_NODE_ID,
-)
+from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, ENVIRONMENT_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID
 from core.workflow.enums import SystemVariableKey
 from factories import variable_factory

@ -50,10 +44,6 @@ class VariablePool(BaseModel):
        description="Conversation variables.",
        default_factory=list,
    )
-    rag_pipeline_variables: list[RAGPipelineVariableInput] = Field(
-        description="RAG pipeline variables.",
-        default_factory=list,
-    )

    def model_post_init(self, context: Any, /) -> None:
        for key, value in self.system_variables.items():
@ -64,9 +54,6 @@ class VariablePool(BaseModel):
        # Add conversation variables to the variable pool
        for var in self.conversation_variables:
            self.add((CONVERSATION_VARIABLE_NODE_ID, var.name), var)
-        # Add rag pipeline variables to the variable pool
-        for var in self.rag_pipeline_variables:
-            self.add((RAG_PIPELINE_VARIABLE_NODE_ID, var.variable.belong_to_node_id, var.variable.variable), var.value)

    def add(self, selector: Sequence[str], value: Any, /) -> None:
        """
--- a/api/core/workflow/entities/workflow_execution.py
+++ b/api/core/workflow/entities/workflow_execution.py
@ -20,7 +20,6 @@ class WorkflowType(StrEnum):

    WORKFLOW = "workflow"
    CHAT = "chat"
-    RAG_PIPELINE = "rag-pipeline"


 class WorkflowExecutionStatus(StrEnum):
--- a/api/core/workflow/entities/workflow_node_execution.py
+++ b/api/core/workflow/entities/workflow_node_execution.py
@ -28,7 +28,6 @@ class WorkflowNodeExecutionMetadataKey(StrEnum):
    AGENT_LOG = "agent_log"
    ITERATION_ID = "iteration_id"
    ITERATION_INDEX = "iteration_index"
-    DATASOURCE_INFO = "datasource_info"
    LOOP_ID = "loop_id"
    LOOP_INDEX = "loop_index"
    PARALLEL_ID = "parallel_id"
@ -67,21 +66,11 @@ class WorkflowNodeExecution(BaseModel):
    but they are not stored in the model.
    """

-    # --------- Core identification fields ---------
-
-    # Unique identifier for this execution record, used when persisting to storage.
-    # Value is a UUID string (e.g., '09b3e04c-f9ae-404c-ad82-290b8d7bd382').
-    id: str
-
-    # Optional secondary ID for cross-referencing purposes.
-    #
-    # NOTE: For referencing the persisted record, use `id` rather than `node_execution_id`.
-    # While `node_execution_id` may sometimes be a UUID string, this is not guaranteed.
-    # In most scenarios, `id` should be used as the primary identifier.
-    node_execution_id: Optional[str] = None
+    # Core identification fields
+    id: str  # Unique identifier for this execution record
+    node_execution_id: Optional[str] = None  # Optional secondary ID for cross-referencing
    workflow_id: str  # ID of the workflow this node belongs to
    workflow_execution_id: Optional[str] = None  # ID of the specific workflow run (null for single-step debugging)
-    # --------- Core identification fields ends ---------

    # Execution positioning and flow
    index: int  # Sequence number for ordering in trace visualization
--- a/api/core/workflow/enums.py
+++ b/api/core/workflow/enums.py
@ -14,10 +14,3 @@ class SystemVariableKey(StrEnum):
    APP_ID = "app_id"
    WORKFLOW_ID = "workflow_id"
    WORKFLOW_EXECUTION_ID = "workflow_run_id"
-    # RAG Pipeline
-    DOCUMENT_ID = "document_id"
-    BATCH = "batch"
-    DATASET_ID = "dataset_id"
-    DATASOURCE_TYPE = "datasource_type"
-    DATASOURCE_INFO = "datasource_info"
-    INVOKE_FROM = "invoke_from"
--- a/api/core/workflow/graph_engine/entities/graph.py
+++ b/api/core/workflow/graph_engine/entities/graph.py
@ -121,7 +121,6 @@ class Graph(BaseModel):
        # fetch nodes that have no predecessor node
        root_node_configs = []
        all_node_id_config_mapping: dict[str, dict] = {}
-
        for node_config in node_configs:
            node_id = node_config.get("id")
            if not node_id:
@ -142,7 +141,6 @@ class Graph(BaseModel):
                    node_config.get("id")
                    for node_config in root_node_configs
                    if node_config.get("data", {}).get("type", "") == NodeType.START.value
-                    or node_config.get("data", {}).get("type", "") == NodeType.DATASOURCE.value
                ),
                None,
            )
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
NFish	459f017de5	add build task for release/e-3.0.2	2025-07-11 16:11:41 +08:00
NFish	1458368047	fix: allow update plugin install settings (#22111 )	2025-07-11 16:07:39 +08:00
NFish	41f4eb044d	Merge branch 'fix/explore-tabs-change-failed' into fix/e-300	2025-06-30 17:45:59 +08:00
NFish	e979768949	fix: user cannot select 'Customer Service & Operations' category	2025-06-30 17:45:23 +08:00
NFish	745d67989b	fix: check user permission to show 'open in explore' menu item at app list popover	2025-06-30 17:34:13 +08:00
NFish	a4aed673c3	fix: update i18n 'Web application' to 'web app'	2025-06-30 15:31:02 +08:00
NFish	47e5e1bc69	add build task	2025-06-30 14:08:20 +08:00
NFish	c2ec9d7b9a	Merge branch 'fix/marketplace-source-link' into fix/e-300	2025-06-30 14:06:41 +08:00
NFish	384570ebfa	fix: Get marketplace URL with specific function, preserving all search params.	2025-06-30 14:05:41 +08:00