r2

Merge branch 'main' into feat/r2
2026-01-21 12:35:21 +08:00 · 2025-07-02 18:46:36 +08:00 · 2025-07-02 18:20:41 +08:00 · 2025-07-02 18:20:05 +08:00 · 2025-07-02 18:15:23 +08:00 · 2025-07-02 14:01:59 +08:00
1446 changed files with 44218 additions and 36097 deletions
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@ -8,13 +8,13 @@ body:
      label: Self Checks
      description: "To make sure we get to you in time, please check the following :)"
      options:
-        - label: I have read the [Contributing Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) and [Language Policy](https://github.com/langgenius/dify/issues/1542).
-          required: true
        - label: This is only for bug report, if you would like to ask a question, please head to [Discussions](https://github.com/langgenius/dify/discussions/categories/general).
          required: true
        - label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
          required: true
-        - label: I confirm that I am using English to submit this report, otherwise it will be closed.
+        - label: I confirm that I am using English to submit this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
+          required: true
+        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:)"
          required: true
        - label: "Please do not modify this template :) and fill in all the required fields."
          required: true
@ -42,22 +42,20 @@ body:
    attributes:
      label: Steps to reproduce
      description: We highly suggest including screenshots and a bug report log. Please use the right markdown syntax for code blocks.
-      placeholder: Having detailed steps helps us reproduce the bug. If you have logs, please use fenced code blocks (triple backticks ```) to format them.
+      placeholder: Having detailed steps helps us reproduce the bug.
    validations:
      required: true

  - type: textarea
    attributes:
      label: ✔️ Expected Behavior
-      description: Describe what you expected to happen.
-      placeholder: What were you expecting? Please do not copy and paste the steps to reproduce here.
+      placeholder: What were you expecting?
    validations:
-      required: true
+      required: false

  - type: textarea
    attributes:
      label: ❌ Actual Behavior
-      description: Describe what actually happened.
-      placeholder: What happened instead? Please do not copy and paste the steps to reproduce here.
+      placeholder: What happened instead?
    validations:
      required: false
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@ -1,11 +1,5 @@
 blank_issues_enabled: false
 contact_links:
-  - name: "\U0001F4A1 Model Providers & Plugins"
-    url: "https://github.com/langgenius/dify-official-plugins/issues/new/choose"
-    about: Report issues with official plugins or model providers, you will need to provide the plugin version and other relevant details.
-  - name: "\U0001F4AC Documentation Issues"
-    url: "https://github.com/langgenius/dify-docs/issues/new"
-    about: Report issues with the documentation, such as typos, outdated information, or missing content. Please provide the specific section and details of the issue.
  - name: "\U0001F4E7 Discussions"
    url: https://github.com/langgenius/dify/discussions/categories/general
-    about: General discussions and seek help from the community
+    about: General discussions and request help from the community
--- a/.github/ISSUE_TEMPLATE/document_issue.yml
+++ b/.github/ISSUE_TEMPLATE/document_issue.yml
@ -0,0 +1,24 @@
+name: "📚 Documentation Issue"
+description: Report issues in our documentation
+labels:
+  - documentation
+body:
+  - type: checkboxes
+    attributes:
+      label: Self Checks
+      description: "To make sure we get to you in time, please check the following :)"
+      options:
+        - label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
+          required: true
+        - label: I confirm that I am using English to submit report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
+          required: true
+        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:)"
+          required: true
+        - label: "Please do not modify this template :) and fill in all the required fields."
+          required: true
+  - type: textarea
+    attributes:
+      label: Provide a description of requested docs changes
+      placeholder: Briefly describe which document needs to be corrected and why.
+    validations:
+      required: true
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@ -8,11 +8,11 @@ body:
      label: Self Checks
      description: "To make sure we get to you in time, please check the following :)"
      options:
-        - label: I have read the [Contributing Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) and [Language Policy](https://github.com/langgenius/dify/issues/1542).
-          required: true
        - label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
          required: true
-        - label: I confirm that I am using English to submit this report, otherwise it will be closed.
+        - label: I confirm that I am using English to submit this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
+          required: true
+        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:)"
          required: true
        - label: "Please do not modify this template :) and fill in all the required fields."
          required: true
--- a/.github/ISSUE_TEMPLATE/translation_issue.yml
+++ b/.github/ISSUE_TEMPLATE/translation_issue.yml
@ -0,0 +1,55 @@
+name: "🌐 Localization/Translation issue"
+description: Report incorrect translations. [please use English :)]
+labels:
+  - translation
+body:
+  - type: checkboxes
+    attributes:
+      label: Self Checks
+      description: "To make sure we get to you in time, please check the following :)"
+      options:
+        - label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
+          required: true
+        - label: I confirm that I am using English to submit this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
+          required: true
+        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:)"
+          required: true
+        - label: "Please do not modify this template :) and fill in all the required fields."
+          required: true
+  - type: input
+    attributes:
+      label: Dify version
+      description: Hover over system tray icon or look at Settings
+    validations:
+      required: true
+  - type: input
+    attributes:
+      label: Utility with translation issue
+      placeholder: Some area
+      description: Please input here the utility with the translation issue
+    validations:
+      required: true
+  - type: input
+    attributes:
+      label: 🌐 Language affected
+      placeholder: "German"
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: ❌ Actual phrase(s)
+      placeholder: What is there? Please include a screenshot as that is extremely helpful.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: ✔️ Expected phrase(s)
+      placeholder: What was expected?
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: ℹ Why is the current translation wrong
+      placeholder: Why do you feel this is incorrect?
+    validations:
+      required: true
--- a/.github/workflows/build-push.yml
+++ b/.github/workflows/build-push.yml
@ -6,6 +6,7 @@ on:
      - "main"
      - "deploy/dev"
      - "deploy/enterprise"
+      - "deploy/rag-dev"
    tags:
      - "*"

--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@ -4,7 +4,7 @@ on:
  workflow_run:
    workflows: ["Build and Push API & Web"]
    branches:
-      - "deploy/dev"
+      - "deploy/rag-dev"
    types:
      - completed

@ -12,12 +12,13 @@ jobs:
  deploy:
    runs-on: ubuntu-latest
    if: |
-      github.event.workflow_run.conclusion == 'success'
+      github.event.workflow_run.conclusion == 'success' &&
+      github.event.workflow_run.head_branch == 'deploy/rag-dev'
    steps:
      - name: Deploy to server
        uses: appleboy/ssh-action@v0.1.8
        with:
-          host: ${{ secrets.SSH_HOST }}
+          host: ${{ secrets.RAG_SSH_HOST }}
          username: ${{ secrets.SSH_USER }}
          key: ${{ secrets.SSH_PRIVATE_KEY }}
          script: |
--- a/README.md
+++ b/README.md
@ -54,7 +54,7 @@
  <a href="./README_BN.md"><img alt="README in বাংলা" src="https://img.shields.io/badge/বাংলা-d9d9d9"></a>
 </p>

-Dify is an open-source platform for developing LLM applications. Its intuitive interface combines agentic AI workflows, RAG pipelines, agent capabilities, model management, observability features, and more—allowing you to quickly move from prototype to production.
+Dify is an open-source LLM app development platform. Its intuitive interface combines agentic AI workflow, RAG pipeline, agent capabilities, model management, observability features, and more, allowing you to quickly move from prototype to production.

 ## Quick start

@ -65,7 +65,7 @@ Dify is an open-source platform for developing LLM applications. Its intuitive i

 </br>

-The easiest way to start the Dify server is through [Docker Compose](docker/docker-compose.yaml). Before running Dify with the following commands, make sure that [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/install/) are installed on your machine:
+The easiest way to start the Dify server is through [docker compose](docker/docker-compose.yaml). Before running Dify with the following commands, make sure that [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/install/) are installed on your machine:

 ```bash
 cd dify
@ -205,7 +205,6 @@ If you'd like to configure a highly-available setup, there are community-contrib
 - [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
 - [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
- [🚀 NEW! YAML files (Supports Dify v1.6.0) by @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes)

 #### Using Terraform for Deployment

@ -262,8 +261,8 @@ At the same time, please consider supporting Dify by sharing it on social media

 ## Security disclosure

-To protect your privacy, please avoid posting security issues on GitHub. Instead, report issues to security@dify.ai, and our team will respond with detailed answer.
+To protect your privacy, please avoid posting security issues on GitHub. Instead, send your questions to security@dify.ai and we will provide you with a more detailed answer.

 ## License

-This repository is licensed under the [Dify Open Source License](LICENSE), based on Apache 2.0 with additional conditions.
+This repository is available under the [Dify Open Source License](LICENSE), which is essentially Apache 2.0 with a few additional restrictions.
--- a/README_AR.md
+++ b/README_AR.md
@ -188,7 +188,6 @@ docker compose up -d
 - [رسم بياني Helm من قبل @magicsong](https://github.com/magicsong/ai-charts)
 - [ملف YAML من قبل @Winson-030](https://github.com/Winson-030/dify-kubernetes)
 - [ملف YAML من قبل @wyy-holding](https://github.com/wyy-holding/dify-k8s)
- [🚀 جديد! ملفات YAML (تدعم Dify v1.6.0) بواسطة @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes)

 #### استخدام Terraform للتوزيع

--- a/README_BN.md
+++ b/README_BN.md
@ -204,8 +204,6 @@ GitHub-এ ডিফাইকে স্টার দিয়ে রাখুন
 - [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
 - [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
- [🚀 নতুন! YAML ফাইলসমূহ (Dify v1.6.0 সমর্থিত) তৈরি করেছেন @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes)
-

 #### টেরাফর্ম ব্যবহার করে ডিপ্লয়

--- a/README_CN.md
+++ b/README_CN.md
@ -194,9 +194,9 @@ docker compose up -d

 如果您需要自定义配置，请参考 [.env.example](docker/.env.example) 文件中的注释，并更新 `.env` 文件中对应的值。此外，您可能需要根据您的具体部署环境和需求对 `docker-compose.yaml` 文件本身进行调整，例如更改镜像版本、端口映射或卷挂载。完成任何更改后，请重新运行 `docker-compose up -d`。您可以在[此处](https://docs.dify.ai/getting-started/install-self-hosted/environments)找到可用环境变量的完整列表。

-#### 使用 Helm Chart 或 Kubernetes 资源清单（YAML）部署
+#### 使用 Helm Chart 部署

-使用 [Helm Chart](https://helm.sh/) 版本或者 Kubernetes 资源清单（YAML），可以在 Kubernetes 上部署 Dify。
+使用 [Helm Chart](https://helm.sh/) 版本或者 YAML 文件，可以在 Kubernetes 上部署 Dify。

 - [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
@ -204,10 +204,6 @@ docker compose up -d
 - [YAML 文件 by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
 - [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)

- [🚀 NEW! YAML 文件 (支持 Dify v1.6.0) by @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes)
-
-
-
 #### 使用 Terraform 部署

 使用 [terraform](https://www.terraform.io/) 一键将 Dify 部署到云平台
--- a/README_DE.md
+++ b/README_DE.md
@ -203,7 +203,6 @@ Falls Sie eine hochverfügbare Konfiguration einrichten möchten, gibt es von de
 - [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
 - [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
- [🚀 NEW! YAML files (Supports Dify v1.6.0) by @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes)

 #### Terraform für die Bereitstellung verwenden

--- a/README_ES.md
+++ b/README_ES.md
@ -203,7 +203,6 @@ Si desea configurar una configuración de alta disponibilidad, la comunidad prop
 - [Gráfico Helm por @magicsong](https://github.com/magicsong/ai-charts)
 - [Ficheros YAML por @Winson-030](https://github.com/Winson-030/dify-kubernetes)
 - [Ficheros YAML por @wyy-holding](https://github.com/wyy-holding/dify-k8s)
- [🚀 ¡NUEVO! Archivos YAML (compatible con Dify v1.6.0) por @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes)

 #### Uso de Terraform para el despliegue

--- a/README_FR.md
+++ b/README_FR.md
@ -201,7 +201,6 @@ Si vous souhaitez configurer une configuration haute disponibilité, la communau
 - [Helm Chart par @magicsong](https://github.com/magicsong/ai-charts)
 - [Fichier YAML par @Winson-030](https://github.com/Winson-030/dify-kubernetes)
 - [Fichier YAML par @wyy-holding](https://github.com/wyy-holding/dify-k8s)
- [🚀 NOUVEAU ! Fichiers YAML (compatible avec Dify v1.6.0) par @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes)

 #### Utilisation de Terraform pour le déploiement

--- a/README_JA.md
+++ b/README_JA.md
@ -202,7 +202,6 @@ docker compose up -d
 - [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
 - [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
- [🚀 新着！YAML ファイル（Dify v1.6.0 対応）by @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes)

 #### Terraformを使用したデプロイ

--- a/README_KL.md
+++ b/README_KL.md
@ -201,7 +201,6 @@ If you'd like to configure a highly-available setup, there are community-contrib
 - [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
 - [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
- [🚀 NEW! YAML files (Supports Dify v1.6.0) by @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes)

 #### Terraform atorlugu pilersitsineq

--- a/README_KR.md
+++ b/README_KR.md
@ -195,7 +195,6 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했
 - [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
 - [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
- [🚀 NEW! YAML files (Supports Dify v1.6.0) by @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes)

 #### Terraform을 사용한 배포

--- a/README_PT.md
+++ b/README_PT.md
@ -200,7 +200,6 @@ Se deseja configurar uma instalação de alta disponibilidade, há [Helm Charts]
 - [Helm Chart de @magicsong](https://github.com/magicsong/ai-charts)
 - [Arquivo YAML por @Winson-030](https://github.com/Winson-030/dify-kubernetes)
 - [Arquivo YAML por @wyy-holding](https://github.com/wyy-holding/dify-k8s)
- [🚀 NOVO! Arquivos YAML (Compatível com Dify v1.6.0) por @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes)

 #### Usando o Terraform para Implantação

--- a/README_SI.md
+++ b/README_SI.md
@ -201,7 +201,6 @@ Star Dify on GitHub and be instantly notified of new releases.
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
 - [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
- [🚀 NEW! YAML files (Supports Dify v1.6.0) by @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes)

 #### Uporaba Terraform za uvajanje

--- a/README_TR.md
+++ b/README_TR.md
@ -194,7 +194,6 @@ Yüksek kullanılabilirliğe sahip bir kurulum yapılandırmak isterseniz, Dify'
 - [@BorisPolonsky tarafından Helm Chart](https://github.com/BorisPolonsky/dify-helm)
 - [@Winson-030 tarafından YAML dosyası](https://github.com/Winson-030/dify-kubernetes)
 - [@wyy-holding tarafından YAML dosyası](https://github.com/wyy-holding/dify-k8s)
- [🚀 YENİ! YAML dosyaları (Dify v1.6.0 destekli) @Zhoneym tarafından](https://github.com/Zhoneym/DifyAI-Kubernetes)

 #### Dağıtım için Terraform Kullanımı

--- a/README_TW.md
+++ b/README_TW.md
@ -197,13 +197,12 @@ Dify 的所有功能都提供相應的 API，因此您可以輕鬆地將 Dify

 如果您需要自定義配置，請參考我們的 [.env.example](docker/.env.example) 文件中的註釋，並在您的 `.env` 文件中更新相應的值。此外，根據您特定的部署環境和需求，您可能需要調整 `docker-compose.yaml` 文件本身，例如更改映像版本、端口映射或卷掛載。進行任何更改後，請重新運行 `docker-compose up -d`。您可以在[這裡](https://docs.dify.ai/getting-started/install-self-hosted/environments)找到可用環境變數的完整列表。

-如果您想配置高可用性設置，社區貢獻的 [Helm Charts](https://helm.sh/) 和 Kubernetes 資源清單（YAML）允許在 Kubernetes 上部署 Dify。
+如果您想配置高可用性設置，社區貢獻的 [Helm Charts](https://helm.sh/) 和 YAML 文件允許在 Kubernetes 上部署 Dify。

 - [由 @LeoQuote 提供的 Helm Chart](https://github.com/douban/charts/tree/master/charts/dify)
 - [由 @BorisPolonsky 提供的 Helm Chart](https://github.com/BorisPolonsky/dify-helm)
 - [由 @Winson-030 提供的 YAML 文件](https://github.com/Winson-030/dify-kubernetes)
 - [由 @wyy-holding 提供的 YAML 文件](https://github.com/wyy-holding/dify-k8s)
- [🚀 NEW! YAML 檔案（支援 Dify v1.6.0）by @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes)

 ### 使用 Terraform 進行部署

--- a/README_VI.md
+++ b/README_VI.md
@ -196,7 +196,6 @@ Nếu bạn muốn cấu hình một cài đặt có độ sẵn sàng cao, có
 - [Helm Chart bởi @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [Tệp YAML bởi @Winson-030](https://github.com/Winson-030/dify-kubernetes)
 - [Tệp YAML bởi @wyy-holding](https://github.com/wyy-holding/dify-k8s)
- [🚀 MỚI! Tệp YAML (Hỗ trợ Dify v1.6.0) bởi @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes)

 #### Sử dụng Terraform để Triển khai

--- a/api/.env.example
+++ b/api/.env.example
@ -17,11 +17,6 @@ APP_WEB_URL=http://127.0.0.1:3000
 # Files URL
 FILES_URL=http://127.0.0.1:5001

-# INTERNAL_FILES_URL is used for plugin daemon communication within Docker network.
-# Set this to the internal Docker service URL for proper plugin file access.
-# Example: INTERNAL_FILES_URL=http://api:5001
-INTERNAL_FILES_URL=http://127.0.0.1:5001
-
 # The time in seconds after the signature is rejected
 FILES_ACCESS_TIMEOUT=300

@ -449,19 +444,6 @@ MAX_VARIABLE_SIZE=204800
 # hybrid: Save new data to object storage, read from both object storage and RDBMS
 WORKFLOW_NODE_EXECUTION_STORAGE=rdbms

-# Repository configuration
-# Core workflow execution repository implementation
-CORE_WORKFLOW_EXECUTION_REPOSITORY=core.repositories.sqlalchemy_workflow_execution_repository.SQLAlchemyWorkflowExecutionRepository
-
-# Core workflow node execution repository implementation
-CORE_WORKFLOW_NODE_EXECUTION_REPOSITORY=core.repositories.sqlalchemy_workflow_node_execution_repository.SQLAlchemyWorkflowNodeExecutionRepository
-
-# API workflow node execution repository implementation
-API_WORKFLOW_NODE_EXECUTION_REPOSITORY=repositories.sqlalchemy_api_workflow_node_execution_repository.DifyAPISQLAlchemyWorkflowNodeExecutionRepository
-
-# API workflow run repository implementation
-API_WORKFLOW_RUN_REPOSITORY=repositories.sqlalchemy_api_workflow_run_repository.DifyAPISQLAlchemyWorkflowRunRepository
-
 # App configuration
 APP_MAX_EXECUTION_TIME=1200
 APP_MAX_ACTIVE_REQUESTS=0
--- a/api/app.py
+++ b/api/app.py
@ -1,4 +1,3 @@
-import os
 import sys


@ -17,20 +16,20 @@ else:
    # It seems that JetBrains Python debugger does not work well with gevent,
    # so we need to disable gevent in debug mode.
    # If you are using debugpy and set GEVENT_SUPPORT=True, you can debug with gevent.
-    if (flask_debug := os.environ.get("FLASK_DEBUG", "0")) and flask_debug.lower() in {"false", "0", "no"}:
-        from gevent import monkey
+    # if (flask_debug := os.environ.get("FLASK_DEBUG", "0")) and flask_debug.lower() in {"false", "0", "no"}:
+    # from gevent import monkey
+    #
+    # # gevent
+    # monkey.patch_all()
+    #
+    # from grpc.experimental import gevent as grpc_gevent  # type: ignore
+    #
+    # # grpc gevent
+    # grpc_gevent.init_gevent()

-        # gevent
-        monkey.patch_all()
-
-        from grpc.experimental import gevent as grpc_gevent  # type: ignore
-
-        # grpc gevent
-        grpc_gevent.init_gevent()
-
-        import psycogreen.gevent  # type: ignore
-
-        psycogreen.gevent.patch_psycopg()
+    # import psycogreen.gevent  # type: ignore
+    #
+    # psycogreen.gevent.patch_psycopg()

    from app_factory import create_app

--- a/api/configs/feature/init.py
+++ b/api/configs/feature/init.py
@ -237,13 +237,6 @@ class FileAccessConfig(BaseSettings):
        default="",
    )

-    INTERNAL_FILES_URL: str = Field(
-        description="Internal base URL for file access within Docker network,"
-        " used for plugin daemon and internal service communication."
-        " Falls back to FILES_URL if not specified.",
-        default="",
-    )
-
    FILES_ACCESS_TIMEOUT: int = Field(
        description="Expiration time in seconds for file access URLs",
        default=300,
@ -537,33 +530,6 @@ class WorkflowNodeExecutionConfig(BaseSettings):
    )


-class RepositoryConfig(BaseSettings):
-    """
-    Configuration for repository implementations
-    """
-
-    CORE_WORKFLOW_EXECUTION_REPOSITORY: str = Field(
-        description="Repository implementation for WorkflowExecution. Specify as a module path",
-        default="core.repositories.sqlalchemy_workflow_execution_repository.SQLAlchemyWorkflowExecutionRepository",
-    )
-
-    CORE_WORKFLOW_NODE_EXECUTION_REPOSITORY: str = Field(
-        description="Repository implementation for WorkflowNodeExecution. Specify as a module path",
-        default="core.repositories.sqlalchemy_workflow_node_execution_repository.SQLAlchemyWorkflowNodeExecutionRepository",
-    )
-
-    API_WORKFLOW_NODE_EXECUTION_REPOSITORY: str = Field(
-        description="Service-layer repository implementation for WorkflowNodeExecutionModel operations. "
-        "Specify as a module path",
-        default="repositories.sqlalchemy_api_workflow_node_execution_repository.DifyAPISQLAlchemyWorkflowNodeExecutionRepository",
-    )
-
-    API_WORKFLOW_RUN_REPOSITORY: str = Field(
-        description="Service-layer repository implementation for WorkflowRun operations. Specify as a module path",
-        default="repositories.sqlalchemy_api_workflow_run_repository.DifyAPISQLAlchemyWorkflowRunRepository",
-    )
-
-
 class AuthConfig(BaseSettings):
    """
    Configuration for authentication and OAuth
@ -930,7 +896,6 @@ class FeatureConfig(
    MultiModalTransferConfig,
    PositionConfig,
    RagEtlConfig,
-    RepositoryConfig,
    SecurityConfig,
    ToolConfig,
    UpdateConfig,
--- a/api/configs/feature/hosted_service/init.py
+++ b/api/configs/feature/hosted_service/init.py
@ -222,11 +222,28 @@ class HostedFetchAppTemplateConfig(BaseSettings):
    )


+class HostedFetchPipelineTemplateConfig(BaseSettings):
+    """
+    Configuration for fetching pipeline templates
+    """
+
+    HOSTED_FETCH_PIPELINE_TEMPLATES_MODE: str = Field(
+        description="Mode for fetching pipeline templates: remote, db, or builtin default to remote,",
+        default="database",
+    )
+
+    HOSTED_FETCH_PIPELINE_TEMPLATES_REMOTE_DOMAIN: str = Field(
+        description="Domain for fetching remote pipeline templates",
+        default="https://tmpl.dify.ai",
+    )
+
+
 class HostedServiceConfig(
    # place the configs in alphabet order
    HostedAnthropicConfig,
    HostedAzureOpenAiConfig,
    HostedFetchAppTemplateConfig,
+    HostedFetchPipelineTemplateConfig,
    HostedMinmaxConfig,
    HostedOpenAiConfig,
    HostedSparkConfig,
--- a/api/configs/middleware/init.py
+++ b/api/configs/middleware/init.py
@ -162,11 +162,6 @@ class DatabaseConfig(BaseSettings):
        default=3600,
    )

-    SQLALCHEMY_POOL_USE_LIFO: bool = Field(
-        description="If True, SQLAlchemy will use last-in-first-out way to retrieve connections from pool.",
-        default=False,
-    )
-
    SQLALCHEMY_POOL_PRE_PING: bool = Field(
        description="If True, enables connection pool pre-ping feature to check connections.",
        default=False,
@ -204,7 +199,6 @@ class DatabaseConfig(BaseSettings):
            "pool_recycle": self.SQLALCHEMY_POOL_RECYCLE,
            "pool_pre_ping": self.SQLALCHEMY_POOL_PRE_PING,
            "connect_args": connect_args,
-            "pool_use_lifo": self.SQLALCHEMY_POOL_USE_LIFO,
        }


--- a/api/contexts/init.py
+++ b/api/contexts/init.py
@ -3,6 +3,7 @@ from threading import Lock
 from typing import TYPE_CHECKING

 from contexts.wrapper import RecyclableContextVar
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController

 if TYPE_CHECKING:
    from core.model_runtime.entities.model_entities import AIModelEntity
@ -33,3 +34,11 @@ plugin_model_schema_lock: RecyclableContextVar[Lock] = RecyclableContextVar(Cont
 plugin_model_schemas: RecyclableContextVar[dict[str, "AIModelEntity"]] = RecyclableContextVar(
    ContextVar("plugin_model_schemas")
 )
+
+datasource_plugin_providers: RecyclableContextVar[dict[str, "DatasourcePluginProviderController"]] = (
+    RecyclableContextVar(ContextVar("datasource_plugin_providers"))
+)
+
+datasource_plugin_providers_lock: RecyclableContextVar[Lock] = RecyclableContextVar(
+    ContextVar("datasource_plugin_providers_lock")
+)
--- a/api/controllers/console/init.py
+++ b/api/controllers/console/init.py
@ -56,7 +56,6 @@ from .app import (
    conversation,
    conversation_variables,
    generator,
-    mcp_server,
    message,
    model_config,
    ops_trace,
@ -77,7 +76,6 @@ from .billing import billing, compliance

 # Import datasets controllers
 from .datasets import (
-    data_source,
    datasets,
    datasets_document,
    datasets_segments,
@ -86,6 +84,14 @@ from .datasets import (
    metadata,
    website,
 )
+from .datasets.rag_pipeline import (
+    datasource_auth,
+    datasource_content_preview,
+    rag_pipeline,
+    rag_pipeline_datasets,
+    rag_pipeline_import,
+    rag_pipeline_workflow,
+)

 # Import explore controllers
 from .explore import (
--- a/api/controllers/console/app/app.py
+++ b/api/controllers/console/app/app.py
@ -151,7 +151,6 @@ class AppApi(Resource):
        parser.add_argument("icon", type=str, location="json")
        parser.add_argument("icon_background", type=str, location="json")
        parser.add_argument("use_icon_as_answer_icon", type=bool, location="json")
-        parser.add_argument("max_active_requests", type=int, location="json")
        args = parser.parse_args()

        app_service = AppService()
--- a/api/controllers/console/app/audio.py
+++ b/api/controllers/console/app/audio.py
@ -90,11 +90,23 @@ class ChatMessageTextApi(Resource):

            message_id = args.get("message_id", None)
            text = args.get("text", None)
-            voice = args.get("voice", None)
-
-            response = AudioService.transcript_tts(
-                app_model=app_model, text=text, voice=voice, message_id=message_id, is_draft=True
-            )
+            if (
+                app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value}
+                and app_model.workflow
+                and app_model.workflow.features_dict
+            ):
+                text_to_speech = app_model.workflow.features_dict.get("text_to_speech")
+                if text_to_speech is None:
+                    raise ValueError("TTS is not enabled")
+                voice = args.get("voice") or text_to_speech.get("voice")
+            else:
+                try:
+                    if app_model.app_model_config is None:
+                        raise ValueError("AppModelConfig not found")
+                    voice = args.get("voice") or app_model.app_model_config.text_to_speech_dict.get("voice")
+                except Exception:
+                    voice = None
+            response = AudioService.transcript_tts(app_model=app_model, text=text, message_id=message_id, voice=voice)
            return response
        except services.errors.app_model_config.AppModelConfigBrokenError:
            logging.exception("App model config broken.")
--- a/api/controllers/console/app/mcp_server.py
+++ b/api/controllers/console/app/mcp_server.py
@ -1,119 +0,0 @@
-import json
-from enum import StrEnum
-
-from flask_login import current_user
-from flask_restful import Resource, marshal_with, reqparse
-from werkzeug.exceptions import NotFound
-
-from controllers.console import api
-from controllers.console.app.wraps import get_app_model
-from controllers.console.wraps import account_initialization_required, setup_required
-from extensions.ext_database import db
-from fields.app_fields import app_server_fields
-from libs.login import login_required
-from models.model import AppMCPServer
-
-
-class AppMCPServerStatus(StrEnum):
-    ACTIVE = "active"
-    INACTIVE = "inactive"
-
-
-class AppMCPServerController(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @get_app_model
-    @marshal_with(app_server_fields)
-    def get(self, app_model):
-        server = db.session.query(AppMCPServer).filter(AppMCPServer.app_id == app_model.id).first()
-        return server
-
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @get_app_model
-    @marshal_with(app_server_fields)
-    def post(self, app_model):
-        if not current_user.is_editor:
-            raise NotFound()
-        parser = reqparse.RequestParser()
-        parser.add_argument("description", type=str, required=False, location="json")
-        parser.add_argument("parameters", type=dict, required=True, location="json")
-        args = parser.parse_args()
-
-        description = args.get("description")
-        if not description:
-            description = app_model.description or ""
-
-        server = AppMCPServer(
-            name=app_model.name,
-            description=description,
-            parameters=json.dumps(args["parameters"], ensure_ascii=False),
-            status=AppMCPServerStatus.ACTIVE,
-            app_id=app_model.id,
-            tenant_id=current_user.current_tenant_id,
-            server_code=AppMCPServer.generate_server_code(16),
-        )
-        db.session.add(server)
-        db.session.commit()
-        return server
-
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @get_app_model
-    @marshal_with(app_server_fields)
-    def put(self, app_model):
-        if not current_user.is_editor:
-            raise NotFound()
-        parser = reqparse.RequestParser()
-        parser.add_argument("id", type=str, required=True, location="json")
-        parser.add_argument("description", type=str, required=False, location="json")
-        parser.add_argument("parameters", type=dict, required=True, location="json")
-        parser.add_argument("status", type=str, required=False, location="json")
-        args = parser.parse_args()
-        server = db.session.query(AppMCPServer).filter(AppMCPServer.id == args["id"]).first()
-        if not server:
-            raise NotFound()
-
-        description = args.get("description")
-        if description is None:
-            pass
-        elif not description:
-            server.description = app_model.description or ""
-        else:
-            server.description = description
-
-        server.parameters = json.dumps(args["parameters"], ensure_ascii=False)
-        if args["status"]:
-            if args["status"] not in [status.value for status in AppMCPServerStatus]:
-                raise ValueError("Invalid status")
-            server.status = args["status"]
-        db.session.commit()
-        return server
-
-
-class AppMCPServerRefreshController(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @marshal_with(app_server_fields)
-    def get(self, server_id):
-        if not current_user.is_editor:
-            raise NotFound()
-        server = (
-            db.session.query(AppMCPServer)
-            .filter(AppMCPServer.id == server_id)
-            .filter(AppMCPServer.tenant_id == current_user.current_tenant_id)
-            .first()
-        )
-        if not server:
-            raise NotFound()
-        server.server_code = AppMCPServer.generate_server_code(16)
-        db.session.commit()
-        return server
-
-
-api.add_resource(AppMCPServerController, "/apps/<uuid:app_id>/server")
-api.add_resource(AppMCPServerRefreshController, "/apps/<uuid:server_id>/server/refresh")
--- a/api/controllers/console/app/statistic.py
+++ b/api/controllers/console/app/statistic.py
@ -2,7 +2,6 @@ from datetime import datetime
 from decimal import Decimal

 import pytz
-import sqlalchemy as sa
 from flask import jsonify
 from flask_login import current_user
 from flask_restful import Resource, reqparse
@ -10,11 +9,10 @@ from flask_restful import Resource, reqparse
 from controllers.console import api
 from controllers.console.app.wraps import get_app_model
 from controllers.console.wraps import account_initialization_required, setup_required
-from core.app.entities.app_invoke_entities import InvokeFrom
 from extensions.ext_database import db
 from libs.helper import DatetimeString
 from libs.login import login_required
-from models import AppMode, Message
+from models.model import AppMode


 class DailyMessageStatistic(Resource):
@ -87,41 +85,46 @@ class DailyConversationStatistic(Resource):
        parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
        args = parser.parse_args()

+        sql_query = """SELECT
+    DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date,
+    COUNT(DISTINCT messages.conversation_id) AS conversation_count
+FROM
+    messages
+WHERE
+    app_id = :app_id"""
+        arg_dict = {"tz": account.timezone, "app_id": app_model.id}
+
        timezone = pytz.timezone(account.timezone)
        utc_timezone = pytz.utc

-        stmt = (
-            sa.select(
-                sa.func.date(
-                    sa.func.date_trunc("day", sa.text("created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz"))
-                ).label("date"),
-                sa.func.count(sa.distinct(Message.conversation_id)).label("conversation_count"),
-            )
-            .select_from(Message)
-            .where(Message.app_id == app_model.id, Message.invoke_from != InvokeFrom.DEBUGGER.value)
-        )
-
        if args["start"]:
            start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
            start_datetime = start_datetime.replace(second=0)
+
            start_datetime_timezone = timezone.localize(start_datetime)
            start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
-            stmt = stmt.where(Message.created_at >= start_datetime_utc)
+
+            sql_query += " AND created_at >= :start"
+            arg_dict["start"] = start_datetime_utc

        if args["end"]:
            end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
            end_datetime = end_datetime.replace(second=0)
+
            end_datetime_timezone = timezone.localize(end_datetime)
            end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
-            stmt = stmt.where(Message.created_at < end_datetime_utc)

-        stmt = stmt.group_by("date").order_by("date")
+            sql_query += " AND created_at < :end"
+            arg_dict["end"] = end_datetime_utc
+
+        sql_query += " GROUP BY date ORDER BY date"

        response_data = []
+
        with db.engine.begin() as conn:
-            rs = conn.execute(stmt, {"tz": account.timezone})
-            for row in rs:
-                response_data.append({"date": str(row.date), "conversation_count": row.conversation_count})
+            rs = conn.execute(db.text(sql_query), arg_dict)
+            for i in rs:
+                response_data.append({"date": str(i.date), "conversation_count": i.conversation_count})

        return jsonify({"data": response_data})

--- a/api/controllers/console/app/workflow_draft_variable.py
+++ b/api/controllers/console/app/workflow_draft_variable.py
@ -68,18 +68,13 @@ def _create_pagination_parser():
    return parser


-def _serialize_variable_type(workflow_draft_var: WorkflowDraftVariable) -> str:
-    value_type = workflow_draft_var.value_type
-    return value_type.exposed_type().value
-
-
 _WORKFLOW_DRAFT_VARIABLE_WITHOUT_VALUE_FIELDS = {
    "id": fields.String,
    "type": fields.String(attribute=lambda model: model.get_variable_type()),
    "name": fields.String,
    "description": fields.String,
    "selector": fields.List(fields.String, attribute=lambda model: model.get_selector()),
-    "value_type": fields.String(attribute=_serialize_variable_type),
+    "value_type": fields.String,
    "edited": fields.Boolean(attribute=lambda model: model.edited),
    "visible": fields.Boolean,
 }
@ -95,7 +90,7 @@ _WORKFLOW_DRAFT_ENV_VARIABLE_FIELDS = {
    "name": fields.String,
    "description": fields.String,
    "selector": fields.List(fields.String, attribute=lambda model: model.get_selector()),
-    "value_type": fields.String(attribute=_serialize_variable_type),
+    "value_type": fields.String,
    "edited": fields.Boolean(attribute=lambda model: model.edited),
    "visible": fields.Boolean,
 }
@ -401,7 +396,7 @@ class EnvironmentVariableCollectionApi(Resource):
                    "name": v.name,
                    "description": v.description,
                    "selector": v.selector,
-                    "value_type": v.value_type.exposed_type().value,
+                    "value_type": v.value_type.value,
                    "value": v.value,
                    # Do not track edited for env vars.
                    "edited": False,
--- a/api/controllers/console/app/wraps.py
+++ b/api/controllers/console/app/wraps.py
@ -35,6 +35,8 @@ def get_app_model(view: Optional[Callable] = None, *, mode: Union[AppMode, list[
                raise AppNotFoundError()

            app_mode = AppMode.value_of(app_model.mode)
+            if app_mode == AppMode.CHANNEL:
+                raise AppNotFoundError()

            if mode is not None:
                if isinstance(mode, list):
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@ -283,6 +283,15 @@ class DatasetApi(Resource):
            location="json",
            help="Invalid external knowledge api id.",
        )
+
+        parser.add_argument(
+            "icon_info",
+            type=dict,
+            required=False,
+            nullable=True,
+            location="json",
+            help="Invalid icon info.",
+        )
        args = parser.parse_args()
        data = request.get_json()

--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@ -1,3 +1,4 @@
+import json
 import logging
 from argparse import ArgumentTypeError
 from datetime import UTC, datetime
@ -51,6 +52,7 @@ from fields.document_fields import (
 )
 from libs.login import login_required
 from models import Dataset, DatasetProcessRule, Document, DocumentSegment, UploadFile
+from models.dataset import DocumentPipelineExecutionLog
 from services.dataset_service import DatasetService, DocumentService
 from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig

@ -661,7 +663,7 @@ class DocumentDetailApi(DocumentResource):
            response = {"id": document.id, "doc_type": document.doc_type, "doc_metadata": document.doc_metadata_details}
        elif metadata == "without":
            dataset_process_rules = DatasetService.get_process_rules(dataset_id)
-            document_process_rules = document.dataset_process_rule.to_dict()
+            document_process_rules = document.dataset_process_rule.to_dict() if document.dataset_process_rule else {}
            data_source_info = document.data_source_detail_dict
            response = {
                "id": document.id,
@ -1028,6 +1030,41 @@ class WebsiteDocumentSyncApi(DocumentResource):
        return {"result": "success"}, 200


+class DocumentPipelineExecutionLogApi(DocumentResource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self, dataset_id, document_id):
+        dataset_id = str(dataset_id)
+        document_id = str(document_id)
+
+        dataset = DatasetService.get_dataset(dataset_id)
+        if not dataset:
+            raise NotFound("Dataset not found.")
+        document = DocumentService.get_document(dataset.id, document_id)
+        if not document:
+            raise NotFound("Document not found.")
+        log = (
+            db.session.query(DocumentPipelineExecutionLog)
+            .filter_by(document_id=document_id)
+            .order_by(DocumentPipelineExecutionLog.created_at.desc())
+            .first()
+        )
+        if not log:
+            return {
+                "datasource_info": None,
+                "datasource_type": None,
+                "input_data": None,
+                "datasource_node_id": None,
+            }, 200
+        return {
+            "datasource_info": json.loads(log.datasource_info),
+            "datasource_type": log.datasource_type,
+            "input_data": log.input_data,
+            "datasource_node_id": log.datasource_node_id,
+        }, 200
+
+
 api.add_resource(GetProcessRuleApi, "/datasets/process-rule")
 api.add_resource(DatasetDocumentListApi, "/datasets/<uuid:dataset_id>/documents")
 api.add_resource(DatasetInitApi, "/datasets/init")
@ -1050,3 +1087,6 @@ api.add_resource(DocumentRetryApi, "/datasets/<uuid:dataset_id>/retry")
 api.add_resource(DocumentRenameApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/rename")

 api.add_resource(WebsiteDocumentSyncApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/website-sync")
+api.add_resource(
+    DocumentPipelineExecutionLogApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/pipeline-execution-log"
+)
--- a/api/controllers/console/datasets/error.py
+++ b/api/controllers/console/datasets/error.py
@ -101,3 +101,9 @@ class ChildChunkDeleteIndexError(BaseHTTPException):
    error_code = "child_chunk_delete_index_error"
    description = "Delete child chunk index failed: {message}"
    code = 500
+
+
+class PipelineNotFoundError(BaseHTTPException):
+    error_code = "pipeline_not_found"
+    description = "Pipeline not found."
+    code = 404
--- a/api/controllers/console/datasets/rag_pipeline/datasource_auth.py
+++ b/api/controllers/console/datasets/rag_pipeline/datasource_auth.py
@ -0,0 +1,197 @@
+from flask import redirect, request
+from flask_login import current_user  # type: ignore
+from flask_restful import (  # type: ignore
+    Resource,  # type: ignore
+    reqparse,
+)
+from werkzeug.exceptions import Forbidden, NotFound
+
+from configs import dify_config
+from controllers.console import api
+from controllers.console.wraps import (
+    account_initialization_required,
+    setup_required,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.plugin.impl.oauth import OAuthHandler
+from extensions.ext_database import db
+from libs.login import login_required
+from models.oauth import DatasourceOauthParamConfig, DatasourceProvider
+from services.datasource_provider_service import DatasourceProviderService
+
+
+class DatasourcePluginOauthApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
+        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
+        args = parser.parse_args()
+        provider = args["provider"]
+        plugin_id = args["plugin_id"]
+        # Check user role first
+        if not current_user.is_editor:
+            raise Forbidden()
+        # get all plugin oauth configs
+        plugin_oauth_config = (
+            db.session.query(DatasourceOauthParamConfig).filter_by(provider=provider, plugin_id=plugin_id).first()
+        )
+        if not plugin_oauth_config:
+            raise NotFound()
+        oauth_handler = OAuthHandler()
+        redirect_url = (
+            f"{dify_config.CONSOLE_WEB_URL}/oauth/datasource/callback?provider={provider}&plugin_id={plugin_id}"
+        )
+        system_credentials = plugin_oauth_config.system_credentials
+        if system_credentials:
+            system_credentials["redirect_url"] = redirect_url
+        response = oauth_handler.get_authorization_url(
+            current_user.current_tenant.id, current_user.id, plugin_id, provider, system_credentials=system_credentials
+        )
+        return response.model_dump()
+
+
+class DatasourceOauthCallback(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
+        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
+        args = parser.parse_args()
+        provider = args["provider"]
+        plugin_id = args["plugin_id"]
+        oauth_handler = OAuthHandler()
+        plugin_oauth_config = (
+            db.session.query(DatasourceOauthParamConfig).filter_by(provider=provider, plugin_id=plugin_id).first()
+        )
+        if not plugin_oauth_config:
+            raise NotFound()
+        credentials = oauth_handler.get_credentials(
+            current_user.current_tenant.id,
+            current_user.id,
+            plugin_id,
+            provider,
+            system_credentials=plugin_oauth_config.system_credentials,
+            request=request,
+        )
+        datasource_provider = DatasourceProvider(
+            plugin_id=plugin_id, provider=provider, auth_type="oauth", encrypted_credentials=credentials
+        )
+        db.session.add(datasource_provider)
+        db.session.commit()
+        return redirect(f"{dify_config.CONSOLE_WEB_URL}")
+
+
+class DatasourceAuth(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def post(self):
+        if not current_user.is_editor:
+            raise Forbidden()
+
+        parser = reqparse.RequestParser()
+        parser.add_argument("provider", type=str, required=True, nullable=False, location="json")
+        parser.add_argument("name", type=str, required=False, nullable=False, location="json", default="test")
+        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="json")
+        parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
+        args = parser.parse_args()
+
+        datasource_provider_service = DatasourceProviderService()
+
+        try:
+            datasource_provider_service.datasource_provider_credentials_validate(
+                tenant_id=current_user.current_tenant_id,
+                provider=args["provider"],
+                plugin_id=args["plugin_id"],
+                credentials=args["credentials"],
+                name=args["name"],
+            )
+        except CredentialsValidateFailedError as ex:
+            raise ValueError(str(ex))
+
+        return {"result": "success"}, 201
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
+        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
+        args = parser.parse_args()
+        datasource_provider_service = DatasourceProviderService()
+        datasources = datasource_provider_service.get_datasource_credentials(
+            tenant_id=current_user.current_tenant_id, provider=args["provider"], plugin_id=args["plugin_id"]
+        )
+        return {"result": datasources}, 200
+
+
+class DatasourceAuthUpdateDeleteApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def delete(self, auth_id: str):
+        parser = reqparse.RequestParser()
+        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
+        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
+        args = parser.parse_args()
+        if not current_user.is_editor:
+            raise Forbidden()
+        datasource_provider_service = DatasourceProviderService()
+        datasource_provider_service.remove_datasource_credentials(
+            tenant_id=current_user.current_tenant_id,
+            auth_id=auth_id,
+            provider=args["provider"],
+            plugin_id=args["plugin_id"],
+        )
+        return {"result": "success"}, 200
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def patch(self, auth_id: str):
+        parser = reqparse.RequestParser()
+        parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
+        parser.add_argument("plugin_id", type=str, required=True, nullable=False, location="args")
+        parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
+        args = parser.parse_args()
+        if not current_user.is_editor:
+            raise Forbidden()
+        try:
+            datasource_provider_service = DatasourceProviderService()
+            datasource_provider_service.update_datasource_credentials(
+                tenant_id=current_user.current_tenant_id,
+                auth_id=auth_id,
+                provider=args["provider"],
+                plugin_id=args["plugin_id"],
+                credentials=args["credentials"],
+            )
+        except CredentialsValidateFailedError as ex:
+            raise ValueError(str(ex))
+
+        return {"result": "success"}, 201
+
+
+# Import Rag Pipeline
+api.add_resource(
+    DatasourcePluginOauthApi,
+    "/oauth/plugin/datasource",
+)
+api.add_resource(
+    DatasourceOauthCallback,
+    "/oauth/plugin/datasource/callback",
+)
+api.add_resource(
+    DatasourceAuth,
+    "/auth/plugin/datasource",
+)
+
+api.add_resource(
+    DatasourceAuthUpdateDeleteApi,
+    "/auth/plugin/datasource/<string:auth_id>",
+)
--- a/api/controllers/console/datasets/rag_pipeline/datasource_content_preview.py
+++ b/api/controllers/console/datasets/rag_pipeline/datasource_content_preview.py
@ -0,0 +1,55 @@
+from flask_restful import (  # type: ignore
+    Resource,  # type: ignore
+    reqparse,
+)
+from werkzeug.exceptions import Forbidden
+
+from controllers.console import api
+from controllers.console.datasets.wraps import get_rag_pipeline
+from controllers.console.wraps import account_initialization_required, setup_required
+from libs.login import current_user, login_required
+from models import Account
+from models.dataset import Pipeline
+from services.rag_pipeline.rag_pipeline import RagPipelineService
+
+
+class DataSourceContentPreviewApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @get_rag_pipeline
+    def post(self, pipeline: Pipeline, node_id: str):
+        """
+        Run datasource content preview
+        """
+        if not isinstance(current_user, Account):
+            raise Forbidden()
+
+        parser = reqparse.RequestParser()
+        parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
+        parser.add_argument("datasource_type", type=str, required=True, location="json")
+        args = parser.parse_args()
+
+        inputs = args.get("inputs")
+        if inputs is None:
+            raise ValueError("missing inputs")
+        datasource_type = args.get("datasource_type")
+        if datasource_type is None:
+            raise ValueError("missing datasource_type")
+
+        rag_pipeline_service = RagPipelineService()
+        preview_content = rag_pipeline_service.run_datasource_node_preview(
+            pipeline=pipeline,
+            node_id=node_id,
+            user_inputs=inputs,
+            account=current_user,
+            datasource_type=datasource_type,
+            is_published=True,
+        )
+        return preview_content, 200
+
+
+api.add_resource(
+    DataSourceContentPreviewApi,
+    "/rag/pipelines/<uuid:pipeline_id>/workflows/published/datasource/nodes/<string:node_id>/preview",
+)
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
@ -0,0 +1,162 @@
+import logging
+
+from flask import request
+from flask_restful import Resource, reqparse
+from sqlalchemy.orm import Session
+
+from controllers.console import api
+from controllers.console.wraps import (
+    account_initialization_required,
+    enterprise_license_required,
+    setup_required,
+)
+from extensions.ext_database import db
+from libs.login import login_required
+from models.dataset import PipelineCustomizedTemplate
+from services.entities.knowledge_entities.rag_pipeline_entities import PipelineTemplateInfoEntity
+from services.rag_pipeline.rag_pipeline import RagPipelineService
+
+logger = logging.getLogger(__name__)
+
+
+def _validate_name(name):
+    if not name or len(name) < 1 or len(name) > 40:
+        raise ValueError("Name must be between 1 to 40 characters.")
+    return name
+
+
+def _validate_description_length(description):
+    if len(description) > 400:
+        raise ValueError("Description cannot exceed 400 characters.")
+    return description
+
+
+class PipelineTemplateListApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @enterprise_license_required
+    def get(self):
+        type = request.args.get("type", default="built-in", type=str)
+        language = request.args.get("language", default="en-US", type=str)
+        # get pipeline templates
+        pipeline_templates = RagPipelineService.get_pipeline_templates(type, language)
+        return pipeline_templates, 200
+
+
+class PipelineTemplateDetailApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @enterprise_license_required
+    def get(self, template_id: str):
+        type = request.args.get("type", default="built-in", type=str)
+        rag_pipeline_service = RagPipelineService()
+        pipeline_template = rag_pipeline_service.get_pipeline_template_detail(template_id, type)
+        return pipeline_template, 200
+
+
+class CustomizedPipelineTemplateApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @enterprise_license_required
+    def patch(self, template_id: str):
+        parser = reqparse.RequestParser()
+        parser.add_argument(
+            "name",
+            nullable=False,
+            required=True,
+            help="Name must be between 1 to 40 characters.",
+            type=_validate_name,
+        )
+        parser.add_argument(
+            "description",
+            type=str,
+            nullable=True,
+            required=False,
+            default="",
+        )
+        parser.add_argument(
+            "icon_info",
+            type=dict,
+            location="json",
+            nullable=True,
+        )
+        args = parser.parse_args()
+        pipeline_template_info = PipelineTemplateInfoEntity(**args)
+        RagPipelineService.update_customized_pipeline_template(template_id, pipeline_template_info)
+        return 200
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @enterprise_license_required
+    def delete(self, template_id: str):
+        RagPipelineService.delete_customized_pipeline_template(template_id)
+        return 200
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @enterprise_license_required
+    def post(self, template_id: str):
+        with Session(db.engine) as session:
+            template = (
+                session.query(PipelineCustomizedTemplate).filter(PipelineCustomizedTemplate.id == template_id).first()
+            )
+            if not template:
+                raise ValueError("Customized pipeline template not found.")
+
+        return {"data": template.yaml_content}, 200
+
+
+class PublishCustomizedPipelineTemplateApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @enterprise_license_required
+    def post(self, pipeline_id: str):
+        parser = reqparse.RequestParser()
+        parser.add_argument(
+            "name",
+            nullable=False,
+            required=True,
+            help="Name must be between 1 to 40 characters.",
+            type=_validate_name,
+        )
+        parser.add_argument(
+            "description",
+            type=str,
+            nullable=True,
+            required=False,
+            default="",
+        )
+        parser.add_argument(
+            "icon_info",
+            type=dict,
+            location="json",
+            nullable=True,
+        )
+        args = parser.parse_args()
+        rag_pipeline_service = RagPipelineService()
+        rag_pipeline_service.publish_customized_pipeline_template(pipeline_id, args)
+        return {"result": "success"}
+
+
+api.add_resource(
+    PipelineTemplateListApi,
+    "/rag/pipeline/templates",
+)
+api.add_resource(
+    PipelineTemplateDetailApi,
+    "/rag/pipeline/templates/<string:template_id>",
+)
+api.add_resource(
+    CustomizedPipelineTemplateApi,
+    "/rag/pipeline/customized/templates/<string:template_id>",
+)
+api.add_resource(
+    PublishCustomizedPipelineTemplateApi,
+    "/rag/pipelines/<string:pipeline_id>/customized/publish",
+)
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_datasets.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_datasets.py
@ -0,0 +1,171 @@
+from flask_login import current_user  # type: ignore  # type: ignore
+from flask_restful import Resource, marshal, reqparse  # type: ignore
+from werkzeug.exceptions import Forbidden
+
+import services
+from controllers.console import api
+from controllers.console.datasets.error import DatasetNameDuplicateError
+from controllers.console.wraps import (
+    account_initialization_required,
+    cloud_edition_billing_rate_limit_check,
+    setup_required,
+)
+from fields.dataset_fields import dataset_detail_fields
+from libs.login import login_required
+from models.dataset import DatasetPermissionEnum
+from services.dataset_service import DatasetPermissionService, DatasetService
+from services.entities.knowledge_entities.rag_pipeline_entities import RagPipelineDatasetCreateEntity
+from services.rag_pipeline.rag_pipeline_dsl_service import RagPipelineDslService
+
+
+def _validate_name(name):
+    if not name or len(name) < 1 or len(name) > 40:
+        raise ValueError("Name must be between 1 to 40 characters.")
+    return name
+
+
+def _validate_description_length(description):
+    if len(description) > 400:
+        raise ValueError("Description cannot exceed 400 characters.")
+    return description
+
+
+class CreateRagPipelineDatasetApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @cloud_edition_billing_rate_limit_check("knowledge")
+    def post(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument(
+            "name",
+            nullable=False,
+            required=True,
+            help="type is required. Name must be between 1 to 40 characters.",
+            type=_validate_name,
+        )
+        parser.add_argument(
+            "description",
+            type=str,
+            nullable=True,
+            required=False,
+            default="",
+        )
+
+        parser.add_argument(
+            "icon_info",
+            type=dict,
+            nullable=True,
+            required=False,
+            default={},
+        )
+
+        parser.add_argument(
+            "permission",
+            type=str,
+            choices=(DatasetPermissionEnum.ONLY_ME, DatasetPermissionEnum.ALL_TEAM, DatasetPermissionEnum.PARTIAL_TEAM),
+            nullable=True,
+            required=False,
+            default=DatasetPermissionEnum.ONLY_ME,
+        )
+
+        parser.add_argument(
+            "partial_member_list",
+            type=list,
+            nullable=True,
+            required=False,
+            default=[],
+        )
+
+        parser.add_argument(
+            "yaml_content",
+            type=str,
+            nullable=False,
+            required=True,
+            help="yaml_content is required.",
+        )
+
+        args = parser.parse_args()
+
+        # The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
+        if not current_user.is_dataset_editor:
+            raise Forbidden()
+        rag_pipeline_dataset_create_entity = RagPipelineDatasetCreateEntity(**args)
+        try:
+            import_info = RagPipelineDslService.create_rag_pipeline_dataset(
+                tenant_id=current_user.current_tenant_id,
+                rag_pipeline_dataset_create_entity=rag_pipeline_dataset_create_entity,
+            )
+            if rag_pipeline_dataset_create_entity.permission == "partial_members":
+                DatasetPermissionService.update_partial_member_list(
+                    current_user.current_tenant_id,
+                    import_info["dataset_id"],
+                    rag_pipeline_dataset_create_entity.partial_member_list,
+                )
+        except services.errors.dataset.DatasetNameDuplicateError:
+            raise DatasetNameDuplicateError()
+
+        return import_info, 201
+
+
+class CreateEmptyRagPipelineDatasetApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @cloud_edition_billing_rate_limit_check("knowledge")
+    def post(self):
+        # The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
+        if not current_user.is_dataset_editor:
+            raise Forbidden()
+
+        parser = reqparse.RequestParser()
+        parser.add_argument(
+            "name",
+            nullable=False,
+            required=True,
+            help="type is required. Name must be between 1 to 40 characters.",
+            type=_validate_name,
+        )
+        parser.add_argument(
+            "description",
+            type=str,
+            nullable=True,
+            required=False,
+            default="",
+        )
+
+        parser.add_argument(
+            "icon_info",
+            type=dict,
+            nullable=True,
+            required=False,
+            default={},
+        )
+
+        parser.add_argument(
+            "permission",
+            type=str,
+            choices=(DatasetPermissionEnum.ONLY_ME, DatasetPermissionEnum.ALL_TEAM, DatasetPermissionEnum.PARTIAL_TEAM),
+            nullable=True,
+            required=False,
+            default=DatasetPermissionEnum.ONLY_ME,
+        )
+
+        parser.add_argument(
+            "partial_member_list",
+            type=list,
+            nullable=True,
+            required=False,
+            default=[],
+        )
+
+        args = parser.parse_args()
+        dataset = DatasetService.create_empty_rag_pipeline_dataset(
+            tenant_id=current_user.current_tenant_id,
+            rag_pipeline_dataset_create_entity=RagPipelineDatasetCreateEntity(**args),
+        )
+        return marshal(dataset, dataset_detail_fields), 201
+
+
+api.add_resource(CreateRagPipelineDatasetApi, "/rag/pipeline/dataset")
+api.add_resource(CreateEmptyRagPipelineDatasetApi, "/rag/pipeline/empty-dataset")
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py
@ -0,0 +1,146 @@
+from typing import cast
+
+from flask_login import current_user  # type: ignore
+from flask_restful import Resource, marshal_with, reqparse  # type: ignore
+from sqlalchemy.orm import Session
+from werkzeug.exceptions import Forbidden
+
+from controllers.console import api
+from controllers.console.datasets.wraps import get_rag_pipeline
+from controllers.console.wraps import (
+    account_initialization_required,
+    setup_required,
+)
+from extensions.ext_database import db
+from fields.rag_pipeline_fields import pipeline_import_check_dependencies_fields, pipeline_import_fields
+from libs.login import login_required
+from models import Account
+from models.dataset import Pipeline
+from services.app_dsl_service import ImportStatus
+from services.rag_pipeline.rag_pipeline_dsl_service import RagPipelineDslService
+
+
+class RagPipelineImportApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @marshal_with(pipeline_import_fields)
+    def post(self):
+        # Check user role first
+        if not current_user.is_editor:
+            raise Forbidden()
+
+        parser = reqparse.RequestParser()
+        parser.add_argument("mode", type=str, required=True, location="json")
+        parser.add_argument("yaml_content", type=str, location="json")
+        parser.add_argument("yaml_url", type=str, location="json")
+        parser.add_argument("name", type=str, location="json")
+        parser.add_argument("description", type=str, location="json")
+        parser.add_argument("icon_type", type=str, location="json")
+        parser.add_argument("icon", type=str, location="json")
+        parser.add_argument("icon_background", type=str, location="json")
+        parser.add_argument("pipeline_id", type=str, location="json")
+        args = parser.parse_args()
+
+        # Create service with session
+        with Session(db.engine) as session:
+            import_service = RagPipelineDslService(session)
+            # Import app
+            account = cast(Account, current_user)
+            result = import_service.import_rag_pipeline(
+                account=account,
+                import_mode=args["mode"],
+                yaml_content=args.get("yaml_content"),
+                yaml_url=args.get("yaml_url"),
+                pipeline_id=args.get("pipeline_id"),
+            )
+            session.commit()
+
+        # Return appropriate status code based on result
+        status = result.status
+        if status == ImportStatus.FAILED.value:
+            return result.model_dump(mode="json"), 400
+        elif status == ImportStatus.PENDING.value:
+            return result.model_dump(mode="json"), 202
+        return result.model_dump(mode="json"), 200
+
+
+class RagPipelineImportConfirmApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @marshal_with(pipeline_import_fields)
+    def post(self, import_id):
+        # Check user role first
+        if not current_user.is_editor:
+            raise Forbidden()
+
+        # Create service with session
+        with Session(db.engine) as session:
+            import_service = RagPipelineDslService(session)
+            # Confirm import
+            account = cast(Account, current_user)
+            result = import_service.confirm_import(import_id=import_id, account=account)
+            session.commit()
+
+        # Return appropriate status code based on result
+        if result.status == ImportStatus.FAILED.value:
+            return result.model_dump(mode="json"), 400
+        return result.model_dump(mode="json"), 200
+
+
+class RagPipelineImportCheckDependenciesApi(Resource):
+    @setup_required
+    @login_required
+    @get_rag_pipeline
+    @account_initialization_required
+    @marshal_with(pipeline_import_check_dependencies_fields)
+    def get(self, pipeline: Pipeline):
+        if not current_user.is_editor:
+            raise Forbidden()
+
+        with Session(db.engine) as session:
+            import_service = RagPipelineDslService(session)
+            result = import_service.check_dependencies(pipeline=pipeline)
+
+        return result.model_dump(mode="json"), 200
+
+
+class RagPipelineExportApi(Resource):
+    @setup_required
+    @login_required
+    @get_rag_pipeline
+    @account_initialization_required
+    def get(self, pipeline: Pipeline):
+        if not current_user.is_editor:
+            raise Forbidden()
+
+            # Add include_secret params
+        parser = reqparse.RequestParser()
+        parser.add_argument("include_secret", type=bool, default=False, location="args")
+        args = parser.parse_args()
+
+        with Session(db.engine) as session:
+            export_service = RagPipelineDslService(session)
+            result = export_service.export_rag_pipeline_dsl(pipeline=pipeline, include_secret=args["include_secret"])
+
+        return {"data": result}, 200
+
+
+# Import Rag Pipeline
+api.add_resource(
+    RagPipelineImportApi,
+    "/rag/pipelines/imports",
+)
+api.add_resource(
+    RagPipelineImportConfirmApi,
+    "/rag/pipelines/imports/<string:import_id>/confirm",
+)
+api.add_resource(
+    RagPipelineImportCheckDependenciesApi,
+    "/rag/pipelines/imports/<string:pipeline_id>/check-dependencies",
+)
+api.add_resource(
+    RagPipelineExportApi,
+    "/rag/pipelines/<string:pipeline_id>/exports",
+)
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py
--- a/api/controllers/console/datasets/wraps.py
+++ b/api/controllers/console/datasets/wraps.py
@ -0,0 +1,43 @@
+from collections.abc import Callable
+from functools import wraps
+from typing import Optional
+
+from controllers.console.datasets.error import PipelineNotFoundError
+from extensions.ext_database import db
+from libs.login import current_user
+from models.dataset import Pipeline
+
+
+def get_rag_pipeline(
+    view: Optional[Callable] = None,
+):
+    def decorator(view_func):
+        @wraps(view_func)
+        def decorated_view(*args, **kwargs):
+            if not kwargs.get("pipeline_id"):
+                raise ValueError("missing pipeline_id in path parameters")
+
+            pipeline_id = kwargs.get("pipeline_id")
+            pipeline_id = str(pipeline_id)
+
+            del kwargs["pipeline_id"]
+
+            pipeline = (
+                db.session.query(Pipeline)
+                .filter(Pipeline.id == pipeline_id, Pipeline.tenant_id == current_user.current_tenant_id)
+                .first()
+            )
+
+            if not pipeline:
+                raise PipelineNotFoundError()
+
+            kwargs["pipeline"] = pipeline
+
+            return view_func(*args, **kwargs)
+
+        return decorated_view
+
+    if view is None:
+        return decorator
+    else:
+        return decorator(view)
--- a/api/controllers/console/explore/audio.py
+++ b/api/controllers/console/explore/audio.py
@ -18,6 +18,7 @@ from controllers.console.app.error import (
 from controllers.console.explore.wraps import InstalledAppResource
 from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
 from core.model_runtime.errors.invoke import InvokeError
+from models.model import AppMode
 from services.audio_service import AudioService
 from services.errors.audio import (
    AudioTooLargeServiceError,
@ -78,9 +79,19 @@ class ChatTextApi(InstalledAppResource):

            message_id = args.get("message_id", None)
            text = args.get("text", None)
-            voice = args.get("voice", None)
-
-            response = AudioService.transcript_tts(app_model=app_model, text=text, voice=voice, message_id=message_id)
+            if (
+                app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value}
+                and app_model.workflow
+                and app_model.workflow.features_dict
+            ):
+                text_to_speech = app_model.workflow.features_dict.get("text_to_speech")
+                voice = args.get("voice") or text_to_speech.get("voice")
+            else:
+                try:
+                    voice = args.get("voice") or app_model.app_model_config.text_to_speech_dict.get("voice")
+                except Exception:
+                    voice = None
+            response = AudioService.transcript_tts(app_model=app_model, message_id=message_id, voice=voice, text=text)
            return response
        except services.errors.app_model_config.AppModelConfigBrokenError:
            logging.exception("App model config broken.")
--- a/api/controllers/console/workspace/tool_providers.py
+++ b/api/controllers/console/workspace/tool_providers.py
@ -1,7 +1,6 @@
 import io
-from urllib.parse import urlparse

-from flask import redirect, send_file
+from flask import send_file
 from flask_login import current_user
 from flask_restful import Resource, reqparse
 from sqlalchemy.orm import Session
@ -10,34 +9,17 @@ from werkzeug.exceptions import Forbidden
 from configs import dify_config
 from controllers.console import api
 from controllers.console.wraps import account_initialization_required, enterprise_license_required, setup_required
-from core.mcp.auth.auth_flow import auth, handle_callback
-from core.mcp.auth.auth_provider import OAuthClientProvider
-from core.mcp.error import MCPAuthError, MCPError
-from core.mcp.mcp_client import MCPClient
 from core.model_runtime.utils.encoders import jsonable_encoder
 from extensions.ext_database import db
 from libs.helper import alphanumeric, uuid_value
 from libs.login import login_required
 from services.tools.api_tools_manage_service import ApiToolManageService
 from services.tools.builtin_tools_manage_service import BuiltinToolManageService
-from services.tools.mcp_tools_mange_service import MCPToolManageService
 from services.tools.tool_labels_service import ToolLabelsService
 from services.tools.tools_manage_service import ToolCommonService
-from services.tools.tools_transform_service import ToolTransformService
 from services.tools.workflow_tools_manage_service import WorkflowToolManageService


-def is_valid_url(url: str) -> bool:
-    if not url:
-        return False
-
-    try:
-        parsed = urlparse(url)
-        return all([parsed.scheme, parsed.netloc]) and parsed.scheme in ["http", "https"]
-    except Exception:
-        return False
-
-
 class ToolProviderListApi(Resource):
    @setup_required
    @login_required
@ -52,7 +34,7 @@ class ToolProviderListApi(Resource):
        req.add_argument(
            "type",
            type=str,
-            choices=["builtin", "model", "api", "workflow", "mcp"],
+            choices=["builtin", "model", "api", "workflow"],
            required=False,
            nullable=True,
            location="args",
@ -631,166 +613,6 @@ class ToolLabelsApi(Resource):
        return jsonable_encoder(ToolLabelsService.list_tool_labels())


-class ToolProviderMCPApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def post(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("server_url", type=str, required=True, nullable=False, location="json")
-        parser.add_argument("name", type=str, required=True, nullable=False, location="json")
-        parser.add_argument("icon", type=str, required=True, nullable=False, location="json")
-        parser.add_argument("icon_type", type=str, required=True, nullable=False, location="json")
-        parser.add_argument("icon_background", type=str, required=False, nullable=True, location="json", default="")
-        parser.add_argument("server_identifier", type=str, required=True, nullable=False, location="json")
-        args = parser.parse_args()
-        user = current_user
-        if not is_valid_url(args["server_url"]):
-            raise ValueError("Server URL is not valid.")
-        return jsonable_encoder(
-            MCPToolManageService.create_mcp_provider(
-                tenant_id=user.current_tenant_id,
-                server_url=args["server_url"],
-                name=args["name"],
-                icon=args["icon"],
-                icon_type=args["icon_type"],
-                icon_background=args["icon_background"],
-                user_id=user.id,
-                server_identifier=args["server_identifier"],
-            )
-        )
-
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def put(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("server_url", type=str, required=True, nullable=False, location="json")
-        parser.add_argument("name", type=str, required=True, nullable=False, location="json")
-        parser.add_argument("icon", type=str, required=True, nullable=False, location="json")
-        parser.add_argument("icon_type", type=str, required=True, nullable=False, location="json")
-        parser.add_argument("icon_background", type=str, required=False, nullable=True, location="json")
-        parser.add_argument("provider_id", type=str, required=True, nullable=False, location="json")
-        parser.add_argument("server_identifier", type=str, required=True, nullable=False, location="json")
-        args = parser.parse_args()
-        if not is_valid_url(args["server_url"]):
-            if "[__HIDDEN__]" in args["server_url"]:
-                pass
-            else:
-                raise ValueError("Server URL is not valid.")
-        MCPToolManageService.update_mcp_provider(
-            tenant_id=current_user.current_tenant_id,
-            provider_id=args["provider_id"],
-            server_url=args["server_url"],
-            name=args["name"],
-            icon=args["icon"],
-            icon_type=args["icon_type"],
-            icon_background=args["icon_background"],
-            server_identifier=args["server_identifier"],
-        )
-        return {"result": "success"}
-
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def delete(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("provider_id", type=str, required=True, nullable=False, location="json")
-        args = parser.parse_args()
-        MCPToolManageService.delete_mcp_tool(tenant_id=current_user.current_tenant_id, provider_id=args["provider_id"])
-        return {"result": "success"}
-
-
-class ToolMCPAuthApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def post(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("provider_id", type=str, required=True, nullable=False, location="json")
-        parser.add_argument("authorization_code", type=str, required=False, nullable=True, location="json")
-        args = parser.parse_args()
-        provider_id = args["provider_id"]
-        tenant_id = current_user.current_tenant_id
-        provider = MCPToolManageService.get_mcp_provider_by_provider_id(provider_id, tenant_id)
-        if not provider:
-            raise ValueError("provider not found")
-        try:
-            with MCPClient(
-                provider.decrypted_server_url,
-                provider_id,
-                tenant_id,
-                authed=False,
-                authorization_code=args["authorization_code"],
-                for_list=True,
-            ):
-                MCPToolManageService.update_mcp_provider_credentials(
-                    mcp_provider=provider,
-                    credentials=provider.decrypted_credentials,
-                    authed=True,
-                )
-                return {"result": "success"}
-
-        except MCPAuthError:
-            auth_provider = OAuthClientProvider(provider_id, tenant_id, for_list=True)
-            return auth(auth_provider, provider.decrypted_server_url, args["authorization_code"])
-        except MCPError as e:
-            MCPToolManageService.update_mcp_provider_credentials(
-                mcp_provider=provider,
-                credentials={},
-                authed=False,
-            )
-            raise ValueError(f"Failed to connect to MCP server: {e}") from e
-
-
-class ToolMCPDetailApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def get(self, provider_id):
-        user = current_user
-        provider = MCPToolManageService.get_mcp_provider_by_provider_id(provider_id, user.current_tenant_id)
-        return jsonable_encoder(ToolTransformService.mcp_provider_to_user_provider(provider, for_list=True))
-
-
-class ToolMCPListAllApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def get(self):
-        user = current_user
-        tenant_id = user.current_tenant_id
-
-        tools = MCPToolManageService.retrieve_mcp_tools(tenant_id=tenant_id)
-
-        return [tool.to_dict() for tool in tools]
-
-
-class ToolMCPUpdateApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def get(self, provider_id):
-        tenant_id = current_user.current_tenant_id
-        tools = MCPToolManageService.list_mcp_tool_from_remote_server(
-            tenant_id=tenant_id,
-            provider_id=provider_id,
-        )
-        return jsonable_encoder(tools)
-
-
-class ToolMCPCallbackApi(Resource):
-    def get(self):
-        parser = reqparse.RequestParser()
-        parser.add_argument("code", type=str, required=True, nullable=False, location="args")
-        parser.add_argument("state", type=str, required=True, nullable=False, location="args")
-        args = parser.parse_args()
-        state_key = args["state"]
-        authorization_code = args["code"]
-        handle_callback(state_key, authorization_code)
-        return redirect(f"{dify_config.CONSOLE_WEB_URL}/oauth-callback")
-
-
 # tool provider
 api.add_resource(ToolProviderListApi, "/workspaces/current/tool-providers")

@ -825,15 +647,8 @@ api.add_resource(ToolWorkflowProviderDeleteApi, "/workspaces/current/tool-provid
 api.add_resource(ToolWorkflowProviderGetApi, "/workspaces/current/tool-provider/workflow/get")
 api.add_resource(ToolWorkflowProviderListToolApi, "/workspaces/current/tool-provider/workflow/tools")

-# mcp tool provider
-api.add_resource(ToolMCPDetailApi, "/workspaces/current/tool-provider/mcp/tools/<path:provider_id>")
-api.add_resource(ToolProviderMCPApi, "/workspaces/current/tool-provider/mcp")
-api.add_resource(ToolMCPUpdateApi, "/workspaces/current/tool-provider/mcp/update/<path:provider_id>")
-api.add_resource(ToolMCPAuthApi, "/workspaces/current/tool-provider/mcp/auth")
-api.add_resource(ToolMCPCallbackApi, "/mcp/oauth/callback")
-
 api.add_resource(ToolBuiltinListApi, "/workspaces/current/tools/builtin")
 api.add_resource(ToolApiListApi, "/workspaces/current/tools/api")
-api.add_resource(ToolMCPListAllApi, "/workspaces/current/tools/mcp")
 api.add_resource(ToolWorkflowListApi, "/workspaces/current/tools/workflow")
+
 api.add_resource(ToolLabelsApi, "/workspaces/current/tool-labels")
--- a/api/controllers/files/upload.py
+++ b/api/controllers/files/upload.py
@ -87,5 +87,7 @@ class PluginUploadFileApi(Resource):
        except services.errors.file.UnsupportedFileTypeError:
            raise UnsupportedFileTypeError()

+        return tool_file, 201
+

 api.add_resource(PluginUploadFileApi, "/files/upload/for-plugin")
--- a/api/controllers/mcp/init.py
+++ b/api/controllers/mcp/init.py
@ -1,8 +0,0 @@
-from flask import Blueprint
-
-from libs.external_api import ExternalApi
-
-bp = Blueprint("mcp", __name__, url_prefix="/mcp")
-api = ExternalApi(bp)
-
-from . import mcp
--- a/api/controllers/mcp/mcp.py
+++ b/api/controllers/mcp/mcp.py
@ -1,104 +0,0 @@
-from flask_restful import Resource, reqparse
-from pydantic import ValidationError
-
-from controllers.console.app.mcp_server import AppMCPServerStatus
-from controllers.mcp import api
-from core.app.app_config.entities import VariableEntity
-from core.mcp import types
-from core.mcp.server.streamable_http import MCPServerStreamableHTTPRequestHandler
-from core.mcp.types import ClientNotification, ClientRequest
-from core.mcp.utils import create_mcp_error_response
-from extensions.ext_database import db
-from libs import helper
-from models.model import App, AppMCPServer, AppMode
-
-
-class MCPAppApi(Resource):
-    def post(self, server_code):
-        def int_or_str(value):
-            if isinstance(value, (int, str)):
-                return value
-            else:
-                return None
-
-        parser = reqparse.RequestParser()
-        parser.add_argument("jsonrpc", type=str, required=True, location="json")
-        parser.add_argument("method", type=str, required=True, location="json")
-        parser.add_argument("params", type=dict, required=False, location="json")
-        parser.add_argument("id", type=int_or_str, required=False, location="json")
-        args = parser.parse_args()
-
-        request_id = args.get("id")
-
-        server = db.session.query(AppMCPServer).filter(AppMCPServer.server_code == server_code).first()
-        if not server:
-            return helper.compact_generate_response(
-                create_mcp_error_response(request_id, types.INVALID_REQUEST, "Server Not Found")
-            )
-
-        if server.status != AppMCPServerStatus.ACTIVE:
-            return helper.compact_generate_response(
-                create_mcp_error_response(request_id, types.INVALID_REQUEST, "Server is not active")
-            )
-
-        app = db.session.query(App).filter(App.id == server.app_id).first()
-        if not app:
-            return helper.compact_generate_response(
-                create_mcp_error_response(request_id, types.INVALID_REQUEST, "App Not Found")
-            )
-
-        if app.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value}:
-            workflow = app.workflow
-            if workflow is None:
-                return helper.compact_generate_response(
-                    create_mcp_error_response(request_id, types.INVALID_REQUEST, "App is unavailable")
-                )
-
-            user_input_form = workflow.user_input_form(to_old_structure=True)
-        else:
-            app_model_config = app.app_model_config
-            if app_model_config is None:
-                return helper.compact_generate_response(
-                    create_mcp_error_response(request_id, types.INVALID_REQUEST, "App is unavailable")
-                )
-
-            features_dict = app_model_config.to_dict()
-            user_input_form = features_dict.get("user_input_form", [])
-        converted_user_input_form: list[VariableEntity] = []
-        try:
-            for item in user_input_form:
-                variable_type = item.get("type", "") or list(item.keys())[0]
-                variable = item[variable_type]
-                converted_user_input_form.append(
-                    VariableEntity(
-                        type=variable_type,
-                        variable=variable.get("variable"),
-                        description=variable.get("description") or "",
-                        label=variable.get("label"),
-                        required=variable.get("required", False),
-                        max_length=variable.get("max_length"),
-                        options=variable.get("options") or [],
-                    )
-                )
-        except ValidationError as e:
-            return helper.compact_generate_response(
-                create_mcp_error_response(request_id, types.INVALID_PARAMS, f"Invalid user_input_form: {str(e)}")
-            )
-
-        try:
-            request: ClientRequest | ClientNotification = ClientRequest.model_validate(args)
-        except ValidationError as e:
-            try:
-                notification = ClientNotification.model_validate(args)
-                request = notification
-            except ValidationError as e:
-                return helper.compact_generate_response(
-                    create_mcp_error_response(request_id, types.INVALID_PARAMS, f"Invalid MCP request: {str(e)}")
-                )
-
-        mcp_server_handler = MCPServerStreamableHTTPRequestHandler(app, request, converted_user_input_form)
-        response = mcp_server_handler.handle()
-        return helper.compact_generate_response(response)
-
-
-api.add_resource(MCPAppApi, "/server/<string:server_code>/mcp")
--- a/api/controllers/service_api/app/audio.py
+++ b/api/controllers/service_api/app/audio.py
@ -20,7 +20,7 @@ from controllers.service_api.app.error import (
 from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
 from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
 from core.model_runtime.errors.invoke import InvokeError
-from models.model import App, EndUser
+from models.model import App, AppMode, EndUser
 from services.audio_service import AudioService
 from services.errors.audio import (
    AudioTooLargeServiceError,
@ -78,9 +78,20 @@ class TextApi(Resource):

            message_id = args.get("message_id", None)
            text = args.get("text", None)
-            voice = args.get("voice", None)
+            if (
+                app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value}
+                and app_model.workflow
+                and app_model.workflow.features_dict
+            ):
+                text_to_speech = app_model.workflow.features_dict.get("text_to_speech", {})
+                voice = args.get("voice") or text_to_speech.get("voice")
+            else:
+                try:
+                    voice = args.get("voice") or app_model.app_model_config.text_to_speech_dict.get("voice")
+                except Exception:
+                    voice = None
            response = AudioService.transcript_tts(
-                app_model=app_model, text=text, voice=voice, end_user=end_user.external_user_id, message_id=message_id
+                app_model=app_model, message_id=message_id, end_user=end_user.external_user_id, voice=voice, text=text
            )

            return response
--- a/api/controllers/service_api/app/workflow.py
+++ b/api/controllers/service_api/app/workflow.py
@ -3,7 +3,7 @@ import logging
 from dateutil.parser import isoparse
 from flask_restful import Resource, fields, marshal_with, reqparse
 from flask_restful.inputs import int_range
-from sqlalchemy.orm import Session, sessionmaker
+from sqlalchemy.orm import Session
 from werkzeug.exceptions import InternalServerError

 from controllers.service_api import api
@ -30,7 +30,7 @@ from fields.workflow_app_log_fields import workflow_app_log_pagination_fields
 from libs import helper
 from libs.helper import TimestampField
 from models.model import App, AppMode, EndUser
-from repositories.factory import DifyAPIRepositoryFactory
+from models.workflow import WorkflowRun
 from services.app_generate_service import AppGenerateService
 from services.errors.llm import InvokeRateLimitError
 from services.workflow_app_service import WorkflowAppService
@ -63,15 +63,7 @@ class WorkflowRunDetailApi(Resource):
        if app_mode not in [AppMode.WORKFLOW, AppMode.ADVANCED_CHAT]:
            raise NotWorkflowAppError()

-        # Use repository to get workflow run
-        session_maker = sessionmaker(bind=db.engine, expire_on_commit=False)
-        workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
-
-        workflow_run = workflow_run_repo.get_workflow_run_by_id(
-            tenant_id=app_model.tenant_id,
-            app_id=app_model.id,
-            run_id=workflow_run_id,
-        )
+        workflow_run = db.session.query(WorkflowRun).filter(WorkflowRun.id == workflow_run_id).first()
        return workflow_run


--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@ -211,9 +211,6 @@ class DocumentAddByFileApi(DatasetApiResource):
        if not dataset:
            raise ValueError("Dataset does not exist.")

-        if dataset.provider == "external":
-            raise ValueError("External datasets are not supported.")
-
        indexing_technique = args.get("indexing_technique") or dataset.indexing_technique
        if not indexing_technique:
            raise ValueError("indexing_technique is required.")
@ -304,9 +301,6 @@ class DocumentUpdateByFileApi(DatasetApiResource):
        if not dataset:
            raise ValueError("Dataset does not exist.")

-        if dataset.provider == "external":
-            raise ValueError("External datasets are not supported.")
-
        # indexing_technique is already set in dataset since this is an update
        args["indexing_technique"] = dataset.indexing_technique

--- a/api/controllers/web/audio.py
+++ b/api/controllers/web/audio.py
@ -19,7 +19,7 @@ from controllers.web.error import (
 from controllers.web.wraps import WebApiResource
 from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
 from core.model_runtime.errors.invoke import InvokeError
-from models.model import App
+from models.model import App, AppMode
 from services.audio_service import AudioService
 from services.errors.audio import (
    AudioTooLargeServiceError,
@ -77,9 +77,21 @@ class TextApi(WebApiResource):

            message_id = args.get("message_id", None)
            text = args.get("text", None)
-            voice = args.get("voice", None)
+            if (
+                app_model.mode in {AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value}
+                and app_model.workflow
+                and app_model.workflow.features_dict
+            ):
+                text_to_speech = app_model.workflow.features_dict.get("text_to_speech", {})
+                voice = args.get("voice") or text_to_speech.get("voice")
+            else:
+                try:
+                    voice = args.get("voice") or app_model.app_model_config.text_to_speech_dict.get("voice")
+                except Exception:
+                    voice = None
+
            response = AudioService.transcript_tts(
-                app_model=app_model, text=text, voice=voice, end_user=end_user.external_user_id, message_id=message_id
+                app_model=app_model, message_id=message_id, end_user=end_user.external_user_id, voice=voice, text=text
            )

            return response
--- a/api/core/agent/base_agent_runner.py
+++ b/api/core/agent/base_agent_runner.py
@ -3,8 +3,6 @@ import logging
 import uuid
 from typing import Optional, Union, cast

-from sqlalchemy import select
-
 from core.agent.entities import AgentEntity, AgentToolEntity
 from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
 from core.app.apps.agent_chat.app_config_manager import AgentChatAppConfig
@ -163,14 +161,10 @@ class BaseAgentRunner(AppRunner):
            if parameter.type == ToolParameter.ToolParameterType.SELECT:
                enum = [option.value for option in parameter.options] if parameter.options else []

-            message_tool.parameters["properties"][parameter.name] = (
-                {
-                    "type": parameter_type,
-                    "description": parameter.llm_description or "",
-                }
-                if parameter.input_schema is None
-                else parameter.input_schema
-            )
+            message_tool.parameters["properties"][parameter.name] = {
+                "type": parameter_type,
+                "description": parameter.llm_description or "",
+            }

            if len(enum) > 0:
                message_tool.parameters["properties"][parameter.name]["enum"] = enum
@ -260,14 +254,10 @@ class BaseAgentRunner(AppRunner):
            if parameter.type == ToolParameter.ToolParameterType.SELECT:
                enum = [option.value for option in parameter.options] if parameter.options else []

-            prompt_tool.parameters["properties"][parameter.name] = (
-                {
-                    "type": parameter_type,
-                    "description": parameter.llm_description or "",
-                }
-                if parameter.input_schema is None
-                else parameter.input_schema
-            )
+            prompt_tool.parameters["properties"][parameter.name] = {
+                "type": parameter_type,
+                "description": parameter.llm_description or "",
+            }

            if len(enum) > 0:
                prompt_tool.parameters["properties"][parameter.name]["enum"] = enum
@ -419,15 +409,12 @@ class BaseAgentRunner(AppRunner):
            if isinstance(prompt_message, SystemPromptMessage):
                result.append(prompt_message)

-        messages = (
-            (
-                db.session.execute(
-                    select(Message)
-                    .where(Message.conversation_id == self.message.conversation_id)
-                    .order_by(Message.created_at.desc())
-                )
+        messages: list[Message] = (
+            db.session.query(Message)
+            .filter(
+                Message.conversation_id == self.message.conversation_id,
            )
-            .scalars()
+            .order_by(Message.created_at.desc())
            .all()
        )

--- a/api/core/agent/plugin_entities.py
+++ b/api/core/agent/plugin_entities.py
@ -85,7 +85,7 @@ class AgentStrategyEntity(BaseModel):
    description: I18nObject = Field(..., description="The description of the agent strategy")
    output_schema: Optional[dict] = None
    features: Optional[list[AgentFeature]] = None
-    meta_version: Optional[str] = None
+
    # pydantic configs
    model_config = ConfigDict(protected_namespaces=())

--- a/api/core/agent/strategy/plugin.py
+++ b/api/core/agent/strategy/plugin.py
@ -15,12 +15,10 @@ class PluginAgentStrategy(BaseAgentStrategy):

    tenant_id: str
    declaration: AgentStrategyEntity
-    meta_version: str | None = None

-    def __init__(self, tenant_id: str, declaration: AgentStrategyEntity, meta_version: str | None):
+    def __init__(self, tenant_id: str, declaration: AgentStrategyEntity):
        self.tenant_id = tenant_id
        self.declaration = declaration
-        self.meta_version = meta_version

    def get_parameters(self) -> Sequence[AgentStrategyParameter]:
        return self.declaration.parameters
--- a/api/core/app/app_config/entities.py
+++ b/api/core/app/app_config/entities.py
@ -113,9 +113,9 @@ class VariableEntity(BaseModel):
    hide: bool = False
    max_length: Optional[int] = None
    options: Sequence[str] = Field(default_factory=list)
-    allowed_file_types: Sequence[FileType] = Field(default_factory=list)
-    allowed_file_extensions: Sequence[str] = Field(default_factory=list)
-    allowed_file_upload_methods: Sequence[FileTransferMethod] = Field(default_factory=list)
+    allowed_file_types: Optional[Sequence[FileType]] = Field(default_factory=list)
+    allowed_file_extensions: Optional[Sequence[str]] = Field(default_factory=list)
+    allowed_file_upload_methods: Optional[Sequence[FileTransferMethod]] = Field(default_factory=list)

    @field_validator("description", mode="before")
    @classmethod
@ -128,6 +128,16 @@ class VariableEntity(BaseModel):
        return v or []


+class RagPipelineVariableEntity(VariableEntity):
+    """
+    Rag Pipeline Variable Entity.
+    """
+
+    tooltips: Optional[str] = None
+    placeholder: Optional[str] = None
+    belong_to_node_id: str
+
+
 class ExternalDataVariableEntity(BaseModel):
    """
    External Data Variable Entity.
@ -285,7 +295,7 @@ class AppConfig(BaseModel):
    tenant_id: str
    app_id: str
    app_mode: AppMode
-    additional_features: AppAdditionalFeatures
+    additional_features: Optional[AppAdditionalFeatures] = None
    variables: list[VariableEntity] = []
    sensitive_word_avoidance: Optional[SensitiveWordAvoidanceEntity] = None

--- a/api/core/app/app_config/workflow_ui_based_app/variables/manager.py
+++ b/api/core/app/app_config/workflow_ui_based_app/variables/manager.py
@ -1,4 +1,4 @@
-from core.app.app_config.entities import VariableEntity
+from core.app.app_config.entities import RagPipelineVariableEntity, VariableEntity
 from models.workflow import Workflow


@ -20,3 +20,19 @@ class WorkflowVariablesConfigManager:
            variables.append(VariableEntity.model_validate(variable))

        return variables
+
+    @classmethod
+    def convert_rag_pipeline_variable(cls, workflow: Workflow) -> list[RagPipelineVariableEntity]:
+        """
+        Convert workflow start variables to variables
+
+        :param workflow: workflow instance
+        """
+        variables = []
+
+        user_input_form = workflow.rag_pipeline_user_input_form()
+        # variables
+        for variable in user_input_form:
+            variables.append(RagPipelineVariableEntity.model_validate(variable))
+
+        return variables
--- a/api/core/app/apps/advanced_chat/app_generator.py
+++ b/api/core/app/apps/advanced_chat/app_generator.py
@ -25,7 +25,8 @@ from core.app.entities.task_entities import ChatbotAppBlockingResponse, ChatbotA
 from core.model_runtime.errors.invoke import InvokeAuthorizationError
 from core.ops.ops_trace_manager import TraceQueueManager
 from core.prompt.utils.get_thread_messages_length import get_thread_messages_length
-from core.repositories import DifyCoreRepositoryFactory
+from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository
+from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository
 from core.workflow.repositories.draft_variable_repository import (
    DraftVariableSaverFactory,
 )
@ -182,14 +183,14 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
            workflow_triggered_from = WorkflowRunTriggeredFrom.DEBUGGING
        else:
            workflow_triggered_from = WorkflowRunTriggeredFrom.APP_RUN
-        workflow_execution_repository = DifyCoreRepositoryFactory.create_workflow_execution_repository(
+        workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
            session_factory=session_factory,
            user=user,
            app_id=application_generate_entity.app_config.app_id,
            triggered_from=workflow_triggered_from,
        )
        # Create workflow node execution repository
-        workflow_node_execution_repository = DifyCoreRepositoryFactory.create_workflow_node_execution_repository(
+        workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
            session_factory=session_factory,
            user=user,
            app_id=application_generate_entity.app_config.app_id,
@ -259,14 +260,14 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
        # Create session factory
        session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
        # Create workflow execution(aka workflow run) repository
-        workflow_execution_repository = DifyCoreRepositoryFactory.create_workflow_execution_repository(
+        workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
            session_factory=session_factory,
            user=user,
            app_id=application_generate_entity.app_config.app_id,
            triggered_from=WorkflowRunTriggeredFrom.DEBUGGING,
        )
        # Create workflow node execution repository
-        workflow_node_execution_repository = DifyCoreRepositoryFactory.create_workflow_node_execution_repository(
+        workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
            session_factory=session_factory,
            user=user,
            app_id=application_generate_entity.app_config.app_id,
@ -342,14 +343,14 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
        # Create session factory
        session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
        # Create workflow execution(aka workflow run) repository
-        workflow_execution_repository = DifyCoreRepositoryFactory.create_workflow_execution_repository(
+        workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
            session_factory=session_factory,
            user=user,
            app_id=application_generate_entity.app_config.app_id,
            triggered_from=WorkflowRunTriggeredFrom.DEBUGGING,
        )
        # Create workflow node execution repository
-        workflow_node_execution_repository = DifyCoreRepositoryFactory.create_workflow_node_execution_repository(
+        workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
            session_factory=session_factory,
            user=user,
            app_id=application_generate_entity.app_config.app_id,
--- a/api/core/app/apps/advanced_chat/app_runner.py
+++ b/api/core/app/apps/advanced_chat/app_runner.py
@ -16,10 +16,9 @@ from core.app.entities.queue_entities import (
    QueueTextChunkEvent,
 )
 from core.moderation.base import ModerationError
-from core.variables.variables import VariableUnion
 from core.workflow.callbacks import WorkflowCallback, WorkflowLoggingCallback
 from core.workflow.entities.variable_pool import VariablePool
-from core.workflow.system_variable import SystemVariable
+from core.workflow.enums import SystemVariableKey
 from core.workflow.variable_loader import VariableLoader
 from core.workflow.workflow_entry import WorkflowEntry
 from extensions.ext_database import db
@ -65,7 +64,7 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
        if not workflow:
            raise ValueError("Workflow not initialized")

-        user_id: str | None = None
+        user_id = None
        if self.application_generate_entity.invoke_from in {InvokeFrom.WEB_APP, InvokeFrom.SERVICE_API}:
            end_user = db.session.query(EndUser).filter(EndUser.id == self.application_generate_entity.user_id).first()
            if end_user:
@ -137,25 +136,23 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
                session.commit()

            # Create a variable pool.
-            system_inputs = SystemVariable(
-                query=query,
-                files=files,
-                conversation_id=self.conversation.id,
-                user_id=user_id,
-                dialogue_count=self._dialogue_count,
-                app_id=app_config.app_id,
-                workflow_id=app_config.workflow_id,
-                workflow_execution_id=self.application_generate_entity.workflow_run_id,
-            )
+            system_inputs = {
+                SystemVariableKey.QUERY: query,
+                SystemVariableKey.FILES: files,
+                SystemVariableKey.CONVERSATION_ID: self.conversation.id,
+                SystemVariableKey.USER_ID: user_id,
+                SystemVariableKey.DIALOGUE_COUNT: self._dialogue_count,
+                SystemVariableKey.APP_ID: app_config.app_id,
+                SystemVariableKey.WORKFLOW_ID: app_config.workflow_id,
+                SystemVariableKey.WORKFLOW_EXECUTION_ID: self.application_generate_entity.workflow_run_id,
+            }

            # init variable pool
            variable_pool = VariablePool(
                system_variables=system_inputs,
                user_inputs=inputs,
                environment_variables=workflow.environment_variables,
-                # Based on the definition of `VariableUnion`,
-                # `list[Variable]` can be safely used as `list[VariableUnion]` since they are compatible.
-                conversation_variables=cast(list[VariableUnion], conversation_variables),
+                conversation_variables=conversation_variables,
            )

            # init graph
--- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py
+++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
@ -61,12 +61,12 @@ from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk
 from core.model_runtime.entities.llm_entities import LLMUsage
 from core.ops.ops_trace_manager import TraceQueueManager
 from core.workflow.entities.workflow_execution import WorkflowExecutionStatus, WorkflowType
+from core.workflow.enums import SystemVariableKey
 from core.workflow.graph_engine.entities.graph_runtime_state import GraphRuntimeState
 from core.workflow.nodes import NodeType
 from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
-from core.workflow.system_variable import SystemVariable
 from core.workflow.workflow_cycle_manager import CycleManagerWorkflowInfo, WorkflowCycleManager
 from events.message_event import message_was_created
 from extensions.ext_database import db
@ -116,16 +116,16 @@ class AdvancedChatAppGenerateTaskPipeline:

        self._workflow_cycle_manager = WorkflowCycleManager(
            application_generate_entity=application_generate_entity,
-            workflow_system_variables=SystemVariable(
-                query=message.query,
-                files=application_generate_entity.files,
-                conversation_id=conversation.id,
-                user_id=user_session_id,
-                dialogue_count=dialogue_count,
-                app_id=application_generate_entity.app_config.app_id,
-                workflow_id=workflow.id,
-                workflow_execution_id=application_generate_entity.workflow_run_id,
-            ),
+            workflow_system_variables={
+                SystemVariableKey.QUERY: message.query,
+                SystemVariableKey.FILES: application_generate_entity.files,
+                SystemVariableKey.CONVERSATION_ID: conversation.id,
+                SystemVariableKey.USER_ID: user_session_id,
+                SystemVariableKey.DIALOGUE_COUNT: dialogue_count,
+                SystemVariableKey.APP_ID: application_generate_entity.app_config.app_id,
+                SystemVariableKey.WORKFLOW_ID: workflow.id,
+                SystemVariableKey.WORKFLOW_EXECUTION_ID: application_generate_entity.workflow_run_id,
+            },
            workflow_info=CycleManagerWorkflowInfo(
                workflow_id=workflow.id,
                workflow_type=WorkflowType(workflow.type),
--- a/api/core/app/apps/common/workflow_response_converter.py
+++ b/api/core/app/apps/common/workflow_response_converter.py
@ -43,11 +43,13 @@ from core.app.entities.task_entities import (
    WorkflowStartStreamResponse,
 )
 from core.file import FILE_MODEL_IDENTITY, File
+from core.plugin.impl.datasource import PluginDatasourceManager
 from core.tools.tool_manager import ToolManager
 from core.variables.segments import ArrayFileSegment, FileSegment, Segment
 from core.workflow.entities.workflow_execution import WorkflowExecution
 from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution, WorkflowNodeExecutionStatus
 from core.workflow.nodes import NodeType
+from core.workflow.nodes.datasource.entities import DatasourceNodeData
 from core.workflow.nodes.tool.entities import ToolNodeData
 from core.workflow.workflow_type_encoder import WorkflowRuntimeTypeConverter
 from models import (
@ -183,6 +185,14 @@ class WorkflowResponseConverter:
                provider_type=node_data.provider_type,
                provider_id=node_data.provider_id,
            )
+        elif event.node_type == NodeType.DATASOURCE:
+            node_data = cast(DatasourceNodeData, event.node_data)
+            manager = PluginDatasourceManager()
+            provider_entity = manager.fetch_datasource_provider(
+                self._application_generate_entity.app_config.tenant_id,
+                f"{node_data.plugin_id}/{node_data.provider_name}",
+            )
+            response.data.extras["icon"] = provider_entity.declaration.identity.icon

        return response

--- a/api/core/app/apps/pipeline/init.py
+++ b/api/core/app/apps/pipeline/init.py
--- a/api/core/app/apps/pipeline/generate_response_converter.py
+++ b/api/core/app/apps/pipeline/generate_response_converter.py
@ -0,0 +1,95 @@
+from collections.abc import Generator
+from typing import cast
+
+from core.app.apps.base_app_generate_response_converter import AppGenerateResponseConverter
+from core.app.entities.task_entities import (
+    AppStreamResponse,
+    ErrorStreamResponse,
+    NodeFinishStreamResponse,
+    NodeStartStreamResponse,
+    PingStreamResponse,
+    WorkflowAppBlockingResponse,
+    WorkflowAppStreamResponse,
+)
+
+
+class WorkflowAppGenerateResponseConverter(AppGenerateResponseConverter):
+    _blocking_response_type = WorkflowAppBlockingResponse
+
+    @classmethod
+    def convert_blocking_full_response(cls, blocking_response: WorkflowAppBlockingResponse) -> dict:  # type: ignore[override]
+        """
+        Convert blocking full response.
+        :param blocking_response: blocking response
+        :return:
+        """
+        return dict(blocking_response.to_dict())
+
+    @classmethod
+    def convert_blocking_simple_response(cls, blocking_response: WorkflowAppBlockingResponse) -> dict:  # type: ignore[override]
+        """
+        Convert blocking simple response.
+        :param blocking_response: blocking response
+        :return:
+        """
+        return cls.convert_blocking_full_response(blocking_response)
+
+    @classmethod
+    def convert_stream_full_response(
+        cls, stream_response: Generator[AppStreamResponse, None, None]
+    ) -> Generator[dict | str, None, None]:
+        """
+        Convert stream full response.
+        :param stream_response: stream response
+        :return:
+        """
+        for chunk in stream_response:
+            chunk = cast(WorkflowAppStreamResponse, chunk)
+            sub_stream_response = chunk.stream_response
+
+            if isinstance(sub_stream_response, PingStreamResponse):
+                yield "ping"
+                continue
+
+            response_chunk = {
+                "event": sub_stream_response.event.value,
+                "workflow_run_id": chunk.workflow_run_id,
+            }
+
+            if isinstance(sub_stream_response, ErrorStreamResponse):
+                data = cls._error_to_stream_response(sub_stream_response.err)
+                response_chunk.update(data)
+            else:
+                response_chunk.update(sub_stream_response.to_dict())
+            yield response_chunk
+
+    @classmethod
+    def convert_stream_simple_response(
+        cls, stream_response: Generator[AppStreamResponse, None, None]
+    ) -> Generator[dict | str, None, None]:
+        """
+        Convert stream simple response.
+        :param stream_response: stream response
+        :return:
+        """
+        for chunk in stream_response:
+            chunk = cast(WorkflowAppStreamResponse, chunk)
+            sub_stream_response = chunk.stream_response
+
+            if isinstance(sub_stream_response, PingStreamResponse):
+                yield "ping"
+                continue
+
+            response_chunk = {
+                "event": sub_stream_response.event.value,
+                "workflow_run_id": chunk.workflow_run_id,
+            }
+
+            if isinstance(sub_stream_response, ErrorStreamResponse):
+                data = cls._error_to_stream_response(sub_stream_response.err)
+                response_chunk.update(data)
+            elif isinstance(sub_stream_response, NodeStartStreamResponse | NodeFinishStreamResponse):
+                response_chunk.update(sub_stream_response.to_ignore_detail_dict())
+            else:
+                response_chunk.update(sub_stream_response.to_dict())
+            yield response_chunk
--- a/api/core/app/apps/pipeline/pipeline_config_manager.py
+++ b/api/core/app/apps/pipeline/pipeline_config_manager.py
@ -0,0 +1,64 @@
+from core.app.app_config.base_app_config_manager import BaseAppConfigManager
+from core.app.app_config.common.sensitive_word_avoidance.manager import SensitiveWordAvoidanceConfigManager
+from core.app.app_config.entities import RagPipelineVariableEntity, WorkflowUIBasedAppConfig
+from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
+from core.app.app_config.features.text_to_speech.manager import TextToSpeechConfigManager
+from core.app.app_config.workflow_ui_based_app.variables.manager import WorkflowVariablesConfigManager
+from models.dataset import Pipeline
+from models.model import AppMode
+from models.workflow import Workflow
+
+
+class PipelineConfig(WorkflowUIBasedAppConfig):
+    """
+    Pipeline Config Entity.
+    """
+
+    rag_pipeline_variables: list[RagPipelineVariableEntity] = []
+    pass
+
+
+class PipelineConfigManager(BaseAppConfigManager):
+    @classmethod
+    def get_pipeline_config(cls, pipeline: Pipeline, workflow: Workflow) -> PipelineConfig:
+        pipeline_config = PipelineConfig(
+            tenant_id=pipeline.tenant_id,
+            app_id=pipeline.id,
+            app_mode=AppMode.RAG_PIPELINE,
+            workflow_id=workflow.id,
+            rag_pipeline_variables=WorkflowVariablesConfigManager.convert_rag_pipeline_variable(workflow=workflow),
+        )
+
+        return pipeline_config
+
+    @classmethod
+    def config_validate(cls, tenant_id: str, config: dict, only_structure_validate: bool = False) -> dict:
+        """
+        Validate for pipeline config
+
+        :param tenant_id: tenant id
+        :param config: app model config args
+        :param only_structure_validate: only validate the structure of the config
+        """
+        related_config_keys = []
+
+        # file upload validation
+        config, current_related_config_keys = FileUploadConfigManager.validate_and_set_defaults(config=config)
+        related_config_keys.extend(current_related_config_keys)
+
+        # text_to_speech
+        config, current_related_config_keys = TextToSpeechConfigManager.validate_and_set_defaults(config)
+        related_config_keys.extend(current_related_config_keys)
+
+        # moderation validation
+        config, current_related_config_keys = SensitiveWordAvoidanceConfigManager.validate_and_set_defaults(
+            tenant_id=tenant_id, config=config, only_structure_validate=only_structure_validate
+        )
+        related_config_keys.extend(current_related_config_keys)
+
+        related_config_keys = list(set(related_config_keys))
+
+        # Filter out extra parameters
+        filtered_config = {key: config.get(key) for key in related_config_keys}
+
+        return filtered_config
--- a/api/core/app/apps/pipeline/pipeline_generator.py
+++ b/api/core/app/apps/pipeline/pipeline_generator.py
@ -0,0 +1,621 @@
+import contextvars
+import datetime
+import json
+import logging
+import secrets
+import threading
+import time
+import uuid
+from collections.abc import Generator, Mapping
+from typing import Any, Literal, Optional, Union, overload
+
+from flask import Flask, current_app
+from pydantic import ValidationError
+from sqlalchemy.orm import sessionmaker
+
+import contexts
+from configs import dify_config
+from core.app.apps.base_app_generator import BaseAppGenerator
+from core.app.apps.base_app_queue_manager import AppQueueManager, GenerateTaskStoppedError, PublishFrom
+from core.app.apps.pipeline.pipeline_config_manager import PipelineConfigManager
+from core.app.apps.pipeline.pipeline_queue_manager import PipelineQueueManager
+from core.app.apps.pipeline.pipeline_runner import PipelineRunner
+from core.app.apps.workflow.generate_response_converter import WorkflowAppGenerateResponseConverter
+from core.app.apps.workflow.generate_task_pipeline import WorkflowAppGenerateTaskPipeline
+from core.app.entities.app_invoke_entities import InvokeFrom, RagPipelineGenerateEntity
+from core.app.entities.task_entities import WorkflowAppBlockingResponse, WorkflowAppStreamResponse
+from core.entities.knowledge_entities import PipelineDataset, PipelineDocument
+from core.model_runtime.errors.invoke import InvokeAuthorizationError
+from core.rag.index_processor.constant.built_in_field import BuiltInField
+from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository
+from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository
+from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
+from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
+from extensions.ext_database import db
+from libs.flask_utils import preserve_flask_contexts
+from models import Account, EndUser, Workflow, WorkflowNodeExecutionTriggeredFrom
+from models.dataset import Document, DocumentPipelineExecutionLog, Pipeline
+from models.enums import WorkflowRunTriggeredFrom
+from models.model import AppMode
+from services.dataset_service import DocumentService
+
+logger = logging.getLogger(__name__)
+
+
+class PipelineGenerator(BaseAppGenerator):
+    @overload
+    def generate(
+        self,
+        *,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: Literal[True],
+        call_depth: int,
+        workflow_thread_pool_id: Optional[str],
+    ) -> Mapping[str, Any] | Generator[Mapping | str, None, None] | None: ...
+
+    @overload
+    def generate(
+        self,
+        *,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: Literal[False],
+        call_depth: int,
+        workflow_thread_pool_id: Optional[str],
+    ) -> Mapping[str, Any]: ...
+
+    @overload
+    def generate(
+        self,
+        *,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: bool,
+        call_depth: int,
+        workflow_thread_pool_id: Optional[str],
+    ) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None]]: ...
+
+    def generate(
+        self,
+        *,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: bool = True,
+        call_depth: int = 0,
+        workflow_thread_pool_id: Optional[str] = None,
+    ) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None], None]:
+        # convert to app config
+        pipeline_config = PipelineConfigManager.get_pipeline_config(
+            pipeline=pipeline,
+            workflow=workflow,
+        )
+        # Add null check for dataset
+        dataset = pipeline.dataset
+        if not dataset:
+            raise ValueError("Pipeline dataset is required")
+        inputs: Mapping[str, Any] = args["inputs"]
+        start_node_id: str = args["start_node_id"]
+        datasource_type: str = args["datasource_type"]
+        datasource_info_list: list[Mapping[str, Any]] = args["datasource_info_list"]
+        batch = time.strftime("%Y%m%d%H%M%S") + str(secrets.randbelow(900000) + 100000)
+        documents = []
+        if invoke_from == InvokeFrom.PUBLISHED:
+            for datasource_info in datasource_info_list:
+                position = DocumentService.get_documents_position(dataset.id)
+                document = self._build_document(
+                    tenant_id=pipeline.tenant_id,
+                    dataset_id=dataset.id,
+                    built_in_field_enabled=dataset.built_in_field_enabled,
+                    datasource_type=datasource_type,
+                    datasource_info=datasource_info,
+                    created_from="rag-pipeline",
+                    position=position,
+                    account=user,
+                    batch=batch,
+                    document_form=dataset.chunk_structure,
+                )
+                db.session.add(document)
+                documents.append(document)
+            db.session.commit()
+
+        # run in child thread
+        for i, datasource_info in enumerate(datasource_info_list):
+            workflow_run_id = str(uuid.uuid4())
+            document_id = None
+            if invoke_from == InvokeFrom.PUBLISHED:
+                document_id = documents[i].id
+                document_pipeline_execution_log = DocumentPipelineExecutionLog(
+                    document_id=document_id,
+                    datasource_type=datasource_type,
+                    datasource_info=json.dumps(datasource_info),
+                    datasource_node_id=start_node_id,
+                    input_data=inputs,
+                    pipeline_id=pipeline.id,
+                    created_by=user.id,
+                )
+                db.session.add(document_pipeline_execution_log)
+                db.session.commit()
+            application_generate_entity = RagPipelineGenerateEntity(
+                task_id=str(uuid.uuid4()),
+                app_config=pipeline_config,
+                pipeline_config=pipeline_config,
+                datasource_type=datasource_type,
+                datasource_info=datasource_info,
+                dataset_id=dataset.id,
+                start_node_id=start_node_id,
+                batch=batch,
+                document_id=document_id,
+                inputs=self._prepare_user_inputs(
+                    user_inputs=inputs,
+                    variables=pipeline_config.rag_pipeline_variables,
+                    tenant_id=pipeline.tenant_id,
+                    strict_type_validation=True if invoke_from == InvokeFrom.SERVICE_API else False,
+                ),
+                files=[],
+                user_id=user.id,
+                stream=streaming,
+                invoke_from=invoke_from,
+                call_depth=call_depth,
+                workflow_execution_id=workflow_run_id,
+            )
+
+            contexts.plugin_tool_providers.set({})
+            contexts.plugin_tool_providers_lock.set(threading.Lock())
+            if invoke_from == InvokeFrom.DEBUGGER:
+                workflow_triggered_from = WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING
+            else:
+                workflow_triggered_from = WorkflowRunTriggeredFrom.RAG_PIPELINE_RUN
+            # Create workflow node execution repository
+            session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
+            workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
+                session_factory=session_factory,
+                user=user,
+                app_id=application_generate_entity.app_config.app_id,
+                triggered_from=workflow_triggered_from,
+            )
+
+            workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
+                session_factory=session_factory,
+                user=user,
+                app_id=application_generate_entity.app_config.app_id,
+                triggered_from=WorkflowNodeExecutionTriggeredFrom.RAG_PIPELINE_RUN,
+            )
+            if invoke_from == InvokeFrom.DEBUGGER:
+                return self._generate(
+                    flask_app=current_app._get_current_object(),  # type: ignore
+                    context=contextvars.copy_context(),
+                    pipeline=pipeline,
+                    workflow_id=workflow.id,
+                    user=user,
+                    application_generate_entity=application_generate_entity,
+                    invoke_from=invoke_from,
+                    workflow_execution_repository=workflow_execution_repository,
+                    workflow_node_execution_repository=workflow_node_execution_repository,
+                    streaming=streaming,
+                    workflow_thread_pool_id=workflow_thread_pool_id,
+                )
+            else:
+                # run in child thread
+                context = contextvars.copy_context()
+
+                worker_thread = threading.Thread(
+                    target=self._generate,
+                    kwargs={
+                        "flask_app": current_app._get_current_object(),  # type: ignore
+                        "context": context,
+                        "pipeline": pipeline,
+                        "workflow_id": workflow.id,
+                        "user": user,
+                        "application_generate_entity": application_generate_entity,
+                        "invoke_from": invoke_from,
+                        "workflow_execution_repository": workflow_execution_repository,
+                        "workflow_node_execution_repository": workflow_node_execution_repository,
+                        "streaming": streaming,
+                        "workflow_thread_pool_id": workflow_thread_pool_id,
+                    },
+                )
+
+                worker_thread.start()
+        # return batch, dataset, documents
+        return {
+            "batch": batch,
+            "dataset": PipelineDataset(
+                id=dataset.id,
+                name=dataset.name,
+                description=dataset.description,
+                chunk_structure=dataset.chunk_structure,
+            ).model_dump(),
+            "documents": [
+                PipelineDocument(
+                    id=document.id,
+                    position=document.position,
+                    data_source_type=document.data_source_type,
+                    data_source_info=json.loads(document.data_source_info) if document.data_source_info else None,
+                    name=document.name,
+                    indexing_status=document.indexing_status,
+                    error=document.error,
+                    enabled=document.enabled,
+                ).model_dump()
+                for document in documents
+            ],
+        }
+
+    def _generate(
+        self,
+        *,
+        flask_app: Flask,
+        context: contextvars.Context,
+        pipeline: Pipeline,
+        workflow_id: str,
+        user: Union[Account, EndUser],
+        application_generate_entity: RagPipelineGenerateEntity,
+        invoke_from: InvokeFrom,
+        workflow_execution_repository: WorkflowExecutionRepository,
+        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
+        streaming: bool = True,
+        workflow_thread_pool_id: Optional[str] = None,
+    ) -> Union[Mapping[str, Any], Generator[str | Mapping[str, Any], None, None]]:
+        """
+        Generate App response.
+
+        :param pipeline: Pipeline
+        :param workflow: Workflow
+        :param user: account or end user
+        :param application_generate_entity: application generate entity
+        :param invoke_from: invoke from source
+        :param workflow_execution_repository: repository for workflow execution
+        :param workflow_node_execution_repository: repository for workflow node execution
+        :param streaming: is stream
+        :param workflow_thread_pool_id: workflow thread pool id
+        """
+        with preserve_flask_contexts(flask_app, context_vars=context):
+            # init queue manager
+            workflow = db.session.query(Workflow).filter(Workflow.id == workflow_id).first()
+            if not workflow:
+                raise ValueError(f"Workflow not found: {workflow_id}")
+            queue_manager = PipelineQueueManager(
+                task_id=application_generate_entity.task_id,
+                user_id=application_generate_entity.user_id,
+                invoke_from=application_generate_entity.invoke_from,
+                app_mode=AppMode.RAG_PIPELINE,
+            )
+            context = contextvars.copy_context()
+
+            # new thread
+            worker_thread = threading.Thread(
+                target=self._generate_worker,
+                kwargs={
+                    "flask_app": current_app._get_current_object(),  # type: ignore
+                    "context": context,
+                    "queue_manager": queue_manager,
+                    "application_generate_entity": application_generate_entity,
+                    "workflow_thread_pool_id": workflow_thread_pool_id,
+                },
+            )
+
+            worker_thread.start()
+
+            # return response or stream generator
+            response = self._handle_response(
+                application_generate_entity=application_generate_entity,
+                workflow=workflow,
+                queue_manager=queue_manager,
+                user=user,
+                workflow_execution_repository=workflow_execution_repository,
+                workflow_node_execution_repository=workflow_node_execution_repository,
+                stream=streaming,
+            )
+
+            return WorkflowAppGenerateResponseConverter.convert(response=response, invoke_from=invoke_from)
+
+    def single_iteration_generate(
+        self,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        node_id: str,
+        user: Account | EndUser,
+        args: Mapping[str, Any],
+        streaming: bool = True,
+    ) -> Mapping[str, Any] | Generator[str | Mapping[str, Any], None, None]:
+        """
+        Generate App response.
+
+        :param app_model: App
+        :param workflow: Workflow
+        :param node_id: the node id
+        :param user: account or end user
+        :param args: request args
+        :param streaming: is streamed
+        """
+        if not node_id:
+            raise ValueError("node_id is required")
+
+        if args.get("inputs") is None:
+            raise ValueError("inputs is required")
+
+        # convert to app config
+        pipeline_config = PipelineConfigManager.get_pipeline_config(pipeline=pipeline, workflow=workflow)
+
+        dataset = pipeline.dataset
+        if not dataset:
+            raise ValueError("Pipeline dataset is required")
+
+        # init application generate entity - use RagPipelineGenerateEntity instead
+        application_generate_entity = RagPipelineGenerateEntity(
+            task_id=str(uuid.uuid4()),
+            app_config=pipeline_config,
+            pipeline_config=pipeline_config,
+            datasource_type=args.get("datasource_type", ""),
+            datasource_info=args.get("datasource_info", {}),
+            dataset_id=dataset.id,
+            batch=args.get("batch", ""),
+            document_id=args.get("document_id"),
+            inputs={},
+            files=[],
+            user_id=user.id,
+            stream=streaming,
+            invoke_from=InvokeFrom.DEBUGGER,
+            call_depth=0,
+            workflow_execution_id=str(uuid.uuid4()),
+        )
+        contexts.plugin_tool_providers.set({})
+        contexts.plugin_tool_providers_lock.set(threading.Lock())
+        # Create workflow node execution repository
+        session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
+
+        workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
+            session_factory=session_factory,
+            user=user,
+            app_id=application_generate_entity.app_config.app_id,
+            triggered_from=WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING,
+        )
+
+        workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
+            session_factory=session_factory,
+            user=user,
+            app_id=application_generate_entity.app_config.app_id,
+            triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP,
+        )
+
+        return self._generate(
+            flask_app=current_app._get_current_object(),  # type: ignore
+            pipeline=pipeline,
+            workflow_id=workflow.id,
+            user=user,
+            invoke_from=InvokeFrom.DEBUGGER,
+            application_generate_entity=application_generate_entity,
+            workflow_execution_repository=workflow_execution_repository,
+            workflow_node_execution_repository=workflow_node_execution_repository,
+            streaming=streaming,
+        )
+
+    def single_loop_generate(
+        self,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        node_id: str,
+        user: Account | EndUser,
+        args: Mapping[str, Any],
+        streaming: bool = True,
+    ) -> Mapping[str, Any] | Generator[str | Mapping[str, Any], None, None]:
+        """
+        Generate App response.
+
+        :param app_model: App
+        :param workflow: Workflow
+        :param node_id: the node id
+        :param user: account or end user
+        :param args: request args
+        :param streaming: is streamed
+        """
+        if not node_id:
+            raise ValueError("node_id is required")
+
+        if args.get("inputs") is None:
+            raise ValueError("inputs is required")
+
+        dataset = pipeline.dataset
+        if not dataset:
+            raise ValueError("Pipeline dataset is required")
+
+        # convert to app config
+        pipeline_config = PipelineConfigManager.get_pipeline_config(pipeline=pipeline, workflow=workflow)
+
+        # init application generate entity
+        application_generate_entity = RagPipelineGenerateEntity(
+            task_id=str(uuid.uuid4()),
+            app_config=pipeline_config,
+            pipeline_config=pipeline_config,
+            datasource_type=args.get("datasource_type", ""),
+            datasource_info=args.get("datasource_info", {}),
+            batch=args.get("batch", ""),
+            document_id=args.get("document_id"),
+            dataset_id=dataset.id,
+            inputs={},
+            files=[],
+            user_id=user.id,
+            stream=streaming,
+            invoke_from=InvokeFrom.DEBUGGER,
+            extras={"auto_generate_conversation_name": False},
+            single_loop_run=RagPipelineGenerateEntity.SingleLoopRunEntity(node_id=node_id, inputs=args["inputs"]),
+            workflow_execution_id=str(uuid.uuid4()),
+        )
+        contexts.plugin_tool_providers.set({})
+        contexts.plugin_tool_providers_lock.set(threading.Lock())
+
+        # Create workflow node execution repository
+        session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
+
+        workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
+            session_factory=session_factory,
+            user=user,
+            app_id=application_generate_entity.app_config.app_id,
+            triggered_from=WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING,
+        )
+
+        workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
+            session_factory=session_factory,
+            user=user,
+            app_id=application_generate_entity.app_config.app_id,
+            triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP,
+        )
+
+        return self._generate(
+            flask_app=current_app._get_current_object(),  # type: ignore
+            pipeline=pipeline,
+            workflow=workflow,
+            user=user,
+            invoke_from=InvokeFrom.DEBUGGER,
+            application_generate_entity=application_generate_entity,
+            workflow_execution_repository=workflow_execution_repository,
+            workflow_node_execution_repository=workflow_node_execution_repository,
+            streaming=streaming,
+        )
+
+    def _generate_worker(
+        self,
+        flask_app: Flask,
+        application_generate_entity: RagPipelineGenerateEntity,
+        queue_manager: AppQueueManager,
+        context: contextvars.Context,
+        workflow_thread_pool_id: Optional[str] = None,
+    ) -> None:
+        """
+        Generate worker in a new thread.
+        :param flask_app: Flask app
+        :param application_generate_entity: application generate entity
+        :param queue_manager: queue manager
+        :param workflow_thread_pool_id: workflow thread pool id
+        :return:
+        """
+
+        with preserve_flask_contexts(flask_app, context_vars=context):
+            try:
+                # workflow app
+                runner = PipelineRunner(
+                    application_generate_entity=application_generate_entity,
+                    queue_manager=queue_manager,
+                    workflow_thread_pool_id=workflow_thread_pool_id,
+                )
+
+                runner.run()
+            except GenerateTaskStoppedError:
+                pass
+            except InvokeAuthorizationError:
+                queue_manager.publish_error(
+                    InvokeAuthorizationError("Incorrect API key provided"), PublishFrom.APPLICATION_MANAGER
+                )
+            except ValidationError as e:
+                logger.exception("Validation Error when generating")
+                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
+            except ValueError as e:
+                if dify_config.DEBUG:
+                    logger.exception("Error when generating")
+                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
+            except Exception as e:
+                logger.exception("Unknown Error when generating")
+                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
+            finally:
+                db.session.close()
+
+    def _handle_response(
+        self,
+        application_generate_entity: RagPipelineGenerateEntity,
+        workflow: Workflow,
+        queue_manager: AppQueueManager,
+        user: Union[Account, EndUser],
+        workflow_execution_repository: WorkflowExecutionRepository,
+        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
+        stream: bool = False,
+    ) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]:
+        """
+        Handle response.
+        :param application_generate_entity: application generate entity
+        :param workflow: workflow
+        :param queue_manager: queue manager
+        :param user: account or end user
+        :param stream: is stream
+        :param workflow_node_execution_repository: optional repository for workflow node execution
+        :return:
+        """
+        # init generate task pipeline
+        generate_task_pipeline = WorkflowAppGenerateTaskPipeline(
+            application_generate_entity=application_generate_entity,
+            workflow=workflow,
+            queue_manager=queue_manager,
+            user=user,
+            stream=stream,
+            workflow_node_execution_repository=workflow_node_execution_repository,
+            workflow_execution_repository=workflow_execution_repository,
+        )
+
+        try:
+            return generate_task_pipeline.process()
+        except ValueError as e:
+            if len(e.args) > 0 and e.args[0] == "I/O operation on closed file.":  # ignore this error
+                raise GenerateTaskStoppedError()
+            else:
+                logger.exception(
+                    f"Fails to process generate task pipeline, task_id: {application_generate_entity.task_id}"
+                )
+                raise e
+
+    def _build_document(
+        self,
+        tenant_id: str,
+        dataset_id: str,
+        built_in_field_enabled: bool,
+        datasource_type: str,
+        datasource_info: Mapping[str, Any],
+        created_from: str,
+        position: int,
+        account: Union[Account, EndUser],
+        batch: str,
+        document_form: str,
+    ):
+        if datasource_type == "local_file":
+            name = datasource_info["name"]
+        elif datasource_type == "online_document":
+            name = datasource_info["page"]["page_name"]
+        elif datasource_type == "website_crawl":
+            name = datasource_info["title"]
+        else:
+            raise ValueError(f"Unsupported datasource type: {datasource_type}")
+
+        document = Document(
+            tenant_id=tenant_id,
+            dataset_id=dataset_id,
+            position=position,
+            data_source_type=datasource_type,
+            data_source_info=json.dumps(datasource_info),
+            batch=batch,
+            name=name,
+            created_from=created_from,
+            created_by=account.id,
+            doc_form=document_form,
+        )
+        doc_metadata = {}
+        if built_in_field_enabled:
+            doc_metadata = {
+                BuiltInField.document_name: name,
+                BuiltInField.uploader: account.name,
+                BuiltInField.upload_date: datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M:%S"),
+                BuiltInField.last_update_date: datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M:%S"),
+                BuiltInField.source: datasource_type,
+            }
+        if doc_metadata:
+            document.doc_metadata = doc_metadata
+        return document
--- a/api/core/app/apps/pipeline/pipeline_queue_manager.py
+++ b/api/core/app/apps/pipeline/pipeline_queue_manager.py
@ -0,0 +1,44 @@
+from core.app.apps.base_app_queue_manager import AppQueueManager, GenerateTaskStoppedError, PublishFrom
+from core.app.entities.app_invoke_entities import InvokeFrom
+from core.app.entities.queue_entities import (
+    AppQueueEvent,
+    QueueErrorEvent,
+    QueueMessageEndEvent,
+    QueueStopEvent,
+    QueueWorkflowFailedEvent,
+    QueueWorkflowPartialSuccessEvent,
+    QueueWorkflowSucceededEvent,
+    WorkflowQueueMessage,
+)
+
+
+class PipelineQueueManager(AppQueueManager):
+    def __init__(self, task_id: str, user_id: str, invoke_from: InvokeFrom, app_mode: str) -> None:
+        super().__init__(task_id, user_id, invoke_from)
+
+        self._app_mode = app_mode
+
+    def _publish(self, event: AppQueueEvent, pub_from: PublishFrom) -> None:
+        """
+        Publish event to queue
+        :param event:
+        :param pub_from:
+        :return:
+        """
+        message = WorkflowQueueMessage(task_id=self._task_id, app_mode=self._app_mode, event=event)
+
+        self._q.put(message)
+
+        if isinstance(
+            event,
+            QueueStopEvent
+            | QueueErrorEvent
+            | QueueMessageEndEvent
+            | QueueWorkflowSucceededEvent
+            | QueueWorkflowFailedEvent
+            | QueueWorkflowPartialSuccessEvent,
+        ):
+            self.stop_listen()
+
+        if pub_from == PublishFrom.APPLICATION_MANAGER and self._is_stopped():
+            raise GenerateTaskStoppedError()
--- a/api/core/app/apps/pipeline/pipeline_runner.py
+++ b/api/core/app/apps/pipeline/pipeline_runner.py
@ -0,0 +1,221 @@
+import logging
+from collections.abc import Mapping
+from typing import Any, Optional, cast
+
+from configs import dify_config
+from core.app.apps.base_app_queue_manager import AppQueueManager
+from core.app.apps.pipeline.pipeline_config_manager import PipelineConfig
+from core.app.apps.workflow_app_runner import WorkflowBasedAppRunner
+from core.app.entities.app_invoke_entities import (
+    InvokeFrom,
+    RagPipelineGenerateEntity,
+)
+from core.variables.variables import RAGPipelineVariable, RAGPipelineVariableInput
+from core.workflow.callbacks import WorkflowCallback, WorkflowLoggingCallback
+from core.workflow.entities.variable_pool import VariablePool
+from core.workflow.enums import SystemVariableKey
+from core.workflow.graph_engine.entities.graph import Graph
+from core.workflow.workflow_entry import WorkflowEntry
+from extensions.ext_database import db
+from models.dataset import Pipeline
+from models.enums import UserFrom
+from models.model import EndUser
+from models.workflow import Workflow, WorkflowType
+
+logger = logging.getLogger(__name__)
+
+
+class PipelineRunner(WorkflowBasedAppRunner):
+    """
+    Pipeline Application Runner
+    """
+
+    def __init__(
+        self,
+        application_generate_entity: RagPipelineGenerateEntity,
+        queue_manager: AppQueueManager,
+        workflow_thread_pool_id: Optional[str] = None,
+    ) -> None:
+        """
+        :param application_generate_entity: application generate entity
+        :param queue_manager: application queue manager
+        :param workflow_thread_pool_id: workflow thread pool id
+        """
+        self.application_generate_entity = application_generate_entity
+        self.queue_manager = queue_manager
+        self.workflow_thread_pool_id = workflow_thread_pool_id
+
+    def _get_app_id(self) -> str:
+        return self.application_generate_entity.app_config.app_id
+
+    def run(self) -> None:
+        """
+        Run application
+        """
+        app_config = self.application_generate_entity.app_config
+        app_config = cast(PipelineConfig, app_config)
+
+        user_id = None
+        if self.application_generate_entity.invoke_from in {InvokeFrom.WEB_APP, InvokeFrom.SERVICE_API}:
+            end_user = db.session.query(EndUser).filter(EndUser.id == self.application_generate_entity.user_id).first()
+            if end_user:
+                user_id = end_user.session_id
+        else:
+            user_id = self.application_generate_entity.user_id
+
+        pipeline = db.session.query(Pipeline).filter(Pipeline.id == app_config.app_id).first()
+        if not pipeline:
+            raise ValueError("Pipeline not found")
+
+        workflow = self.get_workflow(pipeline=pipeline, workflow_id=app_config.workflow_id)
+        if not workflow:
+            raise ValueError("Workflow not initialized")
+
+        db.session.close()
+
+        workflow_callbacks: list[WorkflowCallback] = []
+        if dify_config.DEBUG:
+            workflow_callbacks.append(WorkflowLoggingCallback())
+
+        # if only single iteration run is requested
+        if self.application_generate_entity.single_iteration_run:
+            # if only single iteration run is requested
+            graph, variable_pool = self._get_graph_and_variable_pool_of_single_iteration(
+                workflow=workflow,
+                node_id=self.application_generate_entity.single_iteration_run.node_id,
+                user_inputs=self.application_generate_entity.single_iteration_run.inputs,
+            )
+        elif self.application_generate_entity.single_loop_run:
+            # if only single loop run is requested
+            graph, variable_pool = self._get_graph_and_variable_pool_of_single_loop(
+                workflow=workflow,
+                node_id=self.application_generate_entity.single_loop_run.node_id,
+                user_inputs=self.application_generate_entity.single_loop_run.inputs,
+            )
+        else:
+            inputs = self.application_generate_entity.inputs
+            files = self.application_generate_entity.files
+
+            # Create a variable pool.
+            system_inputs = {
+                SystemVariableKey.FILES: files,
+                SystemVariableKey.USER_ID: user_id,
+                SystemVariableKey.APP_ID: app_config.app_id,
+                SystemVariableKey.WORKFLOW_ID: app_config.workflow_id,
+                SystemVariableKey.WORKFLOW_EXECUTION_ID: self.application_generate_entity.workflow_execution_id,
+                SystemVariableKey.DOCUMENT_ID: self.application_generate_entity.document_id,
+                SystemVariableKey.BATCH: self.application_generate_entity.batch,
+                SystemVariableKey.DATASET_ID: self.application_generate_entity.dataset_id,
+                SystemVariableKey.DATASOURCE_TYPE: self.application_generate_entity.datasource_type,
+                SystemVariableKey.DATASOURCE_INFO: self.application_generate_entity.datasource_info,
+                SystemVariableKey.INVOKE_FROM: self.application_generate_entity.invoke_from.value,
+            }
+            rag_pipeline_variables = []
+            if workflow.rag_pipeline_variables:
+                for v in workflow.rag_pipeline_variables:
+                    rag_pipeline_variable = RAGPipelineVariable(**v)
+                    if (
+                        rag_pipeline_variable.belong_to_node_id
+                        in (self.application_generate_entity.start_node_id, "shared")
+                    ) and rag_pipeline_variable.variable in inputs:
+                        rag_pipeline_variables.append(
+                            RAGPipelineVariableInput(
+                                variable=rag_pipeline_variable,
+                                value=inputs[rag_pipeline_variable.variable],
+                            )
+                        )
+
+            variable_pool = VariablePool(
+                system_variables=system_inputs,
+                user_inputs=inputs,
+                environment_variables=workflow.environment_variables,
+                conversation_variables=[],
+                rag_pipeline_variables=rag_pipeline_variables,
+            )
+
+            # init graph
+            graph = self._init_rag_pipeline_graph(
+                graph_config=workflow.graph_dict,
+                start_node_id=self.application_generate_entity.start_node_id,
+            )
+
+        # RUN WORKFLOW
+        workflow_entry = WorkflowEntry(
+            tenant_id=workflow.tenant_id,
+            app_id=workflow.app_id,
+            workflow_id=workflow.id,
+            workflow_type=WorkflowType.value_of(workflow.type),
+            graph=graph,
+            graph_config=workflow.graph_dict,
+            user_id=self.application_generate_entity.user_id,
+            user_from=(
+                UserFrom.ACCOUNT
+                if self.application_generate_entity.invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER}
+                else UserFrom.END_USER
+            ),
+            invoke_from=self.application_generate_entity.invoke_from,
+            call_depth=self.application_generate_entity.call_depth,
+            variable_pool=variable_pool,
+            thread_pool_id=self.workflow_thread_pool_id,
+        )
+
+        generator = workflow_entry.run(callbacks=workflow_callbacks)
+
+        for event in generator:
+            self._handle_event(workflow_entry, event)
+
+    def get_workflow(self, pipeline: Pipeline, workflow_id: str) -> Optional[Workflow]:
+        """
+        Get workflow
+        """
+        # fetch workflow by workflow_id
+        workflow = (
+            db.session.query(Workflow)
+            .filter(
+                Workflow.tenant_id == pipeline.tenant_id, Workflow.app_id == pipeline.id, Workflow.id == workflow_id
+            )
+            .first()
+        )
+
+        # return workflow
+        return workflow
+
+    def _init_rag_pipeline_graph(self, graph_config: Mapping[str, Any], start_node_id: Optional[str] = None) -> Graph:
+        """
+        Init pipeline graph
+        """
+        if "nodes" not in graph_config or "edges" not in graph_config:
+            raise ValueError("nodes or edges not found in workflow graph")
+
+        if not isinstance(graph_config.get("nodes"), list):
+            raise ValueError("nodes in workflow graph must be a list")
+
+        if not isinstance(graph_config.get("edges"), list):
+            raise ValueError("edges in workflow graph must be a list")
+        nodes = graph_config.get("nodes", [])
+        edges = graph_config.get("edges", [])
+        real_run_nodes = []
+        real_edges = []
+        exclude_node_ids = []
+        for node in nodes:
+            node_id = node.get("id")
+            node_type = node.get("data", {}).get("type", "")
+            if node_type == "datasource":
+                if start_node_id != node_id:
+                    exclude_node_ids.append(node_id)
+                    continue
+            real_run_nodes.append(node)
+        for edge in edges:
+            if edge.get("source") in exclude_node_ids:
+                continue
+            real_edges.append(edge)
+        graph_config = dict(graph_config)
+        graph_config["nodes"] = real_run_nodes
+        graph_config["edges"] = real_edges
+        # init graph
+        graph = Graph.init(graph_config=graph_config)
+
+        if not graph:
+            raise ValueError("graph not found in workflow")
+
+        return graph
--- a/api/core/app/apps/workflow/app_generator.py
+++ b/api/core/app/apps/workflow/app_generator.py
@ -23,7 +23,8 @@ from core.app.entities.app_invoke_entities import InvokeFrom, WorkflowAppGenerat
 from core.app.entities.task_entities import WorkflowAppBlockingResponse, WorkflowAppStreamResponse
 from core.model_runtime.errors.invoke import InvokeAuthorizationError
 from core.ops.ops_trace_manager import TraceQueueManager
-from core.repositories import DifyCoreRepositoryFactory
+from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository
+from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository
 from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
@ -155,14 +156,14 @@ class WorkflowAppGenerator(BaseAppGenerator):
            workflow_triggered_from = WorkflowRunTriggeredFrom.DEBUGGING
        else:
            workflow_triggered_from = WorkflowRunTriggeredFrom.APP_RUN
-        workflow_execution_repository = DifyCoreRepositoryFactory.create_workflow_execution_repository(
+        workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
            session_factory=session_factory,
            user=user,
            app_id=application_generate_entity.app_config.app_id,
            triggered_from=workflow_triggered_from,
        )
        # Create workflow node execution repository
-        workflow_node_execution_repository = DifyCoreRepositoryFactory.create_workflow_node_execution_repository(
+        workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
            session_factory=session_factory,
            user=user,
            app_id=application_generate_entity.app_config.app_id,
@ -305,14 +306,16 @@ class WorkflowAppGenerator(BaseAppGenerator):
        # Create session factory
        session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
        # Create workflow execution(aka workflow run) repository
-        workflow_execution_repository = DifyCoreRepositoryFactory.create_workflow_execution_repository(
+        workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
            session_factory=session_factory,
            user=user,
            app_id=application_generate_entity.app_config.app_id,
            triggered_from=WorkflowRunTriggeredFrom.DEBUGGING,
        )
        # Create workflow node execution repository
-        workflow_node_execution_repository = DifyCoreRepositoryFactory.create_workflow_node_execution_repository(
+        session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
+
+        workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
            session_factory=session_factory,
            user=user,
            app_id=application_generate_entity.app_config.app_id,
@ -387,14 +390,16 @@ class WorkflowAppGenerator(BaseAppGenerator):
        # Create session factory
        session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
        # Create workflow execution(aka workflow run) repository
-        workflow_execution_repository = DifyCoreRepositoryFactory.create_workflow_execution_repository(
+        workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository(
            session_factory=session_factory,
            user=user,
            app_id=application_generate_entity.app_config.app_id,
            triggered_from=WorkflowRunTriggeredFrom.DEBUGGING,
        )
        # Create workflow node execution repository
-        workflow_node_execution_repository = DifyCoreRepositoryFactory.create_workflow_node_execution_repository(
+        session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
+
+        workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
            session_factory=session_factory,
            user=user,
            app_id=application_generate_entity.app_config.app_id,
--- a/api/core/app/apps/workflow/app_runner.py
+++ b/api/core/app/apps/workflow/app_runner.py
@ -11,7 +11,7 @@ from core.app.entities.app_invoke_entities import (
 )
 from core.workflow.callbacks import WorkflowCallback, WorkflowLoggingCallback
 from core.workflow.entities.variable_pool import VariablePool
-from core.workflow.system_variable import SystemVariable
+from core.workflow.enums import SystemVariableKey
 from core.workflow.variable_loader import VariableLoader
 from core.workflow.workflow_entry import WorkflowEntry
 from extensions.ext_database import db
@ -95,14 +95,13 @@ class WorkflowAppRunner(WorkflowBasedAppRunner):
            files = self.application_generate_entity.files

            # Create a variable pool.
-
-            system_inputs = SystemVariable(
-                files=files,
-                user_id=user_id,
-                app_id=app_config.app_id,
-                workflow_id=app_config.workflow_id,
-                workflow_execution_id=self.application_generate_entity.workflow_execution_id,
-            )
+            system_inputs = {
+                SystemVariableKey.FILES: files,
+                SystemVariableKey.USER_ID: user_id,
+                SystemVariableKey.APP_ID: app_config.app_id,
+                SystemVariableKey.WORKFLOW_ID: app_config.workflow_id,
+                SystemVariableKey.WORKFLOW_EXECUTION_ID: self.application_generate_entity.workflow_execution_id,
+            }

            variable_pool = VariablePool(
                system_variables=system_inputs,
--- a/api/core/app/apps/workflow/generate_task_pipeline.py
+++ b/api/core/app/apps/workflow/generate_task_pipeline.py
@ -3,6 +3,7 @@ import time
 from collections.abc import Generator
 from typing import Optional, Union

+from sqlalchemy import select
 from sqlalchemy.orm import Session

 from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME
@ -54,10 +55,10 @@ from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTas
 from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk
 from core.ops.ops_trace_manager import TraceQueueManager
 from core.workflow.entities.workflow_execution import WorkflowExecution, WorkflowExecutionStatus, WorkflowType
+from core.workflow.enums import SystemVariableKey
 from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
-from core.workflow.system_variable import SystemVariable
 from core.workflow.workflow_cycle_manager import CycleManagerWorkflowInfo, WorkflowCycleManager
 from extensions.ext_database import db
 from models.account import Account
@ -67,6 +68,7 @@ from models.workflow import (
    Workflow,
    WorkflowAppLog,
    WorkflowAppLogCreatedFrom,
+    WorkflowRun,
 )

 logger = logging.getLogger(__name__)
@ -107,13 +109,13 @@ class WorkflowAppGenerateTaskPipeline:

        self._workflow_cycle_manager = WorkflowCycleManager(
            application_generate_entity=application_generate_entity,
-            workflow_system_variables=SystemVariable(
-                files=application_generate_entity.files,
-                user_id=user_session_id,
-                app_id=application_generate_entity.app_config.app_id,
-                workflow_id=workflow.id,
-                workflow_execution_id=application_generate_entity.workflow_execution_id,
-            ),
+            workflow_system_variables={
+                SystemVariableKey.FILES: application_generate_entity.files,
+                SystemVariableKey.USER_ID: user_session_id,
+                SystemVariableKey.APP_ID: application_generate_entity.app_config.app_id,
+                SystemVariableKey.WORKFLOW_ID: workflow.id,
+                SystemVariableKey.WORKFLOW_EXECUTION_ID: application_generate_entity.workflow_execution_id,
+            },
            workflow_info=CycleManagerWorkflowInfo(
                workflow_id=workflow.id,
                workflow_type=WorkflowType(workflow.type),
@ -560,6 +562,8 @@ class WorkflowAppGenerateTaskPipeline:
            tts_publisher.publish(None)

    def _save_workflow_app_log(self, *, session: Session, workflow_execution: WorkflowExecution) -> None:
+        workflow_run = session.scalar(select(WorkflowRun).where(WorkflowRun.id == workflow_execution.id_))
+        assert workflow_run is not None
        invoke_from = self._application_generate_entity.invoke_from
        if invoke_from == InvokeFrom.SERVICE_API:
            created_from = WorkflowAppLogCreatedFrom.SERVICE_API
@ -572,10 +576,10 @@ class WorkflowAppGenerateTaskPipeline:
            return

        workflow_app_log = WorkflowAppLog()
-        workflow_app_log.tenant_id = self._application_generate_entity.app_config.tenant_id
-        workflow_app_log.app_id = self._application_generate_entity.app_config.app_id
-        workflow_app_log.workflow_id = workflow_execution.workflow_id
-        workflow_app_log.workflow_run_id = workflow_execution.id_
+        workflow_app_log.tenant_id = workflow_run.tenant_id
+        workflow_app_log.app_id = workflow_run.app_id
+        workflow_app_log.workflow_id = workflow_run.workflow_id
+        workflow_app_log.workflow_run_id = workflow_run.id
        workflow_app_log.created_from = created_from.value
        workflow_app_log.created_by_role = self._created_by_role
        workflow_app_log.created_by = self._user_id
--- a/api/core/app/apps/workflow_app_runner.py
+++ b/api/core/app/apps/workflow_app_runner.py
@ -62,7 +62,6 @@ from core.workflow.graph_engine.entities.event import (
 from core.workflow.graph_engine.entities.graph import Graph
 from core.workflow.nodes import NodeType
 from core.workflow.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING
-from core.workflow.system_variable import SystemVariable
 from core.workflow.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader, load_into_variable_pool
 from core.workflow.workflow_entry import WorkflowEntry
 from extensions.ext_database import db
@ -167,7 +166,7 @@ class WorkflowBasedAppRunner(AppRunner):

        # init variable pool
        variable_pool = VariablePool(
-            system_variables=SystemVariable.empty(),
+            system_variables={},
            user_inputs={},
            environment_variables=workflow.environment_variables,
        )
@ -264,7 +263,7 @@ class WorkflowBasedAppRunner(AppRunner):

        # init variable pool
        variable_pool = VariablePool(
-            system_variables=SystemVariable.empty(),
+            system_variables={},
            user_inputs={},
            environment_variables=workflow.environment_variables,
        )
--- a/api/core/app/entities/app_invoke_entities.py
+++ b/api/core/app/entities/app_invoke_entities.py
@ -36,6 +36,7 @@ class InvokeFrom(Enum):
    # DEBUGGER indicates that this invocation is from
    # the workflow (or chatflow) edit page.
    DEBUGGER = "debugger"
+    PUBLISHED = "published"

    @classmethod
    def value_of(cls, value: str):
@ -240,3 +241,38 @@ class WorkflowAppGenerateEntity(AppGenerateEntity):
        inputs: dict

    single_loop_run: Optional[SingleLoopRunEntity] = None
+
+
+class RagPipelineGenerateEntity(WorkflowAppGenerateEntity):
+    """
+    RAG Pipeline Application Generate Entity.
+    """
+
+    # pipeline config
+    pipeline_config: WorkflowUIBasedAppConfig
+    datasource_type: str
+    datasource_info: Mapping[str, Any]
+    dataset_id: str
+    batch: str
+    document_id: Optional[str] = None
+    start_node_id: Optional[str] = None
+
+    class SingleIterationRunEntity(BaseModel):
+        """
+        Single Iteration Run Entity.
+        """
+
+        node_id: str
+        inputs: dict
+
+    single_iteration_run: Optional[SingleIterationRunEntity] = None
+
+    class SingleLoopRunEntity(BaseModel):
+        """
+        Single Loop Run Entity.
+        """
+
+        node_id: str
+        inputs: dict
+
+    single_loop_run: Optional[SingleLoopRunEntity] = None
--- a/api/core/app/task_pipeline/based_generate_task_pipeline.py
+++ b/api/core/app/task_pipeline/based_generate_task_pipeline.py
@ -19,7 +19,6 @@ from core.app.entities.task_entities import (
 from core.errors.error import QuotaExceededError
 from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
 from core.moderation.output_moderation import ModerationRule, OutputModeration
-from models.enums import MessageStatus
 from models.model import Message

 logger = logging.getLogger(__name__)
@ -63,7 +62,7 @@ class BasedGenerateTaskPipeline:
            return err

        err_desc = self._error_to_desc(err)
-        message.status = MessageStatus.ERROR
+        message.status = "error"
        message.error = err_desc
        return err

--- a/api/core/callback_handler/agent_tool_callback_handler.py
+++ b/api/core/callback_handler/agent_tool_callback_handler.py
@ -105,6 +105,14 @@ class DifyAgentCallbackHandler(BaseModel):

        self.current_loop += 1

+    def on_datasource_start(self, datasource_name: str, datasource_inputs: Mapping[str, Any]) -> None:
+        """Run on datasource start."""
+        if dify_config.DEBUG:
+            print_text(
+                "\n[on_datasource_start] DatasourceCall:" + datasource_name + "\n" + str(datasource_inputs) + "\n",
+                color=self.color,
+            )
+
    @property
    def ignore_agent(self) -> bool:
        """Whether to ignore agent callbacks."""
--- a/api/core/datasource/__base/datasource_plugin.py
+++ b/api/core/datasource/__base/datasource_plugin.py
@ -0,0 +1,33 @@
+from abc import ABC, abstractmethod
+
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import (
+    DatasourceEntity,
+    DatasourceProviderType,
+)
+
+
+class DatasourcePlugin(ABC):
+    entity: DatasourceEntity
+    runtime: DatasourceRuntime
+
+    def __init__(
+        self,
+        entity: DatasourceEntity,
+        runtime: DatasourceRuntime,
+    ) -> None:
+        self.entity = entity
+        self.runtime = runtime
+
+    @abstractmethod
+    def datasource_provider_type(self) -> str:
+        """
+        returns the type of the datasource provider
+        """
+        return DatasourceProviderType.LOCAL_FILE
+
+    def fork_datasource_runtime(self, runtime: DatasourceRuntime) -> "DatasourcePlugin":
+        return self.__class__(
+            entity=self.entity.model_copy(),
+            runtime=runtime,
+        )
--- a/api/core/datasource/__base/datasource_provider.py
+++ b/api/core/datasource/__base/datasource_provider.py
@ -0,0 +1,118 @@
+from abc import ABC, abstractmethod
+from typing import Any
+
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
+from core.entities.provider_entities import ProviderConfig
+from core.plugin.impl.tool import PluginToolManager
+from core.tools.errors import ToolProviderCredentialValidationError
+
+
+class DatasourcePluginProviderController(ABC):
+    entity: DatasourceProviderEntityWithPlugin
+    tenant_id: str
+
+    def __init__(self, entity: DatasourceProviderEntityWithPlugin, tenant_id: str) -> None:
+        self.entity = entity
+        self.tenant_id = tenant_id
+
+    @property
+    def need_credentials(self) -> bool:
+        """
+        returns whether the provider needs credentials
+
+        :return: whether the provider needs credentials
+        """
+        return self.entity.credentials_schema is not None and len(self.entity.credentials_schema) != 0
+
+    def _validate_credentials(self, user_id: str, credentials: dict[str, Any]) -> None:
+        """
+        validate the credentials of the provider
+        """
+        manager = PluginToolManager()
+        if not manager.validate_datasource_credentials(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            provider=self.entity.identity.name,
+            credentials=credentials,
+        ):
+            raise ToolProviderCredentialValidationError("Invalid credentials")
+
+    @property
+    def provider_type(self) -> DatasourceProviderType:
+        """
+        returns the type of the provider
+        """
+        return DatasourceProviderType.LOCAL_FILE
+
+    @abstractmethod
+    def get_datasource(self, datasource_name: str) -> DatasourcePlugin:
+        """
+        return datasource with given name
+        """
+        pass
+
+    def validate_credentials_format(self, credentials: dict[str, Any]) -> None:
+        """
+        validate the format of the credentials of the provider and set the default value if needed
+
+        :param credentials: the credentials of the tool
+        """
+        credentials_schema = dict[str, ProviderConfig]()
+        if credentials_schema is None:
+            return
+
+        for credential in self.entity.credentials_schema:
+            credentials_schema[credential.name] = credential
+
+        credentials_need_to_validate: dict[str, ProviderConfig] = {}
+        for credential_name in credentials_schema:
+            credentials_need_to_validate[credential_name] = credentials_schema[credential_name]
+
+        for credential_name in credentials:
+            if credential_name not in credentials_need_to_validate:
+                raise ToolProviderCredentialValidationError(
+                    f"credential {credential_name} not found in provider {self.entity.identity.name}"
+                )
+
+            # check type
+            credential_schema = credentials_need_to_validate[credential_name]
+            if not credential_schema.required and credentials[credential_name] is None:
+                continue
+
+            if credential_schema.type in {ProviderConfig.Type.SECRET_INPUT, ProviderConfig.Type.TEXT_INPUT}:
+                if not isinstance(credentials[credential_name], str):
+                    raise ToolProviderCredentialValidationError(f"credential {credential_name} should be string")
+
+            elif credential_schema.type == ProviderConfig.Type.SELECT:
+                if not isinstance(credentials[credential_name], str):
+                    raise ToolProviderCredentialValidationError(f"credential {credential_name} should be string")
+
+                options = credential_schema.options
+                if not isinstance(options, list):
+                    raise ToolProviderCredentialValidationError(f"credential {credential_name} options should be list")
+
+                if credentials[credential_name] not in [x.value for x in options]:
+                    raise ToolProviderCredentialValidationError(
+                        f"credential {credential_name} should be one of {options}"
+                    )
+
+            credentials_need_to_validate.pop(credential_name)
+
+        for credential_name in credentials_need_to_validate:
+            credential_schema = credentials_need_to_validate[credential_name]
+            if credential_schema.required:
+                raise ToolProviderCredentialValidationError(f"credential {credential_name} is required")
+
+            # the credential is not set currently, set the default value if needed
+            if credential_schema.default is not None:
+                default_value = credential_schema.default
+                # parse default value into the correct type
+                if credential_schema.type in {
+                    ProviderConfig.Type.SECRET_INPUT,
+                    ProviderConfig.Type.TEXT_INPUT,
+                    ProviderConfig.Type.SELECT,
+                }:
+                    default_value = str(default_value)
+
+                credentials[credential_name] = default_value
--- a/api/core/datasource/__base/datasource_runtime.py
+++ b/api/core/datasource/__base/datasource_runtime.py
@ -0,0 +1,36 @@
+from typing import Any, Optional
+
+from openai import BaseModel
+from pydantic import Field
+
+from core.app.entities.app_invoke_entities import InvokeFrom
+from core.datasource.entities.datasource_entities import DatasourceInvokeFrom
+
+
+class DatasourceRuntime(BaseModel):
+    """
+    Meta data of a datasource call processing
+    """
+
+    tenant_id: str
+    datasource_id: Optional[str] = None
+    invoke_from: Optional[InvokeFrom] = None
+    datasource_invoke_from: Optional[DatasourceInvokeFrom] = None
+    credentials: dict[str, Any] = Field(default_factory=dict)
+    runtime_parameters: dict[str, Any] = Field(default_factory=dict)
+
+
+class FakeDatasourceRuntime(DatasourceRuntime):
+    """
+    Fake datasource runtime for testing
+    """
+
+    def __init__(self):
+        super().__init__(
+            tenant_id="fake_tenant_id",
+            datasource_id="fake_datasource_id",
+            invoke_from=InvokeFrom.DEBUGGER,
+            datasource_invoke_from=DatasourceInvokeFrom.RAG_PIPELINE,
+            credentials={},
+            runtime_parameters={},
+        )
--- a/api/core/ops/aliyun_trace/init.py
+++ b/api/core/ops/aliyun_trace/init.py
--- a/api/core/datasource/datasource_file_manager.py
+++ b/api/core/datasource/datasource_file_manager.py
@ -0,0 +1,244 @@
+import base64
+import hashlib
+import hmac
+import logging
+import os
+import time
+from mimetypes import guess_extension, guess_type
+from typing import Optional, Union
+from uuid import uuid4
+
+import httpx
+
+from configs import dify_config
+from core.helper import ssrf_proxy
+from extensions.ext_database import db
+from extensions.ext_storage import storage
+from models.enums import CreatorUserRole
+from models.model import MessageFile, UploadFile
+from models.tools import ToolFile
+
+logger = logging.getLogger(__name__)
+
+
+class DatasourceFileManager:
+    @staticmethod
+    def sign_file(datasource_file_id: str, extension: str) -> str:
+        """
+        sign file to get a temporary url
+        """
+        base_url = dify_config.FILES_URL
+        file_preview_url = f"{base_url}/files/datasources/{datasource_file_id}{extension}"
+
+        timestamp = str(int(time.time()))
+        nonce = os.urandom(16).hex()
+        data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
+        secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
+        sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+        encoded_sign = base64.urlsafe_b64encode(sign).decode()
+
+        return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
+
+    @staticmethod
+    def verify_file(datasource_file_id: str, timestamp: str, nonce: str, sign: str) -> bool:
+        """
+        verify signature
+        """
+        data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
+        secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
+        recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+        recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
+
+        # verify signature
+        if sign != recalculated_encoded_sign:
+            return False
+
+        current_time = int(time.time())
+        return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
+
+    @staticmethod
+    def create_file_by_raw(
+        *,
+        user_id: str,
+        tenant_id: str,
+        conversation_id: Optional[str],
+        file_binary: bytes,
+        mimetype: str,
+        filename: Optional[str] = None,
+    ) -> UploadFile:
+        extension = guess_extension(mimetype) or ".bin"
+        unique_name = uuid4().hex
+        unique_filename = f"{unique_name}{extension}"
+        # default just as before
+        present_filename = unique_filename
+        if filename is not None:
+            has_extension = len(filename.split(".")) > 1
+            # Add extension flexibly
+            present_filename = filename if has_extension else f"{filename}{extension}"
+        filepath = f"datasources/{tenant_id}/{unique_filename}"
+        storage.save(filepath, file_binary)
+
+        upload_file = UploadFile(
+            tenant_id=tenant_id,
+            storage_type=dify_config.STORAGE_TYPE,
+            key=filepath,
+            name=present_filename,
+            size=len(file_binary),
+            extension=extension,
+            mime_type=mimetype,
+            created_by_role=CreatorUserRole.ACCOUNT,
+            created_by=user_id,
+            used=False,
+            hash=hashlib.sha3_256(file_binary).hexdigest(),
+            source_url="",
+        )
+
+        db.session.add(upload_file)
+        db.session.commit()
+        db.session.refresh(upload_file)
+
+        return upload_file
+
+    @staticmethod
+    def create_file_by_url(
+        user_id: str,
+        tenant_id: str,
+        file_url: str,
+        conversation_id: Optional[str] = None,
+    ) -> UploadFile:
+        # try to download image
+        try:
+            response = ssrf_proxy.get(file_url)
+            response.raise_for_status()
+            blob = response.content
+        except httpx.TimeoutException:
+            raise ValueError(f"timeout when downloading file from {file_url}")
+
+        mimetype = (
+            guess_type(file_url)[0]
+            or response.headers.get("Content-Type", "").split(";")[0].strip()
+            or "application/octet-stream"
+        )
+        extension = guess_extension(mimetype) or ".bin"
+        unique_name = uuid4().hex
+        filename = f"{unique_name}{extension}"
+        filepath = f"tools/{tenant_id}/{filename}"
+        storage.save(filepath, blob)
+
+        upload_file = UploadFile(
+            tenant_id=tenant_id,
+            storage_type=dify_config.STORAGE_TYPE,
+            key=filepath,
+            name=filename,
+            size=len(blob),
+            extension=extension,
+            mime_type=mimetype,
+            created_by_role=CreatorUserRole.ACCOUNT,
+            created_by=user_id,
+            used=False,
+            hash=hashlib.sha3_256(blob).hexdigest(),
+            source_url=file_url,
+        )
+
+        db.session.add(upload_file)
+        db.session.commit()
+
+        return upload_file
+
+    @staticmethod
+    def get_file_binary(id: str) -> Union[tuple[bytes, str], None]:
+        """
+        get file binary
+
+        :param id: the id of the file
+
+        :return: the binary of the file, mime type
+        """
+        upload_file: UploadFile | None = (
+            db.session.query(UploadFile)
+            .filter(
+                UploadFile.id == id,
+            )
+            .first()
+        )
+
+        if not upload_file:
+            return None
+
+        blob = storage.load_once(upload_file.key)
+
+        return blob, upload_file.mime_type
+
+    @staticmethod
+    def get_file_binary_by_message_file_id(id: str) -> Union[tuple[bytes, str], None]:
+        """
+        get file binary
+
+        :param id: the id of the file
+
+        :return: the binary of the file, mime type
+        """
+        message_file: MessageFile | None = (
+            db.session.query(MessageFile)
+            .filter(
+                MessageFile.id == id,
+            )
+            .first()
+        )
+
+        # Check if message_file is not None
+        if message_file is not None:
+            # get tool file id
+            if message_file.url is not None:
+                tool_file_id = message_file.url.split("/")[-1]
+                # trim extension
+                tool_file_id = tool_file_id.split(".")[0]
+            else:
+                tool_file_id = None
+        else:
+            tool_file_id = None
+
+        tool_file: ToolFile | None = (
+            db.session.query(ToolFile)
+            .filter(
+                ToolFile.id == tool_file_id,
+            )
+            .first()
+        )
+
+        if not tool_file:
+            return None
+
+        blob = storage.load_once(tool_file.file_key)
+
+        return blob, tool_file.mimetype
+
+    @staticmethod
+    def get_file_generator_by_upload_file_id(upload_file_id: str):
+        """
+        get file binary
+
+        :param tool_file_id: the id of the tool file
+
+        :return: the binary of the file, mime type
+        """
+        upload_file: UploadFile | None = (
+            db.session.query(UploadFile)
+            .filter(
+                UploadFile.id == upload_file_id,
+            )
+            .first()
+        )
+
+        if not upload_file:
+            return None, None
+
+        stream = storage.load_stream(upload_file.key)
+
+        return stream, upload_file.mime_type
+
+
+# init tool_file_parser
+# from core.file.datasource_file_parser import datasource_file_manager
+#
+# datasource_file_manager["manager"] = DatasourceFileManager
--- a/api/core/datasource/datasource_manager.py
+++ b/api/core/datasource/datasource_manager.py
@ -0,0 +1,100 @@
+import logging
+from threading import Lock
+from typing import Union
+
+import contexts
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
+from core.datasource.entities.common_entities import I18nObject
+from core.datasource.entities.datasource_entities import DatasourceProviderType
+from core.datasource.errors import DatasourceProviderNotFoundError
+from core.datasource.local_file.local_file_provider import LocalFileDatasourcePluginProviderController
+from core.datasource.online_document.online_document_provider import OnlineDocumentDatasourcePluginProviderController
+from core.datasource.website_crawl.website_crawl_provider import WebsiteCrawlDatasourcePluginProviderController
+from core.plugin.impl.datasource import PluginDatasourceManager
+
+logger = logging.getLogger(__name__)
+
+
+class DatasourceManager:
+    _builtin_provider_lock = Lock()
+    _hardcoded_providers: dict[str, DatasourcePluginProviderController] = {}
+    _builtin_providers_loaded = False
+    _builtin_tools_labels: dict[str, Union[I18nObject, None]] = {}
+
+    @classmethod
+    def get_datasource_plugin_provider(
+        cls, provider_id: str, tenant_id: str, datasource_type: DatasourceProviderType
+    ) -> DatasourcePluginProviderController:
+        """
+        get the datasource plugin provider
+        """
+        # check if context is set
+        try:
+            contexts.datasource_plugin_providers.get()
+        except LookupError:
+            contexts.datasource_plugin_providers.set({})
+            contexts.datasource_plugin_providers_lock.set(Lock())
+
+        with contexts.datasource_plugin_providers_lock.get():
+            datasource_plugin_providers = contexts.datasource_plugin_providers.get()
+            if provider_id in datasource_plugin_providers:
+                return datasource_plugin_providers[provider_id]
+
+            manager = PluginDatasourceManager()
+            provider_entity = manager.fetch_datasource_provider(tenant_id, provider_id)
+            if not provider_entity:
+                raise DatasourceProviderNotFoundError(f"plugin provider {provider_id} not found")
+
+            match datasource_type:
+                case DatasourceProviderType.ONLINE_DOCUMENT:
+                    controller = OnlineDocumentDatasourcePluginProviderController(
+                        entity=provider_entity.declaration,
+                        plugin_id=provider_entity.plugin_id,
+                        plugin_unique_identifier=provider_entity.plugin_unique_identifier,
+                        tenant_id=tenant_id,
+                    )
+                case DatasourceProviderType.WEBSITE_CRAWL:
+                    controller = WebsiteCrawlDatasourcePluginProviderController(
+                        entity=provider_entity.declaration,
+                        plugin_id=provider_entity.plugin_id,
+                        plugin_unique_identifier=provider_entity.plugin_unique_identifier,
+                        tenant_id=tenant_id,
+                    )
+                case DatasourceProviderType.LOCAL_FILE:
+                    controller = LocalFileDatasourcePluginProviderController(
+                        entity=provider_entity.declaration,
+                        plugin_id=provider_entity.plugin_id,
+                        plugin_unique_identifier=provider_entity.plugin_unique_identifier,
+                        tenant_id=tenant_id,
+                    )
+                case _:
+                    raise ValueError(f"Unsupported datasource type: {datasource_type}")
+
+            datasource_plugin_providers[provider_id] = controller
+
+        return controller
+
+    @classmethod
+    def get_datasource_runtime(
+        cls,
+        provider_id: str,
+        datasource_name: str,
+        tenant_id: str,
+        datasource_type: DatasourceProviderType,
+    ) -> DatasourcePlugin:
+        """
+        get the datasource runtime
+
+        :param provider_type: the type of the provider
+        :param provider_id: the id of the provider
+        :param datasource_name: the name of the datasource
+        :param tenant_id: the tenant id
+
+        :return: the datasource plugin
+        """
+        return cls.get_datasource_plugin_provider(
+            provider_id,
+            tenant_id,
+            datasource_type,
+        ).get_datasource(datasource_name)
--- a/api/core/datasource/entities/api_entities.py
+++ b/api/core/datasource/entities/api_entities.py
@ -0,0 +1,71 @@
+from typing import Literal, Optional
+
+from pydantic import BaseModel, Field, field_validator
+
+from core.datasource.entities.datasource_entities import DatasourceParameter
+from core.model_runtime.utils.encoders import jsonable_encoder
+from core.tools.entities.common_entities import I18nObject
+
+
+class DatasourceApiEntity(BaseModel):
+    author: str
+    name: str  # identifier
+    label: I18nObject  # label
+    description: I18nObject
+    parameters: Optional[list[DatasourceParameter]] = None
+    labels: list[str] = Field(default_factory=list)
+    output_schema: Optional[dict] = None
+
+
+ToolProviderTypeApiLiteral = Optional[Literal["builtin", "api", "workflow"]]
+
+
+class DatasourceProviderApiEntity(BaseModel):
+    id: str
+    author: str
+    name: str  # identifier
+    description: I18nObject
+    icon: str | dict
+    label: I18nObject  # label
+    type: str
+    masked_credentials: Optional[dict] = None
+    original_credentials: Optional[dict] = None
+    is_team_authorization: bool = False
+    allow_delete: bool = True
+    plugin_id: Optional[str] = Field(default="", description="The plugin id of the datasource")
+    plugin_unique_identifier: Optional[str] = Field(default="", description="The unique identifier of the datasource")
+    datasources: list[DatasourceApiEntity] = Field(default_factory=list)
+    labels: list[str] = Field(default_factory=list)
+
+    @field_validator("datasources", mode="before")
+    @classmethod
+    def convert_none_to_empty_list(cls, v):
+        return v if v is not None else []
+
+    def to_dict(self) -> dict:
+        # -------------
+        # overwrite datasource parameter types for temp fix
+        datasources = jsonable_encoder(self.datasources)
+        for datasource in datasources:
+            if datasource.get("parameters"):
+                for parameter in datasource.get("parameters"):
+                    if parameter.get("type") == DatasourceParameter.DatasourceParameterType.SYSTEM_FILES.value:
+                        parameter["type"] = "files"
+        # -------------
+
+        return {
+            "id": self.id,
+            "author": self.author,
+            "name": self.name,
+            "plugin_id": self.plugin_id,
+            "plugin_unique_identifier": self.plugin_unique_identifier,
+            "description": self.description.to_dict(),
+            "icon": self.icon,
+            "label": self.label.to_dict(),
+            "type": self.type.value,
+            "team_credentials": self.masked_credentials,
+            "is_team_authorization": self.is_team_authorization,
+            "allow_delete": self.allow_delete,
+            "datasources": datasources,
+            "labels": self.labels,
+        }
--- a/api/core/datasource/entities/common_entities.py
+++ b/api/core/datasource/entities/common_entities.py
@ -0,0 +1,23 @@
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+
+class I18nObject(BaseModel):
+    """
+    Model class for i18n object.
+    """
+
+    en_US: str
+    zh_Hans: Optional[str] = Field(default=None)
+    pt_BR: Optional[str] = Field(default=None)
+    ja_JP: Optional[str] = Field(default=None)
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self.zh_Hans = self.zh_Hans or self.en_US
+        self.pt_BR = self.pt_BR or self.en_US
+        self.ja_JP = self.ja_JP or self.en_US
+
+    def to_dict(self) -> dict:
+        return {"zh_Hans": self.zh_Hans, "en_US": self.en_US, "pt_BR": self.pt_BR, "ja_JP": self.ja_JP}
--- a/api/core/datasource/entities/datasource_entities.py
+++ b/api/core/datasource/entities/datasource_entities.py
@ -0,0 +1,361 @@
+import enum
+from enum import Enum
+from typing import Any, Optional
+
+from pydantic import BaseModel, Field, ValidationInfo, field_validator
+
+from core.entities.provider_entities import ProviderConfig
+from core.plugin.entities.oauth import OAuthSchema
+from core.plugin.entities.parameters import (
+    PluginParameter,
+    PluginParameterOption,
+    PluginParameterType,
+    as_normal_type,
+    cast_parameter_value,
+    init_frontend_parameter,
+)
+from core.tools.entities.common_entities import I18nObject
+from core.tools.entities.tool_entities import ToolInvokeMessage, ToolLabelEnum
+
+
+class DatasourceProviderType(enum.StrEnum):
+    """
+    Enum class for datasource provider
+    """
+
+    ONLINE_DOCUMENT = "online_document"
+    LOCAL_FILE = "local_file"
+    WEBSITE_CRAWL = "website_crawl"
+    ONLINE_DRIVE = "online_drive"
+
+    @classmethod
+    def value_of(cls, value: str) -> "DatasourceProviderType":
+        """
+        Get value of given mode.
+
+        :param value: mode value
+        :return: mode
+        """
+        for mode in cls:
+            if mode.value == value:
+                return mode
+        raise ValueError(f"invalid mode value {value}")
+
+
+class DatasourceParameter(PluginParameter):
+    """
+    Overrides type
+    """
+
+    class DatasourceParameterType(enum.StrEnum):
+        """
+        removes TOOLS_SELECTOR from PluginParameterType
+        """
+
+        STRING = PluginParameterType.STRING.value
+        NUMBER = PluginParameterType.NUMBER.value
+        BOOLEAN = PluginParameterType.BOOLEAN.value
+        SELECT = PluginParameterType.SELECT.value
+        SECRET_INPUT = PluginParameterType.SECRET_INPUT.value
+        FILE = PluginParameterType.FILE.value
+        FILES = PluginParameterType.FILES.value
+
+        # deprecated, should not use.
+        SYSTEM_FILES = PluginParameterType.SYSTEM_FILES.value
+
+        def as_normal_type(self):
+            return as_normal_type(self)
+
+        def cast_value(self, value: Any):
+            return cast_parameter_value(self, value)
+
+    type: DatasourceParameterType = Field(..., description="The type of the parameter")
+    description: I18nObject = Field(..., description="The description of the parameter")
+
+    @classmethod
+    def get_simple_instance(
+        cls,
+        name: str,
+        typ: DatasourceParameterType,
+        required: bool,
+        options: Optional[list[str]] = None,
+    ) -> "DatasourceParameter":
+        """
+        get a simple datasource parameter
+
+        :param name: the name of the parameter
+        :param llm_description: the description presented to the LLM
+        :param typ: the type of the parameter
+        :param required: if the parameter is required
+        :param options: the options of the parameter
+        """
+        # convert options to ToolParameterOption
+        # FIXME fix the type error
+        if options:
+            option_objs = [
+                PluginParameterOption(value=option, label=I18nObject(en_US=option, zh_Hans=option))
+                for option in options
+            ]
+        else:
+            option_objs = []
+
+        return cls(
+            name=name,
+            label=I18nObject(en_US="", zh_Hans=""),
+            placeholder=None,
+            type=typ,
+            required=required,
+            options=option_objs,
+            description=I18nObject(en_US="", zh_Hans=""),
+        )
+
+    def init_frontend_parameter(self, value: Any):
+        return init_frontend_parameter(self, self.type, value)
+
+
+class DatasourceIdentity(BaseModel):
+    author: str = Field(..., description="The author of the datasource")
+    name: str = Field(..., description="The name of the datasource")
+    label: I18nObject = Field(..., description="The label of the datasource")
+    provider: str = Field(..., description="The provider of the datasource")
+    icon: Optional[str] = None
+
+
+class DatasourceEntity(BaseModel):
+    identity: DatasourceIdentity
+    parameters: list[DatasourceParameter] = Field(default_factory=list)
+    description: I18nObject = Field(..., description="The label of the datasource")
+
+    @field_validator("parameters", mode="before")
+    @classmethod
+    def set_parameters(cls, v, validation_info: ValidationInfo) -> list[DatasourceParameter]:
+        return v or []
+
+
+class DatasourceProviderIdentity(BaseModel):
+    author: str = Field(..., description="The author of the tool")
+    name: str = Field(..., description="The name of the tool")
+    description: I18nObject = Field(..., description="The description of the tool")
+    icon: str = Field(..., description="The icon of the tool")
+    label: I18nObject = Field(..., description="The label of the tool")
+    tags: Optional[list[ToolLabelEnum]] = Field(
+        default=[],
+        description="The tags of the tool",
+    )
+
+
+class DatasourceProviderEntity(BaseModel):
+    """
+    Datasource provider entity
+    """
+
+    identity: DatasourceProviderIdentity
+    credentials_schema: list[ProviderConfig] = Field(default_factory=list)
+    oauth_schema: Optional[OAuthSchema] = None
+    provider_type: DatasourceProviderType
+
+
+class DatasourceProviderEntityWithPlugin(DatasourceProviderEntity):
+    datasources: list[DatasourceEntity] = Field(default_factory=list)
+
+
+class DatasourceInvokeMeta(BaseModel):
+    """
+    Datasource invoke meta
+    """
+
+    time_cost: float = Field(..., description="The time cost of the tool invoke")
+    error: Optional[str] = None
+    tool_config: Optional[dict] = None
+
+    @classmethod
+    def empty(cls) -> "DatasourceInvokeMeta":
+        """
+        Get an empty instance of DatasourceInvokeMeta
+        """
+        return cls(time_cost=0.0, error=None, tool_config={})
+
+    @classmethod
+    def error_instance(cls, error: str) -> "DatasourceInvokeMeta":
+        """
+        Get an instance of DatasourceInvokeMeta with error
+        """
+        return cls(time_cost=0.0, error=error, tool_config={})
+
+    def to_dict(self) -> dict:
+        return {
+            "time_cost": self.time_cost,
+            "error": self.error,
+            "tool_config": self.tool_config,
+        }
+
+
+class DatasourceLabel(BaseModel):
+    """
+    Datasource label
+    """
+
+    name: str = Field(..., description="The name of the tool")
+    label: I18nObject = Field(..., description="The label of the tool")
+    icon: str = Field(..., description="The icon of the tool")
+
+
+class DatasourceInvokeFrom(Enum):
+    """
+    Enum class for datasource invoke
+    """
+
+    RAG_PIPELINE = "rag_pipeline"
+
+
+class OnlineDocumentPage(BaseModel):
+    """
+    Online document page
+    """
+
+    page_id: str = Field(..., description="The page id")
+    page_name: str = Field(..., description="The page title")
+    page_icon: Optional[dict] = Field(None, description="The page icon")
+    type: str = Field(..., description="The type of the page")
+    last_edited_time: str = Field(..., description="The last edited time")
+    parent_id: Optional[str] = Field(None, description="The parent page id")
+
+
+class OnlineDocumentInfo(BaseModel):
+    """
+    Online document info
+    """
+
+    workspace_id: str = Field(..., description="The workspace id")
+    workspace_name: str = Field(..., description="The workspace name")
+    workspace_icon: str = Field(..., description="The workspace icon")
+    total: int = Field(..., description="The total number of documents")
+    pages: list[OnlineDocumentPage] = Field(..., description="The pages of the online document")
+
+
+class OnlineDocumentPagesMessage(BaseModel):
+    """
+    Get online document pages response
+    """
+
+    result: list[OnlineDocumentInfo]
+
+
+class GetOnlineDocumentPageContentRequest(BaseModel):
+    """
+    Get online document page content request
+    """
+
+    workspace_id: str = Field(..., description="The workspace id")
+    page_id: str = Field(..., description="The page id")
+    type: str = Field(..., description="The type of the page")
+
+
+class OnlineDocumentPageContent(BaseModel):
+    """
+    Online document page content
+    """
+
+    workspace_id: str = Field(..., description="The workspace id")
+    page_id: str = Field(..., description="The page id")
+    content: str = Field(..., description="The content of the page")
+
+
+class GetOnlineDocumentPageContentResponse(BaseModel):
+    """
+    Get online document page content response
+    """
+
+    result: OnlineDocumentPageContent
+
+
+class GetWebsiteCrawlRequest(BaseModel):
+    """
+    Get website crawl request
+    """
+
+    crawl_parameters: dict = Field(..., description="The crawl parameters")
+
+
+class WebSiteInfoDetail(BaseModel):
+    source_url: str = Field(..., description="The url of the website")
+    content: str = Field(..., description="The content of the website")
+    title: str = Field(..., description="The title of the website")
+    description: str = Field(..., description="The description of the website")
+
+
+class WebSiteInfo(BaseModel):
+    """
+    Website info
+    """
+
+    status: Optional[str] = Field(..., description="crawl job status")
+    web_info_list: Optional[list[WebSiteInfoDetail]] = []
+    total: Optional[int] = Field(default=0, description="The total number of websites")
+    completed: Optional[int] = Field(default=0, description="The number of completed websites")
+
+
+class WebsiteCrawlMessage(BaseModel):
+    """
+    Get website crawl response
+    """
+
+    result: WebSiteInfo = WebSiteInfo(status="", web_info_list=[], total=0, completed=0)
+
+
+class DatasourceMessage(ToolInvokeMessage):
+    pass
+
+
+#########################
+# Online driver file
+#########################
+
+
+class OnlineDriveFile(BaseModel):
+    """
+    Online driver file
+    """
+
+    key: str = Field(..., description="The key of the file")
+    size: int = Field(..., description="The size of the file")
+
+
+class OnlineDriveFileBucket(BaseModel):
+    """
+    Online driver file bucket
+    """
+
+    bucket: Optional[str] = Field(None, description="The bucket of the file")
+    files: list[OnlineDriveFile] = Field(..., description="The files of the bucket")
+    is_truncated: bool = Field(False, description="Whether the bucket has more files")
+
+
+class OnlineDriveBrowseFilesRequest(BaseModel):
+    """
+    Get online driver file list request
+    """
+
+    prefix: Optional[str] = Field(None, description="File path prefix for filtering eg: 'docs/dify/'")
+    bucket: Optional[str] = Field(None, description="Storage bucket name")
+    max_keys: int = Field(20, description="Maximum number of files to return")
+    start_after: Optional[str] = Field(
+        None, description="Pagination token for continuing from a specific file eg: 'docs/dify/1.txt'"
+    )
+
+
+class OnlineDriveBrowseFilesResponse(BaseModel):
+    """
+    Get online driver file list response
+    """
+
+    result: list[OnlineDriveFileBucket] = Field(..., description="The bucket of the files")
+
+
+class OnlineDriveDownloadFileRequest(BaseModel):
+    """
+    Get online driver file
+    """
+
+    key: str = Field(..., description="The name of the file")
+    bucket: Optional[str] = Field(None, description="The name of the bucket")
--- a/api/core/datasource/errors.py
+++ b/api/core/datasource/errors.py
@ -0,0 +1,37 @@
+from core.datasource.entities.datasource_entities import DatasourceInvokeMeta
+
+
+class DatasourceProviderNotFoundError(ValueError):
+    pass
+
+
+class DatasourceNotFoundError(ValueError):
+    pass
+
+
+class DatasourceParameterValidationError(ValueError):
+    pass
+
+
+class DatasourceProviderCredentialValidationError(ValueError):
+    pass
+
+
+class DatasourceNotSupportedError(ValueError):
+    pass
+
+
+class DatasourceInvokeError(ValueError):
+    pass
+
+
+class DatasourceApiSchemaError(ValueError):
+    pass
+
+
+class DatasourceEngineInvokeError(Exception):
+    meta: DatasourceInvokeMeta
+
+    def __init__(self, meta, **kwargs):
+        self.meta = meta
+        super().__init__(**kwargs)
--- a/api/core/datasource/local_file/local_file_plugin.py
+++ b/api/core/datasource/local_file/local_file_plugin.py
@ -0,0 +1,28 @@
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import (
+    DatasourceEntity,
+    DatasourceProviderType,
+)
+
+
+class LocalFileDatasourcePlugin(DatasourcePlugin):
+    tenant_id: str
+    icon: str
+    plugin_unique_identifier: str
+
+    def __init__(
+        self,
+        entity: DatasourceEntity,
+        runtime: DatasourceRuntime,
+        tenant_id: str,
+        icon: str,
+        plugin_unique_identifier: str,
+    ) -> None:
+        super().__init__(entity, runtime)
+        self.tenant_id = tenant_id
+        self.icon = icon
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    def datasource_provider_type(self) -> str:
+        return DatasourceProviderType.LOCAL_FILE
--- a/api/core/datasource/local_file/local_file_provider.py
+++ b/api/core/datasource/local_file/local_file_provider.py
@ -0,0 +1,56 @@
+from typing import Any
+
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
+from core.datasource.local_file.local_file_plugin import LocalFileDatasourcePlugin
+
+
+class LocalFileDatasourcePluginProviderController(DatasourcePluginProviderController):
+    entity: DatasourceProviderEntityWithPlugin
+    plugin_id: str
+    plugin_unique_identifier: str
+
+    def __init__(
+        self, entity: DatasourceProviderEntityWithPlugin, plugin_id: str, plugin_unique_identifier: str, tenant_id: str
+    ) -> None:
+        super().__init__(entity, tenant_id)
+        self.plugin_id = plugin_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    @property
+    def provider_type(self) -> DatasourceProviderType:
+        """
+        returns the type of the provider
+        """
+        return DatasourceProviderType.LOCAL_FILE
+
+    def _validate_credentials(self, user_id: str, credentials: dict[str, Any]) -> None:
+        """
+        validate the credentials of the provider
+        """
+        pass
+
+    def get_datasource(self, datasource_name: str) -> LocalFileDatasourcePlugin:  # type: ignore
+        """
+        return datasource with given name
+        """
+        datasource_entity = next(
+            (
+                datasource_entity
+                for datasource_entity in self.entity.datasources
+                if datasource_entity.identity.name == datasource_name
+            ),
+            None,
+        )
+
+        if not datasource_entity:
+            raise ValueError(f"Datasource with name {datasource_name} not found")
+
+        return LocalFileDatasourcePlugin(
+            entity=datasource_entity,
+            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
+            tenant_id=self.tenant_id,
+            icon=self.entity.identity.icon,
+            plugin_unique_identifier=self.plugin_unique_identifier,
+        )
--- a/api/core/datasource/online_document/online_document_plugin.py
+++ b/api/core/datasource/online_document/online_document_plugin.py
@ -0,0 +1,73 @@
+from collections.abc import Generator, Mapping
+from typing import Any
+
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import (
+    DatasourceEntity,
+    DatasourceMessage,
+    DatasourceProviderType,
+    GetOnlineDocumentPageContentRequest,
+    OnlineDocumentPagesMessage,
+)
+from core.plugin.impl.datasource import PluginDatasourceManager
+
+
+class OnlineDocumentDatasourcePlugin(DatasourcePlugin):
+    tenant_id: str
+    icon: str
+    plugin_unique_identifier: str
+    entity: DatasourceEntity
+    runtime: DatasourceRuntime
+
+    def __init__(
+        self,
+        entity: DatasourceEntity,
+        runtime: DatasourceRuntime,
+        tenant_id: str,
+        icon: str,
+        plugin_unique_identifier: str,
+    ) -> None:
+        super().__init__(entity, runtime)
+        self.tenant_id = tenant_id
+        self.icon = icon
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    def get_online_document_pages(
+        self,
+        user_id: str,
+        datasource_parameters: Mapping[str, Any],
+        provider_type: str,
+    ) -> Generator[OnlineDocumentPagesMessage, None, None]:
+        manager = PluginDatasourceManager()
+
+        return manager.get_online_document_pages(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            datasource_provider=self.entity.identity.provider,
+            datasource_name=self.entity.identity.name,
+            credentials=self.runtime.credentials,
+            datasource_parameters=datasource_parameters,
+            provider_type=provider_type,
+        )
+
+    def get_online_document_page_content(
+        self,
+        user_id: str,
+        datasource_parameters: GetOnlineDocumentPageContentRequest,
+        provider_type: str,
+    ) -> Generator[DatasourceMessage, None, None]:
+        manager = PluginDatasourceManager()
+
+        return manager.get_online_document_page_content(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            datasource_provider=self.entity.identity.provider,
+            datasource_name=self.entity.identity.name,
+            credentials=self.runtime.credentials,
+            datasource_parameters=datasource_parameters,
+            provider_type=provider_type,
+        )
+
+    def datasource_provider_type(self) -> str:
+        return DatasourceProviderType.ONLINE_DOCUMENT
--- a/api/core/datasource/online_document/online_document_provider.py
+++ b/api/core/datasource/online_document/online_document_provider.py
@ -0,0 +1,48 @@
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
+from core.datasource.online_document.online_document_plugin import OnlineDocumentDatasourcePlugin
+
+
+class OnlineDocumentDatasourcePluginProviderController(DatasourcePluginProviderController):
+    entity: DatasourceProviderEntityWithPlugin
+    plugin_id: str
+    plugin_unique_identifier: str
+
+    def __init__(
+        self, entity: DatasourceProviderEntityWithPlugin, plugin_id: str, plugin_unique_identifier: str, tenant_id: str
+    ) -> None:
+        super().__init__(entity, tenant_id)
+        self.plugin_id = plugin_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    @property
+    def provider_type(self) -> DatasourceProviderType:
+        """
+        returns the type of the provider
+        """
+        return DatasourceProviderType.ONLINE_DOCUMENT
+
+    def get_datasource(self, datasource_name: str) -> OnlineDocumentDatasourcePlugin:  # type: ignore
+        """
+        return datasource with given name
+        """
+        datasource_entity = next(
+            (
+                datasource_entity
+                for datasource_entity in self.entity.datasources
+                if datasource_entity.identity.name == datasource_name
+            ),
+            None,
+        )
+
+        if not datasource_entity:
+            raise ValueError(f"Datasource with name {datasource_name} not found")
+
+        return OnlineDocumentDatasourcePlugin(
+            entity=datasource_entity,
+            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
+            tenant_id=self.tenant_id,
+            icon=self.entity.identity.icon,
+            plugin_unique_identifier=self.plugin_unique_identifier,
+        )
--- a/api/core/datasource/online_drive/online_drive_plugin.py
+++ b/api/core/datasource/online_drive/online_drive_plugin.py
@ -0,0 +1,73 @@
+from collections.abc import Generator
+
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import (
+    DatasourceEntity,
+    DatasourceMessage,
+    DatasourceProviderType,
+    OnlineDriveBrowseFilesRequest,
+    OnlineDriveBrowseFilesResponse,
+    OnlineDriveDownloadFileRequest,
+)
+from core.plugin.impl.datasource import PluginDatasourceManager
+
+
+class OnlineDriveDatasourcePlugin(DatasourcePlugin):
+    tenant_id: str
+    icon: str
+    plugin_unique_identifier: str
+    entity: DatasourceEntity
+    runtime: DatasourceRuntime
+
+    def __init__(
+        self,
+        entity: DatasourceEntity,
+        runtime: DatasourceRuntime,
+        tenant_id: str,
+        icon: str,
+        plugin_unique_identifier: str,
+    ) -> None:
+        super().__init__(entity, runtime)
+        self.tenant_id = tenant_id
+        self.icon = icon
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    def online_drive_browse_files(
+        self,
+        user_id: str,
+        request: OnlineDriveBrowseFilesRequest,
+        provider_type: str,
+    ) -> Generator[OnlineDriveBrowseFilesResponse, None, None]:
+        manager = PluginDatasourceManager()
+
+        return manager.online_drive_browse_files(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            datasource_provider=self.entity.identity.provider,
+            datasource_name=self.entity.identity.name,
+            credentials=self.runtime.credentials,
+            request=request,
+            provider_type=provider_type,
+        )
+
+    def online_drive_download_file(
+        self,
+        user_id: str,
+        request: OnlineDriveDownloadFileRequest,
+        provider_type: str,
+    ) -> Generator[DatasourceMessage, None, None]:
+        manager = PluginDatasourceManager()
+
+        return manager.online_drive_download_file(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            datasource_provider=self.entity.identity.provider,
+            datasource_name=self.entity.identity.name,
+            credentials=self.runtime.credentials,
+            request=request,
+            provider_type=provider_type,
+        )
+
+    def datasource_provider_type(self) -> str:
+        return DatasourceProviderType.ONLINE_DRIVE
--- a/api/core/datasource/online_drive/online_drive_provider.py
+++ b/api/core/datasource/online_drive/online_drive_provider.py
@ -0,0 +1,48 @@
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
+from core.datasource.online_drive.online_drive_plugin import OnlineDriveDatasourcePlugin
+
+
+class OnlineDriveDatasourcePluginProviderController(DatasourcePluginProviderController):
+    entity: DatasourceProviderEntityWithPlugin
+    plugin_id: str
+    plugin_unique_identifier: str
+
+    def __init__(
+        self, entity: DatasourceProviderEntityWithPlugin, plugin_id: str, plugin_unique_identifier: str, tenant_id: str
+    ) -> None:
+        super().__init__(entity, tenant_id)
+        self.plugin_id = plugin_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    @property
+    def provider_type(self) -> DatasourceProviderType:
+        """
+        returns the type of the provider
+        """
+        return DatasourceProviderType.ONLINE_DRIVE
+
+    def get_datasource(self, datasource_name: str) -> OnlineDriveDatasourcePlugin:  # type: ignore
+        """
+        return datasource with given name
+        """
+        datasource_entity = next(
+            (
+                datasource_entity
+                for datasource_entity in self.entity.datasources
+                if datasource_entity.identity.name == datasource_name
+            ),
+            None,
+        )
+
+        if not datasource_entity:
+            raise ValueError(f"Datasource with name {datasource_name} not found")
+
+        return OnlineDriveDatasourcePlugin(
+            entity=datasource_entity,
+            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
+            tenant_id=self.tenant_id,
+            icon=self.entity.identity.icon,
+            plugin_unique_identifier=self.plugin_unique_identifier,
+        )
--- a/api/core/ops/aliyun_trace/data_exporter/init.py
+++ b/api/core/ops/aliyun_trace/data_exporter/init.py
--- a/api/core/datasource/utils/configuration.py
+++ b/api/core/datasource/utils/configuration.py
@ -0,0 +1,265 @@
+from copy import deepcopy
+from typing import Any
+
+from pydantic import BaseModel
+
+from core.entities.provider_entities import BasicProviderConfig
+from core.helper import encrypter
+from core.helper.tool_parameter_cache import ToolParameterCache, ToolParameterCacheType
+from core.helper.tool_provider_cache import ToolProviderCredentialsCache, ToolProviderCredentialsCacheType
+from core.tools.__base.tool import Tool
+from core.tools.entities.tool_entities import (
+    ToolParameter,
+    ToolProviderType,
+)
+
+
+class ProviderConfigEncrypter(BaseModel):
+    tenant_id: str
+    config: list[BasicProviderConfig]
+    provider_type: str
+    provider_identity: str
+
+    def _deep_copy(self, data: dict[str, str]) -> dict[str, str]:
+        """
+        deep copy data
+        """
+        return deepcopy(data)
+
+    def encrypt(self, data: dict[str, str]) -> dict[str, str]:
+        """
+        encrypt tool credentials with tenant id
+
+        return a deep copy of credentials with encrypted values
+        """
+        data = self._deep_copy(data)
+
+        # get fields need to be decrypted
+        fields = dict[str, BasicProviderConfig]()
+        for credential in self.config:
+            fields[credential.name] = credential
+
+        for field_name, field in fields.items():
+            if field.type == BasicProviderConfig.Type.SECRET_INPUT:
+                if field_name in data:
+                    encrypted = encrypter.encrypt_token(self.tenant_id, data[field_name] or "")
+                    data[field_name] = encrypted
+
+        return data
+
+    def mask_tool_credentials(self, data: dict[str, Any]) -> dict[str, Any]:
+        """
+        mask tool credentials
+
+        return a deep copy of credentials with masked values
+        """
+        data = self._deep_copy(data)
+
+        # get fields need to be decrypted
+        fields = dict[str, BasicProviderConfig]()
+        for credential in self.config:
+            fields[credential.name] = credential
+
+        for field_name, field in fields.items():
+            if field.type == BasicProviderConfig.Type.SECRET_INPUT:
+                if field_name in data:
+                    if len(data[field_name]) > 6:
+                        data[field_name] = (
+                            data[field_name][:2] + "*" * (len(data[field_name]) - 4) + data[field_name][-2:]
+                        )
+                    else:
+                        data[field_name] = "*" * len(data[field_name])
+
+        return data
+
+    def decrypt(self, data: dict[str, str]) -> dict[str, str]:
+        """
+        decrypt tool credentials with tenant id
+
+        return a deep copy of credentials with decrypted values
+        """
+        cache = ToolProviderCredentialsCache(
+            tenant_id=self.tenant_id,
+            identity_id=f"{self.provider_type}.{self.provider_identity}",
+            cache_type=ToolProviderCredentialsCacheType.PROVIDER,
+        )
+        cached_credentials = cache.get()
+        if cached_credentials:
+            return cached_credentials
+        data = self._deep_copy(data)
+        # get fields need to be decrypted
+        fields = dict[str, BasicProviderConfig]()
+        for credential in self.config:
+            fields[credential.name] = credential
+
+        for field_name, field in fields.items():
+            if field.type == BasicProviderConfig.Type.SECRET_INPUT:
+                if field_name in data:
+                    try:
+                        # if the value is None or empty string, skip decrypt
+                        if not data[field_name]:
+                            continue
+
+                        data[field_name] = encrypter.decrypt_token(self.tenant_id, data[field_name])
+                    except Exception:
+                        pass
+
+        cache.set(data)
+        return data
+
+    def delete_tool_credentials_cache(self):
+        cache = ToolProviderCredentialsCache(
+            tenant_id=self.tenant_id,
+            identity_id=f"{self.provider_type}.{self.provider_identity}",
+            cache_type=ToolProviderCredentialsCacheType.PROVIDER,
+        )
+        cache.delete()
+
+
+class ToolParameterConfigurationManager:
+    """
+    Tool parameter configuration manager
+    """
+
+    tenant_id: str
+    tool_runtime: Tool
+    provider_name: str
+    provider_type: ToolProviderType
+    identity_id: str
+
+    def __init__(
+        self, tenant_id: str, tool_runtime: Tool, provider_name: str, provider_type: ToolProviderType, identity_id: str
+    ) -> None:
+        self.tenant_id = tenant_id
+        self.tool_runtime = tool_runtime
+        self.provider_name = provider_name
+        self.provider_type = provider_type
+        self.identity_id = identity_id
+
+    def _deep_copy(self, parameters: dict[str, Any]) -> dict[str, Any]:
+        """
+        deep copy parameters
+        """
+        return deepcopy(parameters)
+
+    def _merge_parameters(self) -> list[ToolParameter]:
+        """
+        merge parameters
+        """
+        # get tool parameters
+        tool_parameters = self.tool_runtime.entity.parameters or []
+        # get tool runtime parameters
+        runtime_parameters = self.tool_runtime.get_runtime_parameters()
+        # override parameters
+        current_parameters = tool_parameters.copy()
+        for runtime_parameter in runtime_parameters:
+            found = False
+            for index, parameter in enumerate(current_parameters):
+                if parameter.name == runtime_parameter.name and parameter.form == runtime_parameter.form:
+                    current_parameters[index] = runtime_parameter
+                    found = True
+                    break
+
+            if not found and runtime_parameter.form == ToolParameter.ToolParameterForm.FORM:
+                current_parameters.append(runtime_parameter)
+
+        return current_parameters
+
+    def mask_tool_parameters(self, parameters: dict[str, Any]) -> dict[str, Any]:
+        """
+        mask tool parameters
+
+        return a deep copy of parameters with masked values
+        """
+        parameters = self._deep_copy(parameters)
+
+        # override parameters
+        current_parameters = self._merge_parameters()
+
+        for parameter in current_parameters:
+            if (
+                parameter.form == ToolParameter.ToolParameterForm.FORM
+                and parameter.type == ToolParameter.ToolParameterType.SECRET_INPUT
+            ):
+                if parameter.name in parameters:
+                    if len(parameters[parameter.name]) > 6:
+                        parameters[parameter.name] = (
+                            parameters[parameter.name][:2]
+                            + "*" * (len(parameters[parameter.name]) - 4)
+                            + parameters[parameter.name][-2:]
+                        )
+                    else:
+                        parameters[parameter.name] = "*" * len(parameters[parameter.name])
+
+        return parameters
+
+    def encrypt_tool_parameters(self, parameters: dict[str, Any]) -> dict[str, Any]:
+        """
+        encrypt tool parameters with tenant id
+
+        return a deep copy of parameters with encrypted values
+        """
+        # override parameters
+        current_parameters = self._merge_parameters()
+
+        parameters = self._deep_copy(parameters)
+
+        for parameter in current_parameters:
+            if (
+                parameter.form == ToolParameter.ToolParameterForm.FORM
+                and parameter.type == ToolParameter.ToolParameterType.SECRET_INPUT
+            ):
+                if parameter.name in parameters:
+                    encrypted = encrypter.encrypt_token(self.tenant_id, parameters[parameter.name])
+                    parameters[parameter.name] = encrypted
+
+        return parameters
+
+    def decrypt_tool_parameters(self, parameters: dict[str, Any]) -> dict[str, Any]:
+        """
+        decrypt tool parameters with tenant id
+
+        return a deep copy of parameters with decrypted values
+        """
+
+        cache = ToolParameterCache(
+            tenant_id=self.tenant_id,
+            provider=f"{self.provider_type.value}.{self.provider_name}",
+            tool_name=self.tool_runtime.entity.identity.name,
+            cache_type=ToolParameterCacheType.PARAMETER,
+            identity_id=self.identity_id,
+        )
+        cached_parameters = cache.get()
+        if cached_parameters:
+            return cached_parameters
+
+        # override parameters
+        current_parameters = self._merge_parameters()
+        has_secret_input = False
+
+        for parameter in current_parameters:
+            if (
+                parameter.form == ToolParameter.ToolParameterForm.FORM
+                and parameter.type == ToolParameter.ToolParameterType.SECRET_INPUT
+            ):
+                if parameter.name in parameters:
+                    try:
+                        has_secret_input = True
+                        parameters[parameter.name] = encrypter.decrypt_token(self.tenant_id, parameters[parameter.name])
+                    except Exception:
+                        pass
+
+        if has_secret_input:
+            cache.set(parameters)
+
+        return parameters
+
+    def delete_tool_parameters_cache(self):
+        cache = ToolParameterCache(
+            tenant_id=self.tenant_id,
+            provider=f"{self.provider_type.value}.{self.provider_name}",
+            tool_name=self.tool_runtime.entity.identity.name,
+            cache_type=ToolParameterCacheType.PARAMETER,
+            identity_id=self.identity_id,
+        )
+        cache.delete()
--- a/api/core/datasource/utils/message_transformer.py
+++ b/api/core/datasource/utils/message_transformer.py
@ -0,0 +1,121 @@
+import logging
+from collections.abc import Generator
+from mimetypes import guess_extension
+from typing import Optional
+
+from core.datasource.datasource_file_manager import DatasourceFileManager
+from core.datasource.entities.datasource_entities import DatasourceMessage
+from core.file import File, FileTransferMethod, FileType
+
+logger = logging.getLogger(__name__)
+
+
+class DatasourceFileMessageTransformer:
+    @classmethod
+    def transform_datasource_invoke_messages(
+        cls,
+        messages: Generator[DatasourceMessage, None, None],
+        user_id: str,
+        tenant_id: str,
+        conversation_id: Optional[str] = None,
+    ) -> Generator[DatasourceMessage, None, None]:
+        """
+        Transform datasource message and handle file download
+        """
+        for message in messages:
+            if message.type in {DatasourceMessage.MessageType.TEXT, DatasourceMessage.MessageType.LINK}:
+                yield message
+            elif message.type == DatasourceMessage.MessageType.IMAGE and isinstance(
+                message.message, DatasourceMessage.TextMessage
+            ):
+                # try to download image
+                try:
+                    assert isinstance(message.message, DatasourceMessage.TextMessage)
+
+                    file = DatasourceFileManager.create_file_by_url(
+                        user_id=user_id,
+                        tenant_id=tenant_id,
+                        file_url=message.message.text,
+                        conversation_id=conversation_id,
+                    )
+
+                    url = f"/files/datasources/{file.id}{guess_extension(file.mime_type) or '.png'}"
+
+                    yield DatasourceMessage(
+                        type=DatasourceMessage.MessageType.IMAGE_LINK,
+                        message=DatasourceMessage.TextMessage(text=url),
+                        meta=message.meta.copy() if message.meta is not None else {},
+                    )
+                except Exception as e:
+                    yield DatasourceMessage(
+                        type=DatasourceMessage.MessageType.TEXT,
+                        message=DatasourceMessage.TextMessage(
+                            text=f"Failed to download image: {message.message.text}: {e}"
+                        ),
+                        meta=message.meta.copy() if message.meta is not None else {},
+                    )
+            elif message.type == DatasourceMessage.MessageType.BLOB:
+                # get mime type and save blob to storage
+                meta = message.meta or {}
+
+                mimetype = meta.get("mime_type", "application/octet-stream")
+                # get filename from meta
+                filename = meta.get("file_name", None)
+                # if message is str, encode it to bytes
+
+                if not isinstance(message.message, DatasourceMessage.BlobMessage):
+                    raise ValueError("unexpected message type")
+
+                # FIXME: should do a type check here.
+                assert isinstance(message.message.blob, bytes)
+                file = DatasourceFileManager.create_file_by_raw(
+                    user_id=user_id,
+                    tenant_id=tenant_id,
+                    conversation_id=conversation_id,
+                    file_binary=message.message.blob,
+                    mimetype=mimetype,
+                    filename=filename,
+                )
+
+                url = cls.get_datasource_file_url(datasource_file_id=file.id, extension=guess_extension(file.mime_type))
+
+                # check if file is image
+                if "image" in mimetype:
+                    yield DatasourceMessage(
+                        type=DatasourceMessage.MessageType.IMAGE_LINK,
+                        message=DatasourceMessage.TextMessage(text=url),
+                        meta=meta.copy() if meta is not None else {},
+                    )
+                else:
+                    yield DatasourceMessage(
+                        type=DatasourceMessage.MessageType.BINARY_LINK,
+                        message=DatasourceMessage.TextMessage(text=url),
+                        meta=meta.copy() if meta is not None else {},
+                    )
+            elif message.type == DatasourceMessage.MessageType.FILE:
+                meta = message.meta or {}
+                file = meta.get("file", None)
+                if isinstance(file, File):
+                    if file.transfer_method == FileTransferMethod.TOOL_FILE:
+                        assert file.related_id is not None
+                        url = cls.get_datasource_file_url(datasource_file_id=file.related_id, extension=file.extension)
+                        if file.type == FileType.IMAGE:
+                            yield DatasourceMessage(
+                                type=DatasourceMessage.MessageType.IMAGE_LINK,
+                                message=DatasourceMessage.TextMessage(text=url),
+                                meta=meta.copy() if meta is not None else {},
+                            )
+                        else:
+                            yield DatasourceMessage(
+                                type=DatasourceMessage.MessageType.LINK,
+                                message=DatasourceMessage.TextMessage(text=url),
+                                meta=meta.copy() if meta is not None else {},
+                            )
+                    else:
+                        yield message
+            else:
+                yield message
+
+    @classmethod
+    def get_datasource_file_url(cls, datasource_file_id: str, extension: Optional[str]) -> str:
+        return f"/files/datasources/{datasource_file_id}{extension or '.bin'}"
--- a/api/core/datasource/utils/parser.py
+++ b/api/core/datasource/utils/parser.py
@ -0,0 +1,389 @@
+import re
+import uuid
+from json import dumps as json_dumps
+from json import loads as json_loads
+from json.decoder import JSONDecodeError
+from typing import Optional
+
+from flask import request
+from requests import get
+from yaml import YAMLError, safe_load  # type: ignore
+
+from core.tools.entities.common_entities import I18nObject
+from core.tools.entities.tool_bundle import ApiToolBundle
+from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolParameter
+from core.tools.errors import ToolApiSchemaError, ToolNotSupportedError, ToolProviderNotFoundError
+
+
+class ApiBasedToolSchemaParser:
+    @staticmethod
+    def parse_openapi_to_tool_bundle(
+        openapi: dict, extra_info: dict | None = None, warning: dict | None = None
+    ) -> list[ApiToolBundle]:
+        warning = warning if warning is not None else {}
+        extra_info = extra_info if extra_info is not None else {}
+
+        # set description to extra_info
+        extra_info["description"] = openapi["info"].get("description", "")
+
+        if len(openapi["servers"]) == 0:
+            raise ToolProviderNotFoundError("No server found in the openapi yaml.")
+
+        server_url = openapi["servers"][0]["url"]
+        request_env = request.headers.get("X-Request-Env")
+        if request_env:
+            matched_servers = [server["url"] for server in openapi["servers"] if server["env"] == request_env]
+            server_url = matched_servers[0] if matched_servers else server_url
+
+        # list all interfaces
+        interfaces = []
+        for path, path_item in openapi["paths"].items():
+            methods = ["get", "post", "put", "delete", "patch", "head", "options", "trace"]
+            for method in methods:
+                if method in path_item:
+                    interfaces.append(
+                        {
+                            "path": path,
+                            "method": method,
+                            "operation": path_item[method],
+                        }
+                    )
+
+        # get all parameters
+        bundles = []
+        for interface in interfaces:
+            # convert parameters
+            parameters = []
+            if "parameters" in interface["operation"]:
+                for parameter in interface["operation"]["parameters"]:
+                    tool_parameter = ToolParameter(
+                        name=parameter["name"],
+                        label=I18nObject(en_US=parameter["name"], zh_Hans=parameter["name"]),
+                        human_description=I18nObject(
+                            en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
+                        ),
+                        type=ToolParameter.ToolParameterType.STRING,
+                        required=parameter.get("required", False),
+                        form=ToolParameter.ToolParameterForm.LLM,
+                        llm_description=parameter.get("description"),
+                        default=parameter["schema"]["default"]
+                        if "schema" in parameter and "default" in parameter["schema"]
+                        else None,
+                        placeholder=I18nObject(
+                            en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
+                        ),
+                    )
+
+                    # check if there is a type
+                    typ = ApiBasedToolSchemaParser._get_tool_parameter_type(parameter)
+                    if typ:
+                        tool_parameter.type = typ
+
+                    parameters.append(tool_parameter)
+            # create tool bundle
+            # check if there is a request body
+            if "requestBody" in interface["operation"]:
+                request_body = interface["operation"]["requestBody"]
+                if "content" in request_body:
+                    for content_type, content in request_body["content"].items():
+                        # if there is a reference, get the reference and overwrite the content
+                        if "schema" not in content:
+                            continue
+
+                        if "$ref" in content["schema"]:
+                            # get the reference
+                            root = openapi
+                            reference = content["schema"]["$ref"].split("/")[1:]
+                            for ref in reference:
+                                root = root[ref]
+                            # overwrite the content
+                            interface["operation"]["requestBody"]["content"][content_type]["schema"] = root
+
+                    # parse body parameters
+                    if "schema" in interface["operation"]["requestBody"]["content"][content_type]:
+                        body_schema = interface["operation"]["requestBody"]["content"][content_type]["schema"]
+                        required = body_schema.get("required", [])
+                        properties = body_schema.get("properties", {})
+                        for name, property in properties.items():
+                            tool = ToolParameter(
+                                name=name,
+                                label=I18nObject(en_US=name, zh_Hans=name),
+                                human_description=I18nObject(
+                                    en_US=property.get("description", ""), zh_Hans=property.get("description", "")
+                                ),
+                                type=ToolParameter.ToolParameterType.STRING,
+                                required=name in required,
+                                form=ToolParameter.ToolParameterForm.LLM,
+                                llm_description=property.get("description", ""),
+                                default=property.get("default", None),
+                                placeholder=I18nObject(
+                                    en_US=property.get("description", ""), zh_Hans=property.get("description", "")
+                                ),
+                            )
+
+                            # check if there is a type
+                            typ = ApiBasedToolSchemaParser._get_tool_parameter_type(property)
+                            if typ:
+                                tool.type = typ
+
+                            parameters.append(tool)
+
+            # check if parameters is duplicated
+            parameters_count = {}
+            for parameter in parameters:
+                if parameter.name not in parameters_count:
+                    parameters_count[parameter.name] = 0
+                parameters_count[parameter.name] += 1
+            for name, count in parameters_count.items():
+                if count > 1:
+                    warning["duplicated_parameter"] = f"Parameter {name} is duplicated."
+
+            # check if there is a operation id, use $path_$method as operation id if not
+            if "operationId" not in interface["operation"]:
+                # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
+                path = interface["path"]
+                if interface["path"].startswith("/"):
+                    path = interface["path"][1:]
+                # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
+                path = re.sub(r"[^a-zA-Z0-9_-]", "", path)
+                if not path:
+                    path = str(uuid.uuid4())
+
+                interface["operation"]["operationId"] = f"{path}_{interface['method']}"
+
+            bundles.append(
+                ApiToolBundle(
+                    server_url=server_url + interface["path"],
+                    method=interface["method"],
+                    summary=interface["operation"]["description"]
+                    if "description" in interface["operation"]
+                    else interface["operation"].get("summary", None),
+                    operation_id=interface["operation"]["operationId"],
+                    parameters=parameters,
+                    author="",
+                    icon=None,
+                    openapi=interface["operation"],
+                )
+            )
+
+        return bundles
+
+    @staticmethod
+    def _get_tool_parameter_type(parameter: dict) -> Optional[ToolParameter.ToolParameterType]:
+        parameter = parameter or {}
+        typ: Optional[str] = None
+        if parameter.get("format") == "binary":
+            return ToolParameter.ToolParameterType.FILE
+
+        if "type" in parameter:
+            typ = parameter["type"]
+        elif "schema" in parameter and "type" in parameter["schema"]:
+            typ = parameter["schema"]["type"]
+
+        if typ in {"integer", "number"}:
+            return ToolParameter.ToolParameterType.NUMBER
+        elif typ == "boolean":
+            return ToolParameter.ToolParameterType.BOOLEAN
+        elif typ == "string":
+            return ToolParameter.ToolParameterType.STRING
+        elif typ == "array":
+            items = parameter.get("items") or parameter.get("schema", {}).get("items")
+            return ToolParameter.ToolParameterType.FILES if items and items.get("format") == "binary" else None
+        else:
+            return None
+
+    @staticmethod
+    def parse_openapi_yaml_to_tool_bundle(
+        yaml: str, extra_info: dict | None = None, warning: dict | None = None
+    ) -> list[ApiToolBundle]:
+        """
+        parse openapi yaml to tool bundle
+
+        :param yaml: the yaml string
+        :param extra_info: the extra info
+        :param warning: the warning message
+        :return: the tool bundle
+        """
+        warning = warning if warning is not None else {}
+        extra_info = extra_info if extra_info is not None else {}
+
+        openapi: dict = safe_load(yaml)
+        if openapi is None:
+            raise ToolApiSchemaError("Invalid openapi yaml.")
+        return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi, extra_info=extra_info, warning=warning)
+
+    @staticmethod
+    def parse_swagger_to_openapi(swagger: dict, extra_info: dict | None = None, warning: dict | None = None) -> dict:
+        warning = warning or {}
+        """
+        parse swagger to openapi
+
+        :param swagger: the swagger dict
+        :return: the openapi dict
+        """
+        # convert swagger to openapi
+        info = swagger.get("info", {"title": "Swagger", "description": "Swagger", "version": "1.0.0"})
+
+        servers = swagger.get("servers", [])
+
+        if len(servers) == 0:
+            raise ToolApiSchemaError("No server found in the swagger yaml.")
+
+        openapi = {
+            "openapi": "3.0.0",
+            "info": {
+                "title": info.get("title", "Swagger"),
+                "description": info.get("description", "Swagger"),
+                "version": info.get("version", "1.0.0"),
+            },
+            "servers": swagger["servers"],
+            "paths": {},
+            "components": {"schemas": {}},
+        }
+
+        # check paths
+        if "paths" not in swagger or len(swagger["paths"]) == 0:
+            raise ToolApiSchemaError("No paths found in the swagger yaml.")
+
+        # convert paths
+        for path, path_item in swagger["paths"].items():
+            openapi["paths"][path] = {}
+            for method, operation in path_item.items():
+                if "operationId" not in operation:
+                    raise ToolApiSchemaError(f"No operationId found in operation {method} {path}.")
+
+                if ("summary" not in operation or len(operation["summary"]) == 0) and (
+                    "description" not in operation or len(operation["description"]) == 0
+                ):
+                    if warning is not None:
+                        warning["missing_summary"] = f"No summary or description found in operation {method} {path}."
+
+                openapi["paths"][path][method] = {
+                    "operationId": operation["operationId"],
+                    "summary": operation.get("summary", ""),
+                    "description": operation.get("description", ""),
+                    "parameters": operation.get("parameters", []),
+                    "responses": operation.get("responses", {}),
+                }
+
+                if "requestBody" in operation:
+                    openapi["paths"][path][method]["requestBody"] = operation["requestBody"]
+
+        # convert definitions
+        for name, definition in swagger["definitions"].items():
+            openapi["components"]["schemas"][name] = definition
+
+        return openapi
+
+    @staticmethod
+    def parse_openai_plugin_json_to_tool_bundle(
+        json: str, extra_info: dict | None = None, warning: dict | None = None
+    ) -> list[ApiToolBundle]:
+        """
+        parse openapi plugin yaml to tool bundle
+
+        :param json: the json string
+        :param extra_info: the extra info
+        :param warning: the warning message
+        :return: the tool bundle
+        """
+        warning = warning if warning is not None else {}
+        extra_info = extra_info if extra_info is not None else {}
+
+        try:
+            openai_plugin = json_loads(json)
+            api = openai_plugin["api"]
+            api_url = api["url"]
+            api_type = api["type"]
+        except JSONDecodeError:
+            raise ToolProviderNotFoundError("Invalid openai plugin json.")
+
+        if api_type != "openapi":
+            raise ToolNotSupportedError("Only openapi is supported now.")
+
+        # get openapi yaml
+        response = get(api_url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "}, timeout=5)
+
+        if response.status_code != 200:
+            raise ToolProviderNotFoundError("cannot get openapi yaml from url.")
+
+        return ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle(
+            response.text, extra_info=extra_info, warning=warning
+        )
+
+    @staticmethod
+    def auto_parse_to_tool_bundle(
+        content: str, extra_info: dict | None = None, warning: dict | None = None
+    ) -> tuple[list[ApiToolBundle], str]:
+        """
+        auto parse to tool bundle
+
+        :param content: the content
+        :param extra_info: the extra info
+        :param warning: the warning message
+        :return: tools bundle, schema_type
+        """
+        warning = warning if warning is not None else {}
+        extra_info = extra_info if extra_info is not None else {}
+
+        content = content.strip()
+        loaded_content = None
+        json_error = None
+        yaml_error = None
+
+        try:
+            loaded_content = json_loads(content)
+        except JSONDecodeError as e:
+            json_error = e
+
+        if loaded_content is None:
+            try:
+                loaded_content = safe_load(content)
+            except YAMLError as e:
+                yaml_error = e
+        if loaded_content is None:
+            raise ToolApiSchemaError(
+                f"Invalid api schema, schema is neither json nor yaml. json error: {str(json_error)},"
+                f" yaml error: {str(yaml_error)}"
+            )
+
+        swagger_error = None
+        openapi_error = None
+        openapi_plugin_error = None
+        schema_type = None
+
+        try:
+            openapi = ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
+                loaded_content, extra_info=extra_info, warning=warning
+            )
+            schema_type = ApiProviderSchemaType.OPENAPI.value
+            return openapi, schema_type
+        except ToolApiSchemaError as e:
+            openapi_error = e
+
+        # openai parse error, fallback to swagger
+        try:
+            converted_swagger = ApiBasedToolSchemaParser.parse_swagger_to_openapi(
+                loaded_content, extra_info=extra_info, warning=warning
+            )
+            schema_type = ApiProviderSchemaType.SWAGGER.value
+            return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
+                converted_swagger, extra_info=extra_info, warning=warning
+            ), schema_type
+        except ToolApiSchemaError as e:
+            swagger_error = e
+
+        # swagger parse error, fallback to openai plugin
+        try:
+            openapi_plugin = ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle(
+                json_dumps(loaded_content), extra_info=extra_info, warning=warning
+            )
+            return openapi_plugin, ApiProviderSchemaType.OPENAI_PLUGIN.value
+        except ToolNotSupportedError as e:
+            # maybe it's not plugin at all
+            openapi_plugin_error = e
+
+        raise ToolApiSchemaError(
+            f"Invalid api schema, openapi error: {str(openapi_error)}, swagger error: {str(swagger_error)},"
+            f" openapi plugin error: {str(openapi_plugin_error)}"
+        )
--- a/api/core/datasource/utils/text_processing_utils.py
+++ b/api/core/datasource/utils/text_processing_utils.py
@ -0,0 +1,17 @@
+import re
+
+
+def remove_leading_symbols(text: str) -> str:
+    """
+    Remove leading punctuation or symbols from the given text.
+
+    Args:
+        text (str): The input text to process.
+
+    Returns:
+        str: The text with leading punctuation or symbols removed.
+    """
+    # Match Unicode ranges for punctuation and symbols
+    # FIXME this pattern is confused quick fix for #11868 maybe refactor it later
+    pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F!\"#$%&'()*+,./:;<=>?@^_`~]+"
+    return re.sub(pattern, "", text)
--- a/api/core/datasource/utils/uuid_utils.py
+++ b/api/core/datasource/utils/uuid_utils.py
@ -0,0 +1,9 @@
+import uuid
+
+
+def is_valid_uuid(uuid_str: str) -> bool:
+    try:
+        uuid.UUID(uuid_str)
+        return True
+    except Exception:
+        return False
--- a/api/core/datasource/utils/workflow_configuration_sync.py
+++ b/api/core/datasource/utils/workflow_configuration_sync.py
@ -0,0 +1,43 @@
+from collections.abc import Mapping, Sequence
+from typing import Any
+
+from core.app.app_config.entities import VariableEntity
+from core.tools.entities.tool_entities import WorkflowToolParameterConfiguration
+
+
+class WorkflowToolConfigurationUtils:
+    @classmethod
+    def check_parameter_configurations(cls, configurations: list[Mapping[str, Any]]):
+        for configuration in configurations:
+            WorkflowToolParameterConfiguration.model_validate(configuration)
+
+    @classmethod
+    def get_workflow_graph_variables(cls, graph: Mapping[str, Any]) -> Sequence[VariableEntity]:
+        """
+        get workflow graph variables
+        """
+        nodes = graph.get("nodes", [])
+        start_node = next(filter(lambda x: x.get("data", {}).get("type") == "start", nodes), None)
+
+        if not start_node:
+            return []
+
+        return [VariableEntity.model_validate(variable) for variable in start_node.get("data", {}).get("variables", [])]
+
+    @classmethod
+    def check_is_synced(
+        cls, variables: list[VariableEntity], tool_configurations: list[WorkflowToolParameterConfiguration]
+    ):
+        """
+        check is synced
+
+        raise ValueError if not synced
+        """
+        variable_names = [variable.variable for variable in variables]
+
+        if len(tool_configurations) != len(variables):
+            raise ValueError("parameter configuration mismatch, please republish the tool to update")
+
+        for parameter in tool_configurations:
+            if parameter.name not in variable_names:
+                raise ValueError("parameter configuration mismatch, please republish the tool to update")
--- a/api/core/datasource/utils/yaml_utils.py
+++ b/api/core/datasource/utils/yaml_utils.py
@ -0,0 +1,35 @@
+import logging
+from pathlib import Path
+from typing import Any
+
+import yaml  # type: ignore
+from yaml import YAMLError
+
+logger = logging.getLogger(__name__)
+
+
+def load_yaml_file(file_path: str, ignore_error: bool = True, default_value: Any = {}) -> Any:
+    """
+    Safe loading a YAML file
+    :param file_path: the path of the YAML file
+    :param ignore_error:
+        if True, return default_value if error occurs and the error will be logged in debug level
+        if False, raise error if error occurs
+    :param default_value: the value returned when errors ignored
+    :return: an object of the YAML content
+    """
+    if not file_path or not Path(file_path).exists():
+        if ignore_error:
+            return default_value
+        else:
+            raise FileNotFoundError(f"File not found: {file_path}")
+
+    with open(file_path, encoding="utf-8") as yaml_file:
+        try:
+            yaml_content = yaml.safe_load(yaml_file)
+            return yaml_content or default_value
+        except Exception as e:
+            if ignore_error:
+                return default_value
+            else:
+                raise YAMLError(f"Failed to load YAML file {file_path}: {e}") from e
--- a/Show More
+++ b/Show More