Merge branch 'main' into feat/mcp-06-18

2026-04-22 19:57:40 +08:00 · 2025-10-10 10:53:15 +08:00
parent 740f970041 885dff82e3
commit 841b7fa7ce
2074 changed files with 112378 additions and 37113 deletions
--- a/api/core/agent/base_agent_runner.py
+++ b/api/core/agent/base_agent_runner.py
@ -90,7 +90,9 @@ class BaseAgentRunner(AppRunner):
            tenant_id=tenant_id,
            dataset_ids=app_config.dataset.dataset_ids if app_config.dataset else [],
            retrieve_config=app_config.dataset.retrieve_config if app_config.dataset else None,
-            return_resource=app_config.additional_features.show_retrieve_source,
+            return_resource=(
+                app_config.additional_features.show_retrieve_source if app_config.additional_features else False
+            ),
            invoke_from=application_generate_entity.invoke_from,
            hit_callback=hit_callback,
            user_id=user_id,
--- a/api/core/app/app_config/easy_ui_based_app/dataset/manager.py
+++ b/api/core/app/app_config/easy_ui_based_app/dataset/manager.py
@ -1,4 +1,5 @@
 import uuid
+from typing import Literal, cast

 from core.app.app_config.entities import (
    DatasetEntity,
@ -74,6 +75,9 @@ class DatasetConfigManager:
            return None
        query_variable = config.get("dataset_query_variable")

+        metadata_model_config_dict = dataset_configs.get("metadata_model_config")
+        metadata_filtering_conditions_dict = dataset_configs.get("metadata_filtering_conditions")
+
        if dataset_configs["retrieval_model"] == "single":
            return DatasetEntity(
                dataset_ids=dataset_ids,
@ -82,18 +86,23 @@ class DatasetConfigManager:
                    retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.value_of(
                        dataset_configs["retrieval_model"]
                    ),
-                    metadata_filtering_mode=dataset_configs.get("metadata_filtering_mode", "disabled"),
-                    metadata_model_config=ModelConfig(**dataset_configs.get("metadata_model_config"))
-                    if dataset_configs.get("metadata_model_config")
+                    metadata_filtering_mode=cast(
+                        Literal["disabled", "automatic", "manual"],
+                        dataset_configs.get("metadata_filtering_mode", "disabled"),
+                    ),
+                    metadata_model_config=ModelConfig(**metadata_model_config_dict)
+                    if isinstance(metadata_model_config_dict, dict)
                    else None,
-                    metadata_filtering_conditions=MetadataFilteringCondition(
-                        **dataset_configs.get("metadata_filtering_conditions", {})
-                    )
-                    if dataset_configs.get("metadata_filtering_conditions")
+                    metadata_filtering_conditions=MetadataFilteringCondition(**metadata_filtering_conditions_dict)
+                    if isinstance(metadata_filtering_conditions_dict, dict)
                    else None,
                ),
            )
        else:
+            score_threshold_val = dataset_configs.get("score_threshold")
+            reranking_model_val = dataset_configs.get("reranking_model")
+            weights_val = dataset_configs.get("weights")
+
            return DatasetEntity(
                dataset_ids=dataset_ids,
                retrieve_config=DatasetRetrieveConfigEntity(
@ -101,22 +110,23 @@ class DatasetConfigManager:
                    retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.value_of(
                        dataset_configs["retrieval_model"]
                    ),
-                    top_k=dataset_configs.get("top_k", 4),
-                    score_threshold=dataset_configs.get("score_threshold")
-                    if dataset_configs.get("score_threshold_enabled", False)
+                    top_k=int(dataset_configs.get("top_k", 4)),
+                    score_threshold=float(score_threshold_val)
+                    if dataset_configs.get("score_threshold_enabled", False) and score_threshold_val is not None
                    else None,
-                    reranking_model=dataset_configs.get("reranking_model"),
-                    weights=dataset_configs.get("weights"),
-                    reranking_enabled=dataset_configs.get("reranking_enabled", True),
+                    reranking_model=reranking_model_val if isinstance(reranking_model_val, dict) else None,
+                    weights=weights_val if isinstance(weights_val, dict) else None,
+                    reranking_enabled=bool(dataset_configs.get("reranking_enabled", True)),
                    rerank_mode=dataset_configs.get("reranking_mode", "reranking_model"),
-                    metadata_filtering_mode=dataset_configs.get("metadata_filtering_mode", "disabled"),
-                    metadata_model_config=ModelConfig(**dataset_configs.get("metadata_model_config"))
-                    if dataset_configs.get("metadata_model_config")
+                    metadata_filtering_mode=cast(
+                        Literal["disabled", "automatic", "manual"],
+                        dataset_configs.get("metadata_filtering_mode", "disabled"),
+                    ),
+                    metadata_model_config=ModelConfig(**metadata_model_config_dict)
+                    if isinstance(metadata_model_config_dict, dict)
                    else None,
-                    metadata_filtering_conditions=MetadataFilteringCondition(
-                        **dataset_configs.get("metadata_filtering_conditions", {})
-                    )
-                    if dataset_configs.get("metadata_filtering_conditions")
+                    metadata_filtering_conditions=MetadataFilteringCondition(**metadata_filtering_conditions_dict)
+                    if isinstance(metadata_filtering_conditions_dict, dict)
                    else None,
                ),
            )
@ -134,18 +144,17 @@ class DatasetConfigManager:
        config = cls.extract_dataset_config_for_legacy_compatibility(tenant_id, app_mode, config)

        # dataset_configs
-        if not config.get("dataset_configs"):
-            config["dataset_configs"] = {"retrieval_model": "single"}
+        if "dataset_configs" not in config or not config.get("dataset_configs"):
+            config["dataset_configs"] = {}
+        config["dataset_configs"]["retrieval_model"] = config["dataset_configs"].get("retrieval_model", "single")

        if not isinstance(config["dataset_configs"], dict):
            raise ValueError("dataset_configs must be of object type")

-        if not config["dataset_configs"].get("datasets"):
+        if "datasets" not in config["dataset_configs"] or not config["dataset_configs"].get("datasets"):
            config["dataset_configs"]["datasets"] = {"strategy": "router", "datasets": []}

-        need_manual_query_datasets = config.get("dataset_configs") and config["dataset_configs"].get(
-            "datasets", {}
-        ).get("datasets")
+        need_manual_query_datasets = config.get("dataset_configs", {}).get("datasets", {}).get("datasets")

        if need_manual_query_datasets and app_mode == AppMode.COMPLETION:
            # Only check when mode is completion
@ -166,8 +175,8 @@ class DatasetConfigManager:
        :param config: app model config args
        """
        # Extract dataset config for legacy compatibility
-        if not config.get("agent_mode"):
-            config["agent_mode"] = {"enabled": False, "tools": []}
+        if "agent_mode" not in config or not config.get("agent_mode"):
+            config["agent_mode"] = {}

        if not isinstance(config["agent_mode"], dict):
            raise ValueError("agent_mode must be of object type")
@ -180,19 +189,22 @@ class DatasetConfigManager:
            raise ValueError("enabled in agent_mode must be of boolean type")

        # tools
-        if not config["agent_mode"].get("tools"):
+        if "tools" not in config["agent_mode"] or not config["agent_mode"].get("tools"):
            config["agent_mode"]["tools"] = []

        if not isinstance(config["agent_mode"]["tools"], list):
            raise ValueError("tools in agent_mode must be a list of objects")

        # strategy
-        if not config["agent_mode"].get("strategy"):
+        if "strategy" not in config["agent_mode"] or not config["agent_mode"].get("strategy"):
            config["agent_mode"]["strategy"] = PlanningStrategy.ROUTER.value

        has_datasets = False
-        if config["agent_mode"]["strategy"] in {PlanningStrategy.ROUTER.value, PlanningStrategy.REACT_ROUTER.value}:
-            for tool in config["agent_mode"]["tools"]:
+        if config.get("agent_mode", {}).get("strategy") in {
+            PlanningStrategy.ROUTER.value,
+            PlanningStrategy.REACT_ROUTER.value,
+        }:
+            for tool in config.get("agent_mode", {}).get("tools", []):
                key = list(tool.keys())[0]
                if key == "dataset":
                    # old style, use tool name as key
@ -217,7 +229,7 @@ class DatasetConfigManager:

                    has_datasets = True

-        need_manual_query_datasets = has_datasets and config["agent_mode"]["enabled"]
+        need_manual_query_datasets = has_datasets and config.get("agent_mode", {}).get("enabled")

        if need_manual_query_datasets and app_mode == AppMode.COMPLETION:
            # Only check when mode is completion
--- a/api/core/app/app_config/easy_ui_based_app/model_config/manager.py
+++ b/api/core/app/app_config/easy_ui_based_app/model_config/manager.py
@ -4,8 +4,8 @@ from typing import Any
 from core.app.app_config.entities import ModelConfigEntity
 from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
 from core.model_runtime.model_providers.model_provider_factory import ModelProviderFactory
-from core.plugin.entities.plugin import ModelProviderID
 from core.provider_manager import ProviderManager
+from models.provider_ids import ModelProviderID


 class ModelConfigManager:
--- a/api/core/app/app_config/entities.py
+++ b/api/core/app/app_config/entities.py
@ -114,9 +114,9 @@ class VariableEntity(BaseModel):
    hide: bool = False
    max_length: int | None = None
    options: Sequence[str] = Field(default_factory=list)
-    allowed_file_types: Sequence[FileType] = Field(default_factory=list)
-    allowed_file_extensions: Sequence[str] = Field(default_factory=list)
-    allowed_file_upload_methods: Sequence[FileTransferMethod] = Field(default_factory=list)
+    allowed_file_types: Sequence[FileType] | None = Field(default_factory=list)
+    allowed_file_extensions: Sequence[str] | None = Field(default_factory=list)
+    allowed_file_upload_methods: Sequence[FileTransferMethod] | None = Field(default_factory=list)

    @field_validator("description", mode="before")
    @classmethod
@ -129,6 +129,16 @@ class VariableEntity(BaseModel):
        return v or []


+class RagPipelineVariableEntity(VariableEntity):
+    """
+    Rag Pipeline Variable Entity.
+    """
+
+    tooltips: str | None = None
+    placeholder: str | None = None
+    belong_to_node_id: str
+
+
 class ExternalDataVariableEntity(BaseModel):
    """
    External Data Variable Entity.
@ -288,7 +298,7 @@ class AppConfig(BaseModel):
    tenant_id: str
    app_id: str
    app_mode: AppMode
-    additional_features: AppAdditionalFeatures
+    additional_features: AppAdditionalFeatures | None = None
    variables: list[VariableEntity] = []
    sensitive_word_avoidance: SensitiveWordAvoidanceEntity | None = None

--- a/api/core/app/app_config/workflow_ui_based_app/variables/manager.py
+++ b/api/core/app/app_config/workflow_ui_based_app/variables/manager.py
@ -1,4 +1,6 @@
-from core.app.app_config.entities import VariableEntity
+import re
+
+from core.app.app_config.entities import RagPipelineVariableEntity, VariableEntity
 from models.workflow import Workflow


@ -20,3 +22,48 @@ class WorkflowVariablesConfigManager:
            variables.append(VariableEntity.model_validate(variable))

        return variables
+
+    @classmethod
+    def convert_rag_pipeline_variable(cls, workflow: Workflow, start_node_id: str) -> list[RagPipelineVariableEntity]:
+        """
+        Convert workflow start variables to variables
+
+        :param workflow: workflow instance
+        """
+        variables = []
+
+        # get second step node
+        rag_pipeline_variables = workflow.rag_pipeline_variables
+        if not rag_pipeline_variables:
+            return []
+        variables_map = {item["variable"]: item for item in rag_pipeline_variables}
+
+        # get datasource node data
+        datasource_node_data = None
+        datasource_nodes = workflow.graph_dict.get("nodes", [])
+        for datasource_node in datasource_nodes:
+            if datasource_node.get("id") == start_node_id:
+                datasource_node_data = datasource_node.get("data", {})
+                break
+        if datasource_node_data:
+            datasource_parameters = datasource_node_data.get("datasource_parameters", {})
+
+            for _, value in datasource_parameters.items():
+                if value.get("value") and isinstance(value.get("value"), str):
+                    pattern = r"\{\{#([a-zA-Z0-9_]{1,50}(?:\.[a-zA-Z0-9_][a-zA-Z0-9_]{0,29}){1,10})#\}\}"
+                    match = re.match(pattern, value["value"])
+                    if match:
+                        full_path = match.group(1)
+                        last_part = full_path.split(".")[-1]
+                        variables_map.pop(last_part, None)
+                if value.get("value") and isinstance(value.get("value"), list):
+                    last_part = value.get("value")[-1]
+                    variables_map.pop(last_part, None)
+
+        all_second_step_variables = list(variables_map.values())
+
+        for item in all_second_step_variables:
+            if item.get("belong_to_node_id") == start_node_id or item.get("belong_to_node_id") == "shared":
+                variables.append(RagPipelineVariableEntity.model_validate(item))
+
+        return variables
--- a/api/core/app/apps/advanced_chat/app_generator.py
+++ b/api/core/app/apps/advanced_chat/app_generator.py
@ -154,7 +154,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):

        if invoke_from == InvokeFrom.DEBUGGER:
            # always enable retriever resource in debugger mode
-            app_config.additional_features.show_retrieve_source = True
+            app_config.additional_features.show_retrieve_source = True  # type: ignore

        workflow_run_id = str(uuid.uuid4())
        # init application generate entity
@ -420,7 +420,9 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
            db.session.refresh(conversation)

        # get conversation dialogue count
-        self._dialogue_count = get_thread_messages_length(conversation.id)
+        # NOTE: dialogue_count should not start from 0,
+        # because during the first conversation, dialogue_count should be 1.
+        self._dialogue_count = get_thread_messages_length(conversation.id) + 1

        # init queue manager
        queue_manager = MessageBasedAppQueueManager(
@ -467,7 +469,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
            workflow_execution_repository=workflow_execution_repository,
            workflow_node_execution_repository=workflow_node_execution_repository,
            stream=stream,
-            draft_var_saver_factory=self._get_draft_var_saver_factory(invoke_from),
+            draft_var_saver_factory=self._get_draft_var_saver_factory(invoke_from, account=user),
        )

        return AdvancedChatAppGenerateResponseConverter.convert(response=response, invoke_from=invoke_from)
--- a/api/core/app/apps/advanced_chat/app_runner.py
+++ b/api/core/app/apps/advanced_chat/app_runner.py
@ -1,11 +1,11 @@
 import logging
+import time
 from collections.abc import Mapping
 from typing import Any, cast

 from sqlalchemy import select
 from sqlalchemy.orm import Session

-from configs import dify_config
 from core.app.apps.advanced_chat.app_config_manager import AdvancedChatAppConfig
 from core.app.apps.base_app_queue_manager import AppQueueManager
 from core.app.apps.workflow_app_runner import WorkflowBasedAppRunner
@ -23,16 +23,17 @@ from core.app.features.annotation_reply.annotation_reply import AnnotationReplyF
 from core.moderation.base import ModerationError
 from core.moderation.input_moderation import InputModeration
 from core.variables.variables import VariableUnion
-from core.workflow.callbacks import WorkflowCallback, WorkflowLoggingCallback
-from core.workflow.entities.variable_pool import VariablePool
+from core.workflow.entities import GraphRuntimeState, VariablePool
+from core.workflow.graph_engine.command_channels.redis_channel import RedisChannel
 from core.workflow.system_variable import SystemVariable
 from core.workflow.variable_loader import VariableLoader
 from core.workflow.workflow_entry import WorkflowEntry
 from extensions.ext_database import db
+from extensions.ext_redis import redis_client
 from models import Workflow
 from models.enums import UserFrom
 from models.model import App, Conversation, Message, MessageAnnotation
-from models.workflow import ConversationVariable, WorkflowType
+from models.workflow import ConversationVariable

 logger = logging.getLogger(__name__)

@ -78,23 +79,12 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
        if not app_record:
            raise ValueError("App not found")

-        workflow_callbacks: list[WorkflowCallback] = []
-        if dify_config.DEBUG:
-            workflow_callbacks.append(WorkflowLoggingCallback())
-
-        if self.application_generate_entity.single_iteration_run:
-            # if only single iteration run is requested
-            graph, variable_pool = self._get_graph_and_variable_pool_of_single_iteration(
+        if self.application_generate_entity.single_iteration_run or self.application_generate_entity.single_loop_run:
+            # Handle single iteration or single loop run
+            graph, variable_pool, graph_runtime_state = self._prepare_single_node_execution(
                workflow=self._workflow,
-                node_id=self.application_generate_entity.single_iteration_run.node_id,
-                user_inputs=dict(self.application_generate_entity.single_iteration_run.inputs),
-            )
-        elif self.application_generate_entity.single_loop_run:
-            # if only single loop run is requested
-            graph, variable_pool = self._get_graph_and_variable_pool_of_single_loop(
-                workflow=self._workflow,
-                node_id=self.application_generate_entity.single_loop_run.node_id,
-                user_inputs=dict(self.application_generate_entity.single_loop_run.inputs),
+                single_iteration_run=self.application_generate_entity.single_iteration_run,
+                single_loop_run=self.application_generate_entity.single_loop_run,
            )
        else:
            inputs = self.application_generate_entity.inputs
@ -146,16 +136,27 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
            )

            # init graph
-            graph = self._init_graph(graph_config=self._workflow.graph_dict)
+            graph_runtime_state = GraphRuntimeState(variable_pool=variable_pool, start_at=time.time())
+            graph = self._init_graph(
+                graph_config=self._workflow.graph_dict,
+                graph_runtime_state=graph_runtime_state,
+                workflow_id=self._workflow.id,
+                tenant_id=self._workflow.tenant_id,
+                user_id=self.application_generate_entity.user_id,
+            )

        db.session.close()

        # RUN WORKFLOW
+        # Create Redis command channel for this workflow execution
+        task_id = self.application_generate_entity.task_id
+        channel_key = f"workflow:{task_id}:commands"
+        command_channel = RedisChannel(redis_client, channel_key)
+
        workflow_entry = WorkflowEntry(
            tenant_id=self._workflow.tenant_id,
            app_id=self._workflow.app_id,
            workflow_id=self._workflow.id,
-            workflow_type=WorkflowType.value_of(self._workflow.type),
            graph=graph,
            graph_config=self._workflow.graph_dict,
            user_id=self.application_generate_entity.user_id,
@ -167,11 +168,11 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
            invoke_from=self.application_generate_entity.invoke_from,
            call_depth=self.application_generate_entity.call_depth,
            variable_pool=variable_pool,
+            graph_runtime_state=graph_runtime_state,
+            command_channel=command_channel,
        )

-        generator = workflow_entry.run(
-            callbacks=workflow_callbacks,
-        )
+        generator = workflow_entry.run()

        for event in generator:
            self._handle_event(workflow_entry, event)
--- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py
+++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
@ -31,14 +31,9 @@ from core.app.entities.queue_entities import (
    QueueMessageReplaceEvent,
    QueueNodeExceptionEvent,
    QueueNodeFailedEvent,
-    QueueNodeInIterationFailedEvent,
-    QueueNodeInLoopFailedEvent,
    QueueNodeRetryEvent,
    QueueNodeStartedEvent,
    QueueNodeSucceededEvent,
-    QueueParallelBranchRunFailedEvent,
-    QueueParallelBranchRunStartedEvent,
-    QueueParallelBranchRunSucceededEvent,
    QueuePingEvent,
    QueueRetrieverResourcesEvent,
    QueueStopEvent,
@ -65,8 +60,8 @@ from core.app.task_pipeline.message_cycle_manager import MessageCycleManager
 from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk
 from core.model_runtime.entities.llm_entities import LLMUsage
 from core.ops.ops_trace_manager import TraceQueueManager
-from core.workflow.entities.workflow_execution import WorkflowExecutionStatus, WorkflowType
-from core.workflow.graph_engine.entities.graph_runtime_state import GraphRuntimeState
+from core.workflow.entities import GraphRuntimeState
+from core.workflow.enums import WorkflowExecutionStatus, WorkflowType
 from core.workflow.nodes import NodeType
 from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
@ -387,9 +382,7 @@ class AdvancedChatAppGenerateTaskPipeline:

    def _handle_node_failed_events(
        self,
-        event: Union[
-            QueueNodeFailedEvent, QueueNodeInIterationFailedEvent, QueueNodeInLoopFailedEvent, QueueNodeExceptionEvent
-        ],
+        event: Union[QueueNodeFailedEvent, QueueNodeExceptionEvent],
        **kwargs,
    ) -> Generator[StreamResponse, None, None]:
        """Handle various node failure events."""
@ -434,32 +427,6 @@ class AdvancedChatAppGenerateTaskPipeline:
            answer=delta_text, message_id=self._message_id, from_variable_selector=event.from_variable_selector
        )

-    def _handle_parallel_branch_started_event(
-        self, event: QueueParallelBranchRunStartedEvent, **kwargs
-    ) -> Generator[StreamResponse, None, None]:
-        """Handle parallel branch started events."""
-        self._ensure_workflow_initialized()
-
-        parallel_start_resp = self._workflow_response_converter.workflow_parallel_branch_start_to_stream_response(
-            task_id=self._application_generate_entity.task_id,
-            workflow_execution_id=self._workflow_run_id,
-            event=event,
-        )
-        yield parallel_start_resp
-
-    def _handle_parallel_branch_finished_events(
-        self, event: Union[QueueParallelBranchRunSucceededEvent, QueueParallelBranchRunFailedEvent], **kwargs
-    ) -> Generator[StreamResponse, None, None]:
-        """Handle parallel branch finished events."""
-        self._ensure_workflow_initialized()
-
-        parallel_finish_resp = self._workflow_response_converter.workflow_parallel_branch_finished_to_stream_response(
-            task_id=self._application_generate_entity.task_id,
-            workflow_execution_id=self._workflow_run_id,
-            event=event,
-        )
-        yield parallel_finish_resp
-
    def _handle_iteration_start_event(
        self, event: QueueIterationStartEvent, **kwargs
    ) -> Generator[StreamResponse, None, None]:
@ -584,7 +551,7 @@ class AdvancedChatAppGenerateTaskPipeline:
                total_steps=validated_state.node_run_steps,
                outputs=event.outputs,
                exceptions_count=event.exceptions_count,
-                conversation_id=None,
+                conversation_id=self._conversation_id,
                trace_manager=trace_manager,
                external_trace_id=self._application_generate_entity.extras.get("external_trace_id"),
            )
@ -751,8 +718,6 @@ class AdvancedChatAppGenerateTaskPipeline:
            QueueNodeRetryEvent: self._handle_node_retry_event,
            QueueNodeStartedEvent: self._handle_node_started_event,
            QueueNodeSucceededEvent: self._handle_node_succeeded_event,
-            # Parallel branch events
-            QueueParallelBranchRunStartedEvent: self._handle_parallel_branch_started_event,
            # Iteration events
            QueueIterationStartEvent: self._handle_iteration_start_event,
            QueueIterationNextEvent: self._handle_iteration_next_event,
@ -800,8 +765,6 @@ class AdvancedChatAppGenerateTaskPipeline:
            event,
            (
                QueueNodeFailedEvent,
-                QueueNodeInIterationFailedEvent,
-                QueueNodeInLoopFailedEvent,
                QueueNodeExceptionEvent,
            ),
        ):
@ -814,17 +777,6 @@ class AdvancedChatAppGenerateTaskPipeline:
            )
            return

-        # Handle parallel branch finished events with isinstance check
-        if isinstance(event, (QueueParallelBranchRunSucceededEvent, QueueParallelBranchRunFailedEvent)):
-            yield from self._handle_parallel_branch_finished_events(
-                event,
-                graph_runtime_state=graph_runtime_state,
-                tts_publisher=tts_publisher,
-                trace_manager=trace_manager,
-                queue_message=queue_message,
-            )
-            return
-
        # For unhandled events, we continue (original behavior)
        return

@ -848,11 +800,6 @@ class AdvancedChatAppGenerateTaskPipeline:
                    graph_runtime_state = event.graph_runtime_state
                    yield from self._handle_workflow_started_event(event)

-                case QueueTextChunkEvent():
-                    yield from self._handle_text_chunk_event(
-                        event, tts_publisher=tts_publisher, queue_message=queue_message
-                    )
-
                case QueueErrorEvent():
                    yield from self._handle_error_event(event)
                    break
--- a/api/core/app/apps/base_app_generator.py
+++ b/api/core/app/apps/base_app_generator.py
@ -6,7 +6,7 @@ from sqlalchemy.orm import Session
 from core.app.app_config.entities import VariableEntityType
 from core.app.entities.app_invoke_entities import InvokeFrom
 from core.file import File, FileUploadConfig
-from core.workflow.nodes.enums import NodeType
+from core.workflow.enums import NodeType
 from core.workflow.repositories.draft_variable_repository import (
    DraftVariableSaver,
    DraftVariableSaverFactory,
@ -14,6 +14,7 @@ from core.workflow.repositories.draft_variable_repository import (
 )
 from factories import file_factory
 from libs.orjson import orjson_dumps
+from models import Account, EndUser
 from services.workflow_draft_variable_service import DraftVariableSaver as DraftVariableSaverImpl

 if TYPE_CHECKING:
@ -44,9 +45,9 @@ class BaseAppGenerator:
                mapping=v,
                tenant_id=tenant_id,
                config=FileUploadConfig(
-                    allowed_file_types=entity_dictionary[k].allowed_file_types,
-                    allowed_file_extensions=entity_dictionary[k].allowed_file_extensions,
-                    allowed_file_upload_methods=entity_dictionary[k].allowed_file_upload_methods,
+                    allowed_file_types=entity_dictionary[k].allowed_file_types or [],
+                    allowed_file_extensions=entity_dictionary[k].allowed_file_extensions or [],
+                    allowed_file_upload_methods=entity_dictionary[k].allowed_file_upload_methods or [],
                ),
                strict_type_validation=strict_type_validation,
            )
@ -59,9 +60,9 @@ class BaseAppGenerator:
                mappings=v,
                tenant_id=tenant_id,
                config=FileUploadConfig(
-                    allowed_file_types=entity_dictionary[k].allowed_file_types,
-                    allowed_file_extensions=entity_dictionary[k].allowed_file_extensions,
-                    allowed_file_upload_methods=entity_dictionary[k].allowed_file_upload_methods,
+                    allowed_file_types=entity_dictionary[k].allowed_file_types or [],
+                    allowed_file_extensions=entity_dictionary[k].allowed_file_extensions or [],
+                    allowed_file_upload_methods=entity_dictionary[k].allowed_file_upload_methods or [],
                ),
            )
            for k, v in user_inputs.items()
@ -182,8 +183,9 @@ class BaseAppGenerator:

    @final
    @staticmethod
-    def _get_draft_var_saver_factory(invoke_from: InvokeFrom) -> DraftVariableSaverFactory:
+    def _get_draft_var_saver_factory(invoke_from: InvokeFrom, account: Account | EndUser) -> DraftVariableSaverFactory:
        if invoke_from == InvokeFrom.DEBUGGER:
+            assert isinstance(account, Account)

            def draft_var_saver_factory(
                session: Session,
@ -200,6 +202,7 @@ class BaseAppGenerator:
                    node_type=node_type,
                    node_execution_id=node_execution_id,
                    enclosing_node_id=enclosing_node_id,
+                    user=account,
                )
        else:

--- a/api/core/app/apps/base_app_queue_manager.py
+++ b/api/core/app/apps/base_app_queue_manager.py
@ -1,9 +1,11 @@
+import logging
 import queue
 import time
 from abc import abstractmethod
 from enum import IntEnum, auto
 from typing import Any

+from redis.exceptions import RedisError
 from sqlalchemy.orm import DeclarativeMeta

 from configs import dify_config
@ -18,6 +20,8 @@ from core.app.entities.queue_entities import (
 )
 from extensions.ext_redis import redis_client

+logger = logging.getLogger(__name__)
+

 class PublishFrom(IntEnum):
    APPLICATION_MANAGER = auto()
@ -35,9 +39,8 @@ class AppQueueManager:
        self.invoke_from = invoke_from  # Public accessor for invoke_from

        user_prefix = "account" if self._invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER} else "end-user"
-        redis_client.setex(
-            AppQueueManager._generate_task_belong_cache_key(self._task_id), 1800, f"{user_prefix}-{self._user_id}"
-        )
+        self._task_belong_cache_key = AppQueueManager._generate_task_belong_cache_key(self._task_id)
+        redis_client.setex(self._task_belong_cache_key, 1800, f"{user_prefix}-{self._user_id}")

        q: queue.Queue[WorkflowQueueMessage | MessageQueueMessage | None] = queue.Queue()

@ -79,9 +82,21 @@ class AppQueueManager:
        Stop listen to queue
        :return:
        """
+        self._clear_task_belong_cache()
        self._q.put(None)

-    def publish_error(self, e, pub_from: PublishFrom):
+    def _clear_task_belong_cache(self) -> None:
+        """
+        Remove the task belong cache key once listening is finished.
+        """
+        try:
+            redis_client.delete(self._task_belong_cache_key)
+        except RedisError:
+            logger.exception(
+                "Failed to clear task belong cache for task %s (key: %s)", self._task_id, self._task_belong_cache_key
+            )
+
+    def publish_error(self, e, pub_from: PublishFrom) -> None:
        """
        Publish error
        :param e: error
@ -127,6 +142,21 @@ class AppQueueManager:
        stopped_cache_key = cls._generate_stopped_cache_key(task_id)
        redis_client.setex(stopped_cache_key, 600, 1)

+    @classmethod
+    def set_stop_flag_no_user_check(cls, task_id: str) -> None:
+        """
+        Set task stop flag without user permission check.
+        This method allows stopping workflows without user context.
+
+        :param task_id: The task ID to stop
+        :return:
+        """
+        if not task_id:
+            return
+
+        stopped_cache_key = cls._generate_stopped_cache_key(task_id)
+        redis_client.setex(stopped_cache_key, 600, 1)
+
    def _is_stopped(self) -> bool:
        """
        Check if task is stopped
--- a/api/core/app/apps/chat/app_runner.py
+++ b/api/core/app/apps/chat/app_runner.py
@ -164,7 +164,9 @@ class ChatAppRunner(AppRunner):
                config=app_config.dataset,
                query=query,
                invoke_from=application_generate_entity.invoke_from,
-                show_retrieve_source=app_config.additional_features.show_retrieve_source,
+                show_retrieve_source=(
+                    app_config.additional_features.show_retrieve_source if app_config.additional_features else False
+                ),
                hit_callback=hit_callback,
                memory=memory,
                message_id=message.id,
--- a/api/core/app/apps/common/workflow_response_converter.py
+++ b/api/core/app/apps/common/workflow_response_converter.py
@ -1,7 +1,7 @@
 import time
 from collections.abc import Mapping, Sequence
 from datetime import UTC, datetime
-from typing import Any, Union, cast
+from typing import Any, Union

 from sqlalchemy.orm import Session

@ -16,14 +16,9 @@ from core.app.entities.queue_entities import (
    QueueLoopStartEvent,
    QueueNodeExceptionEvent,
    QueueNodeFailedEvent,
-    QueueNodeInIterationFailedEvent,
-    QueueNodeInLoopFailedEvent,
    QueueNodeRetryEvent,
    QueueNodeStartedEvent,
    QueueNodeSucceededEvent,
-    QueueParallelBranchRunFailedEvent,
-    QueueParallelBranchRunStartedEvent,
-    QueueParallelBranchRunSucceededEvent,
 )
 from core.app.entities.task_entities import (
    AgentLogStreamResponse,
@ -36,24 +31,23 @@ from core.app.entities.task_entities import (
    NodeFinishStreamResponse,
    NodeRetryStreamResponse,
    NodeStartStreamResponse,
-    ParallelBranchFinishedStreamResponse,
-    ParallelBranchStartStreamResponse,
    WorkflowFinishStreamResponse,
    WorkflowStartStreamResponse,
 )
 from core.file import FILE_MODEL_IDENTITY, File
+from core.plugin.impl.datasource import PluginDatasourceManager
+from core.tools.entities.tool_entities import ToolProviderType
 from core.tools.tool_manager import ToolManager
 from core.variables.segments import ArrayFileSegment, FileSegment, Segment
-from core.workflow.entities.workflow_execution import WorkflowExecution
-from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution, WorkflowNodeExecutionStatus
-from core.workflow.nodes import NodeType
-from core.workflow.nodes.tool.entities import ToolNodeData
+from core.workflow.entities import WorkflowExecution, WorkflowNodeExecution
+from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus
 from core.workflow.workflow_type_encoder import WorkflowRuntimeTypeConverter
 from libs.datetime_utils import naive_utc_now
 from models import (
    Account,
    EndUser,
 )
+from services.variable_truncator import VariableTruncator


 class WorkflowResponseConverter:
@ -65,6 +59,7 @@ class WorkflowResponseConverter:
    ):
        self._application_generate_entity = application_generate_entity
        self._user = user
+        self._truncator = VariableTruncator.default()

    def workflow_start_to_stream_response(
        self,
@ -156,7 +151,8 @@ class WorkflowResponseConverter:
                title=workflow_node_execution.title,
                index=workflow_node_execution.index,
                predecessor_node_id=workflow_node_execution.predecessor_node_id,
-                inputs=workflow_node_execution.inputs,
+                inputs=workflow_node_execution.get_response_inputs(),
+                inputs_truncated=workflow_node_execution.inputs_truncated,
                created_at=int(workflow_node_execution.created_at.timestamp()),
                parallel_id=event.parallel_id,
                parallel_start_node_id=event.parallel_start_node_id,
@ -171,11 +167,19 @@ class WorkflowResponseConverter:

        # extras logic
        if event.node_type == NodeType.TOOL:
-            node_data = cast(ToolNodeData, event.node_data)
            response.data.extras["icon"] = ToolManager.get_tool_icon(
                tenant_id=self._application_generate_entity.app_config.tenant_id,
-                provider_type=node_data.provider_type,
-                provider_id=node_data.provider_id,
+                provider_type=ToolProviderType(event.provider_type),
+                provider_id=event.provider_id,
+            )
+        elif event.node_type == NodeType.DATASOURCE:
+            manager = PluginDatasourceManager()
+            provider_entity = manager.fetch_datasource_provider(
+                self._application_generate_entity.app_config.tenant_id,
+                event.provider_id,
+            )
+            response.data.extras["icon"] = provider_entity.declaration.identity.generate_datasource_icon_url(
+                self._application_generate_entity.app_config.tenant_id
            )

        return response
@ -183,11 +187,7 @@ class WorkflowResponseConverter:
    def workflow_node_finish_to_stream_response(
        self,
        *,
-        event: QueueNodeSucceededEvent
-        | QueueNodeFailedEvent
-        | QueueNodeInIterationFailedEvent
-        | QueueNodeInLoopFailedEvent
-        | QueueNodeExceptionEvent,
+        event: QueueNodeSucceededEvent | QueueNodeFailedEvent | QueueNodeExceptionEvent,
        task_id: str,
        workflow_node_execution: WorkflowNodeExecution,
    ) -> NodeFinishStreamResponse | None:
@ -210,9 +210,12 @@ class WorkflowResponseConverter:
                index=workflow_node_execution.index,
                title=workflow_node_execution.title,
                predecessor_node_id=workflow_node_execution.predecessor_node_id,
-                inputs=workflow_node_execution.inputs,
-                process_data=workflow_node_execution.process_data,
-                outputs=json_converter.to_json_encodable(workflow_node_execution.outputs),
+                inputs=workflow_node_execution.get_response_inputs(),
+                inputs_truncated=workflow_node_execution.inputs_truncated,
+                process_data=workflow_node_execution.get_response_process_data(),
+                process_data_truncated=workflow_node_execution.process_data_truncated,
+                outputs=json_converter.to_json_encodable(workflow_node_execution.get_response_outputs()),
+                outputs_truncated=workflow_node_execution.outputs_truncated,
                status=workflow_node_execution.status,
                error=workflow_node_execution.error,
                elapsed_time=workflow_node_execution.elapsed_time,
@ -221,9 +224,6 @@ class WorkflowResponseConverter:
                finished_at=int(workflow_node_execution.finished_at.timestamp()),
                files=self.fetch_files_from_node_outputs(workflow_node_execution.outputs or {}),
                parallel_id=event.parallel_id,
-                parallel_start_node_id=event.parallel_start_node_id,
-                parent_parallel_id=event.parent_parallel_id,
-                parent_parallel_start_node_id=event.parent_parallel_start_node_id,
                iteration_id=event.in_iteration_id,
                loop_id=event.in_loop_id,
            ),
@ -255,9 +255,12 @@ class WorkflowResponseConverter:
                index=workflow_node_execution.index,
                title=workflow_node_execution.title,
                predecessor_node_id=workflow_node_execution.predecessor_node_id,
-                inputs=workflow_node_execution.inputs,
-                process_data=workflow_node_execution.process_data,
-                outputs=json_converter.to_json_encodable(workflow_node_execution.outputs),
+                inputs=workflow_node_execution.get_response_inputs(),
+                inputs_truncated=workflow_node_execution.inputs_truncated,
+                process_data=workflow_node_execution.get_response_process_data(),
+                process_data_truncated=workflow_node_execution.process_data_truncated,
+                outputs=json_converter.to_json_encodable(workflow_node_execution.get_response_outputs()),
+                outputs_truncated=workflow_node_execution.outputs_truncated,
                status=workflow_node_execution.status,
                error=workflow_node_execution.error,
                elapsed_time=workflow_node_execution.elapsed_time,
@ -275,50 +278,6 @@ class WorkflowResponseConverter:
            ),
        )

-    def workflow_parallel_branch_start_to_stream_response(
-        self,
-        *,
-        task_id: str,
-        workflow_execution_id: str,
-        event: QueueParallelBranchRunStartedEvent,
-    ) -> ParallelBranchStartStreamResponse:
-        return ParallelBranchStartStreamResponse(
-            task_id=task_id,
-            workflow_run_id=workflow_execution_id,
-            data=ParallelBranchStartStreamResponse.Data(
-                parallel_id=event.parallel_id,
-                parallel_branch_id=event.parallel_start_node_id,
-                parent_parallel_id=event.parent_parallel_id,
-                parent_parallel_start_node_id=event.parent_parallel_start_node_id,
-                iteration_id=event.in_iteration_id,
-                loop_id=event.in_loop_id,
-                created_at=int(time.time()),
-            ),
-        )
-
-    def workflow_parallel_branch_finished_to_stream_response(
-        self,
-        *,
-        task_id: str,
-        workflow_execution_id: str,
-        event: QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent,
-    ) -> ParallelBranchFinishedStreamResponse:
-        return ParallelBranchFinishedStreamResponse(
-            task_id=task_id,
-            workflow_run_id=workflow_execution_id,
-            data=ParallelBranchFinishedStreamResponse.Data(
-                parallel_id=event.parallel_id,
-                parallel_branch_id=event.parallel_start_node_id,
-                parent_parallel_id=event.parent_parallel_id,
-                parent_parallel_start_node_id=event.parent_parallel_start_node_id,
-                iteration_id=event.in_iteration_id,
-                loop_id=event.in_loop_id,
-                status="succeeded" if isinstance(event, QueueParallelBranchRunSucceededEvent) else "failed",
-                error=event.error if isinstance(event, QueueParallelBranchRunFailedEvent) else None,
-                created_at=int(time.time()),
-            ),
-        )
-
    def workflow_iteration_start_to_stream_response(
        self,
        *,
@ -326,6 +285,7 @@ class WorkflowResponseConverter:
        workflow_execution_id: str,
        event: QueueIterationStartEvent,
    ) -> IterationNodeStartStreamResponse:
+        new_inputs, truncated = self._truncator.truncate_variable_mapping(event.inputs or {})
        return IterationNodeStartStreamResponse(
            task_id=task_id,
            workflow_run_id=workflow_execution_id,
@ -333,13 +293,12 @@ class WorkflowResponseConverter:
                id=event.node_id,
                node_id=event.node_id,
                node_type=event.node_type.value,
-                title=event.node_data.title,
+                title=event.node_title,
                created_at=int(time.time()),
                extras={},
-                inputs=event.inputs or {},
+                inputs=new_inputs,
+                inputs_truncated=truncated,
                metadata=event.metadata or {},
-                parallel_id=event.parallel_id,
-                parallel_start_node_id=event.parallel_start_node_id,
            ),
        )

@ -357,15 +316,10 @@ class WorkflowResponseConverter:
                id=event.node_id,
                node_id=event.node_id,
                node_type=event.node_type.value,
-                title=event.node_data.title,
+                title=event.node_title,
                index=event.index,
-                pre_iteration_output=event.output,
                created_at=int(time.time()),
                extras={},
-                parallel_id=event.parallel_id,
-                parallel_start_node_id=event.parallel_start_node_id,
-                parallel_mode_run_id=event.parallel_mode_run_id,
-                duration=event.duration,
            ),
        )

@ -377,6 +331,11 @@ class WorkflowResponseConverter:
        event: QueueIterationCompletedEvent,
    ) -> IterationNodeCompletedStreamResponse:
        json_converter = WorkflowRuntimeTypeConverter()
+
+        new_inputs, inputs_truncated = self._truncator.truncate_variable_mapping(event.inputs or {})
+        new_outputs, outputs_truncated = self._truncator.truncate_variable_mapping(
+            json_converter.to_json_encodable(event.outputs) or {}
+        )
        return IterationNodeCompletedStreamResponse(
            task_id=task_id,
            workflow_run_id=workflow_execution_id,
@ -384,28 +343,29 @@ class WorkflowResponseConverter:
                id=event.node_id,
                node_id=event.node_id,
                node_type=event.node_type.value,
-                title=event.node_data.title,
-                outputs=json_converter.to_json_encodable(event.outputs),
+                title=event.node_title,
+                outputs=new_outputs,
+                outputs_truncated=outputs_truncated,
                created_at=int(time.time()),
                extras={},
-                inputs=event.inputs or {},
+                inputs=new_inputs,
+                inputs_truncated=inputs_truncated,
                status=WorkflowNodeExecutionStatus.SUCCEEDED
                if event.error is None
                else WorkflowNodeExecutionStatus.FAILED,
                error=None,
                elapsed_time=(naive_utc_now() - event.start_at).total_seconds(),
-                total_tokens=event.metadata.get("total_tokens", 0) if event.metadata else 0,
+                total_tokens=(lambda x: x if isinstance(x, int) else 0)(event.metadata.get("total_tokens", 0)),
                execution_metadata=event.metadata,
                finished_at=int(time.time()),
                steps=event.steps,
-                parallel_id=event.parallel_id,
-                parallel_start_node_id=event.parallel_start_node_id,
            ),
        )

    def workflow_loop_start_to_stream_response(
        self, *, task_id: str, workflow_execution_id: str, event: QueueLoopStartEvent
    ) -> LoopNodeStartStreamResponse:
+        new_inputs, truncated = self._truncator.truncate_variable_mapping(event.inputs or {})
        return LoopNodeStartStreamResponse(
            task_id=task_id,
            workflow_run_id=workflow_execution_id,
@ -413,10 +373,11 @@ class WorkflowResponseConverter:
                id=event.node_id,
                node_id=event.node_id,
                node_type=event.node_type.value,
-                title=event.node_data.title,
+                title=event.node_title,
                created_at=int(time.time()),
                extras={},
-                inputs=event.inputs or {},
+                inputs=new_inputs,
+                inputs_truncated=truncated,
                metadata=event.metadata or {},
                parallel_id=event.parallel_id,
                parallel_start_node_id=event.parallel_start_node_id,
@ -437,15 +398,16 @@ class WorkflowResponseConverter:
                id=event.node_id,
                node_id=event.node_id,
                node_type=event.node_type.value,
-                title=event.node_data.title,
+                title=event.node_title,
                index=event.index,
-                pre_loop_output=event.output,
+                # The `pre_loop_output` field is not utilized by the frontend.
+                # Previously, it was assigned the value of `event.output`.
+                pre_loop_output={},
                created_at=int(time.time()),
                extras={},
                parallel_id=event.parallel_id,
                parallel_start_node_id=event.parallel_start_node_id,
                parallel_mode_run_id=event.parallel_mode_run_id,
-                duration=event.duration,
            ),
        )

@ -456,6 +418,11 @@ class WorkflowResponseConverter:
        workflow_execution_id: str,
        event: QueueLoopCompletedEvent,
    ) -> LoopNodeCompletedStreamResponse:
+        json_converter = WorkflowRuntimeTypeConverter()
+        new_inputs, inputs_truncated = self._truncator.truncate_variable_mapping(event.inputs or {})
+        new_outputs, outputs_truncated = self._truncator.truncate_variable_mapping(
+            json_converter.to_json_encodable(event.outputs) or {}
+        )
        return LoopNodeCompletedStreamResponse(
            task_id=task_id,
            workflow_run_id=workflow_execution_id,
@ -463,17 +430,19 @@ class WorkflowResponseConverter:
                id=event.node_id,
                node_id=event.node_id,
                node_type=event.node_type.value,
-                title=event.node_data.title,
-                outputs=WorkflowRuntimeTypeConverter().to_json_encodable(event.outputs),
+                title=event.node_title,
+                outputs=new_outputs,
+                outputs_truncated=outputs_truncated,
                created_at=int(time.time()),
                extras={},
-                inputs=event.inputs or {},
+                inputs=new_inputs,
+                inputs_truncated=inputs_truncated,
                status=WorkflowNodeExecutionStatus.SUCCEEDED
                if event.error is None
                else WorkflowNodeExecutionStatus.FAILED,
                error=None,
                elapsed_time=(naive_utc_now() - event.start_at).total_seconds(),
-                total_tokens=event.metadata.get("total_tokens", 0) if event.metadata else 0,
+                total_tokens=(lambda x: x if isinstance(x, int) else 0)(event.metadata.get("total_tokens", 0)),
                execution_metadata=event.metadata,
                finished_at=int(time.time()),
                steps=event.steps,
--- a/api/core/app/apps/completion/app_runner.py
+++ b/api/core/app/apps/completion/app_runner.py
@ -124,7 +124,9 @@ class CompletionAppRunner(AppRunner):
                config=dataset_config,
                query=query or "",
                invoke_from=application_generate_entity.invoke_from,
-                show_retrieve_source=app_config.additional_features.show_retrieve_source,
+                show_retrieve_source=app_config.additional_features.show_retrieve_source
+                if app_config.additional_features
+                else False,
                hit_callback=hit_callback,
                message_id=message.id,
                inputs=inputs,
--- a/api/core/workflow/graph_engine/condition_handlers/init.py
+++ b/api/core/workflow/graph_engine/condition_handlers/init.py
--- a/api/core/app/apps/pipeline/generate_response_converter.py
+++ b/api/core/app/apps/pipeline/generate_response_converter.py
@ -0,0 +1,95 @@
+from collections.abc import Generator
+from typing import cast
+
+from core.app.apps.base_app_generate_response_converter import AppGenerateResponseConverter
+from core.app.entities.task_entities import (
+    AppStreamResponse,
+    ErrorStreamResponse,
+    NodeFinishStreamResponse,
+    NodeStartStreamResponse,
+    PingStreamResponse,
+    WorkflowAppBlockingResponse,
+    WorkflowAppStreamResponse,
+)
+
+
+class WorkflowAppGenerateResponseConverter(AppGenerateResponseConverter):
+    _blocking_response_type = WorkflowAppBlockingResponse
+
+    @classmethod
+    def convert_blocking_full_response(cls, blocking_response: WorkflowAppBlockingResponse) -> dict:  # type: ignore[override]
+        """
+        Convert blocking full response.
+        :param blocking_response: blocking response
+        :return:
+        """
+        return dict(blocking_response.model_dump())
+
+    @classmethod
+    def convert_blocking_simple_response(cls, blocking_response: WorkflowAppBlockingResponse) -> dict:  # type: ignore[override]
+        """
+        Convert blocking simple response.
+        :param blocking_response: blocking response
+        :return:
+        """
+        return cls.convert_blocking_full_response(blocking_response)
+
+    @classmethod
+    def convert_stream_full_response(
+        cls, stream_response: Generator[AppStreamResponse, None, None]
+    ) -> Generator[dict | str, None, None]:
+        """
+        Convert stream full response.
+        :param stream_response: stream response
+        :return:
+        """
+        for chunk in stream_response:
+            chunk = cast(WorkflowAppStreamResponse, chunk)
+            sub_stream_response = chunk.stream_response
+
+            if isinstance(sub_stream_response, PingStreamResponse):
+                yield "ping"
+                continue
+
+            response_chunk = {
+                "event": sub_stream_response.event.value,
+                "workflow_run_id": chunk.workflow_run_id,
+            }
+
+            if isinstance(sub_stream_response, ErrorStreamResponse):
+                data = cls._error_to_stream_response(sub_stream_response.err)
+                response_chunk.update(cast(dict, data))
+            else:
+                response_chunk.update(sub_stream_response.model_dump())
+            yield response_chunk
+
+    @classmethod
+    def convert_stream_simple_response(
+        cls, stream_response: Generator[AppStreamResponse, None, None]
+    ) -> Generator[dict | str, None, None]:
+        """
+        Convert stream simple response.
+        :param stream_response: stream response
+        :return:
+        """
+        for chunk in stream_response:
+            chunk = cast(WorkflowAppStreamResponse, chunk)
+            sub_stream_response = chunk.stream_response
+
+            if isinstance(sub_stream_response, PingStreamResponse):
+                yield "ping"
+                continue
+
+            response_chunk = {
+                "event": sub_stream_response.event.value,
+                "workflow_run_id": chunk.workflow_run_id,
+            }
+
+            if isinstance(sub_stream_response, ErrorStreamResponse):
+                data = cls._error_to_stream_response(sub_stream_response.err)
+                response_chunk.update(cast(dict, data))
+            elif isinstance(sub_stream_response, NodeStartStreamResponse | NodeFinishStreamResponse):
+                response_chunk.update(cast(dict, sub_stream_response.to_ignore_detail_dict()))
+            else:
+                response_chunk.update(sub_stream_response.model_dump())
+            yield response_chunk
--- a/api/core/app/apps/pipeline/pipeline_config_manager.py
+++ b/api/core/app/apps/pipeline/pipeline_config_manager.py
@ -0,0 +1,66 @@
+from core.app.app_config.base_app_config_manager import BaseAppConfigManager
+from core.app.app_config.common.sensitive_word_avoidance.manager import SensitiveWordAvoidanceConfigManager
+from core.app.app_config.entities import RagPipelineVariableEntity, WorkflowUIBasedAppConfig
+from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
+from core.app.app_config.features.text_to_speech.manager import TextToSpeechConfigManager
+from core.app.app_config.workflow_ui_based_app.variables.manager import WorkflowVariablesConfigManager
+from models.dataset import Pipeline
+from models.model import AppMode
+from models.workflow import Workflow
+
+
+class PipelineConfig(WorkflowUIBasedAppConfig):
+    """
+    Pipeline Config Entity.
+    """
+
+    rag_pipeline_variables: list[RagPipelineVariableEntity] = []
+    pass
+
+
+class PipelineConfigManager(BaseAppConfigManager):
+    @classmethod
+    def get_pipeline_config(cls, pipeline: Pipeline, workflow: Workflow, start_node_id: str) -> PipelineConfig:
+        pipeline_config = PipelineConfig(
+            tenant_id=pipeline.tenant_id,
+            app_id=pipeline.id,
+            app_mode=AppMode.RAG_PIPELINE,
+            workflow_id=workflow.id,
+            rag_pipeline_variables=WorkflowVariablesConfigManager.convert_rag_pipeline_variable(
+                workflow=workflow, start_node_id=start_node_id
+            ),
+        )
+
+        return pipeline_config
+
+    @classmethod
+    def config_validate(cls, tenant_id: str, config: dict, only_structure_validate: bool = False) -> dict:
+        """
+        Validate for pipeline config
+
+        :param tenant_id: tenant id
+        :param config: app model config args
+        :param only_structure_validate: only validate the structure of the config
+        """
+        related_config_keys = []
+
+        # file upload validation
+        config, current_related_config_keys = FileUploadConfigManager.validate_and_set_defaults(config=config)
+        related_config_keys.extend(current_related_config_keys)
+
+        # text_to_speech
+        config, current_related_config_keys = TextToSpeechConfigManager.validate_and_set_defaults(config)
+        related_config_keys.extend(current_related_config_keys)
+
+        # moderation validation
+        config, current_related_config_keys = SensitiveWordAvoidanceConfigManager.validate_and_set_defaults(
+            tenant_id=tenant_id, config=config, only_structure_validate=only_structure_validate
+        )
+        related_config_keys.extend(current_related_config_keys)
+
+        related_config_keys = list(set(related_config_keys))
+
+        # Filter out extra parameters
+        filtered_config = {key: config.get(key) for key in related_config_keys}
+
+        return filtered_config
--- a/api/core/app/apps/pipeline/pipeline_generator.py
+++ b/api/core/app/apps/pipeline/pipeline_generator.py
@ -0,0 +1,856 @@
+import contextvars
+import datetime
+import json
+import logging
+import secrets
+import threading
+import time
+import uuid
+from collections.abc import Generator, Mapping
+from typing import Any, Literal, Union, cast, overload
+
+from flask import Flask, current_app
+from pydantic import ValidationError
+from sqlalchemy import select
+from sqlalchemy.orm import Session, sessionmaker
+
+import contexts
+from configs import dify_config
+from core.app.apps.base_app_generator import BaseAppGenerator
+from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
+from core.app.apps.exc import GenerateTaskStoppedError
+from core.app.apps.pipeline.pipeline_config_manager import PipelineConfigManager
+from core.app.apps.pipeline.pipeline_queue_manager import PipelineQueueManager
+from core.app.apps.pipeline.pipeline_runner import PipelineRunner
+from core.app.apps.workflow.generate_response_converter import WorkflowAppGenerateResponseConverter
+from core.app.apps.workflow.generate_task_pipeline import WorkflowAppGenerateTaskPipeline
+from core.app.entities.app_invoke_entities import InvokeFrom, RagPipelineGenerateEntity
+from core.app.entities.rag_pipeline_invoke_entities import RagPipelineInvokeEntity
+from core.app.entities.task_entities import WorkflowAppBlockingResponse, WorkflowAppStreamResponse
+from core.datasource.entities.datasource_entities import (
+    DatasourceProviderType,
+    OnlineDriveBrowseFilesRequest,
+)
+from core.datasource.online_drive.online_drive_plugin import OnlineDriveDatasourcePlugin
+from core.entities.knowledge_entities import PipelineDataset, PipelineDocument
+from core.model_runtime.errors.invoke import InvokeAuthorizationError
+from core.rag.index_processor.constant.built_in_field import BuiltInField
+from core.repositories.factory import DifyCoreRepositoryFactory
+from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
+from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
+from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
+from core.workflow.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader
+from extensions.ext_database import db
+from extensions.ext_redis import redis_client
+from libs.flask_utils import preserve_flask_contexts
+from models import Account, EndUser, Workflow, WorkflowNodeExecutionTriggeredFrom
+from models.dataset import Document, DocumentPipelineExecutionLog, Pipeline
+from models.enums import WorkflowRunTriggeredFrom
+from models.model import AppMode
+from services.datasource_provider_service import DatasourceProviderService
+from services.feature_service import FeatureService
+from services.file_service import FileService
+from services.workflow_draft_variable_service import DraftVarLoader, WorkflowDraftVariableService
+from tasks.rag_pipeline.priority_rag_pipeline_run_task import priority_rag_pipeline_run_task
+from tasks.rag_pipeline.rag_pipeline_run_task import rag_pipeline_run_task
+
+logger = logging.getLogger(__name__)
+
+
+class PipelineGenerator(BaseAppGenerator):
+    @overload
+    def generate(
+        self,
+        *,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: Literal[True],
+        call_depth: int,
+        workflow_thread_pool_id: str | None,
+        is_retry: bool = False,
+    ) -> Generator[Mapping | str, None, None]: ...
+
+    @overload
+    def generate(
+        self,
+        *,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: Literal[False],
+        call_depth: int,
+        workflow_thread_pool_id: str | None,
+        is_retry: bool = False,
+    ) -> Mapping[str, Any]: ...
+
+    @overload
+    def generate(
+        self,
+        *,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: bool,
+        call_depth: int,
+        workflow_thread_pool_id: str | None,
+        is_retry: bool = False,
+    ) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None]]: ...
+
+    def generate(
+        self,
+        *,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        user: Union[Account, EndUser],
+        args: Mapping[str, Any],
+        invoke_from: InvokeFrom,
+        streaming: bool = True,
+        call_depth: int = 0,
+        workflow_thread_pool_id: str | None = None,
+        is_retry: bool = False,
+    ) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None], None]:
+        # Add null check for dataset
+
+        with Session(db.engine, expire_on_commit=False) as session:
+            dataset = pipeline.retrieve_dataset(session)
+            if not dataset:
+                raise ValueError("Pipeline dataset is required")
+        inputs: Mapping[str, Any] = args["inputs"]
+        start_node_id: str = args["start_node_id"]
+        datasource_type: str = args["datasource_type"]
+        datasource_info_list: list[Mapping[str, Any]] = self._format_datasource_info_list(
+            datasource_type, args["datasource_info_list"], pipeline, workflow, start_node_id, user
+        )
+        batch = time.strftime("%Y%m%d%H%M%S") + str(secrets.randbelow(900000) + 100000)
+        # convert to app config
+        pipeline_config = PipelineConfigManager.get_pipeline_config(
+            pipeline=pipeline, workflow=workflow, start_node_id=start_node_id
+        )
+        documents: list[Document] = []
+        if invoke_from == InvokeFrom.PUBLISHED and not is_retry and not args.get("original_document_id"):
+            from services.dataset_service import DocumentService
+
+            for datasource_info in datasource_info_list:
+                position = DocumentService.get_documents_position(dataset.id)
+                document = self._build_document(
+                    tenant_id=pipeline.tenant_id,
+                    dataset_id=dataset.id,
+                    built_in_field_enabled=dataset.built_in_field_enabled,
+                    datasource_type=datasource_type,
+                    datasource_info=datasource_info,
+                    created_from="rag-pipeline",
+                    position=position,
+                    account=user,
+                    batch=batch,
+                    document_form=dataset.chunk_structure,
+                )
+                db.session.add(document)
+                documents.append(document)
+            db.session.commit()
+
+        # run in child thread
+        rag_pipeline_invoke_entities = []
+        for i, datasource_info in enumerate(datasource_info_list):
+            workflow_run_id = str(uuid.uuid4())
+            document_id = args.get("original_document_id") or None
+            if invoke_from == InvokeFrom.PUBLISHED and not is_retry:
+                document_id = document_id or documents[i].id
+                document_pipeline_execution_log = DocumentPipelineExecutionLog(
+                    document_id=document_id,
+                    datasource_type=datasource_type,
+                    datasource_info=json.dumps(datasource_info),
+                    datasource_node_id=start_node_id,
+                    input_data=inputs,
+                    pipeline_id=pipeline.id,
+                    created_by=user.id,
+                )
+                db.session.add(document_pipeline_execution_log)
+                db.session.commit()
+            application_generate_entity = RagPipelineGenerateEntity(
+                task_id=str(uuid.uuid4()),
+                app_config=pipeline_config,
+                pipeline_config=pipeline_config,
+                datasource_type=datasource_type,
+                datasource_info=datasource_info,
+                dataset_id=dataset.id,
+                original_document_id=args.get("original_document_id"),
+                start_node_id=start_node_id,
+                batch=batch,
+                document_id=document_id,
+                inputs=self._prepare_user_inputs(
+                    user_inputs=inputs,
+                    variables=pipeline_config.rag_pipeline_variables,
+                    tenant_id=pipeline.tenant_id,
+                    strict_type_validation=True if invoke_from == InvokeFrom.SERVICE_API else False,
+                ),
+                files=[],
+                user_id=user.id,
+                stream=streaming,
+                invoke_from=invoke_from,
+                call_depth=call_depth,
+                workflow_execution_id=workflow_run_id,
+            )
+
+            contexts.plugin_tool_providers.set({})
+            contexts.plugin_tool_providers_lock.set(threading.Lock())
+            if invoke_from == InvokeFrom.DEBUGGER:
+                workflow_triggered_from = WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING
+            else:
+                workflow_triggered_from = WorkflowRunTriggeredFrom.RAG_PIPELINE_RUN
+            # Create workflow node execution repository
+            session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
+            workflow_execution_repository = DifyCoreRepositoryFactory.create_workflow_execution_repository(
+                session_factory=session_factory,
+                user=user,
+                app_id=application_generate_entity.app_config.app_id,
+                triggered_from=workflow_triggered_from,
+            )
+
+            workflow_node_execution_repository = DifyCoreRepositoryFactory.create_workflow_node_execution_repository(
+                session_factory=session_factory,
+                user=user,
+                app_id=application_generate_entity.app_config.app_id,
+                triggered_from=WorkflowNodeExecutionTriggeredFrom.RAG_PIPELINE_RUN,
+            )
+            if invoke_from == InvokeFrom.DEBUGGER or is_retry:
+                return self._generate(
+                    flask_app=current_app._get_current_object(),  # type: ignore
+                    context=contextvars.copy_context(),
+                    pipeline=pipeline,
+                    workflow_id=workflow.id,
+                    user=user,
+                    application_generate_entity=application_generate_entity,
+                    invoke_from=invoke_from,
+                    workflow_execution_repository=workflow_execution_repository,
+                    workflow_node_execution_repository=workflow_node_execution_repository,
+                    streaming=streaming,
+                    workflow_thread_pool_id=workflow_thread_pool_id,
+                )
+            else:
+                rag_pipeline_invoke_entities.append(
+                    RagPipelineInvokeEntity(
+                        pipeline_id=pipeline.id,
+                        user_id=user.id,
+                        tenant_id=pipeline.tenant_id,
+                        workflow_id=workflow.id,
+                        streaming=streaming,
+                        workflow_execution_id=workflow_run_id,
+                        workflow_thread_pool_id=workflow_thread_pool_id,
+                        application_generate_entity=application_generate_entity.model_dump(),
+                    )
+                )
+
+        if rag_pipeline_invoke_entities:
+            # store the rag_pipeline_invoke_entities to object storage
+            text = [item.model_dump() for item in rag_pipeline_invoke_entities]
+            name = "rag_pipeline_invoke_entities.json"
+            # Convert list to proper JSON string
+            json_text = json.dumps(text)
+            upload_file = FileService(db.engine).upload_text(json_text, name, user.id, dataset.tenant_id)
+            features = FeatureService.get_features(dataset.tenant_id)
+            if features.billing.subscription.plan == "sandbox":
+                tenant_pipeline_task_key = f"tenant_pipeline_task:{dataset.tenant_id}"
+                tenant_self_pipeline_task_queue = f"tenant_self_pipeline_task_queue:{dataset.tenant_id}"
+
+                if redis_client.get(tenant_pipeline_task_key):
+                    # Add to waiting queue using List operations (lpush)
+                    redis_client.lpush(tenant_self_pipeline_task_queue, upload_file.id)
+                else:
+                    # Set flag and execute task
+                    redis_client.set(tenant_pipeline_task_key, 1, ex=60 * 60)
+                    rag_pipeline_run_task.delay(  # type: ignore
+                        rag_pipeline_invoke_entities_file_id=upload_file.id,
+                        tenant_id=dataset.tenant_id,
+                    )
+
+            else:
+                priority_rag_pipeline_run_task.delay(  # type: ignore
+                    rag_pipeline_invoke_entities_file_id=upload_file.id,
+                    tenant_id=dataset.tenant_id,
+                )
+
+        # return batch, dataset, documents
+        return {
+            "batch": batch,
+            "dataset": PipelineDataset(
+                id=dataset.id,
+                name=dataset.name,
+                description=dataset.description,
+                chunk_structure=dataset.chunk_structure,
+            ).model_dump(),
+            "documents": [
+                PipelineDocument(
+                    id=document.id,
+                    position=document.position,
+                    data_source_type=document.data_source_type,
+                    data_source_info=json.loads(document.data_source_info) if document.data_source_info else None,
+                    name=document.name,
+                    indexing_status=document.indexing_status,
+                    error=document.error,
+                    enabled=document.enabled,
+                ).model_dump()
+                for document in documents
+            ],
+        }
+
+    def _generate(
+        self,
+        *,
+        flask_app: Flask,
+        context: contextvars.Context,
+        pipeline: Pipeline,
+        workflow_id: str,
+        user: Union[Account, EndUser],
+        application_generate_entity: RagPipelineGenerateEntity,
+        invoke_from: InvokeFrom,
+        workflow_execution_repository: WorkflowExecutionRepository,
+        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
+        streaming: bool = True,
+        variable_loader: VariableLoader = DUMMY_VARIABLE_LOADER,
+        workflow_thread_pool_id: str | None = None,
+    ) -> Union[Mapping[str, Any], Generator[str | Mapping[str, Any], None, None]]:
+        """
+        Generate App response.
+
+        :param pipeline: Pipeline
+        :param workflow: Workflow
+        :param user: account or end user
+        :param application_generate_entity: application generate entity
+        :param invoke_from: invoke from source
+        :param workflow_execution_repository: repository for workflow execution
+        :param workflow_node_execution_repository: repository for workflow node execution
+        :param streaming: is stream
+        :param workflow_thread_pool_id: workflow thread pool id
+        """
+        with preserve_flask_contexts(flask_app, context_vars=context):
+            # init queue manager
+            workflow = db.session.query(Workflow).where(Workflow.id == workflow_id).first()
+            if not workflow:
+                raise ValueError(f"Workflow not found: {workflow_id}")
+            queue_manager = PipelineQueueManager(
+                task_id=application_generate_entity.task_id,
+                user_id=application_generate_entity.user_id,
+                invoke_from=application_generate_entity.invoke_from,
+                app_mode=AppMode.RAG_PIPELINE,
+            )
+            context = contextvars.copy_context()
+
+            # new thread
+            worker_thread = threading.Thread(
+                target=self._generate_worker,
+                kwargs={
+                    "flask_app": current_app._get_current_object(),  # type: ignore
+                    "context": context,
+                    "queue_manager": queue_manager,
+                    "application_generate_entity": application_generate_entity,
+                    "workflow_thread_pool_id": workflow_thread_pool_id,
+                    "variable_loader": variable_loader,
+                },
+            )
+
+            worker_thread.start()
+
+            draft_var_saver_factory = self._get_draft_var_saver_factory(
+                invoke_from,
+                user,
+            )
+            # return response or stream generator
+            response = self._handle_response(
+                application_generate_entity=application_generate_entity,
+                workflow=workflow,
+                queue_manager=queue_manager,
+                user=user,
+                workflow_execution_repository=workflow_execution_repository,
+                workflow_node_execution_repository=workflow_node_execution_repository,
+                stream=streaming,
+                draft_var_saver_factory=draft_var_saver_factory,
+            )
+
+            return WorkflowAppGenerateResponseConverter.convert(response=response, invoke_from=invoke_from)
+
+    def single_iteration_generate(
+        self,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        node_id: str,
+        user: Account | EndUser,
+        args: Mapping[str, Any],
+        streaming: bool = True,
+    ) -> Mapping[str, Any] | Generator[str | Mapping[str, Any], None, None]:
+        """
+        Generate App response.
+
+        :param app_model: App
+        :param workflow: Workflow
+        :param node_id: the node id
+        :param user: account or end user
+        :param args: request args
+        :param streaming: is streamed
+        """
+        if not node_id:
+            raise ValueError("node_id is required")
+
+        if args.get("inputs") is None:
+            raise ValueError("inputs is required")
+
+        # convert to app config
+        pipeline_config = PipelineConfigManager.get_pipeline_config(
+            pipeline=pipeline, workflow=workflow, start_node_id=args.get("start_node_id", "shared")
+        )
+
+        with Session(db.engine) as session:
+            dataset = pipeline.retrieve_dataset(session)
+            if not dataset:
+                raise ValueError("Pipeline dataset is required")
+
+        # init application generate entity - use RagPipelineGenerateEntity instead
+        application_generate_entity = RagPipelineGenerateEntity(
+            task_id=str(uuid.uuid4()),
+            app_config=pipeline_config,
+            pipeline_config=pipeline_config,
+            datasource_type=args.get("datasource_type", ""),
+            datasource_info=args.get("datasource_info", {}),
+            dataset_id=dataset.id,
+            batch=args.get("batch", ""),
+            document_id=args.get("document_id"),
+            inputs={},
+            files=[],
+            user_id=user.id,
+            stream=streaming,
+            invoke_from=InvokeFrom.DEBUGGER,
+            call_depth=0,
+            workflow_execution_id=str(uuid.uuid4()),
+            single_iteration_run=RagPipelineGenerateEntity.SingleIterationRunEntity(
+                node_id=node_id, inputs=args["inputs"]
+            ),
+        )
+        contexts.plugin_tool_providers.set({})
+        contexts.plugin_tool_providers_lock.set(threading.Lock())
+        # Create workflow node execution repository
+        session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
+
+        workflow_execution_repository = DifyCoreRepositoryFactory.create_workflow_execution_repository(
+            session_factory=session_factory,
+            user=user,
+            app_id=application_generate_entity.app_config.app_id,
+            triggered_from=WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING,
+        )
+
+        workflow_node_execution_repository = DifyCoreRepositoryFactory.create_workflow_node_execution_repository(
+            session_factory=session_factory,
+            user=user,
+            app_id=application_generate_entity.app_config.app_id,
+            triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP,
+        )
+        draft_var_srv = WorkflowDraftVariableService(db.session())
+        draft_var_srv.prefill_conversation_variable_default_values(workflow)
+        var_loader = DraftVarLoader(
+            engine=db.engine,
+            app_id=application_generate_entity.app_config.app_id,
+            tenant_id=application_generate_entity.app_config.tenant_id,
+        )
+
+        return self._generate(
+            flask_app=current_app._get_current_object(),  # type: ignore
+            pipeline=pipeline,
+            workflow_id=workflow.id,
+            user=user,
+            invoke_from=InvokeFrom.DEBUGGER,
+            application_generate_entity=application_generate_entity,
+            workflow_execution_repository=workflow_execution_repository,
+            workflow_node_execution_repository=workflow_node_execution_repository,
+            streaming=streaming,
+            variable_loader=var_loader,
+            context=contextvars.copy_context(),
+        )
+
+    def single_loop_generate(
+        self,
+        pipeline: Pipeline,
+        workflow: Workflow,
+        node_id: str,
+        user: Account | EndUser,
+        args: Mapping[str, Any],
+        streaming: bool = True,
+    ) -> Mapping[str, Any] | Generator[str | Mapping[str, Any], None, None]:
+        """
+        Generate App response.
+
+        :param app_model: App
+        :param workflow: Workflow
+        :param node_id: the node id
+        :param user: account or end user
+        :param args: request args
+        :param streaming: is streamed
+        """
+        if not node_id:
+            raise ValueError("node_id is required")
+
+        if args.get("inputs") is None:
+            raise ValueError("inputs is required")
+
+        with Session(db.engine) as session:
+            dataset = pipeline.retrieve_dataset(session)
+            if not dataset:
+                raise ValueError("Pipeline dataset is required")
+
+        # convert to app config
+        pipeline_config = PipelineConfigManager.get_pipeline_config(
+            pipeline=pipeline, workflow=workflow, start_node_id=args.get("start_node_id", "shared")
+        )
+
+        # init application generate entity
+        application_generate_entity = RagPipelineGenerateEntity(
+            task_id=str(uuid.uuid4()),
+            app_config=pipeline_config,
+            pipeline_config=pipeline_config,
+            datasource_type=args.get("datasource_type", ""),
+            datasource_info=args.get("datasource_info", {}),
+            batch=args.get("batch", ""),
+            document_id=args.get("document_id"),
+            dataset_id=dataset.id,
+            inputs={},
+            files=[],
+            user_id=user.id,
+            stream=streaming,
+            invoke_from=InvokeFrom.DEBUGGER,
+            extras={"auto_generate_conversation_name": False},
+            single_loop_run=RagPipelineGenerateEntity.SingleLoopRunEntity(node_id=node_id, inputs=args["inputs"]),
+            workflow_execution_id=str(uuid.uuid4()),
+        )
+        contexts.plugin_tool_providers.set({})
+        contexts.plugin_tool_providers_lock.set(threading.Lock())
+
+        # Create workflow node execution repository
+        session_factory = sessionmaker(bind=db.engine, expire_on_commit=False)
+
+        workflow_execution_repository = DifyCoreRepositoryFactory.create_workflow_execution_repository(
+            session_factory=session_factory,
+            user=user,
+            app_id=application_generate_entity.app_config.app_id,
+            triggered_from=WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING,
+        )
+
+        workflow_node_execution_repository = DifyCoreRepositoryFactory.create_workflow_node_execution_repository(
+            session_factory=session_factory,
+            user=user,
+            app_id=application_generate_entity.app_config.app_id,
+            triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP,
+        )
+        draft_var_srv = WorkflowDraftVariableService(db.session())
+        draft_var_srv.prefill_conversation_variable_default_values(workflow)
+        var_loader = DraftVarLoader(
+            engine=db.engine,
+            app_id=application_generate_entity.app_config.app_id,
+            tenant_id=application_generate_entity.app_config.tenant_id,
+        )
+
+        return self._generate(
+            flask_app=current_app._get_current_object(),  # type: ignore
+            pipeline=pipeline,
+            workflow_id=workflow.id,
+            user=user,
+            invoke_from=InvokeFrom.DEBUGGER,
+            application_generate_entity=application_generate_entity,
+            workflow_execution_repository=workflow_execution_repository,
+            workflow_node_execution_repository=workflow_node_execution_repository,
+            streaming=streaming,
+            variable_loader=var_loader,
+            context=contextvars.copy_context(),
+        )
+
+    def _generate_worker(
+        self,
+        flask_app: Flask,
+        application_generate_entity: RagPipelineGenerateEntity,
+        queue_manager: AppQueueManager,
+        context: contextvars.Context,
+        variable_loader: VariableLoader,
+        workflow_thread_pool_id: str | None = None,
+    ) -> None:
+        """
+        Generate worker in a new thread.
+        :param flask_app: Flask app
+        :param application_generate_entity: application generate entity
+        :param queue_manager: queue manager
+        :param workflow_thread_pool_id: workflow thread pool id
+        :return:
+        """
+
+        with preserve_flask_contexts(flask_app, context_vars=context):
+            try:
+                with Session(db.engine, expire_on_commit=False) as session:
+                    workflow = session.scalar(
+                        select(Workflow).where(
+                            Workflow.tenant_id == application_generate_entity.app_config.tenant_id,
+                            Workflow.app_id == application_generate_entity.app_config.app_id,
+                            Workflow.id == application_generate_entity.app_config.workflow_id,
+                        )
+                    )
+                    if workflow is None:
+                        raise ValueError("Workflow not found")
+
+                    # Determine system_user_id based on invocation source
+                    is_external_api_call = application_generate_entity.invoke_from in {
+                        InvokeFrom.WEB_APP,
+                        InvokeFrom.SERVICE_API,
+                    }
+
+                    if is_external_api_call:
+                        # For external API calls, use end user's session ID
+                        end_user = session.scalar(
+                            select(EndUser).where(EndUser.id == application_generate_entity.user_id)
+                        )
+                        system_user_id = end_user.session_id if end_user else ""
+                    else:
+                        # For internal calls, use the original user ID
+                        system_user_id = application_generate_entity.user_id
+                    # workflow app
+                    runner = PipelineRunner(
+                        application_generate_entity=application_generate_entity,
+                        queue_manager=queue_manager,
+                        workflow_thread_pool_id=workflow_thread_pool_id,
+                        variable_loader=variable_loader,
+                        workflow=workflow,
+                        system_user_id=system_user_id,
+                    )
+
+                    runner.run()
+            except GenerateTaskStoppedError:
+                pass
+            except InvokeAuthorizationError:
+                queue_manager.publish_error(
+                    InvokeAuthorizationError("Incorrect API key provided"), PublishFrom.APPLICATION_MANAGER
+                )
+            except ValidationError as e:
+                logger.exception("Validation Error when generating")
+                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
+            except ValueError as e:
+                if dify_config.DEBUG:
+                    logger.exception("Error when generating")
+                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
+            except Exception as e:
+                logger.exception("Unknown Error when generating")
+                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
+            finally:
+                db.session.close()
+
+    def _handle_response(
+        self,
+        application_generate_entity: RagPipelineGenerateEntity,
+        workflow: Workflow,
+        queue_manager: AppQueueManager,
+        user: Union[Account, EndUser],
+        workflow_execution_repository: WorkflowExecutionRepository,
+        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
+        draft_var_saver_factory: DraftVariableSaverFactory,
+        stream: bool = False,
+    ) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]:
+        """
+        Handle response.
+        :param application_generate_entity: application generate entity
+        :param workflow: workflow
+        :param queue_manager: queue manager
+        :param user: account or end user
+        :param stream: is stream
+        :param workflow_node_execution_repository: optional repository for workflow node execution
+        :return:
+        """
+        # init generate task pipeline
+        generate_task_pipeline = WorkflowAppGenerateTaskPipeline(
+            application_generate_entity=application_generate_entity,
+            workflow=workflow,
+            queue_manager=queue_manager,
+            user=user,
+            stream=stream,
+            workflow_node_execution_repository=workflow_node_execution_repository,
+            workflow_execution_repository=workflow_execution_repository,
+            draft_var_saver_factory=draft_var_saver_factory,
+        )
+
+        try:
+            return generate_task_pipeline.process()
+        except ValueError as e:
+            if len(e.args) > 0 and e.args[0] == "I/O operation on closed file.":  # ignore this error
+                raise GenerateTaskStoppedError()
+            else:
+                logger.exception(
+                    "Fails to process generate task pipeline, task_id: %r",
+                    application_generate_entity.task_id,
+                )
+                raise e
+
+    def _build_document(
+        self,
+        tenant_id: str,
+        dataset_id: str,
+        built_in_field_enabled: bool,
+        datasource_type: str,
+        datasource_info: Mapping[str, Any],
+        created_from: str,
+        position: int,
+        account: Union[Account, EndUser],
+        batch: str,
+        document_form: str,
+    ):
+        if datasource_type == "local_file":
+            name = datasource_info.get("name", "untitled")
+        elif datasource_type == "online_document":
+            name = datasource_info.get("page", {}).get("page_name", "untitled")
+        elif datasource_type == "website_crawl":
+            name = datasource_info.get("title", "untitled")
+        elif datasource_type == "online_drive":
+            name = datasource_info.get("name", "untitled")
+        else:
+            raise ValueError(f"Unsupported datasource type: {datasource_type}")
+
+        document = Document(
+            tenant_id=tenant_id,
+            dataset_id=dataset_id,
+            position=position,
+            data_source_type=datasource_type,
+            data_source_info=json.dumps(datasource_info),
+            batch=batch,
+            name=name,
+            created_from=created_from,
+            created_by=account.id,
+            doc_form=document_form,
+        )
+        doc_metadata = {}
+        if built_in_field_enabled:
+            doc_metadata = {
+                BuiltInField.document_name: name,
+                BuiltInField.uploader: account.name,
+                BuiltInField.upload_date: datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M:%S"),
+                BuiltInField.last_update_date: datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M:%S"),
+                BuiltInField.source: datasource_type,
+            }
+        if doc_metadata:
+            document.doc_metadata = doc_metadata
+        return document
+
+    def _format_datasource_info_list(
+        self,
+        datasource_type: str,
+        datasource_info_list: list[Mapping[str, Any]],
+        pipeline: Pipeline,
+        workflow: Workflow,
+        start_node_id: str,
+        user: Union[Account, EndUser],
+    ) -> list[Mapping[str, Any]]:
+        """
+        Format datasource info list.
+        """
+        if datasource_type == "online_drive":
+            all_files: list[Mapping[str, Any]] = []
+            datasource_node_data = None
+            datasource_nodes = workflow.graph_dict.get("nodes", [])
+            for datasource_node in datasource_nodes:
+                if datasource_node.get("id") == start_node_id:
+                    datasource_node_data = datasource_node.get("data", {})
+                    break
+            if not datasource_node_data:
+                raise ValueError("Datasource node data not found")
+
+            from core.datasource.datasource_manager import DatasourceManager
+
+            datasource_runtime = DatasourceManager.get_datasource_runtime(
+                provider_id=f"{datasource_node_data.get('plugin_id')}/{datasource_node_data.get('provider_name')}",
+                datasource_name=datasource_node_data.get("datasource_name"),
+                tenant_id=pipeline.tenant_id,
+                datasource_type=DatasourceProviderType(datasource_type),
+            )
+            datasource_provider_service = DatasourceProviderService()
+            credentials = datasource_provider_service.get_datasource_credentials(
+                tenant_id=pipeline.tenant_id,
+                provider=datasource_node_data.get("provider_name"),
+                plugin_id=datasource_node_data.get("plugin_id"),
+                credential_id=datasource_node_data.get("credential_id"),
+            )
+            if credentials:
+                datasource_runtime.runtime.credentials = credentials
+            datasource_runtime = cast(OnlineDriveDatasourcePlugin, datasource_runtime)
+
+            for datasource_info in datasource_info_list:
+                if datasource_info.get("id") and datasource_info.get("type") == "folder":
+                    # get all files in the folder
+                    self._get_files_in_folder(
+                        datasource_runtime,
+                        datasource_info.get("id", ""),
+                        datasource_info.get("bucket", None),
+                        user.id,
+                        all_files,
+                        datasource_info,
+                        None,
+                    )
+                else:
+                    all_files.append(
+                        {
+                            "id": datasource_info.get("id", ""),
+                            "name": datasource_info.get("name", "untitled"),
+                            "bucket": datasource_info.get("bucket", None),
+                        }
+                    )
+            return all_files
+        else:
+            return datasource_info_list
+
+    def _get_files_in_folder(
+        self,
+        datasource_runtime: OnlineDriveDatasourcePlugin,
+        prefix: str,
+        bucket: str | None,
+        user_id: str,
+        all_files: list,
+        datasource_info: Mapping[str, Any],
+        next_page_parameters: dict | None = None,
+    ):
+        """
+        Get files in a folder.
+        """
+        result_generator = datasource_runtime.online_drive_browse_files(
+            user_id=user_id,
+            request=OnlineDriveBrowseFilesRequest(
+                bucket=bucket,
+                prefix=prefix,
+                max_keys=20,
+                next_page_parameters=next_page_parameters,
+            ),
+            provider_type=datasource_runtime.datasource_provider_type(),
+        )
+        is_truncated = False
+        for result in result_generator:
+            for files in result.result:
+                for file in files.files:
+                    if file.type == "folder":
+                        self._get_files_in_folder(
+                            datasource_runtime,
+                            file.id,
+                            bucket,
+                            user_id,
+                            all_files,
+                            datasource_info,
+                            None,
+                        )
+                    else:
+                        all_files.append(
+                            {
+                                "id": file.id,
+                                "name": file.name,
+                                "bucket": bucket,
+                            }
+                        )
+                is_truncated = files.is_truncated
+                next_page_parameters = files.next_page_parameters
+
+        if is_truncated:
+            self._get_files_in_folder(
+                datasource_runtime, prefix, bucket, user_id, all_files, datasource_info, next_page_parameters
+            )
--- a/api/core/app/apps/pipeline/pipeline_queue_manager.py
+++ b/api/core/app/apps/pipeline/pipeline_queue_manager.py
@ -0,0 +1,45 @@
+from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
+from core.app.apps.exc import GenerateTaskStoppedError
+from core.app.entities.app_invoke_entities import InvokeFrom
+from core.app.entities.queue_entities import (
+    AppQueueEvent,
+    QueueErrorEvent,
+    QueueMessageEndEvent,
+    QueueStopEvent,
+    QueueWorkflowFailedEvent,
+    QueueWorkflowPartialSuccessEvent,
+    QueueWorkflowSucceededEvent,
+    WorkflowQueueMessage,
+)
+
+
+class PipelineQueueManager(AppQueueManager):
+    def __init__(self, task_id: str, user_id: str, invoke_from: InvokeFrom, app_mode: str) -> None:
+        super().__init__(task_id, user_id, invoke_from)
+
+        self._app_mode = app_mode
+
+    def _publish(self, event: AppQueueEvent, pub_from: PublishFrom) -> None:
+        """
+        Publish event to queue
+        :param event:
+        :param pub_from:
+        :return:
+        """
+        message = WorkflowQueueMessage(task_id=self._task_id, app_mode=self._app_mode, event=event)
+
+        self._q.put(message)
+
+        if isinstance(
+            event,
+            QueueStopEvent
+            | QueueErrorEvent
+            | QueueMessageEndEvent
+            | QueueWorkflowSucceededEvent
+            | QueueWorkflowFailedEvent
+            | QueueWorkflowPartialSuccessEvent,
+        ):
+            self.stop_listen()
+
+        if pub_from == PublishFrom.APPLICATION_MANAGER and self._is_stopped():
+            raise GenerateTaskStoppedError()
--- a/api/core/app/apps/pipeline/pipeline_runner.py
+++ b/api/core/app/apps/pipeline/pipeline_runner.py
@ -0,0 +1,263 @@
+import logging
+import time
+from typing import cast
+
+from core.app.apps.base_app_queue_manager import AppQueueManager
+from core.app.apps.pipeline.pipeline_config_manager import PipelineConfig
+from core.app.apps.workflow_app_runner import WorkflowBasedAppRunner
+from core.app.entities.app_invoke_entities import (
+    InvokeFrom,
+    RagPipelineGenerateEntity,
+)
+from core.variables.variables import RAGPipelineVariable, RAGPipelineVariableInput
+from core.workflow.entities.graph_init_params import GraphInitParams
+from core.workflow.entities.graph_runtime_state import GraphRuntimeState
+from core.workflow.entities.variable_pool import VariablePool
+from core.workflow.graph import Graph
+from core.workflow.graph_events import GraphEngineEvent, GraphRunFailedEvent
+from core.workflow.nodes.node_factory import DifyNodeFactory
+from core.workflow.system_variable import SystemVariable
+from core.workflow.variable_loader import VariableLoader
+from core.workflow.workflow_entry import WorkflowEntry
+from extensions.ext_database import db
+from models.dataset import Document, Pipeline
+from models.enums import UserFrom
+from models.model import EndUser
+from models.workflow import Workflow
+
+logger = logging.getLogger(__name__)
+
+
+class PipelineRunner(WorkflowBasedAppRunner):
+    """
+    Pipeline Application Runner
+    """
+
+    def __init__(
+        self,
+        application_generate_entity: RagPipelineGenerateEntity,
+        queue_manager: AppQueueManager,
+        variable_loader: VariableLoader,
+        workflow: Workflow,
+        system_user_id: str,
+        workflow_thread_pool_id: str | None = None,
+    ) -> None:
+        """
+        :param application_generate_entity: application generate entity
+        :param queue_manager: application queue manager
+        :param workflow_thread_pool_id: workflow thread pool id
+        """
+        super().__init__(
+            queue_manager=queue_manager,
+            variable_loader=variable_loader,
+            app_id=application_generate_entity.app_config.app_id,
+        )
+        self.application_generate_entity = application_generate_entity
+        self.workflow_thread_pool_id = workflow_thread_pool_id
+        self._workflow = workflow
+        self._sys_user_id = system_user_id
+
+    def _get_app_id(self) -> str:
+        return self.application_generate_entity.app_config.app_id
+
+    def run(self) -> None:
+        """
+        Run application
+        """
+        app_config = self.application_generate_entity.app_config
+        app_config = cast(PipelineConfig, app_config)
+
+        user_id = None
+        if self.application_generate_entity.invoke_from in {InvokeFrom.WEB_APP, InvokeFrom.SERVICE_API}:
+            end_user = db.session.query(EndUser).where(EndUser.id == self.application_generate_entity.user_id).first()
+            if end_user:
+                user_id = end_user.session_id
+        else:
+            user_id = self.application_generate_entity.user_id
+
+        pipeline = db.session.query(Pipeline).where(Pipeline.id == app_config.app_id).first()
+        if not pipeline:
+            raise ValueError("Pipeline not found")
+
+        workflow = self.get_workflow(pipeline=pipeline, workflow_id=app_config.workflow_id)
+        if not workflow:
+            raise ValueError("Workflow not initialized")
+
+        db.session.close()
+
+        # if only single iteration run is requested
+        if self.application_generate_entity.single_iteration_run or self.application_generate_entity.single_loop_run:
+            # Handle single iteration or single loop run
+            graph, variable_pool, graph_runtime_state = self._prepare_single_node_execution(
+                workflow=workflow,
+                single_iteration_run=self.application_generate_entity.single_iteration_run,
+                single_loop_run=self.application_generate_entity.single_loop_run,
+            )
+        else:
+            inputs = self.application_generate_entity.inputs
+            files = self.application_generate_entity.files
+
+            # Create a variable pool.
+            system_inputs = SystemVariable(
+                files=files,
+                user_id=user_id,
+                app_id=app_config.app_id,
+                workflow_id=app_config.workflow_id,
+                workflow_execution_id=self.application_generate_entity.workflow_execution_id,
+                document_id=self.application_generate_entity.document_id,
+                original_document_id=self.application_generate_entity.original_document_id,
+                batch=self.application_generate_entity.batch,
+                dataset_id=self.application_generate_entity.dataset_id,
+                datasource_type=self.application_generate_entity.datasource_type,
+                datasource_info=self.application_generate_entity.datasource_info,
+                invoke_from=self.application_generate_entity.invoke_from.value,
+            )
+
+            rag_pipeline_variables = []
+            if workflow.rag_pipeline_variables:
+                for v in workflow.rag_pipeline_variables:
+                    rag_pipeline_variable = RAGPipelineVariable(**v)
+                    if (
+                        rag_pipeline_variable.belong_to_node_id
+                        in (self.application_generate_entity.start_node_id, "shared")
+                    ) and rag_pipeline_variable.variable in inputs:
+                        rag_pipeline_variables.append(
+                            RAGPipelineVariableInput(
+                                variable=rag_pipeline_variable,
+                                value=inputs[rag_pipeline_variable.variable],
+                            )
+                        )
+
+            variable_pool = VariablePool(
+                system_variables=system_inputs,
+                user_inputs=inputs,
+                environment_variables=workflow.environment_variables,
+                conversation_variables=[],
+                rag_pipeline_variables=rag_pipeline_variables,
+            )
+            graph_runtime_state = GraphRuntimeState(variable_pool=variable_pool, start_at=time.perf_counter())
+
+            # init graph
+            graph = self._init_rag_pipeline_graph(
+                graph_runtime_state=graph_runtime_state,
+                start_node_id=self.application_generate_entity.start_node_id,
+                workflow=workflow,
+            )
+
+        # RUN WORKFLOW
+        workflow_entry = WorkflowEntry(
+            tenant_id=workflow.tenant_id,
+            app_id=workflow.app_id,
+            workflow_id=workflow.id,
+            graph=graph,
+            graph_config=workflow.graph_dict,
+            user_id=self.application_generate_entity.user_id,
+            user_from=(
+                UserFrom.ACCOUNT
+                if self.application_generate_entity.invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER}
+                else UserFrom.END_USER
+            ),
+            invoke_from=self.application_generate_entity.invoke_from,
+            call_depth=self.application_generate_entity.call_depth,
+            graph_runtime_state=graph_runtime_state,
+            variable_pool=variable_pool,
+        )
+
+        generator = workflow_entry.run()
+
+        for event in generator:
+            self._update_document_status(
+                event, self.application_generate_entity.document_id, self.application_generate_entity.dataset_id
+            )
+            self._handle_event(workflow_entry, event)
+
+    def get_workflow(self, pipeline: Pipeline, workflow_id: str) -> Workflow | None:
+        """
+        Get workflow
+        """
+        # fetch workflow by workflow_id
+        workflow = (
+            db.session.query(Workflow)
+            .where(Workflow.tenant_id == pipeline.tenant_id, Workflow.app_id == pipeline.id, Workflow.id == workflow_id)
+            .first()
+        )
+
+        # return workflow
+        return workflow
+
+    def _init_rag_pipeline_graph(
+        self, workflow: Workflow, graph_runtime_state: GraphRuntimeState, start_node_id: str | None = None
+    ) -> Graph:
+        """
+        Init pipeline graph
+        """
+        graph_config = workflow.graph_dict
+        if "nodes" not in graph_config or "edges" not in graph_config:
+            raise ValueError("nodes or edges not found in workflow graph")
+
+        if not isinstance(graph_config.get("nodes"), list):
+            raise ValueError("nodes in workflow graph must be a list")
+
+        if not isinstance(graph_config.get("edges"), list):
+            raise ValueError("edges in workflow graph must be a list")
+        # nodes = graph_config.get("nodes", [])
+        # edges = graph_config.get("edges", [])
+        # real_run_nodes = []
+        # real_edges = []
+        # exclude_node_ids = []
+        # for node in nodes:
+        #     node_id = node.get("id")
+        #     node_type = node.get("data", {}).get("type", "")
+        #     if node_type == "datasource":
+        #         if start_node_id != node_id:
+        #             exclude_node_ids.append(node_id)
+        #             continue
+        #     real_run_nodes.append(node)
+
+        # for edge in edges:
+        #     if edge.get("source") in exclude_node_ids:
+        #         continue
+        #     real_edges.append(edge)
+        # graph_config = dict(graph_config)
+        # graph_config["nodes"] = real_run_nodes
+        # graph_config["edges"] = real_edges
+        # init graph
+        # Create required parameters for Graph.init
+        graph_init_params = GraphInitParams(
+            tenant_id=workflow.tenant_id,
+            app_id=self._app_id,
+            workflow_id=workflow.id,
+            graph_config=graph_config,
+            user_id=self.application_generate_entity.user_id,
+            user_from=UserFrom.ACCOUNT.value,
+            invoke_from=InvokeFrom.SERVICE_API.value,
+            call_depth=0,
+        )
+
+        node_factory = DifyNodeFactory(
+            graph_init_params=graph_init_params,
+            graph_runtime_state=graph_runtime_state,
+        )
+        graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id=start_node_id)
+
+        if not graph:
+            raise ValueError("graph not found in workflow")
+
+        return graph
+
+    def _update_document_status(self, event: GraphEngineEvent, document_id: str | None, dataset_id: str | None) -> None:
+        """
+        Update document status
+        """
+        if isinstance(event, GraphRunFailedEvent):
+            if document_id and dataset_id:
+                document = (
+                    db.session.query(Document)
+                    .where(Document.id == document_id, Document.dataset_id == dataset_id)
+                    .first()
+                )
+                if document:
+                    document.indexing_status = "error"
+                    document.error = event.error or "Unknown error"
+                    db.session.add(document)
+                    db.session.commit()
--- a/api/core/app/apps/workflow/app_generator.py
+++ b/api/core/app/apps/workflow/app_generator.py
@ -53,7 +53,6 @@ class WorkflowAppGenerator(BaseAppGenerator):
        invoke_from: InvokeFrom,
        streaming: Literal[True],
        call_depth: int,
-        workflow_thread_pool_id: str | None,
    ) -> Generator[Mapping | str, None, None]: ...

    @overload
@ -67,7 +66,6 @@ class WorkflowAppGenerator(BaseAppGenerator):
        invoke_from: InvokeFrom,
        streaming: Literal[False],
        call_depth: int,
-        workflow_thread_pool_id: str | None,
    ) -> Mapping[str, Any]: ...

    @overload
@ -81,7 +79,6 @@ class WorkflowAppGenerator(BaseAppGenerator):
        invoke_from: InvokeFrom,
        streaming: bool,
        call_depth: int,
-        workflow_thread_pool_id: str | None,
    ) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None]]: ...

    def generate(
@ -94,7 +91,6 @@ class WorkflowAppGenerator(BaseAppGenerator):
        invoke_from: InvokeFrom,
        streaming: bool = True,
        call_depth: int = 0,
-        workflow_thread_pool_id: str | None = None,
    ) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None]]:
        files: Sequence[Mapping[str, Any]] = args.get("files") or []

@ -186,7 +182,6 @@ class WorkflowAppGenerator(BaseAppGenerator):
            workflow_execution_repository=workflow_execution_repository,
            workflow_node_execution_repository=workflow_node_execution_repository,
            streaming=streaming,
-            workflow_thread_pool_id=workflow_thread_pool_id,
        )

    def _generate(
@ -200,7 +195,6 @@ class WorkflowAppGenerator(BaseAppGenerator):
        workflow_execution_repository: WorkflowExecutionRepository,
        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
        streaming: bool = True,
-        workflow_thread_pool_id: str | None = None,
        variable_loader: VariableLoader = DUMMY_VARIABLE_LOADER,
    ) -> Union[Mapping[str, Any], Generator[str | Mapping[str, Any], None, None]]:
        """
@ -214,7 +208,6 @@ class WorkflowAppGenerator(BaseAppGenerator):
        :param workflow_execution_repository: repository for workflow execution
        :param workflow_node_execution_repository: repository for workflow node execution
        :param streaming: is stream
-        :param workflow_thread_pool_id: workflow thread pool id
        """
        # init queue manager
        queue_manager = WorkflowAppQueueManager(
@ -237,16 +230,13 @@ class WorkflowAppGenerator(BaseAppGenerator):
                "application_generate_entity": application_generate_entity,
                "queue_manager": queue_manager,
                "context": context,
-                "workflow_thread_pool_id": workflow_thread_pool_id,
                "variable_loader": variable_loader,
            },
        )

        worker_thread.start()

-        draft_var_saver_factory = self._get_draft_var_saver_factory(
-            invoke_from,
-        )
+        draft_var_saver_factory = self._get_draft_var_saver_factory(invoke_from, user)

        # return response or stream generator
        response = self._handle_response(
@ -434,8 +424,7 @@ class WorkflowAppGenerator(BaseAppGenerator):
        queue_manager: AppQueueManager,
        context: contextvars.Context,
        variable_loader: VariableLoader,
-        workflow_thread_pool_id: str | None = None,
-    ):
+    ) -> None:
        """
        Generate worker in a new thread.
        :param flask_app: Flask app
@ -444,7 +433,6 @@ class WorkflowAppGenerator(BaseAppGenerator):
        :param workflow_thread_pool_id: workflow thread pool id
        :return:
        """
-
        with preserve_flask_contexts(flask_app, context_vars=context):
            with Session(db.engine, expire_on_commit=False) as session:
                workflow = session.scalar(
@ -474,7 +462,6 @@ class WorkflowAppGenerator(BaseAppGenerator):
            runner = WorkflowAppRunner(
                application_generate_entity=application_generate_entity,
                queue_manager=queue_manager,
-                workflow_thread_pool_id=workflow_thread_pool_id,
                variable_loader=variable_loader,
                workflow=workflow,
                system_user_id=system_user_id,
--- a/api/core/app/apps/workflow/app_runner.py
+++ b/api/core/app/apps/workflow/app_runner.py
@ -1,7 +1,7 @@
 import logging
+import time
 from typing import cast

-from configs import dify_config
 from core.app.apps.base_app_queue_manager import AppQueueManager
 from core.app.apps.workflow.app_config_manager import WorkflowAppConfig
 from core.app.apps.workflow_app_runner import WorkflowBasedAppRunner
@ -9,13 +9,14 @@ from core.app.entities.app_invoke_entities import (
    InvokeFrom,
    WorkflowAppGenerateEntity,
 )
-from core.workflow.callbacks import WorkflowCallback, WorkflowLoggingCallback
-from core.workflow.entities.variable_pool import VariablePool
+from core.workflow.entities import GraphRuntimeState, VariablePool
+from core.workflow.graph_engine.command_channels.redis_channel import RedisChannel
 from core.workflow.system_variable import SystemVariable
 from core.workflow.variable_loader import VariableLoader
 from core.workflow.workflow_entry import WorkflowEntry
+from extensions.ext_redis import redis_client
 from models.enums import UserFrom
-from models.workflow import Workflow, WorkflowType
+from models.workflow import Workflow

 logger = logging.getLogger(__name__)

@ -31,7 +32,6 @@ class WorkflowAppRunner(WorkflowBasedAppRunner):
        application_generate_entity: WorkflowAppGenerateEntity,
        queue_manager: AppQueueManager,
        variable_loader: VariableLoader,
-        workflow_thread_pool_id: str | None = None,
        workflow: Workflow,
        system_user_id: str,
    ):
@ -41,7 +41,6 @@ class WorkflowAppRunner(WorkflowBasedAppRunner):
            app_id=application_generate_entity.app_config.app_id,
        )
        self.application_generate_entity = application_generate_entity
-        self.workflow_thread_pool_id = workflow_thread_pool_id
        self._workflow = workflow
        self._sys_user_id = system_user_id

@ -52,24 +51,12 @@ class WorkflowAppRunner(WorkflowBasedAppRunner):
        app_config = self.application_generate_entity.app_config
        app_config = cast(WorkflowAppConfig, app_config)

-        workflow_callbacks: list[WorkflowCallback] = []
-        if dify_config.DEBUG:
-            workflow_callbacks.append(WorkflowLoggingCallback())
-
-        # if only single iteration run is requested
-        if self.application_generate_entity.single_iteration_run:
-            # if only single iteration run is requested
-            graph, variable_pool = self._get_graph_and_variable_pool_of_single_iteration(
+        # if only single iteration or single loop run is requested
+        if self.application_generate_entity.single_iteration_run or self.application_generate_entity.single_loop_run:
+            graph, variable_pool, graph_runtime_state = self._prepare_single_node_execution(
                workflow=self._workflow,
-                node_id=self.application_generate_entity.single_iteration_run.node_id,
-                user_inputs=self.application_generate_entity.single_iteration_run.inputs,
-            )
-        elif self.application_generate_entity.single_loop_run:
-            # if only single loop run is requested
-            graph, variable_pool = self._get_graph_and_variable_pool_of_single_loop(
-                workflow=self._workflow,
-                node_id=self.application_generate_entity.single_loop_run.node_id,
-                user_inputs=self.application_generate_entity.single_loop_run.inputs,
+                single_iteration_run=self.application_generate_entity.single_iteration_run,
+                single_loop_run=self.application_generate_entity.single_loop_run,
            )
        else:
            inputs = self.application_generate_entity.inputs
@ -92,15 +79,27 @@ class WorkflowAppRunner(WorkflowBasedAppRunner):
                conversation_variables=[],
            )

+            graph_runtime_state = GraphRuntimeState(variable_pool=variable_pool, start_at=time.perf_counter())
+
            # init graph
-            graph = self._init_graph(graph_config=self._workflow.graph_dict)
+            graph = self._init_graph(
+                graph_config=self._workflow.graph_dict,
+                graph_runtime_state=graph_runtime_state,
+                workflow_id=self._workflow.id,
+                tenant_id=self._workflow.tenant_id,
+                user_id=self.application_generate_entity.user_id,
+            )

        # RUN WORKFLOW
+        # Create Redis command channel for this workflow execution
+        task_id = self.application_generate_entity.task_id
+        channel_key = f"workflow:{task_id}:commands"
+        command_channel = RedisChannel(redis_client, channel_key)
+
        workflow_entry = WorkflowEntry(
            tenant_id=self._workflow.tenant_id,
            app_id=self._workflow.app_id,
            workflow_id=self._workflow.id,
-            workflow_type=WorkflowType.value_of(self._workflow.type),
            graph=graph,
            graph_config=self._workflow.graph_dict,
            user_id=self.application_generate_entity.user_id,
@ -112,10 +111,11 @@ class WorkflowAppRunner(WorkflowBasedAppRunner):
            invoke_from=self.application_generate_entity.invoke_from,
            call_depth=self.application_generate_entity.call_depth,
            variable_pool=variable_pool,
-            thread_pool_id=self.workflow_thread_pool_id,
+            graph_runtime_state=graph_runtime_state,
+            command_channel=command_channel,
        )

-        generator = workflow_entry.run(callbacks=workflow_callbacks)
+        generator = workflow_entry.run()

        for event in generator:
            self._handle_event(workflow_entry, event)
--- a/api/core/app/apps/workflow/generate_task_pipeline.py
+++ b/api/core/app/apps/workflow/generate_task_pipeline.py
@ -2,7 +2,7 @@ import logging
 import time
 from collections.abc import Callable, Generator
 from contextlib import contextmanager
-from typing import Any, Union
+from typing import Union

 from sqlalchemy.orm import Session

@ -14,6 +14,7 @@ from core.app.entities.app_invoke_entities import (
    WorkflowAppGenerateEntity,
 )
 from core.app.entities.queue_entities import (
+    AppQueueEvent,
    MessageQueueMessage,
    QueueAgentLogEvent,
    QueueErrorEvent,
@ -25,14 +26,9 @@ from core.app.entities.queue_entities import (
    QueueLoopStartEvent,
    QueueNodeExceptionEvent,
    QueueNodeFailedEvent,
-    QueueNodeInIterationFailedEvent,
-    QueueNodeInLoopFailedEvent,
    QueueNodeRetryEvent,
    QueueNodeStartedEvent,
    QueueNodeSucceededEvent,
-    QueueParallelBranchRunFailedEvent,
-    QueueParallelBranchRunStartedEvent,
-    QueueParallelBranchRunSucceededEvent,
    QueuePingEvent,
    QueueStopEvent,
    QueueTextChunkEvent,
@ -57,8 +53,8 @@ from core.app.entities.task_entities import (
 from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTaskPipeline
 from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk
 from core.ops.ops_trace_manager import TraceQueueManager
-from core.workflow.entities.workflow_execution import WorkflowExecution, WorkflowExecutionStatus, WorkflowType
-from core.workflow.graph_engine.entities.graph_runtime_state import GraphRuntimeState
+from core.workflow.entities import GraphRuntimeState, WorkflowExecution
+from core.workflow.enums import WorkflowExecutionStatus, WorkflowType
 from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
@ -349,9 +345,7 @@ class WorkflowAppGenerateTaskPipeline:

    def _handle_node_failed_events(
        self,
-        event: Union[
-            QueueNodeFailedEvent, QueueNodeInIterationFailedEvent, QueueNodeInLoopFailedEvent, QueueNodeExceptionEvent
-        ],
+        event: Union[QueueNodeFailedEvent, QueueNodeExceptionEvent],
        **kwargs,
    ) -> Generator[StreamResponse, None, None]:
        """Handle various node failure events."""
@ -370,32 +364,6 @@ class WorkflowAppGenerateTaskPipeline:
        if node_failed_response:
            yield node_failed_response

-    def _handle_parallel_branch_started_event(
-        self, event: QueueParallelBranchRunStartedEvent, **kwargs
-    ) -> Generator[StreamResponse, None, None]:
-        """Handle parallel branch started events."""
-        self._ensure_workflow_initialized()
-
-        parallel_start_resp = self._workflow_response_converter.workflow_parallel_branch_start_to_stream_response(
-            task_id=self._application_generate_entity.task_id,
-            workflow_execution_id=self._workflow_run_id,
-            event=event,
-        )
-        yield parallel_start_resp
-
-    def _handle_parallel_branch_finished_events(
-        self, event: Union[QueueParallelBranchRunSucceededEvent, QueueParallelBranchRunFailedEvent], **kwargs
-    ) -> Generator[StreamResponse, None, None]:
-        """Handle parallel branch finished events."""
-        self._ensure_workflow_initialized()
-
-        parallel_finish_resp = self._workflow_response_converter.workflow_parallel_branch_finished_to_stream_response(
-            task_id=self._application_generate_entity.task_id,
-            workflow_execution_id=self._workflow_run_id,
-            event=event,
-        )
-        yield parallel_finish_resp
-
    def _handle_iteration_start_event(
        self, event: QueueIterationStartEvent, **kwargs
    ) -> Generator[StreamResponse, None, None]:
@ -617,8 +585,6 @@ class WorkflowAppGenerateTaskPipeline:
            QueueNodeRetryEvent: self._handle_node_retry_event,
            QueueNodeStartedEvent: self._handle_node_started_event,
            QueueNodeSucceededEvent: self._handle_node_succeeded_event,
-            # Parallel branch events
-            QueueParallelBranchRunStartedEvent: self._handle_parallel_branch_started_event,
            # Iteration events
            QueueIterationStartEvent: self._handle_iteration_start_event,
            QueueIterationNextEvent: self._handle_iteration_next_event,
@ -633,7 +599,7 @@ class WorkflowAppGenerateTaskPipeline:

    def _dispatch_event(
        self,
-        event: Any,
+        event: AppQueueEvent,
        *,
        graph_runtime_state: GraphRuntimeState | None = None,
        tts_publisher: AppGeneratorTTSPublisher | None = None,
@ -660,8 +626,6 @@ class WorkflowAppGenerateTaskPipeline:
            event,
            (
                QueueNodeFailedEvent,
-                QueueNodeInIterationFailedEvent,
-                QueueNodeInLoopFailedEvent,
                QueueNodeExceptionEvent,
            ),
        ):
@ -674,17 +638,6 @@ class WorkflowAppGenerateTaskPipeline:
            )
            return

-        # Handle parallel branch finished events with isinstance check
-        if isinstance(event, (QueueParallelBranchRunSucceededEvent, QueueParallelBranchRunFailedEvent)):
-            yield from self._handle_parallel_branch_finished_events(
-                event,
-                graph_runtime_state=graph_runtime_state,
-                tts_publisher=tts_publisher,
-                trace_manager=trace_manager,
-                queue_message=queue_message,
-            )
-            return
-
        # Handle workflow failed and stop events with isinstance check
        if isinstance(event, (QueueWorkflowFailedEvent, QueueStopEvent)):
            yield from self._handle_workflow_failed_and_stop_events(
--- a/api/core/app/apps/workflow_app_runner.py
+++ b/api/core/app/apps/workflow_app_runner.py
@ -1,7 +1,9 @@
+import time
 from collections.abc import Mapping
 from typing import Any, cast

 from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
+from core.app.entities.app_invoke_entities import InvokeFrom
 from core.app.entities.queue_entities import (
    AppQueueEvent,
    QueueAgentLogEvent,
@ -13,14 +15,9 @@ from core.app.entities.queue_entities import (
    QueueLoopStartEvent,
    QueueNodeExceptionEvent,
    QueueNodeFailedEvent,
-    QueueNodeInIterationFailedEvent,
-    QueueNodeInLoopFailedEvent,
    QueueNodeRetryEvent,
    QueueNodeStartedEvent,
    QueueNodeSucceededEvent,
-    QueueParallelBranchRunFailedEvent,
-    QueueParallelBranchRunStartedEvent,
-    QueueParallelBranchRunSucceededEvent,
    QueueRetrieverResourcesEvent,
    QueueTextChunkEvent,
    QueueWorkflowFailedEvent,
@ -28,42 +25,39 @@ from core.app.entities.queue_entities import (
    QueueWorkflowStartedEvent,
    QueueWorkflowSucceededEvent,
 )
-from core.workflow.entities.variable_pool import VariablePool
-from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey
-from core.workflow.graph_engine.entities.event import (
-    AgentLogEvent,
+from core.workflow.entities import GraphInitParams, GraphRuntimeState, VariablePool
+from core.workflow.graph import Graph
+from core.workflow.graph_events import (
    GraphEngineEvent,
    GraphRunFailedEvent,
    GraphRunPartialSucceededEvent,
    GraphRunStartedEvent,
    GraphRunSucceededEvent,
-    IterationRunFailedEvent,
-    IterationRunNextEvent,
-    IterationRunStartedEvent,
-    IterationRunSucceededEvent,
-    LoopRunFailedEvent,
-    LoopRunNextEvent,
-    LoopRunStartedEvent,
-    LoopRunSucceededEvent,
-    NodeInIterationFailedEvent,
-    NodeInLoopFailedEvent,
+    NodeRunAgentLogEvent,
    NodeRunExceptionEvent,
    NodeRunFailedEvent,
+    NodeRunIterationFailedEvent,
+    NodeRunIterationNextEvent,
+    NodeRunIterationStartedEvent,
+    NodeRunIterationSucceededEvent,
+    NodeRunLoopFailedEvent,
+    NodeRunLoopNextEvent,
+    NodeRunLoopStartedEvent,
+    NodeRunLoopSucceededEvent,
    NodeRunRetrieverResourceEvent,
    NodeRunRetryEvent,
    NodeRunStartedEvent,
    NodeRunStreamChunkEvent,
    NodeRunSucceededEvent,
-    ParallelBranchRunFailedEvent,
-    ParallelBranchRunStartedEvent,
-    ParallelBranchRunSucceededEvent,
 )
-from core.workflow.graph_engine.entities.graph import Graph
+from core.workflow.graph_events.graph import GraphRunAbortedEvent
 from core.workflow.nodes import NodeType
+from core.workflow.nodes.node_factory import DifyNodeFactory
 from core.workflow.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING
 from core.workflow.system_variable import SystemVariable
 from core.workflow.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader, load_into_variable_pool
 from core.workflow.workflow_entry import WorkflowEntry
+from models.enums import UserFrom
 from models.workflow import Workflow


@ -79,7 +73,14 @@ class WorkflowBasedAppRunner:
        self._variable_loader = variable_loader
        self._app_id = app_id

-    def _init_graph(self, graph_config: Mapping[str, Any]) -> Graph:
+    def _init_graph(
+        self,
+        graph_config: Mapping[str, Any],
+        graph_runtime_state: GraphRuntimeState,
+        workflow_id: str = "",
+        tenant_id: str = "",
+        user_id: str = "",
+    ) -> Graph:
        """
        Init graph
        """
@ -91,22 +92,109 @@ class WorkflowBasedAppRunner:

        if not isinstance(graph_config.get("edges"), list):
            raise ValueError("edges in workflow graph must be a list")
+
+        # Create required parameters for Graph.init
+        graph_init_params = GraphInitParams(
+            tenant_id=tenant_id or "",
+            app_id=self._app_id,
+            workflow_id=workflow_id,
+            graph_config=graph_config,
+            user_id=user_id,
+            user_from=UserFrom.ACCOUNT.value,
+            invoke_from=InvokeFrom.SERVICE_API.value,
+            call_depth=0,
+        )
+
+        # Use the provided graph_runtime_state for consistent state management
+
+        node_factory = DifyNodeFactory(
+            graph_init_params=graph_init_params,
+            graph_runtime_state=graph_runtime_state,
+        )
+
        # init graph
-        graph = Graph.init(graph_config=graph_config)
+        graph = Graph.init(graph_config=graph_config, node_factory=node_factory)

        if not graph:
            raise ValueError("graph not found in workflow")

        return graph

-    def _get_graph_and_variable_pool_of_single_iteration(
+    def _prepare_single_node_execution(
+        self,
+        workflow: Workflow,
+        single_iteration_run: Any | None = None,
+        single_loop_run: Any | None = None,
+    ) -> tuple[Graph, VariablePool, GraphRuntimeState]:
+        """
+        Prepare graph, variable pool, and runtime state for single node execution
+        (either single iteration or single loop).
+
+        Args:
+            workflow: The workflow instance
+            single_iteration_run: SingleIterationRunEntity if running single iteration, None otherwise
+            single_loop_run: SingleLoopRunEntity if running single loop, None otherwise
+
+        Returns:
+            A tuple containing (graph, variable_pool, graph_runtime_state)
+
+        Raises:
+            ValueError: If neither single_iteration_run nor single_loop_run is specified
+        """
+        # Create initial runtime state with variable pool containing environment variables
+        graph_runtime_state = GraphRuntimeState(
+            variable_pool=VariablePool(
+                system_variables=SystemVariable.empty(),
+                user_inputs={},
+                environment_variables=workflow.environment_variables,
+            ),
+            start_at=time.time(),
+        )
+
+        # Determine which type of single node execution and get graph/variable_pool
+        if single_iteration_run:
+            graph, variable_pool = self._get_graph_and_variable_pool_of_single_iteration(
+                workflow=workflow,
+                node_id=single_iteration_run.node_id,
+                user_inputs=dict(single_iteration_run.inputs),
+                graph_runtime_state=graph_runtime_state,
+            )
+        elif single_loop_run:
+            graph, variable_pool = self._get_graph_and_variable_pool_of_single_loop(
+                workflow=workflow,
+                node_id=single_loop_run.node_id,
+                user_inputs=dict(single_loop_run.inputs),
+                graph_runtime_state=graph_runtime_state,
+            )
+        else:
+            raise ValueError("Neither single_iteration_run nor single_loop_run is specified")
+
+        # Return the graph, variable_pool, and the same graph_runtime_state used during graph creation
+        # This ensures all nodes in the graph reference the same GraphRuntimeState instance
+        return graph, variable_pool, graph_runtime_state
+
+    def _get_graph_and_variable_pool_for_single_node_run(
        self,
        workflow: Workflow,
        node_id: str,
-        user_inputs: dict,
+        user_inputs: dict[str, Any],
+        graph_runtime_state: GraphRuntimeState,
+        node_type_filter_key: str,  # 'iteration_id' or 'loop_id'
+        node_type_label: str = "node",  # 'iteration' or 'loop' for error messages
    ) -> tuple[Graph, VariablePool]:
        """
-        Get variable pool of single iteration
+        Get graph and variable pool for single node execution (iteration or loop).
+
+        Args:
+            workflow: The workflow instance
+            node_id: The node ID to execute
+            user_inputs: User inputs for the node
+            graph_runtime_state: The graph runtime state
+            node_type_filter_key: The key to filter nodes ('iteration_id' or 'loop_id')
+            node_type_label: Label for error messages ('iteration' or 'loop')
+
+        Returns:
+            A tuple containing (graph, variable_pool)
        """
        # fetch workflow graph
        graph_config = workflow.graph_dict
@ -124,18 +212,22 @@ class WorkflowBasedAppRunner:
        if not isinstance(graph_config.get("edges"), list):
            raise ValueError("edges in workflow graph must be a list")

-        # filter nodes only in iteration
+        # filter nodes only in the specified node type (iteration or loop)
+        main_node_config = next((n for n in graph_config.get("nodes", []) if n.get("id") == node_id), None)
+        start_node_id = main_node_config.get("data", {}).get("start_node_id") if main_node_config else None
        node_configs = [
            node
            for node in graph_config.get("nodes", [])
-            if node.get("id") == node_id or node.get("data", {}).get("iteration_id", "") == node_id
+            if node.get("id") == node_id
+            or node.get("data", {}).get(node_type_filter_key, "") == node_id
+            or (start_node_id and node.get("id") == start_node_id)
        ]

        graph_config["nodes"] = node_configs

        node_ids = [node.get("id") for node in node_configs]

-        # filter edges only in iteration
+        # filter edges only in the specified node type
        edge_configs = [
            edge
            for edge in graph_config.get("edges", [])
@ -145,37 +237,50 @@ class WorkflowBasedAppRunner:

        graph_config["edges"] = edge_configs

+        # Create required parameters for Graph.init
+        graph_init_params = GraphInitParams(
+            tenant_id=workflow.tenant_id,
+            app_id=self._app_id,
+            workflow_id=workflow.id,
+            graph_config=graph_config,
+            user_id="",
+            user_from=UserFrom.ACCOUNT.value,
+            invoke_from=InvokeFrom.SERVICE_API.value,
+            call_depth=0,
+        )
+
+        node_factory = DifyNodeFactory(
+            graph_init_params=graph_init_params,
+            graph_runtime_state=graph_runtime_state,
+        )
+
        # init graph
-        graph = Graph.init(graph_config=graph_config, root_node_id=node_id)
+        graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id=node_id)

        if not graph:
            raise ValueError("graph not found in workflow")

        # fetch node config from node id
-        iteration_node_config = None
+        target_node_config = None
        for node in node_configs:
            if node.get("id") == node_id:
-                iteration_node_config = node
+                target_node_config = node
                break

-        if not iteration_node_config:
-            raise ValueError("iteration node id not found in workflow graph")
+        if not target_node_config:
+            raise ValueError(f"{node_type_label} node id not found in workflow graph")

        # Get node class
-        node_type = NodeType(iteration_node_config.get("data", {}).get("type"))
-        node_version = iteration_node_config.get("data", {}).get("version", "1")
+        node_type = NodeType(target_node_config.get("data", {}).get("type"))
+        node_version = target_node_config.get("data", {}).get("version", "1")
        node_cls = NODE_TYPE_CLASSES_MAPPING[node_type][node_version]

-        # init variable pool
-        variable_pool = VariablePool(
-            system_variables=SystemVariable.empty(),
-            user_inputs={},
-            environment_variables=workflow.environment_variables,
-        )
+        # Use the variable pool from graph_runtime_state instead of creating a new one
+        variable_pool = graph_runtime_state.variable_pool

        try:
            variable_mapping = node_cls.extract_variable_selector_to_variable_mapping(
-                graph_config=workflow.graph_dict, config=iteration_node_config
+                graph_config=workflow.graph_dict, config=target_node_config
            )
        except NotImplementedError:
            variable_mapping = {}
@ -196,102 +301,44 @@ class WorkflowBasedAppRunner:

        return graph, variable_pool

+    def _get_graph_and_variable_pool_of_single_iteration(
+        self,
+        workflow: Workflow,
+        node_id: str,
+        user_inputs: dict[str, Any],
+        graph_runtime_state: GraphRuntimeState,
+    ) -> tuple[Graph, VariablePool]:
+        """
+        Get variable pool of single iteration
+        """
+        return self._get_graph_and_variable_pool_for_single_node_run(
+            workflow=workflow,
+            node_id=node_id,
+            user_inputs=user_inputs,
+            graph_runtime_state=graph_runtime_state,
+            node_type_filter_key="iteration_id",
+            node_type_label="iteration",
+        )
+
    def _get_graph_and_variable_pool_of_single_loop(
        self,
        workflow: Workflow,
        node_id: str,
-        user_inputs: dict,
+        user_inputs: dict[str, Any],
+        graph_runtime_state: GraphRuntimeState,
    ) -> tuple[Graph, VariablePool]:
        """
        Get variable pool of single loop
        """
-        # fetch workflow graph
-        graph_config = workflow.graph_dict
-        if not graph_config:
-            raise ValueError("workflow graph not found")
-
-        graph_config = cast(dict[str, Any], graph_config)
-
-        if "nodes" not in graph_config or "edges" not in graph_config:
-            raise ValueError("nodes or edges not found in workflow graph")
-
-        if not isinstance(graph_config.get("nodes"), list):
-            raise ValueError("nodes in workflow graph must be a list")
-
-        if not isinstance(graph_config.get("edges"), list):
-            raise ValueError("edges in workflow graph must be a list")
-
-        # filter nodes only in loop
-        node_configs = [
-            node
-            for node in graph_config.get("nodes", [])
-            if node.get("id") == node_id or node.get("data", {}).get("loop_id", "") == node_id
-        ]
-
-        graph_config["nodes"] = node_configs
-
-        node_ids = [node.get("id") for node in node_configs]
-
-        # filter edges only in loop
-        edge_configs = [
-            edge
-            for edge in graph_config.get("edges", [])
-            if (edge.get("source") is None or edge.get("source") in node_ids)
-            and (edge.get("target") is None or edge.get("target") in node_ids)
-        ]
-
-        graph_config["edges"] = edge_configs
-
-        # init graph
-        graph = Graph.init(graph_config=graph_config, root_node_id=node_id)
-
-        if not graph:
-            raise ValueError("graph not found in workflow")
-
-        # fetch node config from node id
-        loop_node_config = None
-        for node in node_configs:
-            if node.get("id") == node_id:
-                loop_node_config = node
-                break
-
-        if not loop_node_config:
-            raise ValueError("loop node id not found in workflow graph")
-
-        # Get node class
-        node_type = NodeType(loop_node_config.get("data", {}).get("type"))
-        node_version = loop_node_config.get("data", {}).get("version", "1")
-        node_cls = NODE_TYPE_CLASSES_MAPPING[node_type][node_version]
-
-        # init variable pool
-        variable_pool = VariablePool(
-            system_variables=SystemVariable.empty(),
-            user_inputs={},
-            environment_variables=workflow.environment_variables,
-        )
-
-        try:
-            variable_mapping = node_cls.extract_variable_selector_to_variable_mapping(
-                graph_config=workflow.graph_dict, config=loop_node_config
-            )
-        except NotImplementedError:
-            variable_mapping = {}
-        load_into_variable_pool(
-            self._variable_loader,
-            variable_pool=variable_pool,
-            variable_mapping=variable_mapping,
+        return self._get_graph_and_variable_pool_for_single_node_run(
+            workflow=workflow,
+            node_id=node_id,
            user_inputs=user_inputs,
+            graph_runtime_state=graph_runtime_state,
+            node_type_filter_key="loop_id",
+            node_type_label="loop",
        )

-        WorkflowEntry.mapping_user_inputs_to_variable_pool(
-            variable_mapping=variable_mapping,
-            user_inputs=user_inputs,
-            variable_pool=variable_pool,
-            tenant_id=workflow.tenant_id,
-        )
-
-        return graph, variable_pool
-
    def _handle_event(self, workflow_entry: WorkflowEntry, event: GraphEngineEvent):
        """
        Handle event
@ -310,39 +357,32 @@ class WorkflowBasedAppRunner:
            )
        elif isinstance(event, GraphRunFailedEvent):
            self._publish_event(QueueWorkflowFailedEvent(error=event.error, exceptions_count=event.exceptions_count))
+        elif isinstance(event, GraphRunAbortedEvent):
+            self._publish_event(QueueWorkflowFailedEvent(error=event.reason or "Unknown error", exceptions_count=0))
        elif isinstance(event, NodeRunRetryEvent):
-            node_run_result = event.route_node_state.node_run_result
-            inputs: Mapping[str, Any] | None = {}
-            process_data: Mapping[str, Any] | None = {}
-            outputs: Mapping[str, Any] | None = {}
-            execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] | None = {}
-            if node_run_result:
-                inputs = node_run_result.inputs
-                process_data = node_run_result.process_data
-                outputs = node_run_result.outputs
-                execution_metadata = node_run_result.metadata
+            node_run_result = event.node_run_result
+            inputs = node_run_result.inputs
+            process_data = node_run_result.process_data
+            outputs = node_run_result.outputs
+            execution_metadata = node_run_result.metadata
            self._publish_event(
                QueueNodeRetryEvent(
                    node_execution_id=event.id,
                    node_id=event.node_id,
+                    node_title=event.node_title,
                    node_type=event.node_type,
-                    node_data=event.node_data,
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
                    start_at=event.start_at,
-                    node_run_index=event.route_node_state.index,
                    predecessor_node_id=event.predecessor_node_id,
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
-                    parallel_mode_run_id=event.parallel_mode_run_id,
                    inputs=inputs,
                    process_data=process_data,
                    outputs=outputs,
                    error=event.error,
                    execution_metadata=execution_metadata,
                    retry_index=event.retry_index,
+                    provider_type=event.provider_type,
+                    provider_id=event.provider_id,
                )
            )
        elif isinstance(event, NodeRunStartedEvent):
@ -350,44 +390,29 @@ class WorkflowBasedAppRunner:
                QueueNodeStartedEvent(
                    node_execution_id=event.id,
                    node_id=event.node_id,
+                    node_title=event.node_title,
                    node_type=event.node_type,
-                    node_data=event.node_data,
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
-                    start_at=event.route_node_state.start_at,
-                    node_run_index=event.route_node_state.index,
+                    start_at=event.start_at,
                    predecessor_node_id=event.predecessor_node_id,
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
-                    parallel_mode_run_id=event.parallel_mode_run_id,
                    agent_strategy=event.agent_strategy,
+                    provider_type=event.provider_type,
+                    provider_id=event.provider_id,
                )
            )
        elif isinstance(event, NodeRunSucceededEvent):
-            node_run_result = event.route_node_state.node_run_result
-            if node_run_result:
-                inputs = node_run_result.inputs
-                process_data = node_run_result.process_data
-                outputs = node_run_result.outputs
-                execution_metadata = node_run_result.metadata
-            else:
-                inputs = {}
-                process_data = {}
-                outputs = {}
-                execution_metadata = {}
+            node_run_result = event.node_run_result
+            inputs = node_run_result.inputs
+            process_data = node_run_result.process_data
+            outputs = node_run_result.outputs
+            execution_metadata = node_run_result.metadata
            self._publish_event(
                QueueNodeSucceededEvent(
                    node_execution_id=event.id,
                    node_id=event.node_id,
                    node_type=event.node_type,
-                    node_data=event.node_data,
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
-                    start_at=event.route_node_state.start_at,
+                    start_at=event.start_at,
                    inputs=inputs,
                    process_data=process_data,
                    outputs=outputs,
@ -396,34 +421,18 @@ class WorkflowBasedAppRunner:
                    in_loop_id=event.in_loop_id,
                )
            )
-
        elif isinstance(event, NodeRunFailedEvent):
            self._publish_event(
                QueueNodeFailedEvent(
                    node_execution_id=event.id,
                    node_id=event.node_id,
                    node_type=event.node_type,
-                    node_data=event.node_data,
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
-                    start_at=event.route_node_state.start_at,
-                    inputs=event.route_node_state.node_run_result.inputs
-                    if event.route_node_state.node_run_result
-                    else {},
-                    process_data=event.route_node_state.node_run_result.process_data
-                    if event.route_node_state.node_run_result
-                    else {},
-                    outputs=event.route_node_state.node_run_result.outputs or {}
-                    if event.route_node_state.node_run_result
-                    else {},
-                    error=event.route_node_state.node_run_result.error
-                    if event.route_node_state.node_run_result and event.route_node_state.node_run_result.error
-                    else "Unknown error",
-                    execution_metadata=event.route_node_state.node_run_result.metadata
-                    if event.route_node_state.node_run_result
-                    else {},
+                    start_at=event.start_at,
+                    inputs=event.node_run_result.inputs,
+                    process_data=event.node_run_result.process_data,
+                    outputs=event.node_run_result.outputs,
+                    error=event.node_run_result.error or "Unknown error",
+                    execution_metadata=event.node_run_result.metadata,
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
                )
@ -434,93 +443,21 @@ class WorkflowBasedAppRunner:
                    node_execution_id=event.id,
                    node_id=event.node_id,
                    node_type=event.node_type,
-                    node_data=event.node_data,
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
-                    start_at=event.route_node_state.start_at,
-                    inputs=event.route_node_state.node_run_result.inputs
-                    if event.route_node_state.node_run_result
-                    else {},
-                    process_data=event.route_node_state.node_run_result.process_data
-                    if event.route_node_state.node_run_result
-                    else {},
-                    outputs=event.route_node_state.node_run_result.outputs
-                    if event.route_node_state.node_run_result
-                    else {},
-                    error=event.route_node_state.node_run_result.error
-                    if event.route_node_state.node_run_result and event.route_node_state.node_run_result.error
-                    else "Unknown error",
-                    execution_metadata=event.route_node_state.node_run_result.metadata
-                    if event.route_node_state.node_run_result
-                    else {},
+                    start_at=event.start_at,
+                    inputs=event.node_run_result.inputs,
+                    process_data=event.node_run_result.process_data,
+                    outputs=event.node_run_result.outputs,
+                    error=event.node_run_result.error or "Unknown error",
+                    execution_metadata=event.node_run_result.metadata,
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
                )
            )
-
-        elif isinstance(event, NodeInIterationFailedEvent):
-            self._publish_event(
-                QueueNodeInIterationFailedEvent(
-                    node_execution_id=event.id,
-                    node_id=event.node_id,
-                    node_type=event.node_type,
-                    node_data=event.node_data,
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
-                    start_at=event.route_node_state.start_at,
-                    inputs=event.route_node_state.node_run_result.inputs
-                    if event.route_node_state.node_run_result
-                    else {},
-                    process_data=event.route_node_state.node_run_result.process_data
-                    if event.route_node_state.node_run_result
-                    else {},
-                    outputs=event.route_node_state.node_run_result.outputs or {}
-                    if event.route_node_state.node_run_result
-                    else {},
-                    execution_metadata=event.route_node_state.node_run_result.metadata
-                    if event.route_node_state.node_run_result
-                    else {},
-                    in_iteration_id=event.in_iteration_id,
-                    error=event.error,
-                )
-            )
-        elif isinstance(event, NodeInLoopFailedEvent):
-            self._publish_event(
-                QueueNodeInLoopFailedEvent(
-                    node_execution_id=event.id,
-                    node_id=event.node_id,
-                    node_type=event.node_type,
-                    node_data=event.node_data,
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
-                    start_at=event.route_node_state.start_at,
-                    inputs=event.route_node_state.node_run_result.inputs
-                    if event.route_node_state.node_run_result
-                    else {},
-                    process_data=event.route_node_state.node_run_result.process_data
-                    if event.route_node_state.node_run_result
-                    else {},
-                    outputs=event.route_node_state.node_run_result.outputs or {}
-                    if event.route_node_state.node_run_result
-                    else {},
-                    execution_metadata=event.route_node_state.node_run_result.metadata
-                    if event.route_node_state.node_run_result
-                    else {},
-                    in_loop_id=event.in_loop_id,
-                    error=event.error,
-                )
-            )
        elif isinstance(event, NodeRunStreamChunkEvent):
            self._publish_event(
                QueueTextChunkEvent(
-                    text=event.chunk_content,
-                    from_variable_selector=event.from_variable_selector,
+                    text=event.chunk,
+                    from_variable_selector=list(event.selector),
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
                )
@ -533,10 +470,10 @@ class WorkflowBasedAppRunner:
                    in_loop_id=event.in_loop_id,
                )
            )
-        elif isinstance(event, AgentLogEvent):
+        elif isinstance(event, NodeRunAgentLogEvent):
            self._publish_event(
                QueueAgentLogEvent(
-                    id=event.id,
+                    id=event.message_id,
                    label=event.label,
                    node_execution_id=event.node_execution_id,
                    parent_id=event.parent_id,
@ -547,51 +484,13 @@ class WorkflowBasedAppRunner:
                    node_id=event.node_id,
                )
            )
-        elif isinstance(event, ParallelBranchRunStartedEvent):
-            self._publish_event(
-                QueueParallelBranchRunStartedEvent(
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
-                    in_iteration_id=event.in_iteration_id,
-                    in_loop_id=event.in_loop_id,
-                )
-            )
-        elif isinstance(event, ParallelBranchRunSucceededEvent):
-            self._publish_event(
-                QueueParallelBranchRunSucceededEvent(
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
-                    in_iteration_id=event.in_iteration_id,
-                    in_loop_id=event.in_loop_id,
-                )
-            )
-        elif isinstance(event, ParallelBranchRunFailedEvent):
-            self._publish_event(
-                QueueParallelBranchRunFailedEvent(
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
-                    in_iteration_id=event.in_iteration_id,
-                    in_loop_id=event.in_loop_id,
-                    error=event.error,
-                )
-            )
-        elif isinstance(event, IterationRunStartedEvent):
+        elif isinstance(event, NodeRunIterationStartedEvent):
            self._publish_event(
                QueueIterationStartEvent(
-                    node_execution_id=event.iteration_id,
-                    node_id=event.iteration_node_id,
-                    node_type=event.iteration_node_type,
-                    node_data=event.iteration_node_data,
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
+                    node_execution_id=event.id,
+                    node_id=event.node_id,
+                    node_type=event.node_type,
+                    node_title=event.node_title,
                    start_at=event.start_at,
                    node_run_index=workflow_entry.graph_engine.graph_runtime_state.node_run_steps,
                    inputs=event.inputs,
@ -599,55 +498,41 @@ class WorkflowBasedAppRunner:
                    metadata=event.metadata,
                )
            )
-        elif isinstance(event, IterationRunNextEvent):
+        elif isinstance(event, NodeRunIterationNextEvent):
            self._publish_event(
                QueueIterationNextEvent(
-                    node_execution_id=event.iteration_id,
-                    node_id=event.iteration_node_id,
-                    node_type=event.iteration_node_type,
-                    node_data=event.iteration_node_data,
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
+                    node_execution_id=event.id,
+                    node_id=event.node_id,
+                    node_type=event.node_type,
+                    node_title=event.node_title,
                    index=event.index,
                    node_run_index=workflow_entry.graph_engine.graph_runtime_state.node_run_steps,
                    output=event.pre_iteration_output,
-                    parallel_mode_run_id=event.parallel_mode_run_id,
-                    duration=event.duration,
                )
            )
-        elif isinstance(event, (IterationRunSucceededEvent | IterationRunFailedEvent)):
+        elif isinstance(event, (NodeRunIterationSucceededEvent | NodeRunIterationFailedEvent)):
            self._publish_event(
                QueueIterationCompletedEvent(
-                    node_execution_id=event.iteration_id,
-                    node_id=event.iteration_node_id,
-                    node_type=event.iteration_node_type,
-                    node_data=event.iteration_node_data,
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
+                    node_execution_id=event.id,
+                    node_id=event.node_id,
+                    node_type=event.node_type,
+                    node_title=event.node_title,
                    start_at=event.start_at,
                    node_run_index=workflow_entry.graph_engine.graph_runtime_state.node_run_steps,
                    inputs=event.inputs,
                    outputs=event.outputs,
                    metadata=event.metadata,
                    steps=event.steps,
-                    error=event.error if isinstance(event, IterationRunFailedEvent) else None,
+                    error=event.error if isinstance(event, NodeRunIterationFailedEvent) else None,
                )
            )
-        elif isinstance(event, LoopRunStartedEvent):
+        elif isinstance(event, NodeRunLoopStartedEvent):
            self._publish_event(
                QueueLoopStartEvent(
-                    node_execution_id=event.loop_id,
-                    node_id=event.loop_node_id,
-                    node_type=event.loop_node_type,
-                    node_data=event.loop_node_data,
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
+                    node_execution_id=event.id,
+                    node_id=event.node_id,
+                    node_type=event.node_type,
+                    node_title=event.node_title,
                    start_at=event.start_at,
                    node_run_index=workflow_entry.graph_engine.graph_runtime_state.node_run_steps,
                    inputs=event.inputs,
@ -655,42 +540,32 @@ class WorkflowBasedAppRunner:
                    metadata=event.metadata,
                )
            )
-        elif isinstance(event, LoopRunNextEvent):
+        elif isinstance(event, NodeRunLoopNextEvent):
            self._publish_event(
                QueueLoopNextEvent(
-                    node_execution_id=event.loop_id,
-                    node_id=event.loop_node_id,
-                    node_type=event.loop_node_type,
-                    node_data=event.loop_node_data,
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
+                    node_execution_id=event.id,
+                    node_id=event.node_id,
+                    node_type=event.node_type,
+                    node_title=event.node_title,
                    index=event.index,
                    node_run_index=workflow_entry.graph_engine.graph_runtime_state.node_run_steps,
                    output=event.pre_loop_output,
-                    parallel_mode_run_id=event.parallel_mode_run_id,
-                    duration=event.duration,
                )
            )
-        elif isinstance(event, (LoopRunSucceededEvent | LoopRunFailedEvent)):
+        elif isinstance(event, (NodeRunLoopSucceededEvent | NodeRunLoopFailedEvent)):
            self._publish_event(
                QueueLoopCompletedEvent(
-                    node_execution_id=event.loop_id,
-                    node_id=event.loop_node_id,
-                    node_type=event.loop_node_type,
-                    node_data=event.loop_node_data,
-                    parallel_id=event.parallel_id,
-                    parallel_start_node_id=event.parallel_start_node_id,
-                    parent_parallel_id=event.parent_parallel_id,
-                    parent_parallel_start_node_id=event.parent_parallel_start_node_id,
+                    node_execution_id=event.id,
+                    node_id=event.node_id,
+                    node_type=event.node_type,
+                    node_title=event.node_title,
                    start_at=event.start_at,
                    node_run_index=workflow_entry.graph_engine.graph_runtime_state.node_run_steps,
                    inputs=event.inputs,
                    outputs=event.outputs,
                    metadata=event.metadata,
                    steps=event.steps,
-                    error=event.error if isinstance(event, LoopRunFailedEvent) else None,
+                    error=event.error if isinstance(event, NodeRunLoopFailedEvent) else None,
                )
            )

--- a/api/core/app/entities/app_invoke_entities.py
+++ b/api/core/app/entities/app_invoke_entities.py
@ -1,9 +1,12 @@
 from collections.abc import Mapping, Sequence
 from enum import StrEnum
-from typing import Any
+from typing import TYPE_CHECKING, Any, Optional

 from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator

+if TYPE_CHECKING:
+    from core.ops.ops_trace_manager import TraceQueueManager
+
 from constants import UUID_NIL
 from core.app.app_config.entities import EasyUIBasedAppConfig, WorkflowUIBasedAppConfig
 from core.entities.provider_configuration import ProviderModelBundle
@ -35,6 +38,7 @@ class InvokeFrom(StrEnum):
    # DEBUGGER indicates that this invocation is from
    # the workflow (or chatflow) edit page.
    DEBUGGER = "debugger"
+    PUBLISHED = "published"

    @classmethod
    def value_of(cls, value: str):
@ -113,8 +117,7 @@ class AppGenerateEntity(BaseModel):
    extras: dict[str, Any] = Field(default_factory=dict)

    # tracing instance
-    # Using Any to avoid circular import with TraceQueueManager
-    trace_manager: Any | None = None
+    trace_manager: Optional["TraceQueueManager"] = None


 class EasyUIBasedAppGenerateEntity(AppGenerateEntity):
@ -240,3 +243,34 @@ class WorkflowAppGenerateEntity(AppGenerateEntity):
        inputs: dict

    single_loop_run: SingleLoopRunEntity | None = None
+
+
+class RagPipelineGenerateEntity(WorkflowAppGenerateEntity):
+    """
+    RAG Pipeline Application Generate Entity.
+    """
+
+    # pipeline config
+    pipeline_config: WorkflowUIBasedAppConfig
+    datasource_type: str
+    datasource_info: Mapping[str, Any]
+    dataset_id: str
+    batch: str
+    document_id: str | None = None
+    original_document_id: str | None = None
+    start_node_id: str | None = None
+
+
+# Import TraceQueueManager at runtime to resolve forward references
+from core.ops.ops_trace_manager import TraceQueueManager
+
+# Rebuild models that use forward references
+AppGenerateEntity.model_rebuild()
+EasyUIBasedAppGenerateEntity.model_rebuild()
+ConversationAppGenerateEntity.model_rebuild()
+ChatAppGenerateEntity.model_rebuild()
+CompletionAppGenerateEntity.model_rebuild()
+AgentChatAppGenerateEntity.model_rebuild()
+AdvancedChatAppGenerateEntity.model_rebuild()
+WorkflowAppGenerateEntity.model_rebuild()
+RagPipelineGenerateEntity.model_rebuild()
--- a/api/core/app/entities/queue_entities.py
+++ b/api/core/app/entities/queue_entities.py
@ -3,15 +3,13 @@ from datetime import datetime
 from enum import StrEnum, auto
 from typing import Any

-from pydantic import BaseModel
+from pydantic import BaseModel, Field

 from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
 from core.rag.entities.citation_metadata import RetrievalSourceMetadata
-from core.workflow.entities.node_entities import AgentNodeStrategyInit
-from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey
-from core.workflow.graph_engine.entities.graph_runtime_state import GraphRuntimeState
+from core.workflow.entities import AgentNodeStrategyInit, GraphRuntimeState
+from core.workflow.enums import WorkflowNodeExecutionMetadataKey
 from core.workflow.nodes import NodeType
-from core.workflow.nodes.base import BaseNodeData


 class QueueEvent(StrEnum):
@ -43,9 +41,6 @@ class QueueEvent(StrEnum):
    ANNOTATION_REPLY = "annotation_reply"
    AGENT_THOUGHT = "agent_thought"
    MESSAGE_FILE = "message_file"
-    PARALLEL_BRANCH_RUN_STARTED = "parallel_branch_run_started"
-    PARALLEL_BRANCH_RUN_SUCCEEDED = "parallel_branch_run_succeeded"
-    PARALLEL_BRANCH_RUN_FAILED = "parallel_branch_run_failed"
    AGENT_LOG = "agent_log"
    ERROR = "error"
    PING = "ping"
@ -80,21 +75,13 @@ class QueueIterationStartEvent(AppQueueEvent):
    node_execution_id: str
    node_id: str
    node_type: NodeType
-    node_data: BaseNodeData
-    parallel_id: str | None = None
-    """parallel id if node is in parallel"""
-    parallel_start_node_id: str | None = None
-    """parallel start node id if node is in parallel"""
-    parent_parallel_id: str | None = None
-    """parent parallel id if node is in parallel"""
-    parent_parallel_start_node_id: str | None = None
-    """parent parallel start node id if node is in parallel"""
+    node_title: str
    start_at: datetime

    node_run_index: int
-    inputs: Mapping[str, Any] | None = None
+    inputs: Mapping[str, object] = Field(default_factory=dict)
    predecessor_node_id: str | None = None
-    metadata: Mapping[str, Any] | None = None
+    metadata: Mapping[str, object] = Field(default_factory=dict)


 class QueueIterationNextEvent(AppQueueEvent):
@ -108,20 +95,9 @@ class QueueIterationNextEvent(AppQueueEvent):
    node_execution_id: str
    node_id: str
    node_type: NodeType
-    node_data: BaseNodeData
-    parallel_id: str | None = None
-    """parallel id if node is in parallel"""
-    parallel_start_node_id: str | None = None
-    """parallel start node id if node is in parallel"""
-    parent_parallel_id: str | None = None
-    """parent parallel id if node is in parallel"""
-    parent_parallel_start_node_id: str | None = None
-    """parent parallel start node id if node is in parallel"""
-    parallel_mode_run_id: str | None = None
-    """iteration run in parallel mode run id"""
+    node_title: str
    node_run_index: int
-    output: Any | None = None  # output for the current iteration
-    duration: float | None = None
+    output: Any = None  # output for the current iteration


 class QueueIterationCompletedEvent(AppQueueEvent):
@ -134,21 +110,13 @@ class QueueIterationCompletedEvent(AppQueueEvent):
    node_execution_id: str
    node_id: str
    node_type: NodeType
-    node_data: BaseNodeData
-    parallel_id: str | None = None
-    """parallel id if node is in parallel"""
-    parallel_start_node_id: str | None = None
-    """parallel start node id if node is in parallel"""
-    parent_parallel_id: str | None = None
-    """parent parallel id if node is in parallel"""
-    parent_parallel_start_node_id: str | None = None
-    """parent parallel start node id if node is in parallel"""
+    node_title: str
    start_at: datetime

    node_run_index: int
-    inputs: Mapping[str, Any] | None = None
-    outputs: Mapping[str, Any] | None = None
-    metadata: Mapping[str, Any] | None = None
+    inputs: Mapping[str, object] = Field(default_factory=dict)
+    outputs: Mapping[str, object] = Field(default_factory=dict)
+    metadata: Mapping[str, object] = Field(default_factory=dict)
    steps: int = 0

    error: str | None = None
@ -163,7 +131,7 @@ class QueueLoopStartEvent(AppQueueEvent):
    node_execution_id: str
    node_id: str
    node_type: NodeType
-    node_data: BaseNodeData
+    node_title: str
    parallel_id: str | None = None
    """parallel id if node is in parallel"""
    parallel_start_node_id: str | None = None
@ -175,9 +143,9 @@ class QueueLoopStartEvent(AppQueueEvent):
    start_at: datetime

    node_run_index: int
-    inputs: Mapping[str, Any] | None = None
+    inputs: Mapping[str, object] = Field(default_factory=dict)
    predecessor_node_id: str | None = None
-    metadata: Mapping[str, Any] | None = None
+    metadata: Mapping[str, object] = Field(default_factory=dict)


 class QueueLoopNextEvent(AppQueueEvent):
@ -191,7 +159,7 @@ class QueueLoopNextEvent(AppQueueEvent):
    node_execution_id: str
    node_id: str
    node_type: NodeType
-    node_data: BaseNodeData
+    node_title: str
    parallel_id: str | None = None
    """parallel id if node is in parallel"""
    parallel_start_node_id: str | None = None
@ -203,8 +171,7 @@ class QueueLoopNextEvent(AppQueueEvent):
    parallel_mode_run_id: str | None = None
    """iteration run in parallel mode run id"""
    node_run_index: int
-    output: Any | None = None  # output for the current loop
-    duration: float | None = None
+    output: Any = None  # output for the current loop


 class QueueLoopCompletedEvent(AppQueueEvent):
@ -217,7 +184,7 @@ class QueueLoopCompletedEvent(AppQueueEvent):
    node_execution_id: str
    node_id: str
    node_type: NodeType
-    node_data: BaseNodeData
+    node_title: str
    parallel_id: str | None = None
    """parallel id if node is in parallel"""
    parallel_start_node_id: str | None = None
@ -229,9 +196,9 @@ class QueueLoopCompletedEvent(AppQueueEvent):
    start_at: datetime

    node_run_index: int
-    inputs: Mapping[str, Any] | None = None
-    outputs: Mapping[str, Any] | None = None
-    metadata: Mapping[str, Any] | None = None
+    inputs: Mapping[str, object] = Field(default_factory=dict)
+    outputs: Mapping[str, object] = Field(default_factory=dict)
+    metadata: Mapping[str, object] = Field(default_factory=dict)
    steps: int = 0

    error: str | None = None
@ -332,7 +299,7 @@ class QueueWorkflowSucceededEvent(AppQueueEvent):
    """

    event: QueueEvent = QueueEvent.WORKFLOW_SUCCEEDED
-    outputs: dict[str, Any] | None = None
+    outputs: Mapping[str, object] = Field(default_factory=dict)


 class QueueWorkflowFailedEvent(AppQueueEvent):
@ -352,7 +319,7 @@ class QueueWorkflowPartialSuccessEvent(AppQueueEvent):

    event: QueueEvent = QueueEvent.WORKFLOW_PARTIAL_SUCCEEDED
    exceptions_count: int
-    outputs: dict[str, Any] | None = None
+    outputs: Mapping[str, object] = Field(default_factory=dict)


 class QueueNodeStartedEvent(AppQueueEvent):
@ -364,27 +331,24 @@ class QueueNodeStartedEvent(AppQueueEvent):

    node_execution_id: str
    node_id: str
+    node_title: str
    node_type: NodeType
-    node_data: BaseNodeData
-    node_run_index: int = 1
+    node_run_index: int = 1  # FIXME(-LAN-): may not used
    predecessor_node_id: str | None = None
    parallel_id: str | None = None
-    """parallel id if node is in parallel"""
    parallel_start_node_id: str | None = None
-    """parallel start node id if node is in parallel"""
    parent_parallel_id: str | None = None
-    """parent parallel id if node is in parallel"""
    parent_parallel_start_node_id: str | None = None
-    """parent parallel start node id if node is in parallel"""
    in_iteration_id: str | None = None
-    """iteration id if node is in iteration"""
    in_loop_id: str | None = None
-    """loop id if node is in loop"""
    start_at: datetime
    parallel_mode_run_id: str | None = None
-    """iteration run in parallel mode run id"""
    agent_strategy: AgentNodeStrategyInit | None = None

+    # FIXME(-LAN-): only for ToolNode, need to refactor
+    provider_type: str  # should be a core.tools.entities.tool_entities.ToolProviderType
+    provider_id: str
+

 class QueueNodeSucceededEvent(AppQueueEvent):
    """
@ -396,7 +360,6 @@ class QueueNodeSucceededEvent(AppQueueEvent):
    node_execution_id: str
    node_id: str
    node_type: NodeType
-    node_data: BaseNodeData
    parallel_id: str | None = None
    """parallel id if node is in parallel"""
    parallel_start_node_id: str | None = None
@ -411,16 +374,12 @@ class QueueNodeSucceededEvent(AppQueueEvent):
    """loop id if node is in loop"""
    start_at: datetime

-    inputs: Mapping[str, Any] | None = None
-    process_data: Mapping[str, Any] | None = None
-    outputs: Mapping[str, Any] | None = None
+    inputs: Mapping[str, object] = Field(default_factory=dict)
+    process_data: Mapping[str, object] = Field(default_factory=dict)
+    outputs: Mapping[str, object] = Field(default_factory=dict)
    execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] | None = None

    error: str | None = None
-    """single iteration duration map"""
-    iteration_duration_map: dict[str, float] | None = None
-    """single loop duration map"""
-    loop_duration_map: dict[str, float] | None = None


 class QueueAgentLogEvent(AppQueueEvent):
@ -436,7 +395,7 @@ class QueueAgentLogEvent(AppQueueEvent):
    error: str | None = None
    status: str
    data: Mapping[str, Any]
-    metadata: Mapping[str, Any] | None = None
+    metadata: Mapping[str, object] = Field(default_factory=dict)
    node_id: str


@ -445,81 +404,15 @@ class QueueNodeRetryEvent(QueueNodeStartedEvent):

    event: QueueEvent = QueueEvent.RETRY

-    inputs: Mapping[str, Any] | None = None
-    process_data: Mapping[str, Any] | None = None
-    outputs: Mapping[str, Any] | None = None
+    inputs: Mapping[str, object] = Field(default_factory=dict)
+    process_data: Mapping[str, object] = Field(default_factory=dict)
+    outputs: Mapping[str, object] = Field(default_factory=dict)
    execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] | None = None

    error: str
    retry_index: int  # retry index


-class QueueNodeInIterationFailedEvent(AppQueueEvent):
-    """
-    QueueNodeInIterationFailedEvent entity
-    """
-
-    event: QueueEvent = QueueEvent.NODE_FAILED
-
-    node_execution_id: str
-    node_id: str
-    node_type: NodeType
-    node_data: BaseNodeData
-    parallel_id: str | None = None
-    """parallel id if node is in parallel"""
-    parallel_start_node_id: str | None = None
-    """parallel start node id if node is in parallel"""
-    parent_parallel_id: str | None = None
-    """parent parallel id if node is in parallel"""
-    parent_parallel_start_node_id: str | None = None
-    """parent parallel start node id if node is in parallel"""
-    in_iteration_id: str | None = None
-    """iteration id if node is in iteration"""
-    in_loop_id: str | None = None
-    """loop id if node is in loop"""
-    start_at: datetime
-
-    inputs: Mapping[str, Any] | None = None
-    process_data: Mapping[str, Any] | None = None
-    outputs: Mapping[str, Any] | None = None
-    execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] | None = None
-
-    error: str
-
-
-class QueueNodeInLoopFailedEvent(AppQueueEvent):
-    """
-    QueueNodeInLoopFailedEvent entity
-    """
-
-    event: QueueEvent = QueueEvent.NODE_FAILED
-
-    node_execution_id: str
-    node_id: str
-    node_type: NodeType
-    node_data: BaseNodeData
-    parallel_id: str | None = None
-    """parallel id if node is in parallel"""
-    parallel_start_node_id: str | None = None
-    """parallel start node id if node is in parallel"""
-    parent_parallel_id: str | None = None
-    """parent parallel id if node is in parallel"""
-    parent_parallel_start_node_id: str | None = None
-    """parent parallel start node id if node is in parallel"""
-    in_iteration_id: str | None = None
-    """iteration id if node is in iteration"""
-    in_loop_id: str | None = None
-    """loop id if node is in loop"""
-    start_at: datetime
-
-    inputs: Mapping[str, Any] | None = None
-    process_data: Mapping[str, Any] | None = None
-    outputs: Mapping[str, Any] | None = None
-    execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] | None = None
-
-    error: str
-
-
 class QueueNodeExceptionEvent(AppQueueEvent):
    """
    QueueNodeExceptionEvent entity
@ -530,7 +423,6 @@ class QueueNodeExceptionEvent(AppQueueEvent):
    node_execution_id: str
    node_id: str
    node_type: NodeType
-    node_data: BaseNodeData
    parallel_id: str | None = None
    """parallel id if node is in parallel"""
    parallel_start_node_id: str | None = None
@ -545,9 +437,9 @@ class QueueNodeExceptionEvent(AppQueueEvent):
    """loop id if node is in loop"""
    start_at: datetime

-    inputs: Mapping[str, Any] | None = None
-    process_data: Mapping[str, Any] | None = None
-    outputs: Mapping[str, Any] | None = None
+    inputs: Mapping[str, object] = Field(default_factory=dict)
+    process_data: Mapping[str, object] = Field(default_factory=dict)
+    outputs: Mapping[str, object] = Field(default_factory=dict)
    execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] | None = None

    error: str
@ -563,24 +455,16 @@ class QueueNodeFailedEvent(AppQueueEvent):
    node_execution_id: str
    node_id: str
    node_type: NodeType
-    node_data: BaseNodeData
    parallel_id: str | None = None
-    """parallel id if node is in parallel"""
-    parallel_start_node_id: str | None = None
-    """parallel start node id if node is in parallel"""
-    parent_parallel_id: str | None = None
-    """parent parallel id if node is in parallel"""
-    parent_parallel_start_node_id: str | None = None
-    """parent parallel start node id if node is in parallel"""
    in_iteration_id: str | None = None
    """iteration id if node is in iteration"""
    in_loop_id: str | None = None
    """loop id if node is in loop"""
    start_at: datetime

-    inputs: Mapping[str, Any] | None = None
-    process_data: Mapping[str, Any] | None = None
-    outputs: Mapping[str, Any] | None = None
+    inputs: Mapping[str, object] = Field(default_factory=dict)
+    process_data: Mapping[str, object] = Field(default_factory=dict)
+    outputs: Mapping[str, object] = Field(default_factory=dict)
    execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] | None = None

    error: str
@ -610,7 +494,7 @@ class QueueErrorEvent(AppQueueEvent):
    """

    event: QueueEvent = QueueEvent.ERROR
-    error: Any | None = None
+    error: Any = None


 class QueuePingEvent(AppQueueEvent):
@ -678,61 +562,3 @@ class WorkflowQueueMessage(QueueMessage):
    """

    pass
-
-
-class QueueParallelBranchRunStartedEvent(AppQueueEvent):
-    """
-    QueueParallelBranchRunStartedEvent entity
-    """
-
-    event: QueueEvent = QueueEvent.PARALLEL_BRANCH_RUN_STARTED
-
-    parallel_id: str
-    parallel_start_node_id: str
-    parent_parallel_id: str | None = None
-    """parent parallel id if node is in parallel"""
-    parent_parallel_start_node_id: str | None = None
-    """parent parallel start node id if node is in parallel"""
-    in_iteration_id: str | None = None
-    """iteration id if node is in iteration"""
-    in_loop_id: str | None = None
-    """loop id if node is in loop"""
-
-
-class QueueParallelBranchRunSucceededEvent(AppQueueEvent):
-    """
-    QueueParallelBranchRunSucceededEvent entity
-    """
-
-    event: QueueEvent = QueueEvent.PARALLEL_BRANCH_RUN_SUCCEEDED
-
-    parallel_id: str
-    parallel_start_node_id: str
-    parent_parallel_id: str | None = None
-    """parent parallel id if node is in parallel"""
-    parent_parallel_start_node_id: str | None = None
-    """parent parallel start node id if node is in parallel"""
-    in_iteration_id: str | None = None
-    """iteration id if node is in iteration"""
-    in_loop_id: str | None = None
-    """loop id if node is in loop"""
-
-
-class QueueParallelBranchRunFailedEvent(AppQueueEvent):
-    """
-    QueueParallelBranchRunFailedEvent entity
-    """
-
-    event: QueueEvent = QueueEvent.PARALLEL_BRANCH_RUN_FAILED
-
-    parallel_id: str
-    parallel_start_node_id: str
-    parent_parallel_id: str | None = None
-    """parent parallel id if node is in parallel"""
-    parent_parallel_start_node_id: str | None = None
-    """parent parallel start node id if node is in parallel"""
-    in_iteration_id: str | None = None
-    """iteration id if node is in iteration"""
-    in_loop_id: str | None = None
-    """loop id if node is in loop"""
-    error: str
--- a/api/core/app/entities/rag_pipeline_invoke_entities.py
+++ b/api/core/app/entities/rag_pipeline_invoke_entities.py
@ -0,0 +1,14 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+
+class RagPipelineInvokeEntity(BaseModel):
+    pipeline_id: str
+    application_generate_entity: dict[str, Any]
+    user_id: str
+    tenant_id: str
+    workflow_id: str
+    streaming: bool
+    workflow_execution_id: str | None = None
+    workflow_thread_pool_id: str | None = None
--- a/api/core/app/entities/task_entities.py
+++ b/api/core/app/entities/task_entities.py
@ -1,13 +1,13 @@
 from collections.abc import Mapping, Sequence
-from enum import StrEnum, auto
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, ConfigDict, Field

 from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage
 from core.rag.entities.citation_metadata import RetrievalSourceMetadata
-from core.workflow.entities.node_entities import AgentNodeStrategyInit
-from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
+from core.workflow.entities import AgentNodeStrategyInit
+from core.workflow.enums import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus


 class AnnotationReplyAccount(BaseModel):
@ -55,32 +55,30 @@ class StreamEvent(StrEnum):
    Stream event
    """

-    PING = auto()
-    ERROR = auto()
-    MESSAGE = auto()
-    MESSAGE_END = auto()
-    TTS_MESSAGE = auto()
-    TTS_MESSAGE_END = auto()
-    MESSAGE_FILE = auto()
-    MESSAGE_REPLACE = auto()
-    AGENT_THOUGHT = auto()
-    AGENT_MESSAGE = auto()
-    WORKFLOW_STARTED = auto()
-    WORKFLOW_FINISHED = auto()
-    NODE_STARTED = auto()
-    NODE_FINISHED = auto()
-    NODE_RETRY = auto()
-    PARALLEL_BRANCH_STARTED = auto()
-    PARALLEL_BRANCH_FINISHED = auto()
-    ITERATION_STARTED = auto()
-    ITERATION_NEXT = auto()
-    ITERATION_COMPLETED = auto()
-    LOOP_STARTED = auto()
-    LOOP_NEXT = auto()
-    LOOP_COMPLETED = auto()
-    TEXT_CHUNK = auto()
-    TEXT_REPLACE = auto()
-    AGENT_LOG = auto()
+    PING = "ping"
+    ERROR = "error"
+    MESSAGE = "message"
+    MESSAGE_END = "message_end"
+    TTS_MESSAGE = "tts_message"
+    TTS_MESSAGE_END = "tts_message_end"
+    MESSAGE_FILE = "message_file"
+    MESSAGE_REPLACE = "message_replace"
+    AGENT_THOUGHT = "agent_thought"
+    AGENT_MESSAGE = "agent_message"
+    WORKFLOW_STARTED = "workflow_started"
+    WORKFLOW_FINISHED = "workflow_finished"
+    NODE_STARTED = "node_started"
+    NODE_FINISHED = "node_finished"
+    NODE_RETRY = "node_retry"
+    ITERATION_STARTED = "iteration_started"
+    ITERATION_NEXT = "iteration_next"
+    ITERATION_COMPLETED = "iteration_completed"
+    LOOP_STARTED = "loop_started"
+    LOOP_NEXT = "loop_next"
+    LOOP_COMPLETED = "loop_completed"
+    TEXT_CHUNK = "text_chunk"
+    TEXT_REPLACE = "text_replace"
+    AGENT_LOG = "agent_log"


 class StreamResponse(BaseModel):
@ -138,7 +136,7 @@ class MessageEndStreamResponse(StreamResponse):

    event: StreamEvent = StreamEvent.MESSAGE_END
    id: str
-    metadata: dict = Field(default_factory=dict)
+    metadata: Mapping[str, object] = Field(default_factory=dict)
    files: Sequence[Mapping[str, Any]] | None = None


@ -175,7 +173,7 @@ class AgentThoughtStreamResponse(StreamResponse):
    thought: str | None = None
    observation: str | None = None
    tool: str | None = None
-    tool_labels: dict | None = None
+    tool_labels: Mapping[str, object] = Field(default_factory=dict)
    tool_input: str | None = None
    message_files: list[str] | None = None

@ -228,7 +226,7 @@ class WorkflowFinishStreamResponse(StreamResponse):
        elapsed_time: float
        total_tokens: int
        total_steps: int
-        created_by: dict | None = None
+        created_by: Mapping[str, object] = Field(default_factory=dict)
        created_at: int
        finished_at: int
        exceptions_count: int | None = 0
@ -256,8 +254,9 @@ class NodeStartStreamResponse(StreamResponse):
        index: int
        predecessor_node_id: str | None = None
        inputs: Mapping[str, Any] | None = None
+        inputs_truncated: bool = False
        created_at: int
-        extras: dict = Field(default_factory=dict)
+        extras: dict[str, object] = Field(default_factory=dict)
        parallel_id: str | None = None
        parallel_start_node_id: str | None = None
        parent_parallel_id: str | None = None
@ -313,8 +312,11 @@ class NodeFinishStreamResponse(StreamResponse):
        index: int
        predecessor_node_id: str | None = None
        inputs: Mapping[str, Any] | None = None
+        inputs_truncated: bool = False
        process_data: Mapping[str, Any] | None = None
+        process_data_truncated: bool = False
        outputs: Mapping[str, Any] | None = None
+        outputs_truncated: bool = True
        status: str
        error: str | None = None
        elapsed_time: float
@ -382,8 +384,11 @@ class NodeRetryStreamResponse(StreamResponse):
        index: int
        predecessor_node_id: str | None = None
        inputs: Mapping[str, Any] | None = None
+        inputs_truncated: bool = False
        process_data: Mapping[str, Any] | None = None
+        process_data_truncated: bool = False
        outputs: Mapping[str, Any] | None = None
+        outputs_truncated: bool = False
        status: str
        error: str | None = None
        elapsed_time: float
@ -436,54 +441,6 @@ class NodeRetryStreamResponse(StreamResponse):
        }


-class ParallelBranchStartStreamResponse(StreamResponse):
-    """
-    ParallelBranchStartStreamResponse entity
-    """
-
-    class Data(BaseModel):
-        """
-        Data entity
-        """
-
-        parallel_id: str
-        parallel_branch_id: str
-        parent_parallel_id: str | None = None
-        parent_parallel_start_node_id: str | None = None
-        iteration_id: str | None = None
-        loop_id: str | None = None
-        created_at: int
-
-    event: StreamEvent = StreamEvent.PARALLEL_BRANCH_STARTED
-    workflow_run_id: str
-    data: Data
-
-
-class ParallelBranchFinishedStreamResponse(StreamResponse):
-    """
-    ParallelBranchFinishedStreamResponse entity
-    """
-
-    class Data(BaseModel):
-        """
-        Data entity
-        """
-
-        parallel_id: str
-        parallel_branch_id: str
-        parent_parallel_id: str | None = None
-        parent_parallel_start_node_id: str | None = None
-        iteration_id: str | None = None
-        loop_id: str | None = None
-        status: str
-        error: str | None = None
-        created_at: int
-
-    event: StreamEvent = StreamEvent.PARALLEL_BRANCH_FINISHED
-    workflow_run_id: str
-    data: Data
-
-
 class IterationNodeStartStreamResponse(StreamResponse):
    """
    NodeStartStreamResponse entity
@ -502,8 +459,7 @@ class IterationNodeStartStreamResponse(StreamResponse):
        extras: dict = Field(default_factory=dict)
        metadata: Mapping = {}
        inputs: Mapping = {}
-        parallel_id: str | None = None
-        parallel_start_node_id: str | None = None
+        inputs_truncated: bool = False

    event: StreamEvent = StreamEvent.ITERATION_STARTED
    workflow_run_id: str
@ -526,12 +482,7 @@ class IterationNodeNextStreamResponse(StreamResponse):
        title: str
        index: int
        created_at: int
-        pre_iteration_output: Any | None = None
        extras: dict = Field(default_factory=dict)
-        parallel_id: str | None = None
-        parallel_start_node_id: str | None = None
-        parallel_mode_run_id: str | None = None
-        duration: float | None = None

    event: StreamEvent = StreamEvent.ITERATION_NEXT
    workflow_run_id: str
@ -553,18 +504,18 @@ class IterationNodeCompletedStreamResponse(StreamResponse):
        node_type: str
        title: str
        outputs: Mapping | None = None
+        outputs_truncated: bool = False
        created_at: int
        extras: dict | None = None
        inputs: Mapping | None = None
+        inputs_truncated: bool = False
        status: WorkflowNodeExecutionStatus
        error: str | None = None
        elapsed_time: float
        total_tokens: int
-        execution_metadata: Mapping | None = None
+        execution_metadata: Mapping[str, object] = Field(default_factory=dict)
        finished_at: int
        steps: int
-        parallel_id: str | None = None
-        parallel_start_node_id: str | None = None

    event: StreamEvent = StreamEvent.ITERATION_COMPLETED
    workflow_run_id: str
@ -589,6 +540,7 @@ class LoopNodeStartStreamResponse(StreamResponse):
        extras: dict = Field(default_factory=dict)
        metadata: Mapping = {}
        inputs: Mapping = {}
+        inputs_truncated: bool = False
        parallel_id: str | None = None
        parallel_start_node_id: str | None = None

@ -613,12 +565,11 @@ class LoopNodeNextStreamResponse(StreamResponse):
        title: str
        index: int
        created_at: int
-        pre_loop_output: Any | None = None
-        extras: dict = Field(default_factory=dict)
+        pre_loop_output: Any = None
+        extras: Mapping[str, object] = Field(default_factory=dict)
        parallel_id: str | None = None
        parallel_start_node_id: str | None = None
        parallel_mode_run_id: str | None = None
-        duration: float | None = None

    event: StreamEvent = StreamEvent.LOOP_NEXT
    workflow_run_id: str
@ -640,14 +591,16 @@ class LoopNodeCompletedStreamResponse(StreamResponse):
        node_type: str
        title: str
        outputs: Mapping | None = None
+        outputs_truncated: bool = False
        created_at: int
        extras: dict | None = None
        inputs: Mapping | None = None
+        inputs_truncated: bool = False
        status: WorkflowNodeExecutionStatus
        error: str | None = None
        elapsed_time: float
        total_tokens: int
-        execution_metadata: Mapping | None = None
+        execution_metadata: Mapping[str, object] = Field(default_factory=dict)
        finished_at: int
        steps: int
        parallel_id: str | None = None
@ -757,7 +710,7 @@ class ChatbotAppBlockingResponse(AppBlockingResponse):
        conversation_id: str
        message_id: str
        answer: str
-        metadata: dict = Field(default_factory=dict)
+        metadata: Mapping[str, object] = Field(default_factory=dict)
        created_at: int

    data: Data
@ -777,7 +730,7 @@ class CompletionAppBlockingResponse(AppBlockingResponse):
        mode: str
        message_id: str
        answer: str
-        metadata: dict = Field(default_factory=dict)
+        metadata: Mapping[str, object] = Field(default_factory=dict)
        created_at: int

    data: Data
@ -825,7 +778,7 @@ class AgentLogStreamResponse(StreamResponse):
        error: str | None = None
        status: str
        data: Mapping[str, Any]
-        metadata: Mapping[str, Any] | None = None
+        metadata: Mapping[str, object] = Field(default_factory=dict)
        node_id: str

    event: StreamEvent = StreamEvent.AGENT_LOG
--- a/api/core/app/task_pipeline/message_cycle_manager.py
+++ b/api/core/app/task_pipeline/message_cycle_manager.py
@ -107,7 +107,6 @@ class MessageCycleManager:
                    if dify_config.DEBUG:
                        logger.exception("generate conversation name failed, conversation_id: %s", conversation_id)

-                db.session.merge(conversation)
                db.session.commit()
                db.session.close()

@ -138,6 +137,8 @@ class MessageCycleManager:
        :param event: event
        :return:
        """
+        if not self._application_generate_entity.app_config.additional_features:
+            raise ValueError("Additional features not found")
        if self._application_generate_entity.app_config.additional_features.show_retrieve_source:
            self._task_state.metadata.retriever_resources = event.retriever_resources

--- a/api/core/base/tts/app_generator_tts_publisher.py
+++ b/api/core/base/tts/app_generator_tts_publisher.py
@ -109,7 +109,9 @@ class AppGeneratorTTSPublisher:
                elif isinstance(message.event, QueueNodeSucceededEvent):
                    if message.event.outputs is None:
                        continue
-                    self.msg_text += message.event.outputs.get("output", "")
+                    output = message.event.outputs.get("output", "")
+                    if isinstance(output, str):
+                        self.msg_text += output
                self.last_message = message
                sentence_arr, text_tmp = self._extract_sentence(self.msg_text)
                if len(sentence_arr) >= min(self.max_sentence, 7):
@ -119,7 +121,7 @@ class AppGeneratorTTSPublisher:
                        _invoice_tts, text_content, self.model_instance, self.tenant_id, self.voice
                    )
                    future_queue.put(futures_result)
-                    if text_tmp:
+                    if isinstance(text_tmp, str):
                        self.msg_text = text_tmp
                    else:
                        self.msg_text = ""
--- a/api/core/callback_handler/agent_tool_callback_handler.py
+++ b/api/core/callback_handler/agent_tool_callback_handler.py
@ -105,6 +105,14 @@ class DifyAgentCallbackHandler(BaseModel):

        self.current_loop += 1

+    def on_datasource_start(self, datasource_name: str, datasource_inputs: Mapping[str, Any]) -> None:
+        """Run on datasource start."""
+        if dify_config.DEBUG:
+            print_text(
+                "\n[on_datasource_start] DatasourceCall:" + datasource_name + "\n" + str(datasource_inputs) + "\n",
+                color=self.color,
+            )
+
    @property
    def ignore_agent(self) -> bool:
        """Whether to ignore agent callbacks."""
--- a/api/core/datasource/__base/datasource_plugin.py
+++ b/api/core/datasource/__base/datasource_plugin.py
@ -0,0 +1,41 @@
+from abc import ABC, abstractmethod
+
+from configs import dify_config
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import (
+    DatasourceEntity,
+    DatasourceProviderType,
+)
+
+
+class DatasourcePlugin(ABC):
+    entity: DatasourceEntity
+    runtime: DatasourceRuntime
+    icon: str
+
+    def __init__(
+        self,
+        entity: DatasourceEntity,
+        runtime: DatasourceRuntime,
+        icon: str,
+    ) -> None:
+        self.entity = entity
+        self.runtime = runtime
+        self.icon = icon
+
+    @abstractmethod
+    def datasource_provider_type(self) -> str:
+        """
+        returns the type of the datasource provider
+        """
+        return DatasourceProviderType.LOCAL_FILE
+
+    def fork_datasource_runtime(self, runtime: DatasourceRuntime) -> "DatasourcePlugin":
+        return self.__class__(
+            entity=self.entity.model_copy(),
+            runtime=runtime,
+            icon=self.icon,
+        )
+
+    def get_icon_url(self, tenant_id: str) -> str:
+        return f"{dify_config.CONSOLE_API_URL}/console/api/workspaces/current/plugin/icon?tenant_id={tenant_id}&filename={self.icon}"  # noqa: E501
--- a/api/core/datasource/__base/datasource_provider.py
+++ b/api/core/datasource/__base/datasource_provider.py
@ -0,0 +1,118 @@
+from abc import ABC, abstractmethod
+from typing import Any
+
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
+from core.entities.provider_entities import ProviderConfig
+from core.plugin.impl.tool import PluginToolManager
+from core.tools.errors import ToolProviderCredentialValidationError
+
+
+class DatasourcePluginProviderController(ABC):
+    entity: DatasourceProviderEntityWithPlugin
+    tenant_id: str
+
+    def __init__(self, entity: DatasourceProviderEntityWithPlugin, tenant_id: str) -> None:
+        self.entity = entity
+        self.tenant_id = tenant_id
+
+    @property
+    def need_credentials(self) -> bool:
+        """
+        returns whether the provider needs credentials
+
+        :return: whether the provider needs credentials
+        """
+        return self.entity.credentials_schema is not None and len(self.entity.credentials_schema) != 0
+
+    def _validate_credentials(self, user_id: str, credentials: dict[str, Any]) -> None:
+        """
+        validate the credentials of the provider
+        """
+        manager = PluginToolManager()
+        if not manager.validate_datasource_credentials(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            provider=self.entity.identity.name,
+            credentials=credentials,
+        ):
+            raise ToolProviderCredentialValidationError("Invalid credentials")
+
+    @property
+    def provider_type(self) -> DatasourceProviderType:
+        """
+        returns the type of the provider
+        """
+        return DatasourceProviderType.LOCAL_FILE
+
+    @abstractmethod
+    def get_datasource(self, datasource_name: str) -> DatasourcePlugin:
+        """
+        return datasource with given name
+        """
+        pass
+
+    def validate_credentials_format(self, credentials: dict[str, Any]) -> None:
+        """
+        validate the format of the credentials of the provider and set the default value if needed
+
+        :param credentials: the credentials of the tool
+        """
+        credentials_schema = dict[str, ProviderConfig]()
+        if credentials_schema is None:
+            return
+
+        for credential in self.entity.credentials_schema:
+            credentials_schema[credential.name] = credential
+
+        credentials_need_to_validate: dict[str, ProviderConfig] = {}
+        for credential_name in credentials_schema:
+            credentials_need_to_validate[credential_name] = credentials_schema[credential_name]
+
+        for credential_name in credentials:
+            if credential_name not in credentials_need_to_validate:
+                raise ToolProviderCredentialValidationError(
+                    f"credential {credential_name} not found in provider {self.entity.identity.name}"
+                )
+
+            # check type
+            credential_schema = credentials_need_to_validate[credential_name]
+            if not credential_schema.required and credentials[credential_name] is None:
+                continue
+
+            if credential_schema.type in {ProviderConfig.Type.SECRET_INPUT, ProviderConfig.Type.TEXT_INPUT}:
+                if not isinstance(credentials[credential_name], str):
+                    raise ToolProviderCredentialValidationError(f"credential {credential_name} should be string")
+
+            elif credential_schema.type == ProviderConfig.Type.SELECT:
+                if not isinstance(credentials[credential_name], str):
+                    raise ToolProviderCredentialValidationError(f"credential {credential_name} should be string")
+
+                options = credential_schema.options
+                if not isinstance(options, list):
+                    raise ToolProviderCredentialValidationError(f"credential {credential_name} options should be list")
+
+                if credentials[credential_name] not in [x.value for x in options]:
+                    raise ToolProviderCredentialValidationError(
+                        f"credential {credential_name} should be one of {options}"
+                    )
+
+            credentials_need_to_validate.pop(credential_name)
+
+        for credential_name in credentials_need_to_validate:
+            credential_schema = credentials_need_to_validate[credential_name]
+            if credential_schema.required:
+                raise ToolProviderCredentialValidationError(f"credential {credential_name} is required")
+
+            # the credential is not set currently, set the default value if needed
+            if credential_schema.default is not None:
+                default_value = credential_schema.default
+                # parse default value into the correct type
+                if credential_schema.type in {
+                    ProviderConfig.Type.SECRET_INPUT,
+                    ProviderConfig.Type.TEXT_INPUT,
+                    ProviderConfig.Type.SELECT,
+                }:
+                    default_value = str(default_value)
+
+                credentials[credential_name] = default_value
--- a/api/core/datasource/__base/datasource_runtime.py
+++ b/api/core/datasource/__base/datasource_runtime.py
@ -0,0 +1,39 @@
+from typing import TYPE_CHECKING, Any, Optional
+
+from pydantic import BaseModel, Field
+
+# Import InvokeFrom locally to avoid circular import
+from core.app.entities.app_invoke_entities import InvokeFrom
+from core.datasource.entities.datasource_entities import DatasourceInvokeFrom
+
+if TYPE_CHECKING:
+    from core.app.entities.app_invoke_entities import InvokeFrom
+
+
+class DatasourceRuntime(BaseModel):
+    """
+    Meta data of a datasource call processing
+    """
+
+    tenant_id: str
+    datasource_id: str | None = None
+    invoke_from: Optional["InvokeFrom"] = None
+    datasource_invoke_from: DatasourceInvokeFrom | None = None
+    credentials: dict[str, Any] = Field(default_factory=dict)
+    runtime_parameters: dict[str, Any] = Field(default_factory=dict)
+
+
+class FakeDatasourceRuntime(DatasourceRuntime):
+    """
+    Fake datasource runtime for testing
+    """
+
+    def __init__(self):
+        super().__init__(
+            tenant_id="fake_tenant_id",
+            datasource_id="fake_datasource_id",
+            invoke_from=InvokeFrom.DEBUGGER,
+            datasource_invoke_from=DatasourceInvokeFrom.RAG_PIPELINE,
+            credentials={},
+            runtime_parameters={},
+        )
--- a/api/core/datasource/init.py
+++ b/api/core/datasource/init.py
--- a/api/core/datasource/datasource_file_manager.py
+++ b/api/core/datasource/datasource_file_manager.py
@ -0,0 +1,218 @@
+import base64
+import hashlib
+import hmac
+import logging
+import os
+import time
+from datetime import datetime
+from mimetypes import guess_extension, guess_type
+from typing import Union
+from uuid import uuid4
+
+import httpx
+
+from configs import dify_config
+from core.helper import ssrf_proxy
+from extensions.ext_database import db
+from extensions.ext_storage import storage
+from models.enums import CreatorUserRole
+from models.model import MessageFile, UploadFile
+from models.tools import ToolFile
+
+logger = logging.getLogger(__name__)
+
+
+class DatasourceFileManager:
+    @staticmethod
+    def sign_file(datasource_file_id: str, extension: str) -> str:
+        """
+        sign file to get a temporary url
+        """
+        base_url = dify_config.FILES_URL
+        file_preview_url = f"{base_url}/files/datasources/{datasource_file_id}{extension}"
+
+        timestamp = str(int(time.time()))
+        nonce = os.urandom(16).hex()
+        data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
+        secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
+        sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+        encoded_sign = base64.urlsafe_b64encode(sign).decode()
+
+        return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
+
+    @staticmethod
+    def verify_file(datasource_file_id: str, timestamp: str, nonce: str, sign: str) -> bool:
+        """
+        verify signature
+        """
+        data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}"
+        secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
+        recalculated_sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
+        recalculated_encoded_sign = base64.urlsafe_b64encode(recalculated_sign).decode()
+
+        # verify signature
+        if sign != recalculated_encoded_sign:
+            return False
+
+        current_time = int(time.time())
+        return current_time - int(timestamp) <= dify_config.FILES_ACCESS_TIMEOUT
+
+    @staticmethod
+    def create_file_by_raw(
+        *,
+        user_id: str,
+        tenant_id: str,
+        conversation_id: str | None,
+        file_binary: bytes,
+        mimetype: str,
+        filename: str | None = None,
+    ) -> UploadFile:
+        extension = guess_extension(mimetype) or ".bin"
+        unique_name = uuid4().hex
+        unique_filename = f"{unique_name}{extension}"
+        # default just as before
+        present_filename = unique_filename
+        if filename is not None:
+            has_extension = len(filename.split(".")) > 1
+            # Add extension flexibly
+            present_filename = filename if has_extension else f"{filename}{extension}"
+        filepath = f"datasources/{tenant_id}/{unique_filename}"
+        storage.save(filepath, file_binary)
+
+        upload_file = UploadFile(
+            tenant_id=tenant_id,
+            storage_type=dify_config.STORAGE_TYPE,
+            key=filepath,
+            name=present_filename,
+            size=len(file_binary),
+            extension=extension,
+            mime_type=mimetype,
+            created_by_role=CreatorUserRole.ACCOUNT,
+            created_by=user_id,
+            used=False,
+            hash=hashlib.sha3_256(file_binary).hexdigest(),
+            source_url="",
+            created_at=datetime.now(),
+        )
+
+        db.session.add(upload_file)
+        db.session.commit()
+        db.session.refresh(upload_file)
+
+        return upload_file
+
+    @staticmethod
+    def create_file_by_url(
+        user_id: str,
+        tenant_id: str,
+        file_url: str,
+        conversation_id: str | None = None,
+    ) -> ToolFile:
+        # try to download image
+        try:
+            response = ssrf_proxy.get(file_url)
+            response.raise_for_status()
+            blob = response.content
+        except httpx.TimeoutException:
+            raise ValueError(f"timeout when downloading file from {file_url}")
+
+        mimetype = (
+            guess_type(file_url)[0]
+            or response.headers.get("Content-Type", "").split(";")[0].strip()
+            or "application/octet-stream"
+        )
+        extension = guess_extension(mimetype) or ".bin"
+        unique_name = uuid4().hex
+        filename = f"{unique_name}{extension}"
+        filepath = f"tools/{tenant_id}/{filename}"
+        storage.save(filepath, blob)
+
+        tool_file = ToolFile(
+            tenant_id=tenant_id,
+            user_id=user_id,
+            conversation_id=conversation_id,
+            file_key=filepath,
+            mimetype=mimetype,
+            original_url=file_url,
+            name=filename,
+            size=len(blob),
+        )
+
+        db.session.add(tool_file)
+        db.session.commit()
+
+        return tool_file
+
+    @staticmethod
+    def get_file_binary(id: str) -> Union[tuple[bytes, str], None]:
+        """
+        get file binary
+
+        :param id: the id of the file
+
+        :return: the binary of the file, mime type
+        """
+        upload_file: UploadFile | None = db.session.query(UploadFile).where(UploadFile.id == id).first()
+
+        if not upload_file:
+            return None
+
+        blob = storage.load_once(upload_file.key)
+
+        return blob, upload_file.mime_type
+
+    @staticmethod
+    def get_file_binary_by_message_file_id(id: str) -> Union[tuple[bytes, str], None]:
+        """
+        get file binary
+
+        :param id: the id of the file
+
+        :return: the binary of the file, mime type
+        """
+        message_file: MessageFile | None = db.session.query(MessageFile).where(MessageFile.id == id).first()
+
+        # Check if message_file is not None
+        if message_file is not None:
+            # get tool file id
+            if message_file.url is not None:
+                tool_file_id = message_file.url.split("/")[-1]
+                # trim extension
+                tool_file_id = tool_file_id.split(".")[0]
+            else:
+                tool_file_id = None
+        else:
+            tool_file_id = None
+
+        tool_file: ToolFile | None = db.session.query(ToolFile).where(ToolFile.id == tool_file_id).first()
+
+        if not tool_file:
+            return None
+
+        blob = storage.load_once(tool_file.file_key)
+
+        return blob, tool_file.mimetype
+
+    @staticmethod
+    def get_file_generator_by_upload_file_id(upload_file_id: str):
+        """
+        get file binary
+
+        :param tool_file_id: the id of the tool file
+
+        :return: the binary of the file, mime type
+        """
+        upload_file: UploadFile | None = db.session.query(UploadFile).where(UploadFile.id == upload_file_id).first()
+
+        if not upload_file:
+            return None, None
+
+        stream = storage.load_stream(upload_file.key)
+
+        return stream, upload_file.mime_type
+
+
+# init tool_file_parser
+# from core.file.datasource_file_parser import datasource_file_manager
+#
+# datasource_file_manager["manager"] = DatasourceFileManager
--- a/api/core/datasource/datasource_manager.py
+++ b/api/core/datasource/datasource_manager.py
@ -0,0 +1,112 @@
+import logging
+from threading import Lock
+from typing import Union
+
+import contexts
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
+from core.datasource.entities.common_entities import I18nObject
+from core.datasource.entities.datasource_entities import DatasourceProviderType
+from core.datasource.errors import DatasourceProviderNotFoundError
+from core.datasource.local_file.local_file_provider import LocalFileDatasourcePluginProviderController
+from core.datasource.online_document.online_document_provider import OnlineDocumentDatasourcePluginProviderController
+from core.datasource.online_drive.online_drive_provider import OnlineDriveDatasourcePluginProviderController
+from core.datasource.website_crawl.website_crawl_provider import WebsiteCrawlDatasourcePluginProviderController
+from core.plugin.impl.datasource import PluginDatasourceManager
+
+logger = logging.getLogger(__name__)
+
+
+class DatasourceManager:
+    _builtin_provider_lock = Lock()
+    _hardcoded_providers: dict[str, DatasourcePluginProviderController] = {}
+    _builtin_providers_loaded = False
+    _builtin_tools_labels: dict[str, Union[I18nObject, None]] = {}
+
+    @classmethod
+    def get_datasource_plugin_provider(
+        cls, provider_id: str, tenant_id: str, datasource_type: DatasourceProviderType
+    ) -> DatasourcePluginProviderController:
+        """
+        get the datasource plugin provider
+        """
+        # check if context is set
+        try:
+            contexts.datasource_plugin_providers.get()
+        except LookupError:
+            contexts.datasource_plugin_providers.set({})
+            contexts.datasource_plugin_providers_lock.set(Lock())
+
+        with contexts.datasource_plugin_providers_lock.get():
+            datasource_plugin_providers = contexts.datasource_plugin_providers.get()
+            if provider_id in datasource_plugin_providers:
+                return datasource_plugin_providers[provider_id]
+
+            manager = PluginDatasourceManager()
+            provider_entity = manager.fetch_datasource_provider(tenant_id, provider_id)
+            if not provider_entity:
+                raise DatasourceProviderNotFoundError(f"plugin provider {provider_id} not found")
+            controller: DatasourcePluginProviderController | None = None
+            match datasource_type:
+                case DatasourceProviderType.ONLINE_DOCUMENT:
+                    controller = OnlineDocumentDatasourcePluginProviderController(
+                        entity=provider_entity.declaration,
+                        plugin_id=provider_entity.plugin_id,
+                        plugin_unique_identifier=provider_entity.plugin_unique_identifier,
+                        tenant_id=tenant_id,
+                    )
+                case DatasourceProviderType.ONLINE_DRIVE:
+                    controller = OnlineDriveDatasourcePluginProviderController(
+                        entity=provider_entity.declaration,
+                        plugin_id=provider_entity.plugin_id,
+                        plugin_unique_identifier=provider_entity.plugin_unique_identifier,
+                        tenant_id=tenant_id,
+                    )
+                case DatasourceProviderType.WEBSITE_CRAWL:
+                    controller = WebsiteCrawlDatasourcePluginProviderController(
+                        entity=provider_entity.declaration,
+                        plugin_id=provider_entity.plugin_id,
+                        plugin_unique_identifier=provider_entity.plugin_unique_identifier,
+                        tenant_id=tenant_id,
+                    )
+                case DatasourceProviderType.LOCAL_FILE:
+                    controller = LocalFileDatasourcePluginProviderController(
+                        entity=provider_entity.declaration,
+                        plugin_id=provider_entity.plugin_id,
+                        plugin_unique_identifier=provider_entity.plugin_unique_identifier,
+                        tenant_id=tenant_id,
+                    )
+                case _:
+                    raise ValueError(f"Unsupported datasource type: {datasource_type}")
+
+            if controller:
+                datasource_plugin_providers[provider_id] = controller
+
+        if controller is None:
+            raise DatasourceProviderNotFoundError(f"Datasource provider {provider_id} not found.")
+
+        return controller
+
+    @classmethod
+    def get_datasource_runtime(
+        cls,
+        provider_id: str,
+        datasource_name: str,
+        tenant_id: str,
+        datasource_type: DatasourceProviderType,
+    ) -> DatasourcePlugin:
+        """
+        get the datasource runtime
+
+        :param provider_type: the type of the provider
+        :param provider_id: the id of the provider
+        :param datasource_name: the name of the datasource
+        :param tenant_id: the tenant id
+
+        :return: the datasource plugin
+        """
+        return cls.get_datasource_plugin_provider(
+            provider_id,
+            tenant_id,
+            datasource_type,
+        ).get_datasource(datasource_name)
--- a/api/core/datasource/entities/api_entities.py
+++ b/api/core/datasource/entities/api_entities.py
@ -0,0 +1,71 @@
+from typing import Literal, Optional
+
+from pydantic import BaseModel, Field, field_validator
+
+from core.datasource.entities.datasource_entities import DatasourceParameter
+from core.model_runtime.utils.encoders import jsonable_encoder
+from core.tools.entities.common_entities import I18nObject
+
+
+class DatasourceApiEntity(BaseModel):
+    author: str
+    name: str  # identifier
+    label: I18nObject  # label
+    description: I18nObject
+    parameters: list[DatasourceParameter] | None = None
+    labels: list[str] = Field(default_factory=list)
+    output_schema: dict | None = None
+
+
+ToolProviderTypeApiLiteral = Optional[Literal["builtin", "api", "workflow"]]
+
+
+class DatasourceProviderApiEntity(BaseModel):
+    id: str
+    author: str
+    name: str  # identifier
+    description: I18nObject
+    icon: str | dict
+    label: I18nObject  # label
+    type: str
+    masked_credentials: dict | None = None
+    original_credentials: dict | None = None
+    is_team_authorization: bool = False
+    allow_delete: bool = True
+    plugin_id: str | None = Field(default="", description="The plugin id of the datasource")
+    plugin_unique_identifier: str | None = Field(default="", description="The unique identifier of the datasource")
+    datasources: list[DatasourceApiEntity] = Field(default_factory=list)
+    labels: list[str] = Field(default_factory=list)
+
+    @field_validator("datasources", mode="before")
+    @classmethod
+    def convert_none_to_empty_list(cls, v):
+        return v if v is not None else []
+
+    def to_dict(self) -> dict:
+        # -------------
+        # overwrite datasource parameter types for temp fix
+        datasources = jsonable_encoder(self.datasources)
+        for datasource in datasources:
+            if datasource.get("parameters"):
+                for parameter in datasource.get("parameters"):
+                    if parameter.get("type") == DatasourceParameter.DatasourceParameterType.SYSTEM_FILES.value:
+                        parameter["type"] = "files"
+        # -------------
+
+        return {
+            "id": self.id,
+            "author": self.author,
+            "name": self.name,
+            "plugin_id": self.plugin_id,
+            "plugin_unique_identifier": self.plugin_unique_identifier,
+            "description": self.description.to_dict(),
+            "icon": self.icon,
+            "label": self.label.to_dict(),
+            "type": self.type,
+            "team_credentials": self.masked_credentials,
+            "is_team_authorization": self.is_team_authorization,
+            "allow_delete": self.allow_delete,
+            "datasources": datasources,
+            "labels": self.labels,
+        }
--- a/api/core/datasource/entities/common_entities.py
+++ b/api/core/datasource/entities/common_entities.py
@ -0,0 +1,21 @@
+from pydantic import BaseModel, Field
+
+
+class I18nObject(BaseModel):
+    """
+    Model class for i18n object.
+    """
+
+    en_US: str
+    zh_Hans: str | None = Field(default=None)
+    pt_BR: str | None = Field(default=None)
+    ja_JP: str | None = Field(default=None)
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self.zh_Hans = self.zh_Hans or self.en_US
+        self.pt_BR = self.pt_BR or self.en_US
+        self.ja_JP = self.ja_JP or self.en_US
+
+    def to_dict(self) -> dict:
+        return {"zh_Hans": self.zh_Hans, "en_US": self.en_US, "pt_BR": self.pt_BR, "ja_JP": self.ja_JP}
--- a/api/core/datasource/entities/datasource_entities.py
+++ b/api/core/datasource/entities/datasource_entities.py
@ -0,0 +1,380 @@
+import enum
+from enum import Enum
+from typing import Any
+
+from pydantic import BaseModel, Field, ValidationInfo, field_validator
+from yarl import URL
+
+from configs import dify_config
+from core.entities.provider_entities import ProviderConfig
+from core.plugin.entities.oauth import OAuthSchema
+from core.plugin.entities.parameters import (
+    PluginParameter,
+    PluginParameterOption,
+    PluginParameterType,
+    as_normal_type,
+    cast_parameter_value,
+    init_frontend_parameter,
+)
+from core.tools.entities.common_entities import I18nObject
+from core.tools.entities.tool_entities import ToolInvokeMessage, ToolLabelEnum
+
+
+class DatasourceProviderType(enum.StrEnum):
+    """
+    Enum class for datasource provider
+    """
+
+    ONLINE_DOCUMENT = "online_document"
+    LOCAL_FILE = "local_file"
+    WEBSITE_CRAWL = "website_crawl"
+    ONLINE_DRIVE = "online_drive"
+
+    @classmethod
+    def value_of(cls, value: str) -> "DatasourceProviderType":
+        """
+        Get value of given mode.
+
+        :param value: mode value
+        :return: mode
+        """
+        for mode in cls:
+            if mode.value == value:
+                return mode
+        raise ValueError(f"invalid mode value {value}")
+
+
+class DatasourceParameter(PluginParameter):
+    """
+    Overrides type
+    """
+
+    class DatasourceParameterType(enum.StrEnum):
+        """
+        removes TOOLS_SELECTOR from PluginParameterType
+        """
+
+        STRING = PluginParameterType.STRING.value
+        NUMBER = PluginParameterType.NUMBER.value
+        BOOLEAN = PluginParameterType.BOOLEAN.value
+        SELECT = PluginParameterType.SELECT.value
+        SECRET_INPUT = PluginParameterType.SECRET_INPUT.value
+        FILE = PluginParameterType.FILE.value
+        FILES = PluginParameterType.FILES.value
+
+        # deprecated, should not use.
+        SYSTEM_FILES = PluginParameterType.SYSTEM_FILES.value
+
+        def as_normal_type(self):
+            return as_normal_type(self)
+
+        def cast_value(self, value: Any):
+            return cast_parameter_value(self, value)
+
+    type: DatasourceParameterType = Field(..., description="The type of the parameter")
+    description: I18nObject = Field(..., description="The description of the parameter")
+
+    @classmethod
+    def get_simple_instance(
+        cls,
+        name: str,
+        typ: DatasourceParameterType,
+        required: bool,
+        options: list[str] | None = None,
+    ) -> "DatasourceParameter":
+        """
+        get a simple datasource parameter
+
+        :param name: the name of the parameter
+        :param llm_description: the description presented to the LLM
+        :param typ: the type of the parameter
+        :param required: if the parameter is required
+        :param options: the options of the parameter
+        """
+        # convert options to ToolParameterOption
+        # FIXME fix the type error
+        if options:
+            option_objs = [
+                PluginParameterOption(value=option, label=I18nObject(en_US=option, zh_Hans=option))
+                for option in options
+            ]
+        else:
+            option_objs = []
+
+        return cls(
+            name=name,
+            label=I18nObject(en_US="", zh_Hans=""),
+            placeholder=None,
+            type=typ,
+            required=required,
+            options=option_objs,
+            description=I18nObject(en_US="", zh_Hans=""),
+        )
+
+    def init_frontend_parameter(self, value: Any):
+        return init_frontend_parameter(self, self.type, value)
+
+
+class DatasourceIdentity(BaseModel):
+    author: str = Field(..., description="The author of the datasource")
+    name: str = Field(..., description="The name of the datasource")
+    label: I18nObject = Field(..., description="The label of the datasource")
+    provider: str = Field(..., description="The provider of the datasource")
+    icon: str | None = None
+
+
+class DatasourceEntity(BaseModel):
+    identity: DatasourceIdentity
+    parameters: list[DatasourceParameter] = Field(default_factory=list)
+    description: I18nObject = Field(..., description="The label of the datasource")
+    output_schema: dict | None = None
+
+    @field_validator("parameters", mode="before")
+    @classmethod
+    def set_parameters(cls, v, validation_info: ValidationInfo) -> list[DatasourceParameter]:
+        return v or []
+
+
+class DatasourceProviderIdentity(BaseModel):
+    author: str = Field(..., description="The author of the tool")
+    name: str = Field(..., description="The name of the tool")
+    description: I18nObject = Field(..., description="The description of the tool")
+    icon: str = Field(..., description="The icon of the tool")
+    label: I18nObject = Field(..., description="The label of the tool")
+    tags: list[ToolLabelEnum] | None = Field(
+        default=[],
+        description="The tags of the tool",
+    )
+
+    def generate_datasource_icon_url(self, tenant_id: str) -> str:
+        HARD_CODED_DATASOURCE_ICONS = ["https://assets.dify.ai/images/File%20Upload.svg"]
+        if self.icon in HARD_CODED_DATASOURCE_ICONS:
+            return self.icon
+        return str(
+            URL(dify_config.CONSOLE_API_URL or "/")
+            / "console"
+            / "api"
+            / "workspaces"
+            / "current"
+            / "plugin"
+            / "icon"
+            % {"tenant_id": tenant_id, "filename": self.icon}
+        )
+
+
+class DatasourceProviderEntity(BaseModel):
+    """
+    Datasource provider entity
+    """
+
+    identity: DatasourceProviderIdentity
+    credentials_schema: list[ProviderConfig] = Field(default_factory=list)
+    oauth_schema: OAuthSchema | None = None
+    provider_type: DatasourceProviderType
+
+
+class DatasourceProviderEntityWithPlugin(DatasourceProviderEntity):
+    datasources: list[DatasourceEntity] = Field(default_factory=list)
+
+
+class DatasourceInvokeMeta(BaseModel):
+    """
+    Datasource invoke meta
+    """
+
+    time_cost: float = Field(..., description="The time cost of the tool invoke")
+    error: str | None = None
+    tool_config: dict | None = None
+
+    @classmethod
+    def empty(cls) -> "DatasourceInvokeMeta":
+        """
+        Get an empty instance of DatasourceInvokeMeta
+        """
+        return cls(time_cost=0.0, error=None, tool_config={})
+
+    @classmethod
+    def error_instance(cls, error: str) -> "DatasourceInvokeMeta":
+        """
+        Get an instance of DatasourceInvokeMeta with error
+        """
+        return cls(time_cost=0.0, error=error, tool_config={})
+
+    def to_dict(self) -> dict:
+        return {
+            "time_cost": self.time_cost,
+            "error": self.error,
+            "tool_config": self.tool_config,
+        }
+
+
+class DatasourceLabel(BaseModel):
+    """
+    Datasource label
+    """
+
+    name: str = Field(..., description="The name of the tool")
+    label: I18nObject = Field(..., description="The label of the tool")
+    icon: str = Field(..., description="The icon of the tool")
+
+
+class DatasourceInvokeFrom(Enum):
+    """
+    Enum class for datasource invoke
+    """
+
+    RAG_PIPELINE = "rag_pipeline"
+
+
+class OnlineDocumentPage(BaseModel):
+    """
+    Online document page
+    """
+
+    page_id: str = Field(..., description="The page id")
+    page_name: str = Field(..., description="The page title")
+    page_icon: dict | None = Field(None, description="The page icon")
+    type: str = Field(..., description="The type of the page")
+    last_edited_time: str = Field(..., description="The last edited time")
+    parent_id: str | None = Field(None, description="The parent page id")
+
+
+class OnlineDocumentInfo(BaseModel):
+    """
+    Online document info
+    """
+
+    workspace_id: str | None = Field(None, description="The workspace id")
+    workspace_name: str | None = Field(None, description="The workspace name")
+    workspace_icon: str | None = Field(None, description="The workspace icon")
+    total: int = Field(..., description="The total number of documents")
+    pages: list[OnlineDocumentPage] = Field(..., description="The pages of the online document")
+
+
+class OnlineDocumentPagesMessage(BaseModel):
+    """
+    Get online document pages response
+    """
+
+    result: list[OnlineDocumentInfo]
+
+
+class GetOnlineDocumentPageContentRequest(BaseModel):
+    """
+    Get online document page content request
+    """
+
+    workspace_id: str = Field(..., description="The workspace id")
+    page_id: str = Field(..., description="The page id")
+    type: str = Field(..., description="The type of the page")
+
+
+class OnlineDocumentPageContent(BaseModel):
+    """
+    Online document page content
+    """
+
+    workspace_id: str = Field(..., description="The workspace id")
+    page_id: str = Field(..., description="The page id")
+    content: str = Field(..., description="The content of the page")
+
+
+class GetOnlineDocumentPageContentResponse(BaseModel):
+    """
+    Get online document page content response
+    """
+
+    result: OnlineDocumentPageContent
+
+
+class GetWebsiteCrawlRequest(BaseModel):
+    """
+    Get website crawl request
+    """
+
+    crawl_parameters: dict = Field(..., description="The crawl parameters")
+
+
+class WebSiteInfoDetail(BaseModel):
+    source_url: str = Field(..., description="The url of the website")
+    content: str = Field(..., description="The content of the website")
+    title: str = Field(..., description="The title of the website")
+    description: str = Field(..., description="The description of the website")
+
+
+class WebSiteInfo(BaseModel):
+    """
+    Website info
+    """
+
+    status: str | None = Field(..., description="crawl job status")
+    web_info_list: list[WebSiteInfoDetail] | None = []
+    total: int | None = Field(default=0, description="The total number of websites")
+    completed: int | None = Field(default=0, description="The number of completed websites")
+
+
+class WebsiteCrawlMessage(BaseModel):
+    """
+    Get website crawl response
+    """
+
+    result: WebSiteInfo = WebSiteInfo(status="", web_info_list=[], total=0, completed=0)
+
+
+class DatasourceMessage(ToolInvokeMessage):
+    pass
+
+
+#########################
+# Online drive file
+#########################
+
+
+class OnlineDriveFile(BaseModel):
+    """
+    Online drive file
+    """
+
+    id: str = Field(..., description="The file ID")
+    name: str = Field(..., description="The file name")
+    size: int = Field(..., description="The file size")
+    type: str = Field(..., description="The file type: folder or file")
+
+
+class OnlineDriveFileBucket(BaseModel):
+    """
+    Online drive file bucket
+    """
+
+    bucket: str | None = Field(None, description="The file bucket")
+    files: list[OnlineDriveFile] = Field(..., description="The file list")
+    is_truncated: bool = Field(False, description="Whether the result is truncated")
+    next_page_parameters: dict | None = Field(None, description="Parameters for fetching the next page")
+
+
+class OnlineDriveBrowseFilesRequest(BaseModel):
+    """
+    Get online drive file list request
+    """
+
+    bucket: str | None = Field(None, description="The file bucket")
+    prefix: str = Field(..., description="The parent folder ID")
+    max_keys: int = Field(20, description="Page size for pagination")
+    next_page_parameters: dict | None = Field(None, description="Parameters for fetching the next page")
+
+
+class OnlineDriveBrowseFilesResponse(BaseModel):
+    """
+    Get online drive file list response
+    """
+
+    result: list[OnlineDriveFileBucket] = Field(..., description="The list of file buckets")
+
+
+class OnlineDriveDownloadFileRequest(BaseModel):
+    """
+    Get online drive file
+    """
+
+    id: str = Field(..., description="The id of the file")
+    bucket: str | None = Field(None, description="The name of the bucket")
--- a/api/core/datasource/errors.py
+++ b/api/core/datasource/errors.py
@ -0,0 +1,37 @@
+from core.datasource.entities.datasource_entities import DatasourceInvokeMeta
+
+
+class DatasourceProviderNotFoundError(ValueError):
+    pass
+
+
+class DatasourceNotFoundError(ValueError):
+    pass
+
+
+class DatasourceParameterValidationError(ValueError):
+    pass
+
+
+class DatasourceProviderCredentialValidationError(ValueError):
+    pass
+
+
+class DatasourceNotSupportedError(ValueError):
+    pass
+
+
+class DatasourceInvokeError(ValueError):
+    pass
+
+
+class DatasourceApiSchemaError(ValueError):
+    pass
+
+
+class DatasourceEngineInvokeError(Exception):
+    meta: DatasourceInvokeMeta
+
+    def __init__(self, meta, **kwargs):
+        self.meta = meta
+        super().__init__(**kwargs)
--- a/api/core/datasource/local_file/local_file_plugin.py
+++ b/api/core/datasource/local_file/local_file_plugin.py
@ -0,0 +1,29 @@
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import (
+    DatasourceEntity,
+    DatasourceProviderType,
+)
+
+
+class LocalFileDatasourcePlugin(DatasourcePlugin):
+    tenant_id: str
+    plugin_unique_identifier: str
+
+    def __init__(
+        self,
+        entity: DatasourceEntity,
+        runtime: DatasourceRuntime,
+        tenant_id: str,
+        icon: str,
+        plugin_unique_identifier: str,
+    ) -> None:
+        super().__init__(entity, runtime, icon)
+        self.tenant_id = tenant_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    def datasource_provider_type(self) -> str:
+        return DatasourceProviderType.LOCAL_FILE
+
+    def get_icon_url(self, tenant_id: str) -> str:
+        return self.icon
--- a/api/core/datasource/local_file/local_file_provider.py
+++ b/api/core/datasource/local_file/local_file_provider.py
@ -0,0 +1,56 @@
+from typing import Any
+
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
+from core.datasource.local_file.local_file_plugin import LocalFileDatasourcePlugin
+
+
+class LocalFileDatasourcePluginProviderController(DatasourcePluginProviderController):
+    entity: DatasourceProviderEntityWithPlugin
+    plugin_id: str
+    plugin_unique_identifier: str
+
+    def __init__(
+        self, entity: DatasourceProviderEntityWithPlugin, plugin_id: str, plugin_unique_identifier: str, tenant_id: str
+    ) -> None:
+        super().__init__(entity, tenant_id)
+        self.plugin_id = plugin_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    @property
+    def provider_type(self) -> DatasourceProviderType:
+        """
+        returns the type of the provider
+        """
+        return DatasourceProviderType.LOCAL_FILE
+
+    def _validate_credentials(self, user_id: str, credentials: dict[str, Any]) -> None:
+        """
+        validate the credentials of the provider
+        """
+        pass
+
+    def get_datasource(self, datasource_name: str) -> LocalFileDatasourcePlugin:  # type: ignore
+        """
+        return datasource with given name
+        """
+        datasource_entity = next(
+            (
+                datasource_entity
+                for datasource_entity in self.entity.datasources
+                if datasource_entity.identity.name == datasource_name
+            ),
+            None,
+        )
+
+        if not datasource_entity:
+            raise ValueError(f"Datasource with name {datasource_name} not found")
+
+        return LocalFileDatasourcePlugin(
+            entity=datasource_entity,
+            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
+            tenant_id=self.tenant_id,
+            icon=self.entity.identity.icon,
+            plugin_unique_identifier=self.plugin_unique_identifier,
+        )
--- a/api/core/datasource/online_document/online_document_plugin.py
+++ b/api/core/datasource/online_document/online_document_plugin.py
@ -0,0 +1,71 @@
+from collections.abc import Generator, Mapping
+from typing import Any
+
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import (
+    DatasourceEntity,
+    DatasourceMessage,
+    DatasourceProviderType,
+    GetOnlineDocumentPageContentRequest,
+    OnlineDocumentPagesMessage,
+)
+from core.plugin.impl.datasource import PluginDatasourceManager
+
+
+class OnlineDocumentDatasourcePlugin(DatasourcePlugin):
+    tenant_id: str
+    plugin_unique_identifier: str
+    entity: DatasourceEntity
+    runtime: DatasourceRuntime
+
+    def __init__(
+        self,
+        entity: DatasourceEntity,
+        runtime: DatasourceRuntime,
+        tenant_id: str,
+        icon: str,
+        plugin_unique_identifier: str,
+    ) -> None:
+        super().__init__(entity, runtime, icon)
+        self.tenant_id = tenant_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    def get_online_document_pages(
+        self,
+        user_id: str,
+        datasource_parameters: Mapping[str, Any],
+        provider_type: str,
+    ) -> Generator[OnlineDocumentPagesMessage, None, None]:
+        manager = PluginDatasourceManager()
+
+        return manager.get_online_document_pages(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            datasource_provider=self.entity.identity.provider,
+            datasource_name=self.entity.identity.name,
+            credentials=self.runtime.credentials,
+            datasource_parameters=datasource_parameters,
+            provider_type=provider_type,
+        )
+
+    def get_online_document_page_content(
+        self,
+        user_id: str,
+        datasource_parameters: GetOnlineDocumentPageContentRequest,
+        provider_type: str,
+    ) -> Generator[DatasourceMessage, None, None]:
+        manager = PluginDatasourceManager()
+
+        return manager.get_online_document_page_content(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            datasource_provider=self.entity.identity.provider,
+            datasource_name=self.entity.identity.name,
+            credentials=self.runtime.credentials,
+            datasource_parameters=datasource_parameters,
+            provider_type=provider_type,
+        )
+
+    def datasource_provider_type(self) -> str:
+        return DatasourceProviderType.ONLINE_DOCUMENT
--- a/api/core/datasource/online_document/online_document_provider.py
+++ b/api/core/datasource/online_document/online_document_provider.py
@ -0,0 +1,48 @@
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
+from core.datasource.online_document.online_document_plugin import OnlineDocumentDatasourcePlugin
+
+
+class OnlineDocumentDatasourcePluginProviderController(DatasourcePluginProviderController):
+    entity: DatasourceProviderEntityWithPlugin
+    plugin_id: str
+    plugin_unique_identifier: str
+
+    def __init__(
+        self, entity: DatasourceProviderEntityWithPlugin, plugin_id: str, plugin_unique_identifier: str, tenant_id: str
+    ) -> None:
+        super().__init__(entity, tenant_id)
+        self.plugin_id = plugin_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    @property
+    def provider_type(self) -> DatasourceProviderType:
+        """
+        returns the type of the provider
+        """
+        return DatasourceProviderType.ONLINE_DOCUMENT
+
+    def get_datasource(self, datasource_name: str) -> OnlineDocumentDatasourcePlugin:  # type: ignore
+        """
+        return datasource with given name
+        """
+        datasource_entity = next(
+            (
+                datasource_entity
+                for datasource_entity in self.entity.datasources
+                if datasource_entity.identity.name == datasource_name
+            ),
+            None,
+        )
+
+        if not datasource_entity:
+            raise ValueError(f"Datasource with name {datasource_name} not found")
+
+        return OnlineDocumentDatasourcePlugin(
+            entity=datasource_entity,
+            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
+            tenant_id=self.tenant_id,
+            icon=self.entity.identity.icon,
+            plugin_unique_identifier=self.plugin_unique_identifier,
+        )
--- a/api/core/datasource/online_drive/online_drive_plugin.py
+++ b/api/core/datasource/online_drive/online_drive_plugin.py
@ -0,0 +1,71 @@
+from collections.abc import Generator
+
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import (
+    DatasourceEntity,
+    DatasourceMessage,
+    DatasourceProviderType,
+    OnlineDriveBrowseFilesRequest,
+    OnlineDriveBrowseFilesResponse,
+    OnlineDriveDownloadFileRequest,
+)
+from core.plugin.impl.datasource import PluginDatasourceManager
+
+
+class OnlineDriveDatasourcePlugin(DatasourcePlugin):
+    tenant_id: str
+    plugin_unique_identifier: str
+    entity: DatasourceEntity
+    runtime: DatasourceRuntime
+
+    def __init__(
+        self,
+        entity: DatasourceEntity,
+        runtime: DatasourceRuntime,
+        tenant_id: str,
+        icon: str,
+        plugin_unique_identifier: str,
+    ) -> None:
+        super().__init__(entity, runtime, icon)
+        self.tenant_id = tenant_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    def online_drive_browse_files(
+        self,
+        user_id: str,
+        request: OnlineDriveBrowseFilesRequest,
+        provider_type: str,
+    ) -> Generator[OnlineDriveBrowseFilesResponse, None, None]:
+        manager = PluginDatasourceManager()
+
+        return manager.online_drive_browse_files(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            datasource_provider=self.entity.identity.provider,
+            datasource_name=self.entity.identity.name,
+            credentials=self.runtime.credentials,
+            request=request,
+            provider_type=provider_type,
+        )
+
+    def online_drive_download_file(
+        self,
+        user_id: str,
+        request: OnlineDriveDownloadFileRequest,
+        provider_type: str,
+    ) -> Generator[DatasourceMessage, None, None]:
+        manager = PluginDatasourceManager()
+
+        return manager.online_drive_download_file(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            datasource_provider=self.entity.identity.provider,
+            datasource_name=self.entity.identity.name,
+            credentials=self.runtime.credentials,
+            request=request,
+            provider_type=provider_type,
+        )
+
+    def datasource_provider_type(self) -> str:
+        return DatasourceProviderType.ONLINE_DRIVE
--- a/api/core/datasource/online_drive/online_drive_provider.py
+++ b/api/core/datasource/online_drive/online_drive_provider.py
@ -0,0 +1,48 @@
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
+from core.datasource.online_drive.online_drive_plugin import OnlineDriveDatasourcePlugin
+
+
+class OnlineDriveDatasourcePluginProviderController(DatasourcePluginProviderController):
+    entity: DatasourceProviderEntityWithPlugin
+    plugin_id: str
+    plugin_unique_identifier: str
+
+    def __init__(
+        self, entity: DatasourceProviderEntityWithPlugin, plugin_id: str, plugin_unique_identifier: str, tenant_id: str
+    ) -> None:
+        super().__init__(entity, tenant_id)
+        self.plugin_id = plugin_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    @property
+    def provider_type(self) -> DatasourceProviderType:
+        """
+        returns the type of the provider
+        """
+        return DatasourceProviderType.ONLINE_DRIVE
+
+    def get_datasource(self, datasource_name: str) -> OnlineDriveDatasourcePlugin:  # type: ignore
+        """
+        return datasource with given name
+        """
+        datasource_entity = next(
+            (
+                datasource_entity
+                for datasource_entity in self.entity.datasources
+                if datasource_entity.identity.name == datasource_name
+            ),
+            None,
+        )
+
+        if not datasource_entity:
+            raise ValueError(f"Datasource with name {datasource_name} not found")
+
+        return OnlineDriveDatasourcePlugin(
+            entity=datasource_entity,
+            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
+            tenant_id=self.tenant_id,
+            icon=self.entity.identity.icon,
+            plugin_unique_identifier=self.plugin_unique_identifier,
+        )
--- a/api/core/datasource/utils/init.py
+++ b/api/core/datasource/utils/init.py
--- a/api/core/datasource/utils/message_transformer.py
+++ b/api/core/datasource/utils/message_transformer.py
@ -0,0 +1,127 @@
+import logging
+from collections.abc import Generator
+from mimetypes import guess_extension, guess_type
+
+from core.datasource.entities.datasource_entities import DatasourceMessage
+from core.file import File, FileTransferMethod, FileType
+from core.tools.tool_file_manager import ToolFileManager
+from models.tools import ToolFile
+
+logger = logging.getLogger(__name__)
+
+
+class DatasourceFileMessageTransformer:
+    @classmethod
+    def transform_datasource_invoke_messages(
+        cls,
+        messages: Generator[DatasourceMessage, None, None],
+        user_id: str,
+        tenant_id: str,
+        conversation_id: str | None = None,
+    ) -> Generator[DatasourceMessage, None, None]:
+        """
+        Transform datasource message and handle file download
+        """
+        for message in messages:
+            if message.type in {DatasourceMessage.MessageType.TEXT, DatasourceMessage.MessageType.LINK}:
+                yield message
+            elif message.type == DatasourceMessage.MessageType.IMAGE and isinstance(
+                message.message, DatasourceMessage.TextMessage
+            ):
+                # try to download image
+                try:
+                    assert isinstance(message.message, DatasourceMessage.TextMessage)
+                    tool_file_manager = ToolFileManager()
+                    tool_file: ToolFile | None = tool_file_manager.create_file_by_url(
+                        user_id=user_id,
+                        tenant_id=tenant_id,
+                        file_url=message.message.text,
+                        conversation_id=conversation_id,
+                    )
+                    if tool_file:
+                        url = f"/files/datasources/{tool_file.id}{guess_extension(tool_file.mimetype) or '.png'}"
+
+                        yield DatasourceMessage(
+                            type=DatasourceMessage.MessageType.IMAGE_LINK,
+                            message=DatasourceMessage.TextMessage(text=url),
+                            meta=message.meta.copy() if message.meta is not None else {},
+                        )
+                except Exception as e:
+                    yield DatasourceMessage(
+                        type=DatasourceMessage.MessageType.TEXT,
+                        message=DatasourceMessage.TextMessage(
+                            text=f"Failed to download image: {message.message.text}: {e}"
+                        ),
+                        meta=message.meta.copy() if message.meta is not None else {},
+                    )
+            elif message.type == DatasourceMessage.MessageType.BLOB:
+                # get mime type and save blob to storage
+                meta = message.meta or {}
+                # get filename from meta
+                filename = meta.get("file_name", None)
+
+                mimetype = meta.get("mime_type")
+                if not mimetype:
+                    mimetype = (guess_type(filename)[0] if filename else None) or "application/octet-stream"
+
+                # if message is str, encode it to bytes
+
+                if not isinstance(message.message, DatasourceMessage.BlobMessage):
+                    raise ValueError("unexpected message type")
+
+                # FIXME: should do a type check here.
+                assert isinstance(message.message.blob, bytes)
+                tool_file_manager = ToolFileManager()
+                blob_tool_file: ToolFile | None = tool_file_manager.create_file_by_raw(
+                    user_id=user_id,
+                    tenant_id=tenant_id,
+                    conversation_id=conversation_id,
+                    file_binary=message.message.blob,
+                    mimetype=mimetype,
+                    filename=filename,
+                )
+                if blob_tool_file:
+                    url = cls.get_datasource_file_url(
+                        datasource_file_id=blob_tool_file.id, extension=guess_extension(blob_tool_file.mimetype)
+                    )
+
+                    # check if file is image
+                    if "image" in mimetype:
+                        yield DatasourceMessage(
+                            type=DatasourceMessage.MessageType.IMAGE_LINK,
+                            message=DatasourceMessage.TextMessage(text=url),
+                            meta=meta.copy() if meta is not None else {},
+                        )
+                    else:
+                        yield DatasourceMessage(
+                            type=DatasourceMessage.MessageType.BINARY_LINK,
+                            message=DatasourceMessage.TextMessage(text=url),
+                            meta=meta.copy() if meta is not None else {},
+                        )
+            elif message.type == DatasourceMessage.MessageType.FILE:
+                meta = message.meta or {}
+                file: File | None = meta.get("file")
+                if isinstance(file, File):
+                    if file.transfer_method == FileTransferMethod.TOOL_FILE:
+                        assert file.related_id is not None
+                        url = cls.get_datasource_file_url(datasource_file_id=file.related_id, extension=file.extension)
+                        if file.type == FileType.IMAGE:
+                            yield DatasourceMessage(
+                                type=DatasourceMessage.MessageType.IMAGE_LINK,
+                                message=DatasourceMessage.TextMessage(text=url),
+                                meta=meta.copy() if meta is not None else {},
+                            )
+                        else:
+                            yield DatasourceMessage(
+                                type=DatasourceMessage.MessageType.LINK,
+                                message=DatasourceMessage.TextMessage(text=url),
+                                meta=meta.copy() if meta is not None else {},
+                            )
+                    else:
+                        yield message
+            else:
+                yield message
+
+    @classmethod
+    def get_datasource_file_url(cls, datasource_file_id: str, extension: str | None) -> str:
+        return f"/files/datasources/{datasource_file_id}{extension or '.bin'}"
--- a/api/core/datasource/website_crawl/website_crawl_plugin.py
+++ b/api/core/datasource/website_crawl/website_crawl_plugin.py
@ -0,0 +1,51 @@
+from collections.abc import Generator, Mapping
+from typing import Any
+
+from core.datasource.__base.datasource_plugin import DatasourcePlugin
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import (
+    DatasourceEntity,
+    DatasourceProviderType,
+    WebsiteCrawlMessage,
+)
+from core.plugin.impl.datasource import PluginDatasourceManager
+
+
+class WebsiteCrawlDatasourcePlugin(DatasourcePlugin):
+    tenant_id: str
+    plugin_unique_identifier: str
+    entity: DatasourceEntity
+    runtime: DatasourceRuntime
+
+    def __init__(
+        self,
+        entity: DatasourceEntity,
+        runtime: DatasourceRuntime,
+        tenant_id: str,
+        icon: str,
+        plugin_unique_identifier: str,
+    ) -> None:
+        super().__init__(entity, runtime, icon)
+        self.tenant_id = tenant_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    def get_website_crawl(
+        self,
+        user_id: str,
+        datasource_parameters: Mapping[str, Any],
+        provider_type: str,
+    ) -> Generator[WebsiteCrawlMessage, None, None]:
+        manager = PluginDatasourceManager()
+
+        return manager.get_website_crawl(
+            tenant_id=self.tenant_id,
+            user_id=user_id,
+            datasource_provider=self.entity.identity.provider,
+            datasource_name=self.entity.identity.name,
+            credentials=self.runtime.credentials,
+            datasource_parameters=datasource_parameters,
+            provider_type=provider_type,
+        )
+
+    def datasource_provider_type(self) -> str:
+        return DatasourceProviderType.WEBSITE_CRAWL
--- a/api/core/datasource/website_crawl/website_crawl_provider.py
+++ b/api/core/datasource/website_crawl/website_crawl_provider.py
@ -0,0 +1,52 @@
+from core.datasource.__base.datasource_provider import DatasourcePluginProviderController
+from core.datasource.__base.datasource_runtime import DatasourceRuntime
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType
+from core.datasource.website_crawl.website_crawl_plugin import WebsiteCrawlDatasourcePlugin
+
+
+class WebsiteCrawlDatasourcePluginProviderController(DatasourcePluginProviderController):
+    entity: DatasourceProviderEntityWithPlugin
+    plugin_id: str
+    plugin_unique_identifier: str
+
+    def __init__(
+        self,
+        entity: DatasourceProviderEntityWithPlugin,
+        plugin_id: str,
+        plugin_unique_identifier: str,
+        tenant_id: str,
+    ) -> None:
+        super().__init__(entity, tenant_id)
+        self.plugin_id = plugin_id
+        self.plugin_unique_identifier = plugin_unique_identifier
+
+    @property
+    def provider_type(self) -> DatasourceProviderType:
+        """
+        returns the type of the provider
+        """
+        return DatasourceProviderType.WEBSITE_CRAWL
+
+    def get_datasource(self, datasource_name: str) -> WebsiteCrawlDatasourcePlugin:  # type: ignore
+        """
+        return datasource with given name
+        """
+        datasource_entity = next(
+            (
+                datasource_entity
+                for datasource_entity in self.entity.datasources
+                if datasource_entity.identity.name == datasource_name
+            ),
+            None,
+        )
+
+        if not datasource_entity:
+            raise ValueError(f"Datasource with name {datasource_name} not found")
+
+        return WebsiteCrawlDatasourcePlugin(
+            entity=datasource_entity,
+            runtime=DatasourceRuntime(tenant_id=self.tenant_id),
+            tenant_id=self.tenant_id,
+            icon=self.entity.identity.icon,
+            plugin_unique_identifier=self.plugin_unique_identifier,
+        )
--- a/api/core/entities/knowledge_entities.py
+++ b/api/core/entities/knowledge_entities.py
@ -15,3 +15,27 @@ class IndexingEstimate(BaseModel):
    total_segments: int
    preview: list[PreviewDetail]
    qa_preview: list[QAPreviewDetail] | None = None
+
+
+class PipelineDataset(BaseModel):
+    id: str
+    name: str
+    description: str
+    chunk_structure: str
+
+
+class PipelineDocument(BaseModel):
+    id: str
+    position: int
+    data_source_type: str
+    data_source_info: dict | None = None
+    name: str
+    indexing_status: str
+    error: str | None = None
+    enabled: bool
+
+
+class PipelineGenerateResponse(BaseModel):
+    batch: str
+    dataset: PipelineDataset
+    documents: list[PipelineDocument]
--- a/api/core/entities/provider_configuration.py
+++ b/api/core/entities/provider_configuration.py
@ -28,7 +28,6 @@ from core.model_runtime.entities.provider_entities import (
 )
 from core.model_runtime.model_providers.__base.ai_model import AIModel
 from core.model_runtime.model_providers.model_provider_factory import ModelProviderFactory
-from core.plugin.entities.plugin import ModelProviderID
 from extensions.ext_database import db
 from libs.datetime_utils import naive_utc_now
 from models.provider import (
@ -41,6 +40,7 @@ from models.provider import (
    ProviderType,
    TenantPreferredModelProvider,
 )
+from models.provider_ids import ModelProviderID
 from services.enterprise.plugin_manager_service import PluginCredentialType

 logger = logging.getLogger(__name__)
@ -205,16 +205,10 @@ class ProviderConfiguration(BaseModel):
        """
        Get custom provider record.
        """
-        # get provider
-        model_provider_id = ModelProviderID(self.provider.provider)
-        provider_names = [self.provider.provider]
-        if model_provider_id.is_langgenius():
-            provider_names.append(model_provider_id.provider_name)
-
        stmt = select(Provider).where(
            Provider.tenant_id == self.tenant_id,
            Provider.provider_type == ProviderType.CUSTOM.value,
-            Provider.provider_name.in_(provider_names),
+            Provider.provider_name.in_(self._get_provider_names()),
        )

        return session.execute(stmt).scalar_one_or_none()
@ -276,7 +270,7 @@ class ProviderConfiguration(BaseModel):
        """
        stmt = select(ProviderCredential.id).where(
            ProviderCredential.tenant_id == self.tenant_id,
-            ProviderCredential.provider_name == self.provider.provider,
+            ProviderCredential.provider_name.in_(self._get_provider_names()),
            ProviderCredential.credential_name == credential_name,
        )
        if exclude_id:
@ -324,7 +318,7 @@ class ProviderConfiguration(BaseModel):
                try:
                    stmt = select(ProviderCredential).where(
                        ProviderCredential.tenant_id == self.tenant_id,
-                        ProviderCredential.provider_name == self.provider.provider,
+                        ProviderCredential.provider_name.in_(self._get_provider_names()),
                        ProviderCredential.id == credential_id,
                    )
                    credential_record = s.execute(stmt).scalar_one_or_none()
@ -374,7 +368,7 @@ class ProviderConfiguration(BaseModel):
            session=session,
            query_factory=lambda: select(ProviderCredential).where(
                ProviderCredential.tenant_id == self.tenant_id,
-                ProviderCredential.provider_name == self.provider.provider,
+                ProviderCredential.provider_name.in_(self._get_provider_names()),
            ),
        )

@ -387,7 +381,7 @@ class ProviderConfiguration(BaseModel):
            session=session,
            query_factory=lambda: select(ProviderModelCredential).where(
                ProviderModelCredential.tenant_id == self.tenant_id,
-                ProviderModelCredential.provider_name == self.provider.provider,
+                ProviderModelCredential.provider_name.in_(self._get_provider_names()),
                ProviderModelCredential.model_name == model,
                ProviderModelCredential.model_type == model_type.to_origin_model_type(),
            ),
@ -423,6 +417,16 @@ class ProviderConfiguration(BaseModel):
            logger.warning("Error generating next credential name: %s", str(e))
            return "API KEY 1"

+    def _get_provider_names(self):
+        """
+        The provider name might be stored in the database as either `openai` or `langgenius/openai/openai`.
+        """
+        model_provider_id = ModelProviderID(self.provider.provider)
+        provider_names = [self.provider.provider]
+        if model_provider_id.is_langgenius():
+            provider_names.append(model_provider_id.provider_name)
+        return provider_names
+
    def create_provider_credential(self, credentials: dict, credential_name: str | None):
        """
        Add custom provider credentials.
@ -501,7 +505,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderCredential).where(
                ProviderCredential.id == credential_id,
                ProviderCredential.tenant_id == self.tenant_id,
-                ProviderCredential.provider_name == self.provider.provider,
+                ProviderCredential.provider_name.in_(self._get_provider_names()),
            )

            # Get the credential record to update
@ -554,7 +558,7 @@ class ProviderConfiguration(BaseModel):
        # Find all load balancing configs that use this credential_id
        stmt = select(LoadBalancingModelConfig).where(
            LoadBalancingModelConfig.tenant_id == self.tenant_id,
-            LoadBalancingModelConfig.provider_name == self.provider.provider,
+            LoadBalancingModelConfig.provider_name.in_(self._get_provider_names()),
            LoadBalancingModelConfig.credential_id == credential_id,
            LoadBalancingModelConfig.credential_source_type == credential_source,
        )
@ -591,7 +595,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderCredential).where(
                ProviderCredential.id == credential_id,
                ProviderCredential.tenant_id == self.tenant_id,
-                ProviderCredential.provider_name == self.provider.provider,
+                ProviderCredential.provider_name.in_(self._get_provider_names()),
            )

            # Get the credential record to update
@ -602,7 +606,7 @@ class ProviderConfiguration(BaseModel):
            # Check if this credential is used in load balancing configs
            lb_stmt = select(LoadBalancingModelConfig).where(
                LoadBalancingModelConfig.tenant_id == self.tenant_id,
-                LoadBalancingModelConfig.provider_name == self.provider.provider,
+                LoadBalancingModelConfig.provider_name.in_(self._get_provider_names()),
                LoadBalancingModelConfig.credential_id == credential_id,
                LoadBalancingModelConfig.credential_source_type == "provider",
            )
@ -624,7 +628,7 @@ class ProviderConfiguration(BaseModel):
                # if this is the last credential, we need to delete the provider record
                count_stmt = select(func.count(ProviderCredential.id)).where(
                    ProviderCredential.tenant_id == self.tenant_id,
-                    ProviderCredential.provider_name == self.provider.provider,
+                    ProviderCredential.provider_name.in_(self._get_provider_names()),
                )
                available_credentials_count = session.execute(count_stmt).scalar() or 0
                session.delete(credential_record)
@ -668,7 +672,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderCredential).where(
                ProviderCredential.id == credential_id,
                ProviderCredential.tenant_id == self.tenant_id,
-                ProviderCredential.provider_name == self.provider.provider,
+                ProviderCredential.provider_name.in_(self._get_provider_names()),
            )
            credential_record = session.execute(stmt).scalar_one_or_none()
            if not credential_record:
@ -704,6 +708,7 @@ class ProviderConfiguration(BaseModel):
        Get custom model credentials.
        """
        # get provider model
+
        model_provider_id = ModelProviderID(self.provider.provider)
        provider_names = [self.provider.provider]
        if model_provider_id.is_langgenius():
@ -736,7 +741,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderModelCredential).where(
                ProviderModelCredential.id == credential_id,
                ProviderModelCredential.tenant_id == self.tenant_id,
-                ProviderModelCredential.provider_name == self.provider.provider,
+                ProviderModelCredential.provider_name.in_(self._get_provider_names()),
                ProviderModelCredential.model_name == model,
                ProviderModelCredential.model_type == model_type.to_origin_model_type(),
            )
@ -783,7 +788,7 @@ class ProviderConfiguration(BaseModel):
        """
        stmt = select(ProviderModelCredential).where(
            ProviderModelCredential.tenant_id == self.tenant_id,
-            ProviderModelCredential.provider_name == self.provider.provider,
+            ProviderModelCredential.provider_name.in_(self._get_provider_names()),
            ProviderModelCredential.model_name == model,
            ProviderModelCredential.model_type == model_type.to_origin_model_type(),
            ProviderModelCredential.credential_name == credential_name,
@ -859,7 +864,7 @@ class ProviderConfiguration(BaseModel):
                    stmt = select(ProviderModelCredential).where(
                        ProviderModelCredential.id == credential_id,
                        ProviderModelCredential.tenant_id == self.tenant_id,
-                        ProviderModelCredential.provider_name == self.provider.provider,
+                        ProviderModelCredential.provider_name.in_(self._get_provider_names()),
                        ProviderModelCredential.model_name == model,
                        ProviderModelCredential.model_type == model_type.to_origin_model_type(),
                    )
@ -996,7 +1001,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderModelCredential).where(
                ProviderModelCredential.id == credential_id,
                ProviderModelCredential.tenant_id == self.tenant_id,
-                ProviderModelCredential.provider_name == self.provider.provider,
+                ProviderModelCredential.provider_name.in_(self._get_provider_names()),
                ProviderModelCredential.model_name == model,
                ProviderModelCredential.model_type == model_type.to_origin_model_type(),
            )
@ -1041,7 +1046,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderModelCredential).where(
                ProviderModelCredential.id == credential_id,
                ProviderModelCredential.tenant_id == self.tenant_id,
-                ProviderModelCredential.provider_name == self.provider.provider,
+                ProviderModelCredential.provider_name.in_(self._get_provider_names()),
                ProviderModelCredential.model_name == model,
                ProviderModelCredential.model_type == model_type.to_origin_model_type(),
            )
@ -1051,7 +1056,7 @@ class ProviderConfiguration(BaseModel):

            lb_stmt = select(LoadBalancingModelConfig).where(
                LoadBalancingModelConfig.tenant_id == self.tenant_id,
-                LoadBalancingModelConfig.provider_name == self.provider.provider,
+                LoadBalancingModelConfig.provider_name.in_(self._get_provider_names()),
                LoadBalancingModelConfig.credential_id == credential_id,
                LoadBalancingModelConfig.credential_source_type == "custom_model",
            )
@ -1074,7 +1079,7 @@ class ProviderConfiguration(BaseModel):
                # if this is the last credential, we need to delete the custom model record
                count_stmt = select(func.count(ProviderModelCredential.id)).where(
                    ProviderModelCredential.tenant_id == self.tenant_id,
-                    ProviderModelCredential.provider_name == self.provider.provider,
+                    ProviderModelCredential.provider_name.in_(self._get_provider_names()),
                    ProviderModelCredential.model_name == model,
                    ProviderModelCredential.model_type == model_type.to_origin_model_type(),
                )
@ -1114,7 +1119,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderModelCredential).where(
                ProviderModelCredential.id == credential_id,
                ProviderModelCredential.tenant_id == self.tenant_id,
-                ProviderModelCredential.provider_name == self.provider.provider,
+                ProviderModelCredential.provider_name.in_(self._get_provider_names()),
                ProviderModelCredential.model_name == model,
                ProviderModelCredential.model_type == model_type.to_origin_model_type(),
            )
@ -1156,7 +1161,7 @@ class ProviderConfiguration(BaseModel):
            stmt = select(ProviderModelCredential).where(
                ProviderModelCredential.id == credential_id,
                ProviderModelCredential.tenant_id == self.tenant_id,
-                ProviderModelCredential.provider_name == self.provider.provider,
+                ProviderModelCredential.provider_name.in_(self._get_provider_names()),
                ProviderModelCredential.model_name == model,
                ProviderModelCredential.model_type == model_type.to_origin_model_type(),
            )
@ -1203,14 +1208,9 @@ class ProviderConfiguration(BaseModel):
        """
        Get provider model setting.
        """
-        model_provider_id = ModelProviderID(self.provider.provider)
-        provider_names = [self.provider.provider]
-        if model_provider_id.is_langgenius():
-            provider_names.append(model_provider_id.provider_name)
-
        stmt = select(ProviderModelSetting).where(
            ProviderModelSetting.tenant_id == self.tenant_id,
-            ProviderModelSetting.provider_name.in_(provider_names),
+            ProviderModelSetting.provider_name.in_(self._get_provider_names()),
            ProviderModelSetting.model_type == model_type.to_origin_model_type(),
            ProviderModelSetting.model_name == model,
        )
@ -1286,6 +1286,7 @@ class ProviderConfiguration(BaseModel):
        :param model: model name
        :return:
        """
+
        model_provider_id = ModelProviderID(self.provider.provider)
        provider_names = [self.provider.provider]
        if model_provider_id.is_langgenius():
@ -1381,15 +1382,9 @@ class ProviderConfiguration(BaseModel):
            return

        def _switch(s: Session):
-            # get preferred provider
-            model_provider_id = ModelProviderID(self.provider.provider)
-            provider_names = [self.provider.provider]
-            if model_provider_id.is_langgenius():
-                provider_names.append(model_provider_id.provider_name)
-
            stmt = select(TenantPreferredModelProvider).where(
                TenantPreferredModelProvider.tenant_id == self.tenant_id,
-                TenantPreferredModelProvider.provider_name.in_(provider_names),
+                TenantPreferredModelProvider.provider_name.in_(self._get_provider_names()),
            )
            preferred_model_provider = s.execute(stmt).scalars().first()

@ -1419,7 +1414,7 @@ class ProviderConfiguration(BaseModel):
        """
        secret_input_form_variables = []
        for credential_form_schema in credential_form_schemas:
-            if credential_form_schema.type == FormType.SECRET_INPUT:
+            if credential_form_schema.type.value == FormType.SECRET_INPUT.value:
                secret_input_form_variables.append(credential_form_schema.variable)

        return secret_input_form_variables
--- a/api/core/file/enums.py
+++ b/api/core/file/enums.py
@ -20,6 +20,7 @@ class FileTransferMethod(StrEnum):
    REMOTE_URL = "remote_url"
    LOCAL_FILE = "local_file"
    TOOL_FILE = "tool_file"
+    DATASOURCE_FILE = "datasource_file"

    @staticmethod
    def value_of(value):
--- a/api/core/file/file_manager.py
+++ b/api/core/file/file_manager.py
@ -97,7 +97,11 @@ def to_prompt_message_content(


 def download(f: File, /):
-    if f.transfer_method in (FileTransferMethod.TOOL_FILE, FileTransferMethod.LOCAL_FILE):
+    if f.transfer_method in (
+        FileTransferMethod.TOOL_FILE,
+        FileTransferMethod.LOCAL_FILE,
+        FileTransferMethod.DATASOURCE_FILE,
+    ):
        return _download_file_content(f.storage_key)
    elif f.transfer_method == FileTransferMethod.REMOTE_URL:
        response = ssrf_proxy.get(f.remote_url, follow_redirects=True)
@ -137,6 +141,8 @@ def _get_encoded_string(f: File, /):
            data = _download_file_content(f.storage_key)
        case FileTransferMethod.TOOL_FILE:
            data = _download_file_content(f.storage_key)
+        case FileTransferMethod.DATASOURCE_FILE:
+            data = _download_file_content(f.storage_key)

    encoded_string = base64.b64encode(data).decode("utf-8")
    return encoded_string
--- a/api/core/file/helpers.py
+++ b/api/core/file/helpers.py
@ -3,11 +3,12 @@ import hashlib
 import hmac
 import os
 import time
+import urllib.parse

 from configs import dify_config


-def get_signed_file_url(upload_file_id: str) -> str:
+def get_signed_file_url(upload_file_id: str, as_attachment=False) -> str:
    url = f"{dify_config.FILES_URL}/files/{upload_file_id}/file-preview"

    timestamp = str(int(time.time()))
@ -16,8 +17,12 @@ def get_signed_file_url(upload_file_id: str) -> str:
    msg = f"file-preview|{upload_file_id}|{timestamp}|{nonce}"
    sign = hmac.new(key, msg.encode(), hashlib.sha256).digest()
    encoded_sign = base64.urlsafe_b64encode(sign).decode()
+    query = {"timestamp": timestamp, "nonce": nonce, "sign": encoded_sign}
+    if as_attachment:
+        query["as_attachment"] = "true"
+    query_string = urllib.parse.urlencode(query)

-    return f"{url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
+    return f"{url}?{query_string}"


 def get_signed_file_url_for_plugin(filename: str, mimetype: str, tenant_id: str, user_id: str) -> str:
@ -30,7 +35,6 @@ def get_signed_file_url_for_plugin(filename: str, mimetype: str, tenant_id: str,
    msg = f"upload|{filename}|{mimetype}|{tenant_id}|{user_id}|{timestamp}|{nonce}"
    sign = hmac.new(key, msg.encode(), hashlib.sha256).digest()
    encoded_sign = base64.urlsafe_b64encode(sign).decode()
-
    return f"{url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}&user_id={user_id}&tenant_id={tenant_id}"


--- a/api/core/file/models.py
+++ b/api/core/file/models.py
@ -115,10 +115,11 @@ class File(BaseModel):
            if self.related_id is None:
                raise ValueError("Missing file related_id")
            return helpers.get_signed_file_url(upload_file_id=self.related_id)
-        elif self.transfer_method == FileTransferMethod.TOOL_FILE:
+        elif self.transfer_method in [FileTransferMethod.TOOL_FILE, FileTransferMethod.DATASOURCE_FILE]:
            assert self.related_id is not None
            assert self.extension is not None
            return sign_tool_file(tool_file_id=self.related_id, extension=self.extension)
+        return None

    def to_plugin_parameter(self) -> dict[str, Any]:
        return {
@ -145,6 +146,9 @@ class File(BaseModel):
            case FileTransferMethod.TOOL_FILE:
                if not self.related_id:
                    raise ValueError("Missing file related_id")
+            case FileTransferMethod.DATASOURCE_FILE:
+                if not self.related_id:
+                    raise ValueError("Missing file related_id")
        return self

    @property
--- a/api/core/helper/code_executor/code_executor.py
+++ b/api/core/helper/code_executor/code_executor.py
@ -4,7 +4,7 @@ from enum import StrEnum
 from threading import Lock
 from typing import Any

-from httpx import Timeout, post
+import httpx
 from pydantic import BaseModel
 from yarl import URL

@ -13,9 +13,17 @@ from core.helper.code_executor.javascript.javascript_transformer import NodeJsTe
 from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer
 from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
 from core.helper.code_executor.template_transformer import TemplateTransformer
+from core.helper.http_client_pooling import get_pooled_http_client

 logger = logging.getLogger(__name__)
 code_execution_endpoint_url = URL(str(dify_config.CODE_EXECUTION_ENDPOINT))
+CODE_EXECUTION_SSL_VERIFY = dify_config.CODE_EXECUTION_SSL_VERIFY
+_CODE_EXECUTOR_CLIENT_LIMITS = httpx.Limits(
+    max_connections=dify_config.CODE_EXECUTION_POOL_MAX_CONNECTIONS,
+    max_keepalive_connections=dify_config.CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS,
+    keepalive_expiry=dify_config.CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY,
+)
+_CODE_EXECUTOR_CLIENT_KEY = "code_executor:http_client"


 class CodeExecutionError(Exception):
@ -38,6 +46,13 @@ class CodeLanguage(StrEnum):
    JAVASCRIPT = "javascript"


+def _build_code_executor_client() -> httpx.Client:
+    return httpx.Client(
+        verify=CODE_EXECUTION_SSL_VERIFY,
+        limits=_CODE_EXECUTOR_CLIENT_LIMITS,
+    )
+
+
 class CodeExecutor:
    dependencies_cache: dict[str, str] = {}
    dependencies_cache_lock = Lock()
@ -76,17 +91,21 @@ class CodeExecutor:
            "enable_network": True,
        }

+        timeout = httpx.Timeout(
+            connect=dify_config.CODE_EXECUTION_CONNECT_TIMEOUT,
+            read=dify_config.CODE_EXECUTION_READ_TIMEOUT,
+            write=dify_config.CODE_EXECUTION_WRITE_TIMEOUT,
+            pool=None,
+        )
+
+        client = get_pooled_http_client(_CODE_EXECUTOR_CLIENT_KEY, _build_code_executor_client)
+
        try:
-            response = post(
+            response = client.post(
                str(url),
                json=data,
                headers=headers,
-                timeout=Timeout(
-                    connect=dify_config.CODE_EXECUTION_CONNECT_TIMEOUT,
-                    read=dify_config.CODE_EXECUTION_READ_TIMEOUT,
-                    write=dify_config.CODE_EXECUTION_WRITE_TIMEOUT,
-                    pool=None,
-                ),
+                timeout=timeout,
            )
            if response.status_code == 503:
                raise CodeExecutionError("Code execution service is unavailable")
@ -106,8 +125,8 @@ class CodeExecutor:

        try:
            response_data = response.json()
-        except:
-            raise CodeExecutionError("Failed to parse response")
+        except Exception as e:
+            raise CodeExecutionError("Failed to parse response") from e

        if (code := response_data.get("code")) != 0:
            raise CodeExecutionError(f"Got error code: {code}. Got error msg: {response_data.get('message')}")
--- a/api/core/helper/code_executor/code_node_provider.py
+++ b/api/core/helper/code_executor/code_node_provider.py
@ -1,9 +1,33 @@
-from abc import abstractmethod
+from abc import ABC, abstractmethod
+from collections.abc import Mapping, Sequence
+from typing import TypedDict

 from pydantic import BaseModel


-class CodeNodeProvider(BaseModel):
+class VariableConfig(TypedDict):
+    variable: str
+    value_selector: Sequence[str | int]
+
+
+class OutputConfig(TypedDict):
+    type: str
+    children: None
+
+
+class CodeConfig(TypedDict):
+    variables: Sequence[VariableConfig]
+    code_language: str
+    code: str
+    outputs: Mapping[str, OutputConfig]
+
+
+class DefaultConfig(TypedDict):
+    type: str
+    config: CodeConfig
+
+
+class CodeNodeProvider(BaseModel, ABC):
    @staticmethod
    @abstractmethod
    def get_language() -> str:
@ -22,11 +46,14 @@ class CodeNodeProvider(BaseModel):
        pass

    @classmethod
-    def get_default_config(cls):
+    def get_default_config(cls) -> DefaultConfig:
        return {
            "type": "code",
            "config": {
-                "variables": [{"variable": "arg1", "value_selector": []}, {"variable": "arg2", "value_selector": []}],
+                "variables": [
+                    {"variable": "arg1", "value_selector": []},
+                    {"variable": "arg2", "value_selector": []},
+                ],
                "code_language": cls.get_language(),
                "code": cls.get_default_code(),
                "outputs": {"result": {"type": "string", "children": None}},
--- a/api/core/helper/encrypter.py
+++ b/api/core/helper/encrypter.py
@ -16,8 +16,8 @@ def full_mask_token(token_length=20):


 def encrypt_token(tenant_id: str, token: str):
+    from extensions.ext_database import db
    from models.account import Tenant
-    from models.engine import db

    if not (tenant := db.session.query(Tenant).where(Tenant.id == tenant_id).first()):
        raise ValueError(f"Tenant with id {tenant_id} not found")
--- a/api/core/helper/http_client_pooling.py
+++ b/api/core/helper/http_client_pooling.py
@ -0,0 +1,59 @@
+"""HTTP client pooling utilities."""
+
+from __future__ import annotations
+
+import atexit
+import threading
+from collections.abc import Callable
+
+import httpx
+
+ClientBuilder = Callable[[], httpx.Client]
+
+
+class HttpClientPoolFactory:
+    """Thread-safe factory that maintains reusable HTTP client instances."""
+
+    def __init__(self) -> None:
+        self._clients: dict[str, httpx.Client] = {}
+        self._lock = threading.Lock()
+
+    def get_or_create(self, key: str, builder: ClientBuilder) -> httpx.Client:
+        """Return a pooled client associated with ``key`` creating it on demand."""
+        client = self._clients.get(key)
+        if client is not None:
+            return client
+
+        with self._lock:
+            client = self._clients.get(key)
+            if client is None:
+                client = builder()
+                self._clients[key] = client
+        return client
+
+    def close_all(self) -> None:
+        """Close all pooled clients and clear the pool."""
+        with self._lock:
+            for client in self._clients.values():
+                client.close()
+            self._clients.clear()
+
+
+_factory = HttpClientPoolFactory()
+
+
+def get_pooled_http_client(key: str, builder: ClientBuilder) -> httpx.Client:
+    """Return a pooled client for the given ``key`` using ``builder`` when missing."""
+    return _factory.get_or_create(key, builder)
+
+
+def close_all_pooled_clients() -> None:
+    """Close every client created through the pooling factory."""
+    _factory.close_all()
+
+
+def _register_shutdown_hook() -> None:
+    atexit.register(close_all_pooled_clients)
+
+
+_register_shutdown_hook()
--- a/api/core/helper/marketplace.py
+++ b/api/core/helper/marketplace.py
@ -23,7 +23,7 @@ def batch_fetch_plugin_manifests(plugin_ids: list[str]) -> Sequence[MarketplaceP
        return []

    url = str(marketplace_api_url / "api/v1/plugins/batch")
-    response = httpx.post(url, json={"plugin_ids": plugin_ids})
+    response = httpx.post(url, json={"plugin_ids": plugin_ids}, headers={"X-Dify-Version": dify_config.project.version})
    response.raise_for_status()

    return [MarketplacePluginDeclaration(**plugin) for plugin in response.json()["data"]["plugins"]]
@ -36,7 +36,7 @@ def batch_fetch_plugin_manifests_ignore_deserialization_error(
        return []

    url = str(marketplace_api_url / "api/v1/plugins/batch")
-    response = httpx.post(url, json={"plugin_ids": plugin_ids})
+    response = httpx.post(url, json={"plugin_ids": plugin_ids}, headers={"X-Dify-Version": dify_config.project.version})
    response.raise_for_status()
    result: list[MarketplacePluginDeclaration] = []
    for plugin in response.json()["data"]["plugins"]:
--- a/api/core/helper/name_generator.py
+++ b/api/core/helper/name_generator.py
@ -0,0 +1,42 @@
+import logging
+import re
+from collections.abc import Sequence
+from typing import Any
+
+from core.tools.entities.tool_entities import CredentialType
+
+logger = logging.getLogger(__name__)
+
+
+def generate_provider_name(
+    providers: Sequence[Any], credential_type: CredentialType, fallback_context: str = "provider"
+) -> str:
+    try:
+        return generate_incremental_name(
+            [provider.name for provider in providers],
+            f"{credential_type.get_name()}",
+        )
+    except Exception as e:
+        logger.warning("Error generating next provider name for %r: %r", fallback_context, e)
+        return f"{credential_type.get_name()} 1"
+
+
+def generate_incremental_name(
+    names: Sequence[str],
+    default_pattern: str,
+) -> str:
+    pattern = rf"^{re.escape(default_pattern)}\s+(\d+)$"
+    numbers = []
+
+    for name in names:
+        if not name:
+            continue
+        match = re.match(pattern, name.strip())
+        if match:
+            numbers.append(int(match.group(1)))
+
+    if not numbers:
+        return f"{default_pattern} 1"
+
+    max_number = max(numbers)
+    return f"{default_pattern} {max_number + 1}"
--- a/api/core/helper/ssrf_proxy.py
+++ b/api/core/helper/ssrf_proxy.py
@ -8,27 +8,23 @@ import time
 import httpx

 from configs import dify_config
+from core.helper.http_client_pooling import get_pooled_http_client

 logger = logging.getLogger(__name__)

 SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES

-http_request_node_ssl_verify = True  # Default value for http_request_node_ssl_verify is True
-try:
-    config_value = dify_config.HTTP_REQUEST_NODE_SSL_VERIFY
-    http_request_node_ssl_verify_lower = str(config_value).lower()
-    if http_request_node_ssl_verify_lower == "true":
-        http_request_node_ssl_verify = True
-    elif http_request_node_ssl_verify_lower == "false":
-        http_request_node_ssl_verify = False
-    else:
-        raise ValueError("Invalid value. HTTP_REQUEST_NODE_SSL_VERIFY should be 'True' or 'False'")
-except NameError:
-    http_request_node_ssl_verify = True
-
 BACKOFF_FACTOR = 0.5
 STATUS_FORCELIST = [429, 500, 502, 503, 504]

+_SSL_VERIFIED_POOL_KEY = "ssrf:verified"
+_SSL_UNVERIFIED_POOL_KEY = "ssrf:unverified"
+_SSRF_CLIENT_LIMITS = httpx.Limits(
+    max_connections=dify_config.SSRF_POOL_MAX_CONNECTIONS,
+    max_keepalive_connections=dify_config.SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS,
+    keepalive_expiry=dify_config.SSRF_POOL_KEEPALIVE_EXPIRY,
+)
+

 class MaxRetriesExceededError(ValueError):
    """Raised when the maximum number of retries is exceeded."""
@ -36,6 +32,45 @@ class MaxRetriesExceededError(ValueError):
    pass


+def _create_proxy_mounts() -> dict[str, httpx.HTTPTransport]:
+    return {
+        "http://": httpx.HTTPTransport(
+            proxy=dify_config.SSRF_PROXY_HTTP_URL,
+        ),
+        "https://": httpx.HTTPTransport(
+            proxy=dify_config.SSRF_PROXY_HTTPS_URL,
+        ),
+    }
+
+
+def _build_ssrf_client(verify: bool) -> httpx.Client:
+    if dify_config.SSRF_PROXY_ALL_URL:
+        return httpx.Client(
+            proxy=dify_config.SSRF_PROXY_ALL_URL,
+            verify=verify,
+            limits=_SSRF_CLIENT_LIMITS,
+        )
+
+    if dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL:
+        return httpx.Client(
+            mounts=_create_proxy_mounts(),
+            verify=verify,
+            limits=_SSRF_CLIENT_LIMITS,
+        )
+
+    return httpx.Client(verify=verify, limits=_SSRF_CLIENT_LIMITS)
+
+
+def _get_ssrf_client(ssl_verify_enabled: bool) -> httpx.Client:
+    if not isinstance(ssl_verify_enabled, bool):
+        raise ValueError("SSRF client verify flag must be a boolean")
+
+    return get_pooled_http_client(
+        _SSL_VERIFIED_POOL_KEY if ssl_verify_enabled else _SSL_UNVERIFIED_POOL_KEY,
+        lambda: _build_ssrf_client(verify=ssl_verify_enabled),
+    )
+
+
 def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
    if "allow_redirects" in kwargs:
        allow_redirects = kwargs.pop("allow_redirects")
@ -50,33 +85,22 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
            write=dify_config.SSRF_DEFAULT_WRITE_TIME_OUT,
        )

-    if "ssl_verify" not in kwargs:
-        kwargs["ssl_verify"] = http_request_node_ssl_verify
-
-    ssl_verify = kwargs.pop("ssl_verify")
+    # prioritize per-call option, which can be switched on and off inside the HTTP node on the web UI
+    verify_option = kwargs.pop("ssl_verify", dify_config.HTTP_REQUEST_NODE_SSL_VERIFY)
+    client = _get_ssrf_client(verify_option)

    retries = 0
    while retries <= max_retries:
        try:
-            if dify_config.SSRF_PROXY_ALL_URL:
-                with httpx.Client(proxy=dify_config.SSRF_PROXY_ALL_URL, verify=ssl_verify) as client:
-                    response = client.request(method=method, url=url, **kwargs)
-            elif dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL:
-                proxy_mounts = {
-                    "http://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTP_URL, verify=ssl_verify),
-                    "https://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTPS_URL, verify=ssl_verify),
-                }
-                with httpx.Client(mounts=proxy_mounts, verify=ssl_verify) as client:
-                    response = client.request(method=method, url=url, **kwargs)
-            else:
-                with httpx.Client(verify=ssl_verify) as client:
-                    response = client.request(method=method, url=url, **kwargs)
+            response = client.request(method=method, url=url, **kwargs)

            if response.status_code not in STATUS_FORCELIST:
                return response
            else:
                logger.warning(
-                    "Received status code %s for URL %s which is in the force list", response.status_code, url
+                    "Received status code %s for URL %s which is in the force list",
+                    response.status_code,
+                    url,
                )

        except httpx.RequestError as e:
--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@ -358,6 +358,7 @@ class IndexingRunner:
            extract_setting = ExtractSetting(
                datasource_type=DatasourceType.NOTION.value,
                notion_info={
+                    "credential_id": data_source_info["credential_id"],
                    "notion_workspace_id": data_source_info["notion_workspace_id"],
                    "notion_obj_id": data_source_info["notion_page_id"],
                    "notion_page_type": data_source_info["type"],
--- a/api/core/llm_generator/llm_generator.py
+++ b/api/core/llm_generator/llm_generator.py
@ -2,7 +2,7 @@ import json
 import logging
 import re
 from collections.abc import Sequence
-from typing import cast
+from typing import Protocol, cast

 import json_repair

@ -28,12 +28,22 @@ from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
 from core.ops.utils import measure_time
 from core.prompt.utils.prompt_template_parser import PromptTemplateParser
 from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey
-from core.workflow.graph_engine.entities.event import AgentLogEvent
-from models import App, Message, WorkflowNodeExecutionModel, db
+from extensions.ext_database import db
+from extensions.ext_storage import storage
+from models import App, Message, WorkflowNodeExecutionModel
+from models.workflow import Workflow

 logger = logging.getLogger(__name__)


+class WorkflowServiceInterface(Protocol):
+    def get_draft_workflow(self, app_model: App, workflow_id: str | None = None) -> Workflow | None:
+        pass
+
+    def get_node_last_run(self, app_model: App, workflow: Workflow, node_id: str) -> WorkflowNodeExecutionModel | None:
+        pass
+
+
 class LLMGenerator:
    @classmethod
    def generate_conversation_name(
@ -417,16 +427,17 @@ class LLMGenerator:
        instruction: str,
        model_config: dict,
        ideal_output: str | None,
+        workflow_service: WorkflowServiceInterface,
    ):
-        from services.workflow_service import WorkflowService
+        session = db.session()

-        app: App | None = db.session.query(App).where(App.id == flow_id).first()
+        app: App | None = session.query(App).where(App.id == flow_id).first()
        if not app:
            raise ValueError("App not found.")
-        workflow = WorkflowService().get_draft_workflow(app_model=app)
+        workflow = workflow_service.get_draft_workflow(app_model=app)
        if not workflow:
            raise ValueError("Workflow not found for the given app model.")
-        last_run = WorkflowService().get_node_last_run(app_model=app, workflow=workflow, node_id=node_id)
+        last_run = workflow_service.get_node_last_run(app_model=app, workflow=workflow, node_id=node_id)
        try:
            node_type = cast(WorkflowNodeExecutionModel, last_run).node_type
        except Exception:
@ -450,22 +461,22 @@ class LLMGenerator:
            )

        def agent_log_of(node_execution: WorkflowNodeExecutionModel) -> Sequence:
-            raw_agent_log = node_execution.execution_metadata_dict.get(WorkflowNodeExecutionMetadataKey.AGENT_LOG)
+            raw_agent_log = node_execution.execution_metadata_dict.get(WorkflowNodeExecutionMetadataKey.AGENT_LOG, [])
            if not raw_agent_log:
                return []
-            parsed: Sequence[AgentLogEvent] = json.loads(raw_agent_log)

-            def dict_of_event(event: AgentLogEvent):
-                return {
-                    "status": event.status,
-                    "error": event.error,
-                    "data": event.data,
+            return [
+                {
+                    "status": event["status"],
+                    "error": event["error"],
+                    "data": event["data"],
                }
+                for event in raw_agent_log
+            ]

-            return [dict_of_event(event) for event in parsed]
-
+        inputs = last_run.load_full_inputs(session, storage)
        last_run_dict = {
-            "inputs": last_run.inputs_dict,
+            "inputs": inputs,
            "status": last_run.status,
            "error": last_run.error,
            "agent_log": agent_log_of(last_run),
--- a/api/core/mcp/types.py
+++ b/api/core/mcp/types.py
@ -160,7 +160,7 @@ class ErrorData(BaseModel):
    sentence.
    """

-    data: Any | None = None
+    data: Any = None
    """
    Additional information about the error. The value of this member is defined by the
    sender (e.g. detailed error information, nested errors etc.).
--- a/api/core/model_runtime/entities/message_entities.py
+++ b/api/core/model_runtime/entities/message_entities.py
@ -74,7 +74,7 @@ class TextPromptMessageContent(PromptMessageContent):
    Model class for text prompt message content.
    """

-    type: Literal[PromptMessageContentType.TEXT] = PromptMessageContentType.TEXT
+    type: Literal[PromptMessageContentType.TEXT] = PromptMessageContentType.TEXT  # type: ignore
    data: str


@ -95,11 +95,11 @@ class MultiModalPromptMessageContent(PromptMessageContent):


 class VideoPromptMessageContent(MultiModalPromptMessageContent):
-    type: Literal[PromptMessageContentType.VIDEO] = PromptMessageContentType.VIDEO
+    type: Literal[PromptMessageContentType.VIDEO] = PromptMessageContentType.VIDEO  # type: ignore


 class AudioPromptMessageContent(MultiModalPromptMessageContent):
-    type: Literal[PromptMessageContentType.AUDIO] = PromptMessageContentType.AUDIO
+    type: Literal[PromptMessageContentType.AUDIO] = PromptMessageContentType.AUDIO  # type: ignore


 class ImagePromptMessageContent(MultiModalPromptMessageContent):
@ -111,12 +111,12 @@ class ImagePromptMessageContent(MultiModalPromptMessageContent):
        LOW = auto()
        HIGH = auto()

-    type: Literal[PromptMessageContentType.IMAGE] = PromptMessageContentType.IMAGE
+    type: Literal[PromptMessageContentType.IMAGE] = PromptMessageContentType.IMAGE  # type: ignore
    detail: DETAIL = DETAIL.LOW


 class DocumentPromptMessageContent(MultiModalPromptMessageContent):
-    type: Literal[PromptMessageContentType.DOCUMENT] = PromptMessageContentType.DOCUMENT
+    type: Literal[PromptMessageContentType.DOCUMENT] = PromptMessageContentType.DOCUMENT  # type: ignore


 PromptMessageContentUnionTypes = Annotated[
--- a/api/core/model_runtime/model_providers/__base/ai_model.py
+++ b/api/core/model_runtime/model_providers/__base/ai_model.py
@ -23,8 +23,7 @@ from core.model_runtime.errors.invoke import (
    InvokeRateLimitError,
    InvokeServerUnavailableError,
 )
-from core.plugin.entities.plugin_daemon import PluginDaemonInnerError, PluginModelProviderEntity
-from core.plugin.impl.model import PluginModelClient
+from core.plugin.entities.plugin_daemon import PluginModelProviderEntity


 class AIModel(BaseModel):
@ -52,6 +51,8 @@ class AIModel(BaseModel):

        :return: Invoke error mapping
        """
+        from core.plugin.entities.plugin_daemon import PluginDaemonInnerError
+
        return {
            InvokeConnectionError: [InvokeConnectionError],
            InvokeServerUnavailableError: [InvokeServerUnavailableError],
@ -139,6 +140,8 @@ class AIModel(BaseModel):
        :param credentials: model credentials
        :return: model schema
        """
+        from core.plugin.impl.model import PluginModelClient
+
        plugin_model_manager = PluginModelClient()
        cache_key = f"{self.tenant_id}:{self.plugin_id}:{self.provider_name}:{self.model_type.value}:{model}"
        # sort credentials
--- a/api/core/model_runtime/model_providers/__base/large_language_model.py
+++ b/api/core/model_runtime/model_providers/__base/large_language_model.py
@ -22,7 +22,6 @@ from core.model_runtime.entities.model_entities import (
    PriceType,
 )
 from core.model_runtime.model_providers.__base.ai_model import AIModel
-from core.plugin.impl.model import PluginModelClient

 logger = logging.getLogger(__name__)

@ -142,6 +141,8 @@ class LargeLanguageModel(AIModel):
        result: Union[LLMResult, Generator[LLMResultChunk, None, None]]

        try:
+            from core.plugin.impl.model import PluginModelClient
+
            plugin_model_manager = PluginModelClient()
            result = plugin_model_manager.invoke_llm(
                tenant_id=self.tenant_id,
@ -340,6 +341,8 @@ class LargeLanguageModel(AIModel):
        :return:
        """
        if dify_config.PLUGIN_BASED_TOKEN_COUNTING_ENABLED:
+            from core.plugin.impl.model import PluginModelClient
+
            plugin_model_manager = PluginModelClient()
            return plugin_model_manager.get_llm_num_tokens(
                tenant_id=self.tenant_id,
--- a/api/core/model_runtime/model_providers/__base/moderation_model.py
+++ b/api/core/model_runtime/model_providers/__base/moderation_model.py
@ -4,7 +4,6 @@ from pydantic import ConfigDict

 from core.model_runtime.entities.model_entities import ModelType
 from core.model_runtime.model_providers.__base.ai_model import AIModel
-from core.plugin.impl.model import PluginModelClient


 class ModerationModel(AIModel):
@ -30,6 +29,8 @@ class ModerationModel(AIModel):
        self.started_at = time.perf_counter()

        try:
+            from core.plugin.impl.model import PluginModelClient
+
            plugin_model_manager = PluginModelClient()
            return plugin_model_manager.invoke_moderation(
                tenant_id=self.tenant_id,
--- a/api/core/model_runtime/model_providers/__base/rerank_model.py
+++ b/api/core/model_runtime/model_providers/__base/rerank_model.py
@ -1,7 +1,6 @@
 from core.model_runtime.entities.model_entities import ModelType
 from core.model_runtime.entities.rerank_entities import RerankResult
 from core.model_runtime.model_providers.__base.ai_model import AIModel
-from core.plugin.impl.model import PluginModelClient


 class RerankModel(AIModel):
@ -34,6 +33,8 @@ class RerankModel(AIModel):
        :return: rerank result
        """
        try:
+            from core.plugin.impl.model import PluginModelClient
+
            plugin_model_manager = PluginModelClient()
            return plugin_model_manager.invoke_rerank(
                tenant_id=self.tenant_id,
--- a/api/core/model_runtime/model_providers/__base/speech2text_model.py
+++ b/api/core/model_runtime/model_providers/__base/speech2text_model.py
@ -4,7 +4,6 @@ from pydantic import ConfigDict

 from core.model_runtime.entities.model_entities import ModelType
 from core.model_runtime.model_providers.__base.ai_model import AIModel
-from core.plugin.impl.model import PluginModelClient


 class Speech2TextModel(AIModel):
@ -28,6 +27,8 @@ class Speech2TextModel(AIModel):
        :return: text for given audio file
        """
        try:
+            from core.plugin.impl.model import PluginModelClient
+
            plugin_model_manager = PluginModelClient()
            return plugin_model_manager.invoke_speech_to_text(
                tenant_id=self.tenant_id,
--- a/api/core/model_runtime/model_providers/__base/text_embedding_model.py
+++ b/api/core/model_runtime/model_providers/__base/text_embedding_model.py
@ -4,7 +4,6 @@ from core.entities.embedding_type import EmbeddingInputType
 from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
 from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
 from core.model_runtime.model_providers.__base.ai_model import AIModel
-from core.plugin.impl.model import PluginModelClient


 class TextEmbeddingModel(AIModel):
@ -35,6 +34,8 @@ class TextEmbeddingModel(AIModel):
        :param input_type: input type
        :return: embeddings result
        """
+        from core.plugin.impl.model import PluginModelClient
+
        try:
            plugin_model_manager = PluginModelClient()
            return plugin_model_manager.invoke_text_embedding(
@ -59,6 +60,8 @@ class TextEmbeddingModel(AIModel):
        :param texts: texts to embed
        :return:
        """
+        from core.plugin.impl.model import PluginModelClient
+
        plugin_model_manager = PluginModelClient()
        return plugin_model_manager.get_text_embedding_num_tokens(
            tenant_id=self.tenant_id,
--- a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenizer.py
+++ b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenizer.py
@ -15,7 +15,7 @@ class GPT2Tokenizer:
        use gpt2 tokenizer to get num tokens
        """
        _tokenizer = GPT2Tokenizer.get_encoder()
-        tokens = _tokenizer.encode(text)
+        tokens = _tokenizer.encode(text)  # type: ignore
        return len(tokens)

    @staticmethod
--- a/api/core/model_runtime/model_providers/__base/tts_model.py
+++ b/api/core/model_runtime/model_providers/__base/tts_model.py
@ -5,7 +5,6 @@ from pydantic import ConfigDict

 from core.model_runtime.entities.model_entities import ModelType
 from core.model_runtime.model_providers.__base.ai_model import AIModel
-from core.plugin.impl.model import PluginModelClient

 logger = logging.getLogger(__name__)

@ -41,6 +40,8 @@ class TTSModel(AIModel):
        :return: translated audio file
        """
        try:
+            from core.plugin.impl.model import PluginModelClient
+
            plugin_model_manager = PluginModelClient()
            return plugin_model_manager.invoke_tts(
                tenant_id=self.tenant_id,
@ -64,6 +65,8 @@ class TTSModel(AIModel):
        :param credentials: The credentials required to access the TTS model.
        :return: A list of voices supported by the TTS model.
        """
+        from core.plugin.impl.model import PluginModelClient
+
        plugin_model_manager = PluginModelClient()
        return plugin_model_manager.get_tts_model_voices(
            tenant_id=self.tenant_id,
--- a/api/core/model_runtime/model_providers/model_provider_factory.py
+++ b/api/core/model_runtime/model_providers/model_provider_factory.py
@ -15,16 +15,16 @@ from core.model_runtime.model_providers.__base.text_embedding_model import TextE
 from core.model_runtime.model_providers.__base.tts_model import TTSModel
 from core.model_runtime.schema_validators.model_credential_schema_validator import ModelCredentialSchemaValidator
 from core.model_runtime.schema_validators.provider_credential_schema_validator import ProviderCredentialSchemaValidator
-from core.plugin.entities.plugin import ModelProviderID
 from core.plugin.entities.plugin_daemon import PluginModelProviderEntity
-from core.plugin.impl.asset import PluginAssetManager
-from core.plugin.impl.model import PluginModelClient
+from models.provider_ids import ModelProviderID

 logger = logging.getLogger(__name__)


 class ModelProviderFactory:
    def __init__(self, tenant_id: str):
+        from core.plugin.impl.model import PluginModelClient
+
        self.tenant_id = tenant_id
        self.plugin_model_manager = PluginModelClient()

@ -38,7 +38,7 @@ class ModelProviderFactory:
        plugin_providers = self.get_plugin_model_providers()
        return [provider.declaration for provider in plugin_providers]

-    def get_plugin_model_providers(self) -> Sequence[PluginModelProviderEntity]:
+    def get_plugin_model_providers(self) -> Sequence["PluginModelProviderEntity"]:
        """
        Get all plugin model providers
        :return: list of plugin model providers
@ -76,7 +76,7 @@ class ModelProviderFactory:
        plugin_model_provider_entity = self.get_plugin_model_provider(provider=provider)
        return plugin_model_provider_entity.declaration

-    def get_plugin_model_provider(self, provider: str) -> PluginModelProviderEntity:
+    def get_plugin_model_provider(self, provider: str) -> "PluginModelProviderEntity":
        """
        Get plugin model provider
        :param provider: provider name
@ -331,6 +331,8 @@ class ModelProviderFactory:
        mime_type = image_mime_types.get(extension, "image/png")

        # get icon bytes from plugin asset manager
+        from core.plugin.impl.asset import PluginAssetManager
+
        plugin_asset_manager = PluginAssetManager()
        return plugin_asset_manager.fetch_asset(tenant_id=self.tenant_id, id=file_name), mime_type

@ -340,5 +342,6 @@ class ModelProviderFactory:
        :param provider: provider name
        :return: plugin id and provider name
        """
+
        provider_id = ModelProviderID(provider)
        return provider_id.plugin_id, provider_id.provider_name
--- a/api/core/model_runtime/utils/encoders.py
+++ b/api/core/model_runtime/utils/encoders.py
@ -196,15 +196,15 @@ def jsonable_encoder(
            return encoder(obj)

    try:
-        data = dict(obj)
+        data = dict(obj)  # type: ignore
    except Exception as e:
        errors: list[Exception] = []
        errors.append(e)
        try:
-            data = vars(obj)
+            data = vars(obj)  # type: ignore
        except Exception as e:
            errors.append(e)
-            raise ValueError(errors) from e
+            raise ValueError(str(errors)) from e
    return jsonable_encoder(
        data,
        by_alias=by_alias,
--- a/api/core/ops/aliyun_trace/aliyun_trace.py
+++ b/api/core/ops/aliyun_trace/aliyun_trace.py
@ -1,38 +1,28 @@
-import json
 import logging
 from collections.abc import Sequence
-from urllib.parse import urljoin

-from opentelemetry.trace import Link, Status, StatusCode
-from sqlalchemy import select
-from sqlalchemy.orm import Session, sessionmaker
+from sqlalchemy.orm import sessionmaker

 from core.ops.aliyun_trace.data_exporter.traceclient import (
    TraceClient,
+    build_endpoint,
    convert_datetime_to_nanoseconds,
    convert_to_span_id,
    convert_to_trace_id,
-    create_link,
    generate_span_id,
 )
-from core.ops.aliyun_trace.entities.aliyun_trace_entity import SpanData
+from core.ops.aliyun_trace.entities.aliyun_trace_entity import SpanData, TraceMetadata
 from core.ops.aliyun_trace.entities.semconv import (
    GEN_AI_COMPLETION,
-    GEN_AI_FRAMEWORK,
-    GEN_AI_MODEL_NAME,
+    GEN_AI_INPUT_MESSAGE,
+    GEN_AI_OUTPUT_MESSAGE,
    GEN_AI_PROMPT,
-    GEN_AI_PROMPT_TEMPLATE_TEMPLATE,
-    GEN_AI_PROMPT_TEMPLATE_VARIABLE,
+    GEN_AI_PROVIDER_NAME,
+    GEN_AI_REQUEST_MODEL,
    GEN_AI_RESPONSE_FINISH_REASON,
-    GEN_AI_SESSION_ID,
-    GEN_AI_SPAN_KIND,
-    GEN_AI_SYSTEM,
    GEN_AI_USAGE_INPUT_TOKENS,
    GEN_AI_USAGE_OUTPUT_TOKENS,
    GEN_AI_USAGE_TOTAL_TOKENS,
-    GEN_AI_USER_ID,
-    INPUT_VALUE,
-    OUTPUT_VALUE,
    RETRIEVAL_DOCUMENT,
    RETRIEVAL_QUERY,
    TOOL_DESCRIPTION,
@ -40,6 +30,18 @@ from core.ops.aliyun_trace.entities.semconv import (
    TOOL_PARAMETERS,
    GenAISpanKind,
 )
+from core.ops.aliyun_trace.utils import (
+    create_common_span_attributes,
+    create_links_from_trace_id,
+    create_status_from_error,
+    extract_retrieval_documents,
+    format_input_messages,
+    format_output_messages,
+    format_retrieval_documents,
+    get_user_id_from_message_data,
+    get_workflow_node_status,
+    serialize_json_data,
+)
 from core.ops.base_trace_instance import BaseTraceInstance
 from core.ops.entities.config_entity import AliyunConfig
 from core.ops.entities.trace_entity import (
@ -52,15 +54,11 @@ from core.ops.entities.trace_entity import (
    ToolTraceInfo,
    WorkflowTraceInfo,
 )
-from core.rag.models.document import Document
 from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository
-from core.workflow.entities.workflow_node_execution import (
-    WorkflowNodeExecution,
-    WorkflowNodeExecutionMetadataKey,
-    WorkflowNodeExecutionStatus,
-)
-from core.workflow.nodes import NodeType
-from models import Account, App, EndUser, TenantAccountJoin, WorkflowNodeExecutionTriggeredFrom, db
+from core.workflow.entities import WorkflowNodeExecution
+from core.workflow.enums import NodeType, WorkflowNodeExecutionMetadataKey
+from extensions.ext_database import db
+from models import WorkflowNodeExecutionTriggeredFrom

 logger = logging.getLogger(__name__)

@ -71,8 +69,7 @@ class AliyunDataTrace(BaseTraceInstance):
        aliyun_config: AliyunConfig,
    ):
        super().__init__(aliyun_config)
-        base_url = aliyun_config.endpoint.rstrip("/")
-        endpoint = urljoin(base_url, f"adapt_{aliyun_config.license_key}/api/otlp/traces")
+        endpoint = build_endpoint(aliyun_config.endpoint, aliyun_config.license_key)
        self.trace_client = TraceClient(service_name=aliyun_config.app_name, endpoint=endpoint)

    def trace(self, trace_info: BaseTraceInfo):
@ -98,423 +95,425 @@ class AliyunDataTrace(BaseTraceInstance):
        try:
            return self.trace_client.get_project_url()
        except Exception as e:
-            logger.info("Aliyun get run url failed: %s", str(e), exc_info=True)
-            raise ValueError(f"Aliyun get run url failed: {str(e)}")
+            logger.info("Aliyun get project url failed: %s", str(e), exc_info=True)
+            raise ValueError(f"Aliyun get project url failed: {str(e)}")

    def workflow_trace(self, trace_info: WorkflowTraceInfo):
-        trace_id = convert_to_trace_id(trace_info.workflow_run_id)
-        links = []
-        if trace_info.trace_id:
-            links.append(create_link(trace_id_str=trace_info.trace_id))
-        workflow_span_id = convert_to_span_id(trace_info.workflow_run_id, "workflow")
-        self.add_workflow_span(trace_id, workflow_span_id, trace_info, links)
+        trace_metadata = TraceMetadata(
+            trace_id=convert_to_trace_id(trace_info.workflow_run_id),
+            workflow_span_id=convert_to_span_id(trace_info.workflow_run_id, "workflow"),
+            session_id=trace_info.metadata.get("conversation_id") or "",
+            user_id=str(trace_info.metadata.get("user_id") or ""),
+            links=create_links_from_trace_id(trace_info.trace_id),
+        )
+
+        self.add_workflow_span(trace_info, trace_metadata)

        workflow_node_executions = self.get_workflow_node_executions(trace_info)
        for node_execution in workflow_node_executions:
-            node_span = self.build_workflow_node_span(node_execution, trace_id, trace_info, workflow_span_id)
+            node_span = self.build_workflow_node_span(node_execution, trace_info, trace_metadata)
            self.trace_client.add_span(node_span)

    def message_trace(self, trace_info: MessageTraceInfo):
        message_data = trace_info.message_data
        if message_data is None:
            return
+
        message_id = trace_info.message_id
+        user_id = get_user_id_from_message_data(message_data)
+        status = create_status_from_error(trace_info.error)

-        user_id = message_data.from_account_id
-        if message_data.from_end_user_id:
-            end_user_data: EndUser | None = (
-                db.session.query(EndUser).where(EndUser.id == message_data.from_end_user_id).first()
-            )
-            if end_user_data is not None:
-                user_id = end_user_data.session_id
+        trace_metadata = TraceMetadata(
+            trace_id=convert_to_trace_id(message_id),
+            workflow_span_id=0,
+            session_id=trace_info.metadata.get("conversation_id") or "",
+            user_id=user_id,
+            links=create_links_from_trace_id(trace_info.trace_id),
+        )

-        status: Status = Status(StatusCode.OK)
-        if trace_info.error:
-            status = Status(StatusCode.ERROR, trace_info.error)
-
-        trace_id = convert_to_trace_id(message_id)
-        links = []
-        if trace_info.trace_id:
-            links.append(create_link(trace_id_str=trace_info.trace_id))
+        inputs_json = serialize_json_data(trace_info.inputs)
+        outputs_str = str(trace_info.outputs)

        message_span_id = convert_to_span_id(message_id, "message")
        message_span = SpanData(
-            trace_id=trace_id,
+            trace_id=trace_metadata.trace_id,
            parent_span_id=None,
            span_id=message_span_id,
            name="message",
            start_time=convert_datetime_to_nanoseconds(trace_info.start_time),
            end_time=convert_datetime_to_nanoseconds(trace_info.end_time),
-            attributes={
-                GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id") or "",
-                GEN_AI_USER_ID: str(user_id),
-                GEN_AI_SPAN_KIND: GenAISpanKind.CHAIN.value,
-                GEN_AI_FRAMEWORK: "dify",
-                INPUT_VALUE: json.dumps(trace_info.inputs, ensure_ascii=False),
-                OUTPUT_VALUE: str(trace_info.outputs),
-            },
+            attributes=create_common_span_attributes(
+                session_id=trace_metadata.session_id,
+                user_id=trace_metadata.user_id,
+                span_kind=GenAISpanKind.CHAIN,
+                inputs=inputs_json,
+                outputs=outputs_str,
+            ),
            status=status,
-            links=links,
+            links=trace_metadata.links,
        )
        self.trace_client.add_span(message_span)

-        app_model_config = getattr(trace_info.message_data, "app_model_config", {})
-        pre_prompt = getattr(app_model_config, "pre_prompt", "")
-        inputs_data = getattr(trace_info.message_data, "inputs", {})
        llm_span = SpanData(
-            trace_id=trace_id,
+            trace_id=trace_metadata.trace_id,
            parent_span_id=message_span_id,
            span_id=convert_to_span_id(message_id, "llm"),
            name="llm",
            start_time=convert_datetime_to_nanoseconds(trace_info.start_time),
            end_time=convert_datetime_to_nanoseconds(trace_info.end_time),
            attributes={
-                GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id") or "",
-                GEN_AI_USER_ID: str(user_id),
-                GEN_AI_SPAN_KIND: GenAISpanKind.LLM.value,
-                GEN_AI_FRAMEWORK: "dify",
-                GEN_AI_MODEL_NAME: trace_info.metadata.get("ls_model_name") or "",
-                GEN_AI_SYSTEM: trace_info.metadata.get("ls_provider") or "",
+                **create_common_span_attributes(
+                    session_id=trace_metadata.session_id,
+                    user_id=trace_metadata.user_id,
+                    span_kind=GenAISpanKind.LLM,
+                    inputs=inputs_json,
+                    outputs=outputs_str,
+                ),
+                GEN_AI_REQUEST_MODEL: trace_info.metadata.get("ls_model_name") or "",
+                GEN_AI_PROVIDER_NAME: trace_info.metadata.get("ls_provider") or "",
                GEN_AI_USAGE_INPUT_TOKENS: str(trace_info.message_tokens),
                GEN_AI_USAGE_OUTPUT_TOKENS: str(trace_info.answer_tokens),
                GEN_AI_USAGE_TOTAL_TOKENS: str(trace_info.total_tokens),
-                GEN_AI_PROMPT_TEMPLATE_VARIABLE: json.dumps(inputs_data, ensure_ascii=False),
-                GEN_AI_PROMPT_TEMPLATE_TEMPLATE: pre_prompt,
-                GEN_AI_PROMPT: json.dumps(trace_info.inputs, ensure_ascii=False),
-                GEN_AI_COMPLETION: str(trace_info.outputs),
-                INPUT_VALUE: json.dumps(trace_info.inputs, ensure_ascii=False),
-                OUTPUT_VALUE: str(trace_info.outputs),
+                GEN_AI_PROMPT: inputs_json,
+                GEN_AI_COMPLETION: outputs_str,
            },
            status=status,
+            links=trace_metadata.links,
        )
        self.trace_client.add_span(llm_span)

    def dataset_retrieval_trace(self, trace_info: DatasetRetrievalTraceInfo):
        if trace_info.message_data is None:
            return
+
        message_id = trace_info.message_id

-        trace_id = convert_to_trace_id(message_id)
-        links = []
-        if trace_info.trace_id:
-            links.append(create_link(trace_id_str=trace_info.trace_id))
+        trace_metadata = TraceMetadata(
+            trace_id=convert_to_trace_id(message_id),
+            workflow_span_id=0,
+            session_id=trace_info.metadata.get("conversation_id") or "",
+            user_id=str(trace_info.metadata.get("user_id") or ""),
+            links=create_links_from_trace_id(trace_info.trace_id),
+        )

        documents_data = extract_retrieval_documents(trace_info.documents)
+        documents_json = serialize_json_data(documents_data)
+        inputs_str = str(trace_info.inputs)
+
        dataset_retrieval_span = SpanData(
-            trace_id=trace_id,
+            trace_id=trace_metadata.trace_id,
            parent_span_id=convert_to_span_id(message_id, "message"),
            span_id=generate_span_id(),
            name="dataset_retrieval",
            start_time=convert_datetime_to_nanoseconds(trace_info.start_time),
            end_time=convert_datetime_to_nanoseconds(trace_info.end_time),
            attributes={
-                GEN_AI_SPAN_KIND: GenAISpanKind.RETRIEVER.value,
-                GEN_AI_FRAMEWORK: "dify",
-                RETRIEVAL_QUERY: str(trace_info.inputs),
-                RETRIEVAL_DOCUMENT: json.dumps(documents_data, ensure_ascii=False),
-                INPUT_VALUE: str(trace_info.inputs),
-                OUTPUT_VALUE: json.dumps(documents_data, ensure_ascii=False),
+                **create_common_span_attributes(
+                    session_id=trace_metadata.session_id,
+                    user_id=trace_metadata.user_id,
+                    span_kind=GenAISpanKind.RETRIEVER,
+                    inputs=inputs_str,
+                    outputs=documents_json,
+                ),
+                RETRIEVAL_QUERY: inputs_str,
+                RETRIEVAL_DOCUMENT: documents_json,
            },
-            links=links,
+            links=trace_metadata.links,
        )
        self.trace_client.add_span(dataset_retrieval_span)

    def tool_trace(self, trace_info: ToolTraceInfo):
        if trace_info.message_data is None:
            return
+
        message_id = trace_info.message_id
+        status = create_status_from_error(trace_info.error)

-        status: Status = Status(StatusCode.OK)
-        if trace_info.error:
-            status = Status(StatusCode.ERROR, trace_info.error)
+        trace_metadata = TraceMetadata(
+            trace_id=convert_to_trace_id(message_id),
+            workflow_span_id=0,
+            session_id=trace_info.metadata.get("conversation_id") or "",
+            user_id=str(trace_info.metadata.get("user_id") or ""),
+            links=create_links_from_trace_id(trace_info.trace_id),
+        )

-        trace_id = convert_to_trace_id(message_id)
-        links = []
-        if trace_info.trace_id:
-            links.append(create_link(trace_id_str=trace_info.trace_id))
+        tool_config_json = serialize_json_data(trace_info.tool_config)
+        tool_inputs_json = serialize_json_data(trace_info.tool_inputs)
+        inputs_json = serialize_json_data(trace_info.inputs)

        tool_span = SpanData(
-            trace_id=trace_id,
+            trace_id=trace_metadata.trace_id,
            parent_span_id=convert_to_span_id(message_id, "message"),
            span_id=generate_span_id(),
            name=trace_info.tool_name,
            start_time=convert_datetime_to_nanoseconds(trace_info.start_time),
            end_time=convert_datetime_to_nanoseconds(trace_info.end_time),
            attributes={
-                GEN_AI_SPAN_KIND: GenAISpanKind.TOOL.value,
-                GEN_AI_FRAMEWORK: "dify",
+                **create_common_span_attributes(
+                    session_id=trace_metadata.session_id,
+                    user_id=trace_metadata.user_id,
+                    span_kind=GenAISpanKind.TOOL,
+                    inputs=inputs_json,
+                    outputs=str(trace_info.tool_outputs),
+                ),
                TOOL_NAME: trace_info.tool_name,
-                TOOL_DESCRIPTION: json.dumps(trace_info.tool_config, ensure_ascii=False),
-                TOOL_PARAMETERS: json.dumps(trace_info.tool_inputs, ensure_ascii=False),
-                INPUT_VALUE: json.dumps(trace_info.inputs, ensure_ascii=False),
-                OUTPUT_VALUE: str(trace_info.tool_outputs),
+                TOOL_DESCRIPTION: tool_config_json,
+                TOOL_PARAMETERS: tool_inputs_json,
            },
            status=status,
-            links=links,
+            links=trace_metadata.links,
        )
        self.trace_client.add_span(tool_span)

    def get_workflow_node_executions(self, trace_info: WorkflowTraceInfo) -> Sequence[WorkflowNodeExecution]:
-        # through workflow_run_id get all_nodes_execution using repository
-        session_factory = sessionmaker(bind=db.engine)
-        # Find the app's creator account
-        with Session(db.engine, expire_on_commit=False) as session:
-            # Get the app to find its creator
-            app_id = trace_info.metadata.get("app_id")
-            if not app_id:
-                raise ValueError("No app_id found in trace_info metadata")
-            app_stmt = select(App).where(App.id == app_id)
-            app = session.scalar(app_stmt)
-            if not app:
-                raise ValueError(f"App with id {app_id} not found")
+        app_id = trace_info.metadata.get("app_id")
+        if not app_id:
+            raise ValueError("No app_id found in trace_info metadata")

-            if not app.created_by:
-                raise ValueError(f"App with id {app_id} has no creator (created_by is None)")
-            account_stmt = select(Account).where(Account.id == app.created_by)
-            service_account = session.scalar(account_stmt)
-            if not service_account:
-                raise ValueError(f"Creator account with id {app.created_by} not found for app {app_id}")
-            current_tenant = (
-                session.query(TenantAccountJoin).filter_by(account_id=service_account.id, current=True).first()
-            )
-            if not current_tenant:
-                raise ValueError(f"Current tenant not found for account {service_account.id}")
-            service_account.set_tenant_id(current_tenant.tenant_id)
+        service_account = self.get_service_account_with_tenant(app_id)
+
+        session_factory = sessionmaker(bind=db.engine)
        workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository(
            session_factory=session_factory,
            user=service_account,
-            app_id=trace_info.metadata.get("app_id"),
+            app_id=app_id,
            triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN,
        )
-        # Get all executions for this workflow run
-        workflow_node_executions = workflow_node_execution_repository.get_by_workflow_run(
-            workflow_run_id=trace_info.workflow_run_id
-        )
-        return workflow_node_executions
+
+        return workflow_node_execution_repository.get_by_workflow_run(workflow_run_id=trace_info.workflow_run_id)

    def build_workflow_node_span(
-        self, node_execution: WorkflowNodeExecution, trace_id: int, trace_info: WorkflowTraceInfo, workflow_span_id: int
+        self, node_execution: WorkflowNodeExecution, trace_info: WorkflowTraceInfo, trace_metadata: TraceMetadata
    ):
        try:
            if node_execution.node_type == NodeType.LLM:
-                node_span = self.build_workflow_llm_span(trace_id, workflow_span_id, trace_info, node_execution)
+                node_span = self.build_workflow_llm_span(trace_info, node_execution, trace_metadata)
            elif node_execution.node_type == NodeType.KNOWLEDGE_RETRIEVAL:
-                node_span = self.build_workflow_retrieval_span(trace_id, workflow_span_id, trace_info, node_execution)
+                node_span = self.build_workflow_retrieval_span(trace_info, node_execution, trace_metadata)
            elif node_execution.node_type == NodeType.TOOL:
-                node_span = self.build_workflow_tool_span(trace_id, workflow_span_id, trace_info, node_execution)
+                node_span = self.build_workflow_tool_span(trace_info, node_execution, trace_metadata)
            else:
-                node_span = self.build_workflow_task_span(trace_id, workflow_span_id, trace_info, node_execution)
+                node_span = self.build_workflow_task_span(trace_info, node_execution, trace_metadata)
            return node_span
        except Exception as e:
            logger.debug("Error occurred in build_workflow_node_span: %s", e, exc_info=True)
            return None

-    def get_workflow_node_status(self, node_execution: WorkflowNodeExecution) -> Status:
-        span_status: Status = Status(StatusCode.UNSET)
-        if node_execution.status == WorkflowNodeExecutionStatus.SUCCEEDED:
-            span_status = Status(StatusCode.OK)
-        elif node_execution.status in [WorkflowNodeExecutionStatus.FAILED, WorkflowNodeExecutionStatus.EXCEPTION]:
-            span_status = Status(StatusCode.ERROR, str(node_execution.error))
-        return span_status
-
    def build_workflow_task_span(
-        self, trace_id: int, workflow_span_id: int, trace_info: WorkflowTraceInfo, node_execution: WorkflowNodeExecution
+        self, trace_info: WorkflowTraceInfo, node_execution: WorkflowNodeExecution, trace_metadata: TraceMetadata
    ) -> SpanData:
+        inputs_json = serialize_json_data(node_execution.inputs)
+        outputs_json = serialize_json_data(node_execution.outputs)
        return SpanData(
-            trace_id=trace_id,
-            parent_span_id=workflow_span_id,
+            trace_id=trace_metadata.trace_id,
+            parent_span_id=trace_metadata.workflow_span_id,
            span_id=convert_to_span_id(node_execution.id, "node"),
            name=node_execution.title,
            start_time=convert_datetime_to_nanoseconds(node_execution.created_at),
            end_time=convert_datetime_to_nanoseconds(node_execution.finished_at),
-            attributes={
-                GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id") or "",
-                GEN_AI_SPAN_KIND: GenAISpanKind.TASK.value,
-                GEN_AI_FRAMEWORK: "dify",
-                INPUT_VALUE: json.dumps(node_execution.inputs, ensure_ascii=False),
-                OUTPUT_VALUE: json.dumps(node_execution.outputs, ensure_ascii=False),
-            },
-            status=self.get_workflow_node_status(node_execution),
+            attributes=create_common_span_attributes(
+                session_id=trace_metadata.session_id,
+                user_id=trace_metadata.user_id,
+                span_kind=GenAISpanKind.TASK,
+                inputs=inputs_json,
+                outputs=outputs_json,
+            ),
+            status=get_workflow_node_status(node_execution),
+            links=trace_metadata.links,
        )

    def build_workflow_tool_span(
-        self, trace_id: int, workflow_span_id: int, trace_info: WorkflowTraceInfo, node_execution: WorkflowNodeExecution
+        self, trace_info: WorkflowTraceInfo, node_execution: WorkflowNodeExecution, trace_metadata: TraceMetadata
    ) -> SpanData:
        tool_des = {}
        if node_execution.metadata:
            tool_des = node_execution.metadata.get(WorkflowNodeExecutionMetadataKey.TOOL_INFO, {})
+
+        inputs_json = serialize_json_data(node_execution.inputs or {})
+        outputs_json = serialize_json_data(node_execution.outputs)
+
        return SpanData(
-            trace_id=trace_id,
-            parent_span_id=workflow_span_id,
+            trace_id=trace_metadata.trace_id,
+            parent_span_id=trace_metadata.workflow_span_id,
            span_id=convert_to_span_id(node_execution.id, "node"),
            name=node_execution.title,
            start_time=convert_datetime_to_nanoseconds(node_execution.created_at),
            end_time=convert_datetime_to_nanoseconds(node_execution.finished_at),
            attributes={
-                GEN_AI_SPAN_KIND: GenAISpanKind.TOOL.value,
-                GEN_AI_FRAMEWORK: "dify",
+                **create_common_span_attributes(
+                    session_id=trace_metadata.session_id,
+                    user_id=trace_metadata.user_id,
+                    span_kind=GenAISpanKind.TOOL,
+                    inputs=inputs_json,
+                    outputs=outputs_json,
+                ),
                TOOL_NAME: node_execution.title,
-                TOOL_DESCRIPTION: json.dumps(tool_des, ensure_ascii=False),
-                TOOL_PARAMETERS: json.dumps(node_execution.inputs or {}, ensure_ascii=False),
-                INPUT_VALUE: json.dumps(node_execution.inputs or {}, ensure_ascii=False),
-                OUTPUT_VALUE: json.dumps(node_execution.outputs, ensure_ascii=False),
+                TOOL_DESCRIPTION: serialize_json_data(tool_des),
+                TOOL_PARAMETERS: inputs_json,
            },
-            status=self.get_workflow_node_status(node_execution),
+            status=get_workflow_node_status(node_execution),
+            links=trace_metadata.links,
        )

    def build_workflow_retrieval_span(
-        self, trace_id: int, workflow_span_id: int, trace_info: WorkflowTraceInfo, node_execution: WorkflowNodeExecution
+        self, trace_info: WorkflowTraceInfo, node_execution: WorkflowNodeExecution, trace_metadata: TraceMetadata
    ) -> SpanData:
-        input_value = ""
-        if node_execution.inputs:
-            input_value = str(node_execution.inputs.get("query", ""))
-        output_value = ""
-        if node_execution.outputs:
-            output_value = json.dumps(node_execution.outputs.get("result", []), ensure_ascii=False)
+        input_value = str(node_execution.inputs.get("query", "")) if node_execution.inputs else ""
+        output_value = serialize_json_data(node_execution.outputs.get("result", [])) if node_execution.outputs else ""
+
+        retrieval_documents = node_execution.outputs.get("result", []) if node_execution.outputs else []
+        semantic_retrieval_documents = format_retrieval_documents(retrieval_documents)
+        semantic_retrieval_documents_json = serialize_json_data(semantic_retrieval_documents)
+
        return SpanData(
-            trace_id=trace_id,
-            parent_span_id=workflow_span_id,
+            trace_id=trace_metadata.trace_id,
+            parent_span_id=trace_metadata.workflow_span_id,
            span_id=convert_to_span_id(node_execution.id, "node"),
            name=node_execution.title,
            start_time=convert_datetime_to_nanoseconds(node_execution.created_at),
            end_time=convert_datetime_to_nanoseconds(node_execution.finished_at),
            attributes={
-                GEN_AI_SPAN_KIND: GenAISpanKind.RETRIEVER.value,
-                GEN_AI_FRAMEWORK: "dify",
+                **create_common_span_attributes(
+                    session_id=trace_metadata.session_id,
+                    user_id=trace_metadata.user_id,
+                    span_kind=GenAISpanKind.RETRIEVER,
+                    inputs=input_value,
+                    outputs=output_value,
+                ),
                RETRIEVAL_QUERY: input_value,
-                RETRIEVAL_DOCUMENT: output_value,
-                INPUT_VALUE: input_value,
-                OUTPUT_VALUE: output_value,
+                RETRIEVAL_DOCUMENT: semantic_retrieval_documents_json,
            },
-            status=self.get_workflow_node_status(node_execution),
+            status=get_workflow_node_status(node_execution),
+            links=trace_metadata.links,
        )

    def build_workflow_llm_span(
-        self, trace_id: int, workflow_span_id: int, trace_info: WorkflowTraceInfo, node_execution: WorkflowNodeExecution
+        self, trace_info: WorkflowTraceInfo, node_execution: WorkflowNodeExecution, trace_metadata: TraceMetadata
    ) -> SpanData:
        process_data = node_execution.process_data or {}
        outputs = node_execution.outputs or {}
        usage_data = process_data.get("usage", {}) if "usage" in process_data else outputs.get("usage", {})
+
+        prompts_json = serialize_json_data(process_data.get("prompts", []))
+        text_output = str(outputs.get("text", ""))
+
+        gen_ai_input_message = format_input_messages(process_data)
+        gen_ai_output_message = format_output_messages(outputs)
+
        return SpanData(
-            trace_id=trace_id,
-            parent_span_id=workflow_span_id,
+            trace_id=trace_metadata.trace_id,
+            parent_span_id=trace_metadata.workflow_span_id,
            span_id=convert_to_span_id(node_execution.id, "node"),
            name=node_execution.title,
            start_time=convert_datetime_to_nanoseconds(node_execution.created_at),
            end_time=convert_datetime_to_nanoseconds(node_execution.finished_at),
            attributes={
-                GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id") or "",
-                GEN_AI_SPAN_KIND: GenAISpanKind.LLM.value,
-                GEN_AI_FRAMEWORK: "dify",
-                GEN_AI_MODEL_NAME: process_data.get("model_name") or "",
-                GEN_AI_SYSTEM: process_data.get("model_provider") or "",
+                **create_common_span_attributes(
+                    session_id=trace_metadata.session_id,
+                    user_id=trace_metadata.user_id,
+                    span_kind=GenAISpanKind.LLM,
+                    inputs=prompts_json,
+                    outputs=text_output,
+                ),
+                GEN_AI_REQUEST_MODEL: process_data.get("model_name") or "",
+                GEN_AI_PROVIDER_NAME: process_data.get("model_provider") or "",
                GEN_AI_USAGE_INPUT_TOKENS: str(usage_data.get("prompt_tokens", 0)),
                GEN_AI_USAGE_OUTPUT_TOKENS: str(usage_data.get("completion_tokens", 0)),
                GEN_AI_USAGE_TOTAL_TOKENS: str(usage_data.get("total_tokens", 0)),
-                GEN_AI_PROMPT: json.dumps(process_data.get("prompts", []), ensure_ascii=False),
-                GEN_AI_COMPLETION: str(outputs.get("text", "")),
+                GEN_AI_PROMPT: prompts_json,
+                GEN_AI_COMPLETION: text_output,
                GEN_AI_RESPONSE_FINISH_REASON: outputs.get("finish_reason") or "",
-                INPUT_VALUE: json.dumps(process_data.get("prompts", []), ensure_ascii=False),
-                OUTPUT_VALUE: str(outputs.get("text", "")),
+                GEN_AI_INPUT_MESSAGE: gen_ai_input_message,
+                GEN_AI_OUTPUT_MESSAGE: gen_ai_output_message,
            },
-            status=self.get_workflow_node_status(node_execution),
+            status=get_workflow_node_status(node_execution),
+            links=trace_metadata.links,
        )

-    def add_workflow_span(
-        self, trace_id: int, workflow_span_id: int, trace_info: WorkflowTraceInfo, links: Sequence[Link]
-    ):
+    def add_workflow_span(self, trace_info: WorkflowTraceInfo, trace_metadata: TraceMetadata):
        message_span_id = None
        if trace_info.message_id:
            message_span_id = convert_to_span_id(trace_info.message_id, "message")
-        user_id = trace_info.metadata.get("user_id")
-        status: Status = Status(StatusCode.OK)
-        if trace_info.error:
-            status = Status(StatusCode.ERROR, trace_info.error)
-        if message_span_id:  # chatflow
+        status = create_status_from_error(trace_info.error)
+
+        inputs_json = serialize_json_data(trace_info.workflow_run_inputs)
+        outputs_json = serialize_json_data(trace_info.workflow_run_outputs)
+
+        if message_span_id:
            message_span = SpanData(
-                trace_id=trace_id,
+                trace_id=trace_metadata.trace_id,
                parent_span_id=None,
                span_id=message_span_id,
                name="message",
                start_time=convert_datetime_to_nanoseconds(trace_info.start_time),
                end_time=convert_datetime_to_nanoseconds(trace_info.end_time),
-                attributes={
-                    GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id") or "",
-                    GEN_AI_USER_ID: str(user_id),
-                    GEN_AI_SPAN_KIND: GenAISpanKind.CHAIN.value,
-                    GEN_AI_FRAMEWORK: "dify",
-                    INPUT_VALUE: trace_info.workflow_run_inputs.get("sys.query") or "",
-                    OUTPUT_VALUE: json.dumps(trace_info.workflow_run_outputs, ensure_ascii=False),
-                },
+                attributes=create_common_span_attributes(
+                    session_id=trace_metadata.session_id,
+                    user_id=trace_metadata.user_id,
+                    span_kind=GenAISpanKind.CHAIN,
+                    inputs=trace_info.workflow_run_inputs.get("sys.query") or "",
+                    outputs=outputs_json,
+                ),
                status=status,
-                links=links,
+                links=trace_metadata.links,
            )
            self.trace_client.add_span(message_span)

        workflow_span = SpanData(
-            trace_id=trace_id,
+            trace_id=trace_metadata.trace_id,
            parent_span_id=message_span_id,
-            span_id=workflow_span_id,
+            span_id=trace_metadata.workflow_span_id,
            name="workflow",
            start_time=convert_datetime_to_nanoseconds(trace_info.start_time),
            end_time=convert_datetime_to_nanoseconds(trace_info.end_time),
-            attributes={
-                GEN_AI_USER_ID: str(user_id),
-                GEN_AI_SPAN_KIND: GenAISpanKind.CHAIN.value,
-                GEN_AI_FRAMEWORK: "dify",
-                INPUT_VALUE: json.dumps(trace_info.workflow_run_inputs, ensure_ascii=False),
-                OUTPUT_VALUE: json.dumps(trace_info.workflow_run_outputs, ensure_ascii=False),
-            },
+            attributes=create_common_span_attributes(
+                session_id=trace_metadata.session_id,
+                user_id=trace_metadata.user_id,
+                span_kind=GenAISpanKind.CHAIN,
+                inputs=inputs_json,
+                outputs=outputs_json,
+            ),
            status=status,
-            links=links,
+            links=trace_metadata.links,
        )
        self.trace_client.add_span(workflow_span)

    def suggested_question_trace(self, trace_info: SuggestedQuestionTraceInfo):
        message_id = trace_info.message_id
-        status: Status = Status(StatusCode.OK)
-        if trace_info.error:
-            status = Status(StatusCode.ERROR, trace_info.error)
+        status = create_status_from_error(trace_info.error)

-        trace_id = convert_to_trace_id(message_id)
-        links = []
-        if trace_info.trace_id:
-            links.append(create_link(trace_id_str=trace_info.trace_id))
+        trace_metadata = TraceMetadata(
+            trace_id=convert_to_trace_id(message_id),
+            workflow_span_id=0,
+            session_id=trace_info.metadata.get("conversation_id") or "",
+            user_id=str(trace_info.metadata.get("user_id") or ""),
+            links=create_links_from_trace_id(trace_info.trace_id),
+        )
+
+        inputs_json = serialize_json_data(trace_info.inputs)
+        suggested_question_json = serialize_json_data(trace_info.suggested_question)

        suggested_question_span = SpanData(
-            trace_id=trace_id,
+            trace_id=trace_metadata.trace_id,
            parent_span_id=convert_to_span_id(message_id, "message"),
            span_id=convert_to_span_id(message_id, "suggested_question"),
            name="suggested_question",
            start_time=convert_datetime_to_nanoseconds(trace_info.start_time),
            end_time=convert_datetime_to_nanoseconds(trace_info.end_time),
            attributes={
-                GEN_AI_SPAN_KIND: GenAISpanKind.LLM.value,
-                GEN_AI_FRAMEWORK: "dify",
-                GEN_AI_MODEL_NAME: trace_info.metadata.get("ls_model_name") or "",
-                GEN_AI_SYSTEM: trace_info.metadata.get("ls_provider") or "",
-                GEN_AI_PROMPT: json.dumps(trace_info.inputs, ensure_ascii=False),
-                GEN_AI_COMPLETION: json.dumps(trace_info.suggested_question, ensure_ascii=False),
-                INPUT_VALUE: json.dumps(trace_info.inputs, ensure_ascii=False),
-                OUTPUT_VALUE: json.dumps(trace_info.suggested_question, ensure_ascii=False),
+                **create_common_span_attributes(
+                    session_id=trace_metadata.session_id,
+                    user_id=trace_metadata.user_id,
+                    span_kind=GenAISpanKind.LLM,
+                    inputs=inputs_json,
+                    outputs=suggested_question_json,
+                ),
+                GEN_AI_REQUEST_MODEL: trace_info.metadata.get("ls_model_name") or "",
+                GEN_AI_PROVIDER_NAME: trace_info.metadata.get("ls_provider") or "",
+                GEN_AI_PROMPT: inputs_json,
+                GEN_AI_COMPLETION: suggested_question_json,
            },
            status=status,
-            links=links,
+            links=trace_metadata.links,
        )
        self.trace_client.add_span(suggested_question_span)
-
-
-def extract_retrieval_documents(documents: list[Document]):
-    documents_data = []
-    for document in documents:
-        document_data = {
-            "content": document.page_content,
-            "metadata": {
-                "dataset_id": document.metadata.get("dataset_id"),
-                "doc_id": document.metadata.get("doc_id"),
-                "document_id": document.metadata.get("document_id"),
-            },
-            "score": document.metadata.get("score"),
-        }
-        documents_data.append(document_data)
-    return documents_data
--- a/api/core/ops/aliyun_trace/data_exporter/traceclient.py
+++ b/api/core/ops/aliyun_trace/data_exporter/traceclient.py
@ -7,8 +7,10 @@ import uuid
 from collections import deque
 from collections.abc import Sequence
 from datetime import datetime
+from typing import Final
+from urllib.parse import urljoin

-import requests
+import httpx
 from opentelemetry import trace as trace_api
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
 from opentelemetry.sdk.resources import Resource
@ -20,8 +22,12 @@ from opentelemetry.trace import Link, SpanContext, TraceFlags
 from configs import dify_config
 from core.ops.aliyun_trace.entities.aliyun_trace_entity import SpanData

-INVALID_SPAN_ID = 0x0000000000000000
-INVALID_TRACE_ID = 0x00000000000000000000000000000000
+INVALID_SPAN_ID: Final[int] = 0x0000000000000000
+INVALID_TRACE_ID: Final[int] = 0x00000000000000000000000000000000
+DEFAULT_TIMEOUT: Final[int] = 5
+DEFAULT_MAX_QUEUE_SIZE: Final[int] = 1000
+DEFAULT_SCHEDULE_DELAY_SEC: Final[int] = 5
+DEFAULT_MAX_EXPORT_BATCH_SIZE: Final[int] = 50

 logger = logging.getLogger(__name__)

@ -31,9 +37,9 @@ class TraceClient:
        self,
        service_name: str,
        endpoint: str,
-        max_queue_size: int = 1000,
-        schedule_delay_sec: int = 5,
-        max_export_batch_size: int = 50,
+        max_queue_size: int = DEFAULT_MAX_QUEUE_SIZE,
+        schedule_delay_sec: int = DEFAULT_SCHEDULE_DELAY_SEC,
+        max_export_batch_size: int = DEFAULT_MAX_EXPORT_BATCH_SIZE,
    ):
        self.endpoint = endpoint
        self.resource = Resource(
@ -63,24 +69,25 @@ class TraceClient:
    def export(self, spans: Sequence[ReadableSpan]):
        self.exporter.export(spans)

-    def api_check(self):
+    def api_check(self) -> bool:
        try:
-            response = requests.head(self.endpoint, timeout=5)
+            response = httpx.head(self.endpoint, timeout=DEFAULT_TIMEOUT)
            if response.status_code == 405:
                return True
            else:
                logger.debug("AliyunTrace API check failed: Unexpected status code: %s", response.status_code)
                return False
-        except requests.RequestException as e:
+        except httpx.RequestError as e:
            logger.debug("AliyunTrace API check failed: %s", str(e))
            raise ValueError(f"AliyunTrace API check failed: {str(e)}")

-    def get_project_url(self):
+    def get_project_url(self) -> str:
        return "https://arms.console.aliyun.com/#/llm"

-    def add_span(self, span_data: SpanData):
+    def add_span(self, span_data: SpanData | None) -> None:
        if span_data is None:
            return
+
        span: ReadableSpan = self.span_builder.build_span(span_data)
        with self.condition:
            if len(self.queue) == self.max_queue_size:
@ -92,14 +99,14 @@ class TraceClient:
            if len(self.queue) >= self.max_export_batch_size:
                self.condition.notify()

-    def _worker(self):
+    def _worker(self) -> None:
        while not self.done:
            with self.condition:
                if len(self.queue) < self.max_export_batch_size and not self.done:
                    self.condition.wait(timeout=self.schedule_delay_sec)
            self._export_batch()

-    def _export_batch(self):
+    def _export_batch(self) -> None:
        spans_to_export: list[ReadableSpan] = []
        with self.condition:
            while len(spans_to_export) < self.max_export_batch_size and self.queue:
@ -111,7 +118,7 @@ class TraceClient:
            except Exception as e:
                logger.debug("Error exporting spans: %s", e)

-    def shutdown(self):
+    def shutdown(self) -> None:
        with self.condition:
            self.done = True
            self.condition.notify_all()
@ -121,7 +128,7 @@ class TraceClient:


 class SpanBuilder:
-    def __init__(self, resource):
+    def __init__(self, resource: Resource) -> None:
        self.resource = resource
        self.instrumentation_scope = InstrumentationScope(
            __name__,
@ -167,8 +174,12 @@ class SpanBuilder:


 def create_link(trace_id_str: str) -> Link:
-    placeholder_span_id = 0x0000000000000000
-    trace_id = int(trace_id_str, 16)
+    placeholder_span_id = INVALID_SPAN_ID
+    try:
+        trace_id = int(trace_id_str, 16)
+    except ValueError as e:
+        raise ValueError(f"Invalid trace ID format: {trace_id_str}") from e
+
    span_context = SpanContext(
        trace_id=trace_id, span_id=placeholder_span_id, is_remote=False, trace_flags=TraceFlags(TraceFlags.SAMPLED)
    )
@ -184,26 +195,29 @@ def generate_span_id() -> int:


 def convert_to_trace_id(uuid_v4: str | None) -> int:
+    if uuid_v4 is None:
+        raise ValueError("UUID cannot be None")
    try:
        uuid_obj = uuid.UUID(uuid_v4)
        return uuid_obj.int
-    except Exception as e:
-        raise ValueError(f"Invalid UUID input: {e}")
+    except ValueError as e:
+        raise ValueError(f"Invalid UUID input: {uuid_v4}") from e


 def convert_string_to_id(string: str | None) -> int:
    if not string:
        return generate_span_id()
    hash_bytes = hashlib.sha256(string.encode("utf-8")).digest()
-    id = int.from_bytes(hash_bytes[:8], byteorder="big", signed=False)
-    return id
+    return int.from_bytes(hash_bytes[:8], byteorder="big", signed=False)


 def convert_to_span_id(uuid_v4: str | None, span_type: str) -> int:
+    if uuid_v4 is None:
+        raise ValueError("UUID cannot be None")
    try:
        uuid_obj = uuid.UUID(uuid_v4)
-    except Exception as e:
-        raise ValueError(f"Invalid UUID input: {e}")
+    except ValueError as e:
+        raise ValueError(f"Invalid UUID input: {uuid_v4}") from e
    combined_key = f"{uuid_obj.hex}-{span_type}"
    return convert_string_to_id(combined_key)

@ -212,5 +226,11 @@ def convert_datetime_to_nanoseconds(start_time_a: datetime | None) -> int | None
    if start_time_a is None:
        return None
    timestamp_in_seconds = start_time_a.timestamp()
-    timestamp_in_nanoseconds = int(timestamp_in_seconds * 1e9)
-    return timestamp_in_nanoseconds
+    return int(timestamp_in_seconds * 1e9)
+
+
+def build_endpoint(base_url: str, license_key: str) -> str:
+    if "log.aliyuncs.com" in base_url:  # cms2.0 endpoint
+        return urljoin(base_url, f"adapt_{license_key}/api/v1/traces")
+    else:  # xtrace endpoint
+        return urljoin(base_url, f"adapt_{license_key}/api/otlp/traces")
--- a/api/core/ops/aliyun_trace/entities/aliyun_trace_entity.py
+++ b/api/core/ops/aliyun_trace/entities/aliyun_trace_entity.py
@ -1,18 +1,34 @@
 from collections.abc import Sequence
+from dataclasses import dataclass
+from typing import Any

 from opentelemetry import trace as trace_api
-from opentelemetry.sdk.trace import Event, Status, StatusCode
+from opentelemetry.sdk.trace import Event
+from opentelemetry.trace import Status, StatusCode
 from pydantic import BaseModel, Field


+@dataclass
+class TraceMetadata:
+    """Metadata for trace operations, containing common attributes for all spans in a trace."""
+
+    trace_id: int
+    workflow_span_id: int
+    session_id: str
+    user_id: str
+    links: list[trace_api.Link]
+
+
 class SpanData(BaseModel):
+    """Data model for span information in Aliyun trace system."""
+
    model_config = {"arbitrary_types_allowed": True}

    trace_id: int = Field(..., description="The unique identifier for the trace.")
    parent_span_id: int | None = Field(None, description="The ID of the parent span, if any.")
    span_id: int = Field(..., description="The unique identifier for this span.")
    name: str = Field(..., description="The name of the span.")
-    attributes: dict[str, str] = Field(default_factory=dict, description="Attributes associated with the span.")
+    attributes: dict[str, Any] = Field(default_factory=dict, description="Attributes associated with the span.")
    events: Sequence[Event] = Field(default_factory=list, description="Events recorded in the span.")
    links: Sequence[trace_api.Link] = Field(default_factory=list, description="Links to other spans.")
    status: Status = Field(default=Status(StatusCode.UNSET), description="The status of the span.")
--- a/api/core/ops/aliyun_trace/entities/semconv.py
+++ b/api/core/ops/aliyun_trace/entities/semconv.py
@ -1,56 +1,38 @@
 from enum import StrEnum
+from typing import Final

-# public
-GEN_AI_SESSION_ID = "gen_ai.session.id"
+# Public attributes
+GEN_AI_SESSION_ID: Final[str] = "gen_ai.session.id"
+GEN_AI_USER_ID: Final[str] = "gen_ai.user.id"
+GEN_AI_USER_NAME: Final[str] = "gen_ai.user.name"
+GEN_AI_SPAN_KIND: Final[str] = "gen_ai.span.kind"
+GEN_AI_FRAMEWORK: Final[str] = "gen_ai.framework"

-GEN_AI_USER_ID = "gen_ai.user.id"
+# Chain attributes
+INPUT_VALUE: Final[str] = "input.value"
+OUTPUT_VALUE: Final[str] = "output.value"

-GEN_AI_USER_NAME = "gen_ai.user.name"
+# Retriever attributes
+RETRIEVAL_QUERY: Final[str] = "retrieval.query"
+RETRIEVAL_DOCUMENT: Final[str] = "retrieval.document"

-GEN_AI_SPAN_KIND = "gen_ai.span.kind"
+# LLM attributes
+GEN_AI_REQUEST_MODEL: Final[str] = "gen_ai.request.model"
+GEN_AI_PROVIDER_NAME: Final[str] = "gen_ai.provider.name"
+GEN_AI_USAGE_INPUT_TOKENS: Final[str] = "gen_ai.usage.input_tokens"
+GEN_AI_USAGE_OUTPUT_TOKENS: Final[str] = "gen_ai.usage.output_tokens"
+GEN_AI_USAGE_TOTAL_TOKENS: Final[str] = "gen_ai.usage.total_tokens"
+GEN_AI_PROMPT: Final[str] = "gen_ai.prompt"
+GEN_AI_COMPLETION: Final[str] = "gen_ai.completion"
+GEN_AI_RESPONSE_FINISH_REASON: Final[str] = "gen_ai.response.finish_reason"

-GEN_AI_FRAMEWORK = "gen_ai.framework"
+GEN_AI_INPUT_MESSAGE: Final[str] = "gen_ai.input.messages"
+GEN_AI_OUTPUT_MESSAGE: Final[str] = "gen_ai.output.messages"

-
-# Chain
-INPUT_VALUE = "input.value"
-
-OUTPUT_VALUE = "output.value"
-
-
-# Retriever
-RETRIEVAL_QUERY = "retrieval.query"
-
-RETRIEVAL_DOCUMENT = "retrieval.document"
-
-
-# LLM
-GEN_AI_MODEL_NAME = "gen_ai.model_name"
-
-GEN_AI_SYSTEM = "gen_ai.system"
-
-GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
-
-GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
-
-GEN_AI_USAGE_TOTAL_TOKENS = "gen_ai.usage.total_tokens"
-
-GEN_AI_PROMPT_TEMPLATE_TEMPLATE = "gen_ai.prompt_template.template"
-
-GEN_AI_PROMPT_TEMPLATE_VARIABLE = "gen_ai.prompt_template.variable"
-
-GEN_AI_PROMPT = "gen_ai.prompt"
-
-GEN_AI_COMPLETION = "gen_ai.completion"
-
-GEN_AI_RESPONSE_FINISH_REASON = "gen_ai.response.finish_reason"
-
-# Tool
-TOOL_NAME = "tool.name"
-
-TOOL_DESCRIPTION = "tool.description"
-
-TOOL_PARAMETERS = "tool.parameters"
+# Tool attributes
+TOOL_NAME: Final[str] = "tool.name"
+TOOL_DESCRIPTION: Final[str] = "tool.description"
+TOOL_PARAMETERS: Final[str] = "tool.parameters"


 class GenAISpanKind(StrEnum):
--- a/api/core/ops/aliyun_trace/utils.py
+++ b/api/core/ops/aliyun_trace/utils.py
@ -0,0 +1,190 @@
+import json
+from collections.abc import Mapping
+from typing import Any
+
+from opentelemetry.trace import Link, Status, StatusCode
+
+from core.ops.aliyun_trace.entities.semconv import (
+    GEN_AI_FRAMEWORK,
+    GEN_AI_SESSION_ID,
+    GEN_AI_SPAN_KIND,
+    GEN_AI_USER_ID,
+    INPUT_VALUE,
+    OUTPUT_VALUE,
+    GenAISpanKind,
+)
+from core.rag.models.document import Document
+from core.workflow.entities import WorkflowNodeExecution
+from core.workflow.enums import WorkflowNodeExecutionStatus
+from extensions.ext_database import db
+from models import EndUser
+
+# Constants
+DEFAULT_JSON_ENSURE_ASCII = False
+DEFAULT_FRAMEWORK_NAME = "dify"
+
+
+def get_user_id_from_message_data(message_data) -> str:
+    user_id = message_data.from_account_id
+    if message_data.from_end_user_id:
+        end_user_data: EndUser | None = (
+            db.session.query(EndUser).where(EndUser.id == message_data.from_end_user_id).first()
+        )
+        if end_user_data is not None:
+            user_id = end_user_data.session_id
+    return user_id
+
+
+def create_status_from_error(error: str | None) -> Status:
+    if error:
+        return Status(StatusCode.ERROR, error)
+    return Status(StatusCode.OK)
+
+
+def get_workflow_node_status(node_execution: WorkflowNodeExecution) -> Status:
+    if node_execution.status == WorkflowNodeExecutionStatus.SUCCEEDED:
+        return Status(StatusCode.OK)
+    if node_execution.status in [WorkflowNodeExecutionStatus.FAILED, WorkflowNodeExecutionStatus.EXCEPTION]:
+        return Status(StatusCode.ERROR, str(node_execution.error))
+    return Status(StatusCode.UNSET)
+
+
+def create_links_from_trace_id(trace_id: str | None) -> list[Link]:
+    from core.ops.aliyun_trace.data_exporter.traceclient import create_link
+
+    links = []
+    if trace_id:
+        links.append(create_link(trace_id_str=trace_id))
+    return links
+
+
+def extract_retrieval_documents(documents: list[Document]) -> list[dict[str, Any]]:
+    documents_data = []
+    for document in documents:
+        document_data = {
+            "content": document.page_content,
+            "metadata": {
+                "dataset_id": document.metadata.get("dataset_id"),
+                "doc_id": document.metadata.get("doc_id"),
+                "document_id": document.metadata.get("document_id"),
+            },
+            "score": document.metadata.get("score"),
+        }
+        documents_data.append(document_data)
+    return documents_data
+
+
+def serialize_json_data(data: Any, ensure_ascii: bool = DEFAULT_JSON_ENSURE_ASCII) -> str:
+    return json.dumps(data, ensure_ascii=ensure_ascii)
+
+
+def create_common_span_attributes(
+    session_id: str = "",
+    user_id: str = "",
+    span_kind: str = GenAISpanKind.CHAIN,
+    framework: str = DEFAULT_FRAMEWORK_NAME,
+    inputs: str = "",
+    outputs: str = "",
+) -> dict[str, Any]:
+    return {
+        GEN_AI_SESSION_ID: session_id,
+        GEN_AI_USER_ID: user_id,
+        GEN_AI_SPAN_KIND: span_kind,
+        GEN_AI_FRAMEWORK: framework,
+        INPUT_VALUE: inputs,
+        OUTPUT_VALUE: outputs,
+    }
+
+
+def format_retrieval_documents(retrieval_documents: list) -> list:
+    try:
+        if not isinstance(retrieval_documents, list):
+            return []
+
+        semantic_documents = []
+        for doc in retrieval_documents:
+            if not isinstance(doc, dict):
+                continue
+
+            metadata = doc.get("metadata", {})
+            content = doc.get("content", "")
+            title = doc.get("title", "")
+            score = metadata.get("score", 0.0)
+            document_id = metadata.get("document_id", "")
+
+            semantic_metadata = {}
+            if title:
+                semantic_metadata["title"] = title
+            if metadata.get("source"):
+                semantic_metadata["source"] = metadata["source"]
+            elif metadata.get("_source"):
+                semantic_metadata["source"] = metadata["_source"]
+            if metadata.get("doc_metadata"):
+                doc_metadata = metadata["doc_metadata"]
+                if isinstance(doc_metadata, dict):
+                    semantic_metadata.update(doc_metadata)
+
+            semantic_doc = {
+                "document": {"content": content, "metadata": semantic_metadata, "score": score, "id": document_id}
+            }
+            semantic_documents.append(semantic_doc)
+
+        return semantic_documents
+    except Exception:
+        return []
+
+
+def format_input_messages(process_data: Mapping[str, Any]) -> str:
+    try:
+        if not isinstance(process_data, dict):
+            return serialize_json_data([])
+
+        prompts = process_data.get("prompts", [])
+        if not prompts:
+            return serialize_json_data([])
+
+        valid_roles = {"system", "user", "assistant", "tool"}
+        input_messages = []
+        for prompt in prompts:
+            if not isinstance(prompt, dict):
+                continue
+
+            role = prompt.get("role", "")
+            text = prompt.get("text", "")
+
+            if not role or role not in valid_roles:
+                continue
+
+            if text:
+                message = {"role": role, "parts": [{"type": "text", "content": text}]}
+                input_messages.append(message)
+
+        return serialize_json_data(input_messages)
+    except Exception:
+        return serialize_json_data([])
+
+
+def format_output_messages(outputs: Mapping[str, Any]) -> str:
+    try:
+        if not isinstance(outputs, dict):
+            return serialize_json_data([])
+
+        text = outputs.get("text", "")
+        finish_reason = outputs.get("finish_reason", "")
+
+        if not text:
+            return serialize_json_data([])
+
+        valid_finish_reasons = {"stop", "length", "content_filter", "tool_call", "error"}
+        if finish_reason not in valid_finish_reasons:
+            finish_reason = "stop"
+
+        output_message = {
+            "role": "assistant",
+            "parts": [{"type": "text", "content": text}],
+            "finish_reason": finish_reason,
+        }
+
+        return serialize_json_data([output_message])
+    except Exception:
+        return serialize_json_data([])
--- a/api/core/ops/entities/config_entity.py
+++ b/api/core/ops/entities/config_entity.py
@ -191,7 +191,8 @@ class AliyunConfig(BaseTracingConfig):
    @field_validator("endpoint")
    @classmethod
    def endpoint_validator(cls, v, info: ValidationInfo):
-        return cls.validate_endpoint_url(v, "https://tracing-analysis-dc-hz.aliyuncs.com")
+        # aliyun uses two URL formats, which may include a URL path
+        return validate_url_with_path(v, "https://tracing-analysis-dc-hz.aliyuncs.com")


 OPS_FILE_PATH = "ops_trace/"
--- a/api/core/ops/entities/trace_entity.py
+++ b/api/core/ops/entities/trace_entity.py
@ -136,3 +136,4 @@ class TraceTaskName(StrEnum):
    DATASET_RETRIEVAL_TRACE = "dataset_retrieval"
    TOOL_TRACE = "tool"
    GENERATE_NAME_TRACE = "generate_conversation_name"
+    DATASOURCE_TRACE = "datasource"
--- a/api/core/ops/langfuse_trace/langfuse_trace.py
+++ b/api/core/ops/langfuse_trace/langfuse_trace.py
@ -28,7 +28,7 @@ from core.ops.langfuse_trace.entities.langfuse_trace_entity import (
 )
 from core.ops.utils import filter_none_values
 from core.repositories import DifyCoreRepositoryFactory
-from core.workflow.nodes.enums import NodeType
+from core.workflow.enums import NodeType
 from extensions.ext_database import db
 from models import EndUser, WorkflowNodeExecutionTriggeredFrom
 from models.enums import MessageStatus
--- a/api/core/ops/langsmith_trace/langsmith_trace.py
+++ b/api/core/ops/langsmith_trace/langsmith_trace.py
@ -28,8 +28,7 @@ from core.ops.langsmith_trace.entities.langsmith_trace_entity import (
 )
 from core.ops.utils import filter_none_values, generate_dotted_order
 from core.repositories import DifyCoreRepositoryFactory
-from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey
-from core.workflow.nodes.enums import NodeType
+from core.workflow.enums import NodeType, WorkflowNodeExecutionMetadataKey
 from extensions.ext_database import db
 from models import EndUser, MessageFile, WorkflowNodeExecutionTriggeredFrom

--- a/api/core/ops/opik_trace/opik_trace.py
+++ b/api/core/ops/opik_trace/opik_trace.py
@ -22,8 +22,7 @@ from core.ops.entities.trace_entity import (
    WorkflowTraceInfo,
 )
 from core.repositories import DifyCoreRepositoryFactory
-from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey
-from core.workflow.nodes.enums import NodeType
+from core.workflow.enums import NodeType, WorkflowNodeExecutionMetadataKey
 from extensions.ext_database import db
 from models import EndUser, MessageFile, WorkflowNodeExecutionTriggeredFrom

--- a/api/core/ops/ops_trace_manager.py
+++ b/api/core/ops/ops_trace_manager.py
@ -6,7 +6,7 @@ import queue
 import threading
 import time
 from datetime import timedelta
-from typing import Any, Union
+from typing import TYPE_CHECKING, Any, Optional, Union
 from uuid import UUID, uuid4

 from cachetools import LRUCache
@ -31,13 +31,15 @@ from core.ops.entities.trace_entity import (
    WorkflowTraceInfo,
 )
 from core.ops.utils import get_message_data
-from core.workflow.entities.workflow_execution import WorkflowExecution
 from extensions.ext_database import db
 from extensions.ext_storage import storage
 from models.model import App, AppModelConfig, Conversation, Message, MessageFile, TraceAppConfig
 from models.workflow import WorkflowAppLog, WorkflowRun
 from tasks.ops_trace_task import process_trace_tasks

+if TYPE_CHECKING:
+    from core.workflow.entities import WorkflowExecution
+
 logger = logging.getLogger(__name__)


@ -153,7 +155,10 @@ class OpsTraceManager:
            if key in tracing_config:
                if "*" in tracing_config[key]:
                    # If the key contains '*', retain the original value from the current config
-                    new_config[key] = current_trace_config.get(key, tracing_config[key])
+                    if current_trace_config:
+                        new_config[key] = current_trace_config.get(key, tracing_config[key])
+                    else:
+                        new_config[key] = tracing_config[key]
                else:
                    # Otherwise, encrypt the key
                    new_config[key] = encrypt_token(tenant_id, tracing_config[key])
@ -407,7 +412,7 @@ class TraceTask:
        self,
        trace_type: Any,
        message_id: str | None = None,
-        workflow_execution: WorkflowExecution | None = None,
+        workflow_execution: Optional["WorkflowExecution"] = None,
        conversation_id: str | None = None,
        user_id: str | None = None,
        timer: Any | None = None,
--- a/api/core/ops/weave_trace/weave_trace.py
+++ b/api/core/ops/weave_trace/weave_trace.py
@ -23,8 +23,7 @@ from core.ops.entities.trace_entity import (
 )
 from core.ops.weave_trace.entities.weave_trace_entity import WeaveTraceModel
 from core.repositories import DifyCoreRepositoryFactory
-from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey
-from core.workflow.nodes.enums import NodeType
+from core.workflow.enums import NodeType, WorkflowNodeExecutionMetadataKey
 from extensions.ext_database import db
 from models import EndUser, MessageFile, WorkflowNodeExecutionTriggeredFrom

@ -63,7 +62,8 @@ class WeaveDataTrace(BaseTraceInstance):
        self,
    ):
        try:
-            project_url = f"https://wandb.ai/{self.weave_client._project_id()}"
+            project_identifier = f"{self.entity}/{self.project_name}" if self.entity else self.project_name
+            project_url = f"https://wandb.ai/{project_identifier}"
            return project_url
        except Exception as e:
            logger.debug("Weave get run url failed: %s", str(e))
@ -418,14 +418,30 @@ class WeaveDataTrace(BaseTraceInstance):
            if not login_status:
                raise ValueError("Weave login failed")
            else:
-                print("Weave login successful")
+                logger.info("Weave login successful")
                return True
        except Exception as e:
            logger.debug("Weave API check failed: %s", str(e))
            raise ValueError(f"Weave API check failed: {str(e)}")

    def start_call(self, run_data: WeaveTraceModel, parent_run_id: str | None = None):
-        call = self.weave_client.create_call(op=run_data.op, inputs=run_data.inputs, attributes=run_data.attributes)
+        inputs = run_data.inputs
+        if inputs is None:
+            inputs = {}
+        elif not isinstance(inputs, dict):
+            inputs = {"inputs": str(inputs)}
+
+        attributes = run_data.attributes
+        if attributes is None:
+            attributes = {}
+        elif not isinstance(attributes, dict):
+            attributes = {"attributes": str(attributes)}
+
+        call = self.weave_client.create_call(
+            op=run_data.op,
+            inputs=inputs,
+            attributes=attributes,
+        )
        self.calls[run_data.id] = call
        if parent_run_id:
            self.calls[run_data.id].parent_id = parent_run_id
@ -433,6 +449,7 @@ class WeaveDataTrace(BaseTraceInstance):
    def finish_call(self, run_data: WeaveTraceModel):
        call = self.calls.get(run_data.id)
        if call:
-            self.weave_client.finish_call(call=call, output=run_data.outputs, exception=run_data.exception)
+            exception = Exception(run_data.exception) if run_data.exception else None
+            self.weave_client.finish_call(call=call, output=run_data.outputs, exception=exception)
        else:
            raise ValueError(f"Call with id {run_data.id} not found")
--- a/api/core/plugin/backwards_invocation/app.py
+++ b/api/core/plugin/backwards_invocation/app.py
@ -167,7 +167,6 @@ class PluginAppBackwardsInvocation(BaseBackwardsInvocation):
            invoke_from=InvokeFrom.SERVICE_API,
            streaming=stream,
            call_depth=1,
-            workflow_thread_pool_id=None,
        )

    @classmethod
--- a/api/core/plugin/backwards_invocation/node.py
+++ b/api/core/plugin/backwards_invocation/node.py
@ -1,5 +1,5 @@
 from core.plugin.backwards_invocation.base import BaseBackwardsInvocation
-from core.workflow.nodes.enums import NodeType
+from core.workflow.enums import NodeType
 from core.workflow.nodes.parameter_extractor.entities import (
    ModelConfig as ParameterExtractorModelConfig,
 )
--- a/api/core/plugin/entities/oauth.py
+++ b/api/core/plugin/entities/oauth.py
@ -0,0 +1,21 @@
+from collections.abc import Sequence
+
+from pydantic import BaseModel, Field
+
+from core.entities.provider_entities import ProviderConfig
+
+
+class OAuthSchema(BaseModel):
+    """
+    OAuth schema
+    """
+
+    client_schema: Sequence[ProviderConfig] = Field(
+        default_factory=list,
+        description="client schema like client_id, client_secret, etc.",
+    )
+
+    credentials_schema: Sequence[ProviderConfig] = Field(
+        default_factory=list,
+        description="credentials schema like access_token, refresh_token, etc.",
+    )
--- a/api/core/plugin/entities/parameters.py
+++ b/api/core/plugin/entities/parameters.py
@ -6,7 +6,6 @@ from pydantic import BaseModel, Field, field_validator

 from core.entities.parameter_entities import CommonParameterType
 from core.tools.entities.common_entities import I18nObject
-from core.workflow.nodes.base.entities import NumberType


 class PluginParameterOption(BaseModel):
@ -153,7 +152,7 @@ def cast_parameter_value(typ: StrEnum, value: Any, /):
                    raise ValueError("The tools selector must be a list.")
                return value
            case PluginParameterType.ANY:
-                if value and not isinstance(value, str | dict | list | NumberType):
+                if value and not isinstance(value, str | dict | list | int | float):
                    raise ValueError("The var selector must be a string, dictionary, list or number.")
                return value
            case PluginParameterType.ARRAY:
--- a/api/core/plugin/entities/plugin.py
+++ b/api/core/plugin/entities/plugin.py
@ -1,14 +1,13 @@
 import datetime
-import re
 from collections.abc import Mapping
 from enum import StrEnum, auto
 from typing import Any

 from packaging.version import InvalidVersion, Version
 from pydantic import BaseModel, Field, field_validator, model_validator
-from werkzeug.exceptions import NotFound

 from core.agent.plugin_entities import AgentStrategyProviderEntity
+from core.datasource.entities.datasource_entities import DatasourceProviderEntity
 from core.model_runtime.entities.provider_entities import ProviderEntity
 from core.plugin.entities.base import BasePluginEntity
 from core.plugin.entities.endpoint import EndpointProviderDeclaration
@ -63,6 +62,7 @@ class PluginCategory(StrEnum):
    Model = auto()
    Extension = auto()
    AgentStrategy = "agent-strategy"
+    Datasource = "datasource"


 class PluginDeclaration(BaseModel):
@ -70,6 +70,7 @@ class PluginDeclaration(BaseModel):
        tools: list[str] | None = Field(default_factory=list[str])
        models: list[str] | None = Field(default_factory=list[str])
        endpoints: list[str] | None = Field(default_factory=list[str])
+        datasources: list[str] | None = Field(default_factory=list[str])

    class Meta(BaseModel):
        minimum_dify_version: str | None = Field(default=None)
@ -104,6 +105,7 @@ class PluginDeclaration(BaseModel):
    model: ProviderEntity | None = None
    endpoint: EndpointProviderDeclaration | None = None
    agent_strategy: AgentStrategyProviderEntity | None = None
+    datasource: DatasourceProviderEntity | None = None
    meta: Meta

    @field_validator("version")
@ -123,6 +125,8 @@ class PluginDeclaration(BaseModel):
            values["category"] = PluginCategory.Tool
        elif values.get("model"):
            values["category"] = PluginCategory.Model
+        elif values.get("datasource"):
+            values["category"] = PluginCategory.Datasource
        elif values.get("agent_strategy"):
            values["category"] = PluginCategory.AgentStrategy
        else:
@ -156,55 +160,6 @@ class PluginEntity(PluginInstallation):
        return self


-class GenericProviderID:
-    organization: str
-    plugin_name: str
-    provider_name: str
-    is_hardcoded: bool
-
-    def to_string(self) -> str:
-        return str(self)
-
-    def __str__(self) -> str:
-        return f"{self.organization}/{self.plugin_name}/{self.provider_name}"
-
-    def __init__(self, value: str, is_hardcoded: bool = False):
-        if not value:
-            raise NotFound("plugin not found, please add plugin")
-        # check if the value is a valid plugin id with format: $organization/$plugin_name/$provider_name
-        if not re.match(r"^[a-z0-9_-]+\/[a-z0-9_-]+\/[a-z0-9_-]+$", value):
-            # check if matches [a-z0-9_-]+, if yes, append with langgenius/$value/$value
-            if re.match(r"^[a-z0-9_-]+$", value):
-                value = f"langgenius/{value}/{value}"
-            else:
-                raise ValueError(f"Invalid plugin id {value}")
-
-        self.organization, self.plugin_name, self.provider_name = value.split("/")
-        self.is_hardcoded = is_hardcoded
-
-    def is_langgenius(self) -> bool:
-        return self.organization == "langgenius"
-
-    @property
-    def plugin_id(self) -> str:
-        return f"{self.organization}/{self.plugin_name}"
-
-
-class ModelProviderID(GenericProviderID):
-    def __init__(self, value: str, is_hardcoded: bool = False):
-        super().__init__(value, is_hardcoded)
-        if self.organization == "langgenius" and self.provider_name == "google":
-            self.plugin_name = "gemini"
-
-
-class ToolProviderID(GenericProviderID):
-    def __init__(self, value: str, is_hardcoded: bool = False):
-        super().__init__(value, is_hardcoded)
-        if self.organization == "langgenius":
-            if self.provider_name in ["jina", "siliconflow", "stepfun", "gitee_ai"]:
-                self.plugin_name = f"{self.provider_name}_tool"
-
-
 class PluginDependency(BaseModel):
    class Type(StrEnum):
        Github = PluginInstallationSource.Github
@ -223,6 +178,7 @@ class PluginDependency(BaseModel):

    class Marketplace(BaseModel):
        marketplace_plugin_unique_identifier: str
+        version: str | None = None

        @property
        def plugin_unique_identifier(self) -> str:
@ -230,6 +186,7 @@ class PluginDependency(BaseModel):

    class Package(BaseModel):
        plugin_unique_identifier: str
+        version: str | None = None

    type: Type
    value: Github | Marketplace | Package
--- a/api/core/plugin/entities/plugin_daemon.py
+++ b/api/core/plugin/entities/plugin_daemon.py
@ -6,6 +6,7 @@ from typing import Any, Generic, TypeVar
 from pydantic import BaseModel, ConfigDict, Field

 from core.agent.plugin_entities import AgentProviderEntityWithPlugin
+from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin
 from core.model_runtime.entities.model_entities import AIModelEntity
 from core.model_runtime.entities.provider_entities import ProviderEntity
 from core.plugin.entities.base import BasePluginEntity
@ -48,6 +49,14 @@ class PluginToolProviderEntity(BaseModel):
    declaration: ToolProviderEntityWithPlugin


+class PluginDatasourceProviderEntity(BaseModel):
+    provider: str
+    plugin_unique_identifier: str
+    plugin_id: str
+    is_authorized: bool = False
+    declaration: DatasourceProviderEntityWithPlugin
+
+
 class PluginAgentProviderEntity(BaseModel):
    provider: str
    plugin_unique_identifier: str
--- a/api/core/plugin/impl/agent.py
+++ b/api/core/plugin/impl/agent.py
@ -2,13 +2,13 @@ from collections.abc import Generator
 from typing import Any

 from core.agent.entities import AgentInvokeMessage
-from core.plugin.entities.plugin import GenericProviderID
 from core.plugin.entities.plugin_daemon import (
    PluginAgentProviderEntity,
 )
 from core.plugin.entities.request import PluginInvokeContext
 from core.plugin.impl.base import BasePluginClient
 from core.plugin.utils.chunk_merger import merge_blob_chunks
+from models.provider_ids import GenericProviderID


 class PluginAgentClient(BasePluginClient):
--- a/api/core/plugin/impl/datasource.py
+++ b/api/core/plugin/impl/datasource.py
@ -0,0 +1,372 @@
+from collections.abc import Generator, Mapping
+from typing import Any
+
+from core.datasource.entities.datasource_entities import (
+    DatasourceMessage,
+    GetOnlineDocumentPageContentRequest,
+    OnlineDocumentPagesMessage,
+    OnlineDriveBrowseFilesRequest,
+    OnlineDriveBrowseFilesResponse,
+    OnlineDriveDownloadFileRequest,
+    WebsiteCrawlMessage,
+)
+from core.plugin.entities.plugin_daemon import (
+    PluginBasicBooleanResponse,
+    PluginDatasourceProviderEntity,
+)
+from core.plugin.impl.base import BasePluginClient
+from core.schemas.resolver import resolve_dify_schema_refs
+from models.provider_ids import DatasourceProviderID, GenericProviderID
+from services.tools.tools_transform_service import ToolTransformService
+
+
+class PluginDatasourceManager(BasePluginClient):
+    def fetch_datasource_providers(self, tenant_id: str) -> list[PluginDatasourceProviderEntity]:
+        """
+        Fetch datasource providers for the given tenant.
+        """
+
+        def transformer(json_response: dict[str, Any]) -> dict:
+            if json_response.get("data"):
+                for provider in json_response.get("data", []):
+                    declaration = provider.get("declaration", {}) or {}
+                    provider_name = declaration.get("identity", {}).get("name")
+                    for datasource in declaration.get("datasources", []):
+                        datasource["identity"]["provider"] = provider_name
+                        # resolve refs
+                        if datasource.get("output_schema"):
+                            datasource["output_schema"] = resolve_dify_schema_refs(datasource["output_schema"])
+
+            return json_response
+
+        response = self._request_with_plugin_daemon_response(
+            "GET",
+            f"plugin/{tenant_id}/management/datasources",
+            list[PluginDatasourceProviderEntity],
+            params={"page": 1, "page_size": 256},
+            transformer=transformer,
+        )
+        local_file_datasource_provider = PluginDatasourceProviderEntity(**self._get_local_file_datasource_provider())
+
+        for provider in response:
+            ToolTransformService.repack_provider(tenant_id=tenant_id, provider=provider)
+        all_response = [local_file_datasource_provider] + response
+
+        for provider in all_response:
+            provider.declaration.identity.name = f"{provider.plugin_id}/{provider.declaration.identity.name}"
+
+            # override the provider name for each tool to plugin_id/provider_name
+            for tool in provider.declaration.datasources:
+                tool.identity.provider = provider.declaration.identity.name
+
+        return all_response
+
+    def fetch_installed_datasource_providers(self, tenant_id: str) -> list[PluginDatasourceProviderEntity]:
+        """
+        Fetch datasource providers for the given tenant.
+        """
+
+        def transformer(json_response: dict[str, Any]) -> dict:
+            if json_response.get("data"):
+                for provider in json_response.get("data", []):
+                    declaration = provider.get("declaration", {}) or {}
+                    provider_name = declaration.get("identity", {}).get("name")
+                    for datasource in declaration.get("datasources", []):
+                        datasource["identity"]["provider"] = provider_name
+                        # resolve refs
+                        if datasource.get("output_schema"):
+                            datasource["output_schema"] = resolve_dify_schema_refs(datasource["output_schema"])
+
+            return json_response
+
+        response = self._request_with_plugin_daemon_response(
+            "GET",
+            f"plugin/{tenant_id}/management/datasources",
+            list[PluginDatasourceProviderEntity],
+            params={"page": 1, "page_size": 256},
+            transformer=transformer,
+        )
+
+        for provider in response:
+            ToolTransformService.repack_provider(tenant_id=tenant_id, provider=provider)
+
+        for provider in response:
+            provider.declaration.identity.name = f"{provider.plugin_id}/{provider.declaration.identity.name}"
+
+            # override the provider name for each tool to plugin_id/provider_name
+            for tool in provider.declaration.datasources:
+                tool.identity.provider = provider.declaration.identity.name
+
+        return response
+
+    def fetch_datasource_provider(self, tenant_id: str, provider_id: str) -> PluginDatasourceProviderEntity:
+        """
+        Fetch datasource provider for the given tenant and plugin.
+        """
+        if provider_id == "langgenius/file/file":
+            return PluginDatasourceProviderEntity(**self._get_local_file_datasource_provider())
+
+        tool_provider_id = DatasourceProviderID(provider_id)
+
+        def transformer(json_response: dict[str, Any]) -> dict:
+            data = json_response.get("data")
+            if data:
+                for datasource in data.get("declaration", {}).get("datasources", []):
+                    datasource["identity"]["provider"] = tool_provider_id.provider_name
+                    if datasource.get("output_schema"):
+                        datasource["output_schema"] = resolve_dify_schema_refs(datasource["output_schema"])
+            return json_response
+
+        response = self._request_with_plugin_daemon_response(
+            "GET",
+            f"plugin/{tenant_id}/management/datasource",
+            PluginDatasourceProviderEntity,
+            params={"provider": tool_provider_id.provider_name, "plugin_id": tool_provider_id.plugin_id},
+            transformer=transformer,
+        )
+
+        response.declaration.identity.name = f"{response.plugin_id}/{response.declaration.identity.name}"
+
+        # override the provider name for each tool to plugin_id/provider_name
+        for datasource in response.declaration.datasources:
+            datasource.identity.provider = response.declaration.identity.name
+
+        return response
+
+    def get_website_crawl(
+        self,
+        tenant_id: str,
+        user_id: str,
+        datasource_provider: str,
+        datasource_name: str,
+        credentials: dict[str, Any],
+        datasource_parameters: Mapping[str, Any],
+        provider_type: str,
+    ) -> Generator[WebsiteCrawlMessage, None, None]:
+        """
+        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
+        """
+
+        datasource_provider_id = GenericProviderID(datasource_provider)
+
+        return self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/datasource/get_website_crawl",
+            WebsiteCrawlMessage,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": datasource_provider_id.provider_name,
+                    "datasource": datasource_name,
+                    "credentials": credentials,
+                    "datasource_parameters": datasource_parameters,
+                },
+            },
+            headers={
+                "X-Plugin-ID": datasource_provider_id.plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+
+    def get_online_document_pages(
+        self,
+        tenant_id: str,
+        user_id: str,
+        datasource_provider: str,
+        datasource_name: str,
+        credentials: dict[str, Any],
+        datasource_parameters: Mapping[str, Any],
+        provider_type: str,
+    ) -> Generator[OnlineDocumentPagesMessage, None, None]:
+        """
+        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
+        """
+
+        datasource_provider_id = GenericProviderID(datasource_provider)
+
+        return self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/datasource/get_online_document_pages",
+            OnlineDocumentPagesMessage,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": datasource_provider_id.provider_name,
+                    "datasource": datasource_name,
+                    "credentials": credentials,
+                    "datasource_parameters": datasource_parameters,
+                },
+            },
+            headers={
+                "X-Plugin-ID": datasource_provider_id.plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+
+    def get_online_document_page_content(
+        self,
+        tenant_id: str,
+        user_id: str,
+        datasource_provider: str,
+        datasource_name: str,
+        credentials: dict[str, Any],
+        datasource_parameters: GetOnlineDocumentPageContentRequest,
+        provider_type: str,
+    ) -> Generator[DatasourceMessage, None, None]:
+        """
+        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
+        """
+
+        datasource_provider_id = GenericProviderID(datasource_provider)
+
+        return self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/datasource/get_online_document_page_content",
+            DatasourceMessage,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": datasource_provider_id.provider_name,
+                    "datasource": datasource_name,
+                    "credentials": credentials,
+                    "page": datasource_parameters.model_dump(),
+                },
+            },
+            headers={
+                "X-Plugin-ID": datasource_provider_id.plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+
+    def online_drive_browse_files(
+        self,
+        tenant_id: str,
+        user_id: str,
+        datasource_provider: str,
+        datasource_name: str,
+        credentials: dict[str, Any],
+        request: OnlineDriveBrowseFilesRequest,
+        provider_type: str,
+    ) -> Generator[OnlineDriveBrowseFilesResponse, None, None]:
+        """
+        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
+        """
+
+        datasource_provider_id = GenericProviderID(datasource_provider)
+
+        response = self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/datasource/online_drive_browse_files",
+            OnlineDriveBrowseFilesResponse,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": datasource_provider_id.provider_name,
+                    "datasource": datasource_name,
+                    "credentials": credentials,
+                    "request": request.model_dump(),
+                },
+            },
+            headers={
+                "X-Plugin-ID": datasource_provider_id.plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+        yield from response
+
+    def online_drive_download_file(
+        self,
+        tenant_id: str,
+        user_id: str,
+        datasource_provider: str,
+        datasource_name: str,
+        credentials: dict[str, Any],
+        request: OnlineDriveDownloadFileRequest,
+        provider_type: str,
+    ) -> Generator[DatasourceMessage, None, None]:
+        """
+        Invoke the datasource with the given tenant, user, plugin, provider, name, credentials and parameters.
+        """
+
+        datasource_provider_id = GenericProviderID(datasource_provider)
+
+        response = self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/datasource/online_drive_download_file",
+            DatasourceMessage,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": datasource_provider_id.provider_name,
+                    "datasource": datasource_name,
+                    "credentials": credentials,
+                    "request": request.model_dump(),
+                },
+            },
+            headers={
+                "X-Plugin-ID": datasource_provider_id.plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+        yield from response
+
+    def validate_provider_credentials(
+        self, tenant_id: str, user_id: str, provider: str, plugin_id: str, credentials: dict[str, Any]
+    ) -> bool:
+        """
+        validate the credentials of the provider
+        """
+        # datasource_provider_id = GenericProviderID(provider_id)
+
+        response = self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/datasource/validate_credentials",
+            PluginBasicBooleanResponse,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": provider,
+                    "credentials": credentials,
+                },
+            },
+            headers={
+                "X-Plugin-ID": plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+
+        for resp in response:
+            return resp.result
+
+        return False
+
+    def _get_local_file_datasource_provider(self) -> dict[str, Any]:
+        return {
+            "id": "langgenius/file/file",
+            "plugin_id": "langgenius/file",
+            "provider": "file",
+            "plugin_unique_identifier": "langgenius/file:0.0.1@dify",
+            "declaration": {
+                "identity": {
+                    "author": "langgenius",
+                    "name": "file",
+                    "label": {"zh_Hans": "File", "en_US": "File", "pt_BR": "File", "ja_JP": "File"},
+                    "icon": "https://assets.dify.ai/images/File%20Upload.svg",
+                    "description": {"zh_Hans": "File", "en_US": "File", "pt_BR": "File", "ja_JP": "File"},
+                },
+                "credentials_schema": [],
+                "provider_type": "local_file",
+                "datasources": [
+                    {
+                        "identity": {
+                            "author": "langgenius",
+                            "name": "upload-file",
+                            "provider": "file",
+                            "label": {"zh_Hans": "File", "en_US": "File", "pt_BR": "File", "ja_JP": "File"},
+                        },
+                        "parameters": [],
+                        "description": {"zh_Hans": "File", "en_US": "File", "pt_BR": "File", "ja_JP": "File"},
+                    }
+                ],
+            },
+        }
--- a/api/core/plugin/impl/dynamic_select.py
+++ b/api/core/plugin/impl/dynamic_select.py
@ -1,9 +1,9 @@
 from collections.abc import Mapping
 from typing import Any

-from core.plugin.entities.plugin import GenericProviderID
 from core.plugin.entities.plugin_daemon import PluginDynamicSelectOptionsResponse
 from core.plugin.impl.base import BasePluginClient
+from models.provider_ids import GenericProviderID


 class DynamicSelectClient(BasePluginClient):
--- a/api/core/plugin/impl/plugin.py
+++ b/api/core/plugin/impl/plugin.py
@ -2,7 +2,6 @@ from collections.abc import Sequence

 from core.plugin.entities.bundle import PluginBundleDependency
 from core.plugin.entities.plugin import (
-    GenericProviderID,
    MissingPluginDependency,
    PluginDeclaration,
    PluginEntity,
@ -16,6 +15,7 @@ from core.plugin.entities.plugin_daemon import (
    PluginListResponse,
 )
 from core.plugin.impl.base import BasePluginClient
+from models.provider_ids import GenericProviderID


 class PluginInstaller(BasePluginClient):
--- a/api/core/plugin/impl/tool.py
+++ b/api/core/plugin/impl/tool.py
@ -3,11 +3,15 @@ from typing import Any

 from pydantic import BaseModel

-from core.plugin.entities.plugin import GenericProviderID, ToolProviderID
-from core.plugin.entities.plugin_daemon import PluginBasicBooleanResponse, PluginToolProviderEntity
+from core.plugin.entities.plugin_daemon import (
+    PluginBasicBooleanResponse,
+    PluginToolProviderEntity,
+)
 from core.plugin.impl.base import BasePluginClient
 from core.plugin.utils.chunk_merger import merge_blob_chunks
+from core.schemas.resolver import resolve_dify_schema_refs
 from core.tools.entities.tool_entities import CredentialType, ToolInvokeMessage, ToolParameter
+from models.provider_ids import GenericProviderID, ToolProviderID


 class PluginToolManager(BasePluginClient):
@ -22,6 +26,9 @@ class PluginToolManager(BasePluginClient):
                provider_name = declaration.get("identity", {}).get("name")
                for tool in declaration.get("tools", []):
                    tool["identity"]["provider"] = provider_name
+                    # resolve refs
+                    if tool.get("output_schema"):
+                        tool["output_schema"] = resolve_dify_schema_refs(tool["output_schema"])

            return json_response

@ -53,6 +60,9 @@ class PluginToolManager(BasePluginClient):
            if data:
                for tool in data.get("declaration", {}).get("tools", []):
                    tool["identity"]["provider"] = tool_provider_id.provider_name
+                    # resolve refs
+                    if tool.get("output_schema"):
+                        tool["output_schema"] = resolve_dify_schema_refs(tool["output_schema"])

            return json_response

@ -146,6 +156,36 @@ class PluginToolManager(BasePluginClient):

        return False

+    def validate_datasource_credentials(
+        self, tenant_id: str, user_id: str, provider: str, credentials: dict[str, Any]
+    ) -> bool:
+        """
+        validate the credentials of the datasource
+        """
+        tool_provider_id = GenericProviderID(provider)
+
+        response = self._request_with_plugin_daemon_response_stream(
+            "POST",
+            f"plugin/{tenant_id}/dispatch/datasource/validate_credentials",
+            PluginBasicBooleanResponse,
+            data={
+                "user_id": user_id,
+                "data": {
+                    "provider": tool_provider_id.provider_name,
+                    "credentials": credentials,
+                },
+            },
+            headers={
+                "X-Plugin-ID": tool_provider_id.plugin_id,
+                "Content-Type": "application/json",
+            },
+        )
+
+        for resp in response:
+            return resp.result
+
+        return False
+
    def get_runtime_parameters(
        self,
        tenant_id: str,
--- a/Show More
+++ b/Show More