Merge branch 'main' into feat/r2

# Conflicts: # docker/docker-compose.middleware.yaml # web/app/components/workflow-app/components/workflow-main.tsx # web/app/components/workflow-app/hooks/index.ts # web/app/components/workflow/hooks-store/store.ts # web/app/components/workflow/hooks/index.ts # web/app/components/workflow/nodes/_base/components/variable/var-reference-picker.tsx
2026-05-02 00:18:03 +08:00 · 2025-07-02 18:20:05 +08:00
parent 81b07dc3be 68f41bbaa8
commit 832bef053f
201 changed files with 7572 additions and 5289 deletions
--- a/api/services/account_service.py
+++ b/api/services/account_service.py
@ -889,7 +889,7 @@ class RegisterService:

            TenantService.create_owner_tenant_if_not_exist(account=account, is_setup=True)

-            dify_setup = DifySetup(version=dify_config.CURRENT_VERSION)
+            dify_setup = DifySetup(version=dify_config.project.version)
            db.session.add(dify_setup)
            db.session.commit()
        except Exception as e:
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@ -323,6 +323,23 @@ class DatasetService:
        except ProviderTokenNotInitError as ex:
            raise ValueError(ex.description)

+    @staticmethod
+    def check_reranking_model_setting(tenant_id: str, reranking_model_provider: str, reranking_model: str):
+        try:
+            model_manager = ModelManager()
+            model_manager.get_model_instance(
+                tenant_id=tenant_id,
+                provider=reranking_model_provider,
+                model_type=ModelType.RERANK,
+                model=reranking_model,
+            )
+        except LLMBadRequestError:
+            raise ValueError(
+                "No Rerank Model available. Please configure a valid provider in the Settings -> Model Provider."
+            )
+        except ProviderTokenNotInitError as ex:
+            raise ValueError(ex.description)
+
    @staticmethod
    def update_dataset(dataset_id, data, user):
        """
@ -645,6 +662,10 @@ class DatasetService:
            )
        except ProviderTokenNotInitError:
            # If we can't get the embedding model, preserve existing settings
+            logging.warning(
+                f"Failed to initialize embedding model {data['embedding_model_provider']}/{data['embedding_model']}, "
+                f"preserving existing settings"
+            )
            if dataset.embedding_model_provider and dataset.embedding_model:
                filtered_data["embedding_model_provider"] = dataset.embedding_model_provider
                filtered_data["embedding_model"] = dataset.embedding_model
@ -2661,6 +2682,7 @@ class SegmentService:

                    # calc embedding use tokens
                    if document.doc_form == "qa_model":
+                        segment.answer = args.answer
                        tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment.answer])[0]
                    else:
                        tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])[0]
--- a/api/services/moderation_service.py
+++ b/api/services/moderation_service.py
@ -1,23 +0,0 @@
-from typing import Optional
-
-from core.moderation.factory import ModerationFactory, ModerationOutputsResult
-from extensions.ext_database import db
-from models.model import App, AppModelConfig
-
-
-class ModerationService:
-    def moderation_for_outputs(self, app_id: str, app_model: App, text: str) -> ModerationOutputsResult:
-        app_model_config: Optional[AppModelConfig] = None
-
-        app_model_config = (
-            db.session.query(AppModelConfig).filter(AppModelConfig.id == app_model.app_model_config_id).first()
-        )
-
-        if not app_model_config:
-            raise ValueError("app model config not found")
-
-        name = app_model_config.sensitive_word_avoidance_dict["type"]
-        config = app_model_config.sensitive_word_avoidance_dict["config"]
-
-        moderation = ModerationFactory(name, app_id, app_model.tenant_id, config)
-        return moderation.moderation_for_outputs(text)
--- a/api/services/plugin/oauth_service.py
+++ b/api/services/plugin/oauth_service.py
@ -8,9 +8,10 @@ from extensions.ext_redis import redis_client
 class OAuthProxyService(BasePluginClient):
    # Default max age for proxy context parameter in seconds
    __MAX_AGE__ = 5 * 60  # 5 minutes
+    __KEY_PREFIX__ = "oauth_proxy_context:"

    @staticmethod
-    def create_proxy_context(user_id, tenant_id, plugin_id, provider):
+    def create_proxy_context(user_id: str, tenant_id: str, plugin_id: str, provider: str):
        """
        Create a proxy context for an OAuth 2.0 authorization request.

@ -23,26 +24,22 @@ class OAuthProxyService(BasePluginClient):
        is used to verify the state, ensuring the request's integrity and authenticity,
        and mitigating replay attacks.
        """
-        seconds, _ = redis_client.time()
        context_id = str(uuid.uuid4())
        data = {
            "user_id": user_id,
            "plugin_id": plugin_id,
            "tenant_id": tenant_id,
            "provider": provider,
-            # encode redis time to avoid distribution time skew
-            "timestamp": seconds,
        }
-        # ignore nonce collision
        redis_client.setex(
-            f"oauth_proxy_context:{context_id}",
+            f"{OAuthProxyService.__KEY_PREFIX__}{context_id}",
            OAuthProxyService.__MAX_AGE__,
            json.dumps(data),
        )
        return context_id

    @staticmethod
-    def use_proxy_context(context_id, max_age=__MAX_AGE__):
+    def use_proxy_context(context_id: str):
        """
        Validate the proxy context parameter.
        This checks if the context_id is valid and not expired.
@ -50,12 +47,7 @@ class OAuthProxyService(BasePluginClient):
        if not context_id:
            raise ValueError("context_id is required")
        # get data from redis
-        data = redis_client.getdel(f"oauth_proxy_context:{context_id}")
+        data = redis_client.getdel(f"{OAuthProxyService.__KEY_PREFIX__}{context_id}")
        if not data:
            raise ValueError("context_id is invalid")
-        # check if data is expired
-        seconds, _ = redis_client.time()
-        state = json.loads(data)
-        if state.get("timestamp") < seconds - max_age:
-            raise ValueError("context_id is expired")
-        return state
+        return json.loads(data)
--- a/api/services/plugin/plugin_parameter_service.py
+++ b/api/services/plugin/plugin_parameter_service.py
@ -0,0 +1,74 @@
+from collections.abc import Mapping, Sequence
+from typing import Any, Literal
+
+from sqlalchemy.orm import Session
+
+from core.plugin.entities.parameters import PluginParameterOption
+from core.plugin.impl.dynamic_select import DynamicSelectClient
+from core.tools.tool_manager import ToolManager
+from core.tools.utils.configuration import ProviderConfigEncrypter
+from extensions.ext_database import db
+from models.tools import BuiltinToolProvider
+
+
+class PluginParameterService:
+    @staticmethod
+    def get_dynamic_select_options(
+        tenant_id: str,
+        user_id: str,
+        plugin_id: str,
+        provider: str,
+        action: str,
+        parameter: str,
+        provider_type: Literal["tool"],
+    ) -> Sequence[PluginParameterOption]:
+        """
+        Get dynamic select options for a plugin parameter.
+
+        Args:
+            tenant_id: The tenant ID.
+            plugin_id: The plugin ID.
+            provider: The provider name.
+            action: The action name.
+            parameter: The parameter name.
+        """
+        credentials: Mapping[str, Any] = {}
+
+        match provider_type:
+            case "tool":
+                provider_controller = ToolManager.get_builtin_provider(provider, tenant_id)
+                # init tool configuration
+                tool_configuration = ProviderConfigEncrypter(
+                    tenant_id=tenant_id,
+                    config=[x.to_basic_provider_config() for x in provider_controller.get_credentials_schema()],
+                    provider_type=provider_controller.provider_type.value,
+                    provider_identity=provider_controller.entity.identity.name,
+                )
+
+                # check if credentials are required
+                if not provider_controller.need_credentials:
+                    credentials = {}
+                else:
+                    # fetch credentials from db
+                    with Session(db.engine) as session:
+                        db_record = (
+                            session.query(BuiltinToolProvider)
+                            .filter(
+                                BuiltinToolProvider.tenant_id == tenant_id,
+                                BuiltinToolProvider.provider == provider,
+                            )
+                            .first()
+                        )
+
+                    if db_record is None:
+                        raise ValueError(f"Builtin provider {provider} not found when fetching credentials")
+
+                    credentials = tool_configuration.decrypt(db_record.credentials)
+            case _:
+                raise ValueError(f"Invalid provider type: {provider_type}")
+
+        return (
+            DynamicSelectClient()
+            .fetch_dynamic_select_options(tenant_id, user_id, plugin_id, provider, action, credentials, parameter)
+            .options
+        )
--- a/api/services/vector_service.py
+++ b/api/services/vector_service.py
@ -97,16 +97,16 @@ class VectorService:
            vector = Vector(dataset=dataset)
            vector.delete_by_ids([segment.index_node_id])
            vector.add_texts([document], duplicate_check=True)
-
-        # update keyword index
-        keyword = Keyword(dataset)
-        keyword.delete_by_ids([segment.index_node_id])
-
-        # save keyword index
-        if keywords and len(keywords) > 0:
-            keyword.add_texts([document], keywords_list=[keywords])
        else:
-            keyword.add_texts([document])
+            # update keyword index
+            keyword = Keyword(dataset)
+            keyword.delete_by_ids([segment.index_node_id])
+
+            # save keyword index
+            if keywords and len(keywords) > 0:
+                keyword.add_texts([document], keywords_list=[keywords])
+            else:
+                keyword.add_texts([document])

    @classmethod
    def generate_child_chunks(
--- a/api/services/workflow_draft_variable_service.py
+++ b/api/services/workflow_draft_variable_service.py
@ -154,7 +154,7 @@ class WorkflowDraftVariableService:
        variables = (
            # Do not load the `value` field.
            query.options(orm.defer(WorkflowDraftVariable.value))
-            .order_by(WorkflowDraftVariable.id.desc())
+            .order_by(WorkflowDraftVariable.created_at.desc())
            .limit(limit)
            .offset((page - 1) * limit)
            .all()
@ -168,7 +168,7 @@ class WorkflowDraftVariableService:
            WorkflowDraftVariable.node_id == node_id,
        )
        query = self._session.query(WorkflowDraftVariable).filter(*criteria)
-        variables = query.order_by(WorkflowDraftVariable.id.desc()).all()
+        variables = query.order_by(WorkflowDraftVariable.created_at.desc()).all()
        return WorkflowDraftVariableList(variables=variables)

    def list_node_variables(self, app_id: str, node_id: str) -> WorkflowDraftVariableList:
@ -235,7 +235,9 @@ class WorkflowDraftVariableService:
        self._session.flush()
        return variable

-    def _reset_node_var(self, workflow: Workflow, variable: WorkflowDraftVariable) -> WorkflowDraftVariable | None:
+    def _reset_node_var_or_sys_var(
+        self, workflow: Workflow, variable: WorkflowDraftVariable
+    ) -> WorkflowDraftVariable | None:
        # If a variable does not allow updating, it makes no sence to resetting it.
        if not variable.editable:
            return variable
@ -259,28 +261,35 @@ class WorkflowDraftVariableService:
            self._session.flush()
            return None

-        # Get node type for proper value extraction
-        node_config = workflow.get_node_config_by_id(variable.node_id)
-        node_type = workflow.get_node_type_from_node_config(node_config)
-
        outputs_dict = node_exec.outputs_dict or {}
+        # a sentinel value used to check the absent of the output variable key.
+        absent = object()

-        # Note: Based on the implementation in `_build_from_variable_assigner_mapping`,
-        # VariableAssignerNode (both v1 and v2) can only create conversation draft variables.
-        # For consistency, we should simply return when processing VARIABLE_ASSIGNER nodes.
-        #
-        # This implementation must remain synchronized with the `_build_from_variable_assigner_mapping`
-        # and `save` methods.
-        if node_type == NodeType.VARIABLE_ASSIGNER:
-            return variable
+        if variable.get_variable_type() == DraftVariableType.NODE:
+            # Get node type for proper value extraction
+            node_config = workflow.get_node_config_by_id(variable.node_id)
+            node_type = workflow.get_node_type_from_node_config(node_config)

-        if variable.name not in outputs_dict:
+            # Note: Based on the implementation in `_build_from_variable_assigner_mapping`,
+            # VariableAssignerNode (both v1 and v2) can only create conversation draft variables.
+            # For consistency, we should simply return when processing VARIABLE_ASSIGNER nodes.
+            #
+            # This implementation must remain synchronized with the `_build_from_variable_assigner_mapping`
+            # and `save` methods.
+            if node_type == NodeType.VARIABLE_ASSIGNER:
+                return variable
+            output_value = outputs_dict.get(variable.name, absent)
+        else:
+            output_value = outputs_dict.get(f"sys.{variable.name}", absent)
+
+        # We cannot use `is None` to check the existence of an output variable here as
+        # the value of the output may be `None`.
+        if output_value is absent:
            # If variable not found in execution data, delete the variable
            self._session.delete(instance=variable)
            self._session.flush()
            return None
-        value = outputs_dict[variable.name]
-        value_seg = WorkflowDraftVariable.build_segment_with_type(variable.value_type, value)
+        value_seg = WorkflowDraftVariable.build_segment_with_type(variable.value_type, output_value)
        # Extract variable value using unified logic
        variable.set_value(value_seg)
        variable.last_edited_at = None  # Reset to indicate this is a reset operation
@ -291,10 +300,8 @@ class WorkflowDraftVariableService:
        variable_type = variable.get_variable_type()
        if variable_type == DraftVariableType.CONVERSATION:
            return self._reset_conv_var(workflow, variable)
-        elif variable_type == DraftVariableType.NODE:
-            return self._reset_node_var(workflow, variable)
        else:
-            raise VariableResetError(f"cannot reset system variable, variable_id={variable.id}")
+            return self._reset_node_var_or_sys_var(workflow, variable)

    def delete_variable(self, variable: WorkflowDraftVariable):
        self._session.delete(variable)
@ -439,6 +446,9 @@ def _batch_upsert_draft_varaible(
        stmt = stmt.on_conflict_do_update(
            index_elements=WorkflowDraftVariable.unique_app_id_node_id_name(),
            set_={
+                # Refresh creation timestamp to ensure updated variables
+                # appear first in chronologically sorted result sets.
+                "created_at": stmt.excluded.created_at,
                "updated_at": stmt.excluded.updated_at,
                "last_edited_at": stmt.excluded.last_edited_at,
                "description": stmt.excluded.description,
@ -525,9 +535,6 @@ class DraftVariableSaver:
    # The type of the current node (see NodeType).
    _node_type: NodeType

-    # Indicates how the workflow execution was triggered (see InvokeFrom).
-    _invoke_from: InvokeFrom
-
    #
    _node_execution_id: str

@ -546,15 +553,16 @@ class DraftVariableSaver:
        app_id: str,
        node_id: str,
        node_type: NodeType,
-        invoke_from: InvokeFrom,
        node_execution_id: str,
        enclosing_node_id: str | None = None,
    ):
+        # Important: `node_execution_id` parameter refers to the primary key (`id`) of the
+        # WorkflowNodeExecutionModel/WorkflowNodeExecution, not their `node_execution_id`
+        # field. These are distinct database fields with different purposes.
        self._session = session
        self._app_id = app_id
        self._node_id = node_id
        self._node_type = node_type
-        self._invoke_from = invoke_from
        self._node_execution_id = node_execution_id
        self._enclosing_node_id = enclosing_node_id

@ -570,9 +578,6 @@ class DraftVariableSaver:
        )

    def _should_save_output_variables_for_draft(self) -> bool:
-        # Only save output variables for debugging execution of workflow.
-        if self._invoke_from != InvokeFrom.DEBUGGER:
-            return False
        if self._enclosing_node_id is not None and self._node_type != NodeType.VARIABLE_ASSIGNER:
            # Currently we do not save output variables for nodes inside loop or iteration.
            return False
--- a/api/services/workflow_service.py
+++ b/api/services/workflow_service.py
@ -12,7 +12,6 @@ from sqlalchemy.orm import Session
 from core.app.app_config.entities import VariableEntityType
 from core.app.apps.advanced_chat.app_config_manager import AdvancedChatAppConfigManager
 from core.app.apps.workflow.app_config_manager import WorkflowAppConfigManager
-from core.app.entities.app_invoke_entities import InvokeFrom
 from core.file import File
 from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository
 from core.variables import Variable
@ -413,7 +412,6 @@ class WorkflowService:
                app_id=app_model.id,
                node_id=workflow_node_execution.node_id,
                node_type=NodeType(workflow_node_execution.node_type),
-                invoke_from=InvokeFrom.DEBUGGER,
                enclosing_node_id=enclosing_node_id,
                node_execution_id=node_execution.id,
            )