Merge remote-tracking branch 'origin/main' into feat/support-agent-sandbox

2026-05-03 08:58:09 +08:00 · 2026-01-09 15:37:29 +08:00
parent 5a4f5f54a7 d4432ed80f
commit d2f41ae9ef
161 changed files with 10562 additions and 4905 deletions
--- a/api/core/app/apps/advanced_chat/app_runner.py
+++ b/api/core/app/apps/advanced_chat/app_runner.py
@ -39,7 +39,6 @@ from extensions.ext_database import db
 from extensions.ext_redis import redis_client
 from extensions.otel import WorkflowAppRunnerHandler, trace_span
 from models import Workflow
-from models.enums import UserFrom
 from models.model import App, Conversation, Message, MessageAnnotation
 from models.workflow import ConversationVariable
 from services.conversation_variable_updater import ConversationVariableUpdater
@ -106,6 +105,11 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
        if not app_record:
            raise ValueError("App not found")

+        invoke_from = self.application_generate_entity.invoke_from
+        if self.application_generate_entity.single_iteration_run or self.application_generate_entity.single_loop_run:
+            invoke_from = InvokeFrom.DEBUGGER
+        user_from = self._resolve_user_from(invoke_from)
+
        if self.application_generate_entity.single_iteration_run or self.application_generate_entity.single_loop_run:
            # Handle single iteration or single loop run
            graph, variable_pool, graph_runtime_state = self._prepare_single_node_execution(
@ -158,6 +162,8 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
                workflow_id=self._workflow.id,
                tenant_id=self._workflow.tenant_id,
                user_id=self.application_generate_entity.user_id,
+                user_from=user_from,
+                invoke_from=invoke_from,
            )

        db.session.close()
@ -175,12 +181,8 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
            graph=graph,
            graph_config=self._workflow.graph_dict,
            user_id=self.application_generate_entity.user_id,
-            user_from=(
-                UserFrom.ACCOUNT
-                if self.application_generate_entity.invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER}
-                else UserFrom.END_USER
-            ),
-            invoke_from=self.application_generate_entity.invoke_from,
+            user_from=user_from,
+            invoke_from=invoke_from,
            call_depth=self.application_generate_entity.call_depth,
            variable_pool=variable_pool,
            graph_runtime_state=graph_runtime_state,
--- a/api/core/app/apps/pipeline/pipeline_runner.py
+++ b/api/core/app/apps/pipeline/pipeline_runner.py
@ -73,9 +73,15 @@ class PipelineRunner(WorkflowBasedAppRunner):
        """
        app_config = self.application_generate_entity.app_config
        app_config = cast(PipelineConfig, app_config)
+        invoke_from = self.application_generate_entity.invoke_from
+
+        if self.application_generate_entity.single_iteration_run or self.application_generate_entity.single_loop_run:
+            invoke_from = InvokeFrom.DEBUGGER
+
+        user_from = self._resolve_user_from(invoke_from)

        user_id = None
-        if self.application_generate_entity.invoke_from in {InvokeFrom.WEB_APP, InvokeFrom.SERVICE_API}:
+        if invoke_from in {InvokeFrom.WEB_APP, InvokeFrom.SERVICE_API}:
            end_user = db.session.query(EndUser).where(EndUser.id == self.application_generate_entity.user_id).first()
            if end_user:
                user_id = end_user.session_id
@ -117,7 +123,7 @@ class PipelineRunner(WorkflowBasedAppRunner):
                dataset_id=self.application_generate_entity.dataset_id,
                datasource_type=self.application_generate_entity.datasource_type,
                datasource_info=self.application_generate_entity.datasource_info,
-                invoke_from=self.application_generate_entity.invoke_from.value,
+                invoke_from=invoke_from.value,
            )

            rag_pipeline_variables = []
@ -149,6 +155,8 @@ class PipelineRunner(WorkflowBasedAppRunner):
                graph_runtime_state=graph_runtime_state,
                start_node_id=self.application_generate_entity.start_node_id,
                workflow=workflow,
+                user_from=user_from,
+                invoke_from=invoke_from,
            )

        # RUN WORKFLOW
@ -159,12 +167,8 @@ class PipelineRunner(WorkflowBasedAppRunner):
            graph=graph,
            graph_config=workflow.graph_dict,
            user_id=self.application_generate_entity.user_id,
-            user_from=(
-                UserFrom.ACCOUNT
-                if self.application_generate_entity.invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER}
-                else UserFrom.END_USER
-            ),
-            invoke_from=self.application_generate_entity.invoke_from,
+            user_from=user_from,
+            invoke_from=invoke_from,
            call_depth=self.application_generate_entity.call_depth,
            graph_runtime_state=graph_runtime_state,
            variable_pool=variable_pool,
@ -210,7 +214,12 @@ class PipelineRunner(WorkflowBasedAppRunner):
        return workflow

    def _init_rag_pipeline_graph(
-        self, workflow: Workflow, graph_runtime_state: GraphRuntimeState, start_node_id: str | None = None
+        self,
+        workflow: Workflow,
+        graph_runtime_state: GraphRuntimeState,
+        start_node_id: str | None = None,
+        user_from: UserFrom = UserFrom.ACCOUNT,
+        invoke_from: InvokeFrom = InvokeFrom.SERVICE_API,
    ) -> Graph:
        """
        Init pipeline graph
@ -253,8 +262,8 @@ class PipelineRunner(WorkflowBasedAppRunner):
            workflow_id=workflow.id,
            graph_config=graph_config,
            user_id=self.application_generate_entity.user_id,
-            user_from=UserFrom.ACCOUNT,
-            invoke_from=InvokeFrom.SERVICE_API,
+            user_from=user_from,
+            invoke_from=invoke_from,
            call_depth=0,
        )

--- a/api/core/app/apps/workflow/app_runner.py
+++ b/api/core/app/apps/workflow/app_runner.py
@ -20,7 +20,6 @@ from core.workflow.workflow_entry import WorkflowEntry
 from extensions.ext_redis import redis_client
 from extensions.otel import WorkflowAppRunnerHandler, trace_span
 from libs.datetime_utils import naive_utc_now
-from models.enums import UserFrom
 from models.workflow import Workflow

 logger = logging.getLogger(__name__)
@ -74,7 +73,12 @@ class WorkflowAppRunner(WorkflowBasedAppRunner):
            workflow_execution_id=self.application_generate_entity.workflow_execution_id,
        )

+        invoke_from = self.application_generate_entity.invoke_from
        # if only single iteration or single loop run is requested
+        if self.application_generate_entity.single_iteration_run or self.application_generate_entity.single_loop_run:
+            invoke_from = InvokeFrom.DEBUGGER
+        user_from = self._resolve_user_from(invoke_from)
+
        if self.application_generate_entity.single_iteration_run or self.application_generate_entity.single_loop_run:
            graph, variable_pool, graph_runtime_state = self._prepare_single_node_execution(
                workflow=self._workflow,
@ -102,6 +106,8 @@ class WorkflowAppRunner(WorkflowBasedAppRunner):
                workflow_id=self._workflow.id,
                tenant_id=self._workflow.tenant_id,
                user_id=self.application_generate_entity.user_id,
+                user_from=user_from,
+                invoke_from=invoke_from,
                root_node_id=self._root_node_id,
            )

@ -120,12 +126,8 @@ class WorkflowAppRunner(WorkflowBasedAppRunner):
            graph=graph,
            graph_config=self._workflow.graph_dict,
            user_id=self.application_generate_entity.user_id,
-            user_from=(
-                UserFrom.ACCOUNT
-                if self.application_generate_entity.invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER}
-                else UserFrom.END_USER
-            ),
-            invoke_from=self.application_generate_entity.invoke_from,
+            user_from=user_from,
+            invoke_from=invoke_from,
            call_depth=self.application_generate_entity.call_depth,
            variable_pool=variable_pool,
            graph_runtime_state=graph_runtime_state,
--- a/api/core/app/apps/workflow_app_runner.py
+++ b/api/core/app/apps/workflow_app_runner.py
@ -77,10 +77,18 @@ class WorkflowBasedAppRunner:
        self._app_id = app_id
        self._graph_engine_layers = graph_engine_layers

+    @staticmethod
+    def _resolve_user_from(invoke_from: InvokeFrom) -> UserFrom:
+        if invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER}:
+            return UserFrom.ACCOUNT
+        return UserFrom.END_USER
+
    def _init_graph(
        self,
        graph_config: Mapping[str, Any],
        graph_runtime_state: GraphRuntimeState,
+        user_from: UserFrom,
+        invoke_from: InvokeFrom,
        workflow_id: str = "",
        tenant_id: str = "",
        user_id: str = "",
@ -105,8 +113,8 @@ class WorkflowBasedAppRunner:
            workflow_id=workflow_id,
            graph_config=graph_config,
            user_id=user_id,
-            user_from=UserFrom.ACCOUNT,
-            invoke_from=InvokeFrom.SERVICE_API,
+            user_from=user_from,
+            invoke_from=invoke_from,
            call_depth=0,
        )

@ -250,7 +258,7 @@ class WorkflowBasedAppRunner:
            graph_config=graph_config,
            user_id="",
            user_from=UserFrom.ACCOUNT,
-            invoke_from=InvokeFrom.SERVICE_API,
+            invoke_from=InvokeFrom.DEBUGGER,
            call_depth=0,
        )

--- a/api/core/hosting_configuration.py
+++ b/api/core/hosting_configuration.py
@ -56,6 +56,10 @@ class HostingConfiguration:
        self.provider_map[f"{DEFAULT_PLUGIN_ID}/minimax/minimax"] = self.init_minimax()
        self.provider_map[f"{DEFAULT_PLUGIN_ID}/spark/spark"] = self.init_spark()
        self.provider_map[f"{DEFAULT_PLUGIN_ID}/zhipuai/zhipuai"] = self.init_zhipuai()
+        self.provider_map[f"{DEFAULT_PLUGIN_ID}/gemini/google"] = self.init_gemini()
+        self.provider_map[f"{DEFAULT_PLUGIN_ID}/x/x"] = self.init_xai()
+        self.provider_map[f"{DEFAULT_PLUGIN_ID}/deepseek/deepseek"] = self.init_deepseek()
+        self.provider_map[f"{DEFAULT_PLUGIN_ID}/tongyi/tongyi"] = self.init_tongyi()

        self.moderation_config = self.init_moderation_config()

@ -128,7 +132,7 @@ class HostingConfiguration:
        quotas: list[HostingQuota] = []

        if dify_config.HOSTED_OPENAI_TRIAL_ENABLED:
-            hosted_quota_limit = dify_config.HOSTED_OPENAI_QUOTA_LIMIT
+            hosted_quota_limit = 0
            trial_models = self.parse_restrict_models_from_env("HOSTED_OPENAI_TRIAL_MODELS")
            trial_quota = TrialHostingQuota(quota_limit=hosted_quota_limit, restrict_models=trial_models)
            quotas.append(trial_quota)
@ -156,18 +160,49 @@ class HostingConfiguration:
            quota_unit=quota_unit,
        )

-    @staticmethod
-    def init_anthropic() -> HostingProvider:
-        quota_unit = QuotaUnit.TOKENS
+    def init_gemini(self) -> HostingProvider:
+        quota_unit = QuotaUnit.CREDITS
+        quotas: list[HostingQuota] = []
+
+        if dify_config.HOSTED_GEMINI_TRIAL_ENABLED:
+            hosted_quota_limit = 0
+            trial_models = self.parse_restrict_models_from_env("HOSTED_GEMINI_TRIAL_MODELS")
+            trial_quota = TrialHostingQuota(quota_limit=hosted_quota_limit, restrict_models=trial_models)
+            quotas.append(trial_quota)
+
+        if dify_config.HOSTED_GEMINI_PAID_ENABLED:
+            paid_models = self.parse_restrict_models_from_env("HOSTED_GEMINI_PAID_MODELS")
+            paid_quota = PaidHostingQuota(restrict_models=paid_models)
+            quotas.append(paid_quota)
+
+        if len(quotas) > 0:
+            credentials = {
+                "google_api_key": dify_config.HOSTED_GEMINI_API_KEY,
+            }
+
+            if dify_config.HOSTED_GEMINI_API_BASE:
+                credentials["google_base_url"] = dify_config.HOSTED_GEMINI_API_BASE
+
+            return HostingProvider(enabled=True, credentials=credentials, quota_unit=quota_unit, quotas=quotas)
+
+        return HostingProvider(
+            enabled=False,
+            quota_unit=quota_unit,
+        )
+
+    def init_anthropic(self) -> HostingProvider:
+        quota_unit = QuotaUnit.CREDITS
        quotas: list[HostingQuota] = []

        if dify_config.HOSTED_ANTHROPIC_TRIAL_ENABLED:
-            hosted_quota_limit = dify_config.HOSTED_ANTHROPIC_QUOTA_LIMIT
-            trial_quota = TrialHostingQuota(quota_limit=hosted_quota_limit)
+            hosted_quota_limit = 0
+            trail_models = self.parse_restrict_models_from_env("HOSTED_ANTHROPIC_TRIAL_MODELS")
+            trial_quota = TrialHostingQuota(quota_limit=hosted_quota_limit, restrict_models=trail_models)
            quotas.append(trial_quota)

        if dify_config.HOSTED_ANTHROPIC_PAID_ENABLED:
-            paid_quota = PaidHostingQuota()
+            paid_models = self.parse_restrict_models_from_env("HOSTED_ANTHROPIC_PAID_MODELS")
+            paid_quota = PaidHostingQuota(restrict_models=paid_models)
            quotas.append(paid_quota)

        if len(quotas) > 0:
@ -185,6 +220,94 @@ class HostingConfiguration:
            quota_unit=quota_unit,
        )

+    def init_tongyi(self) -> HostingProvider:
+        quota_unit = QuotaUnit.CREDITS
+        quotas: list[HostingQuota] = []
+
+        if dify_config.HOSTED_TONGYI_TRIAL_ENABLED:
+            hosted_quota_limit = 0
+            trail_models = self.parse_restrict_models_from_env("HOSTED_TONGYI_TRIAL_MODELS")
+            trial_quota = TrialHostingQuota(quota_limit=hosted_quota_limit, restrict_models=trail_models)
+            quotas.append(trial_quota)
+
+        if dify_config.HOSTED_TONGYI_PAID_ENABLED:
+            paid_models = self.parse_restrict_models_from_env("HOSTED_TONGYI_PAID_MODELS")
+            paid_quota = PaidHostingQuota(restrict_models=paid_models)
+            quotas.append(paid_quota)
+
+        if len(quotas) > 0:
+            credentials = {
+                "dashscope_api_key": dify_config.HOSTED_TONGYI_API_KEY,
+                "use_international_endpoint": dify_config.HOSTED_TONGYI_USE_INTERNATIONAL_ENDPOINT,
+            }
+
+            return HostingProvider(enabled=True, credentials=credentials, quota_unit=quota_unit, quotas=quotas)
+
+        return HostingProvider(
+            enabled=False,
+            quota_unit=quota_unit,
+        )
+
+    def init_xai(self) -> HostingProvider:
+        quota_unit = QuotaUnit.CREDITS
+        quotas: list[HostingQuota] = []
+
+        if dify_config.HOSTED_XAI_TRIAL_ENABLED:
+            hosted_quota_limit = 0
+            trail_models = self.parse_restrict_models_from_env("HOSTED_XAI_TRIAL_MODELS")
+            trial_quota = TrialHostingQuota(quota_limit=hosted_quota_limit, restrict_models=trail_models)
+            quotas.append(trial_quota)
+
+        if dify_config.HOSTED_XAI_PAID_ENABLED:
+            paid_models = self.parse_restrict_models_from_env("HOSTED_XAI_PAID_MODELS")
+            paid_quota = PaidHostingQuota(restrict_models=paid_models)
+            quotas.append(paid_quota)
+
+        if len(quotas) > 0:
+            credentials = {
+                "api_key": dify_config.HOSTED_XAI_API_KEY,
+            }
+
+            if dify_config.HOSTED_XAI_API_BASE:
+                credentials["endpoint_url"] = dify_config.HOSTED_XAI_API_BASE
+
+            return HostingProvider(enabled=True, credentials=credentials, quota_unit=quota_unit, quotas=quotas)
+
+        return HostingProvider(
+            enabled=False,
+            quota_unit=quota_unit,
+        )
+
+    def init_deepseek(self) -> HostingProvider:
+        quota_unit = QuotaUnit.CREDITS
+        quotas: list[HostingQuota] = []
+
+        if dify_config.HOSTED_DEEPSEEK_TRIAL_ENABLED:
+            hosted_quota_limit = 0
+            trail_models = self.parse_restrict_models_from_env("HOSTED_DEEPSEEK_TRIAL_MODELS")
+            trial_quota = TrialHostingQuota(quota_limit=hosted_quota_limit, restrict_models=trail_models)
+            quotas.append(trial_quota)
+
+        if dify_config.HOSTED_DEEPSEEK_PAID_ENABLED:
+            paid_models = self.parse_restrict_models_from_env("HOSTED_DEEPSEEK_PAID_MODELS")
+            paid_quota = PaidHostingQuota(restrict_models=paid_models)
+            quotas.append(paid_quota)
+
+        if len(quotas) > 0:
+            credentials = {
+                "api_key": dify_config.HOSTED_DEEPSEEK_API_KEY,
+            }
+
+            if dify_config.HOSTED_DEEPSEEK_API_BASE:
+                credentials["endpoint_url"] = dify_config.HOSTED_DEEPSEEK_API_BASE
+
+            return HostingProvider(enabled=True, credentials=credentials, quota_unit=quota_unit, quotas=quotas)
+
+        return HostingProvider(
+            enabled=False,
+            quota_unit=quota_unit,
+        )
+
    @staticmethod
    def init_minimax() -> HostingProvider:
        quota_unit = QuotaUnit.TOKENS
--- a/api/core/provider_manager.py
+++ b/api/core/provider_manager.py
@ -618,18 +618,18 @@ class ProviderManager:
                )

            for quota in configuration.quotas:
-                if quota.quota_type == ProviderQuotaType.TRIAL:
+                if quota.quota_type in (ProviderQuotaType.TRIAL, ProviderQuotaType.PAID):
                    # Init trial provider records if not exists
-                    if ProviderQuotaType.TRIAL not in provider_quota_to_provider_record_dict:
+                    if quota.quota_type not in provider_quota_to_provider_record_dict:
                        try:
                            # FIXME ignore the type error, only TrialHostingQuota has limit need to change the logic
                            new_provider_record = Provider(
                                tenant_id=tenant_id,
                                # TODO: Use provider name with prefix after the data migration.
                                provider_name=ModelProviderID(provider_name).provider_name,
-                                provider_type=ProviderType.SYSTEM,
-                                quota_type=ProviderQuotaType.TRIAL,
-                                quota_limit=quota.quota_limit,  # type: ignore
+                                provider_type=ProviderType.SYSTEM.value,
+                                quota_type=quota.quota_type,
+                                quota_limit=0,  # type: ignore
                                quota_used=0,
                                is_valid=True,
                            )
@ -641,8 +641,8 @@ class ProviderManager:
                            stmt = select(Provider).where(
                                Provider.tenant_id == tenant_id,
                                Provider.provider_name == ModelProviderID(provider_name).provider_name,
-                                Provider.provider_type == ProviderType.SYSTEM,
-                                Provider.quota_type == ProviderQuotaType.TRIAL,
+                                Provider.provider_type == ProviderType.SYSTEM.value,
+                                Provider.quota_type == quota.quota_type,
                            )
                            existed_provider_record = db.session.scalar(stmt)
                            if not existed_provider_record:
@ -912,6 +912,22 @@ class ProviderManager:
                provider_record
            )
        quota_configurations = []
+
+        if dify_config.EDITION == "CLOUD":
+            from services.credit_pool_service import CreditPoolService
+
+            trail_pool = CreditPoolService.get_pool(
+                tenant_id=tenant_id,
+                pool_type=ProviderQuotaType.TRIAL.value,
+            )
+            paid_pool = CreditPoolService.get_pool(
+                tenant_id=tenant_id,
+                pool_type=ProviderQuotaType.PAID.value,
+            )
+        else:
+            trail_pool = None
+            paid_pool = None
+
        for provider_quota in provider_hosting_configuration.quotas:
            if provider_quota.quota_type not in quota_type_to_provider_records_dict:
                if provider_quota.quota_type == ProviderQuotaType.FREE:
@ -932,16 +948,36 @@ class ProviderManager:
                    raise ValueError("quota_used is None")
                if provider_record.quota_limit is None:
                    raise ValueError("quota_limit is None")
+                if provider_quota.quota_type == ProviderQuotaType.TRIAL and trail_pool is not None:
+                    quota_configuration = QuotaConfiguration(
+                        quota_type=provider_quota.quota_type,
+                        quota_unit=provider_hosting_configuration.quota_unit or QuotaUnit.TOKENS,
+                        quota_used=trail_pool.quota_used,
+                        quota_limit=trail_pool.quota_limit,
+                        is_valid=trail_pool.quota_limit > trail_pool.quota_used or trail_pool.quota_limit == -1,
+                        restrict_models=provider_quota.restrict_models,
+                    )

-                quota_configuration = QuotaConfiguration(
-                    quota_type=provider_quota.quota_type,
-                    quota_unit=provider_hosting_configuration.quota_unit or QuotaUnit.TOKENS,
-                    quota_used=provider_record.quota_used,
-                    quota_limit=provider_record.quota_limit,
-                    is_valid=provider_record.quota_limit > provider_record.quota_used
-                    or provider_record.quota_limit == -1,
-                    restrict_models=provider_quota.restrict_models,
-                )
+                elif provider_quota.quota_type == ProviderQuotaType.PAID and paid_pool is not None:
+                    quota_configuration = QuotaConfiguration(
+                        quota_type=provider_quota.quota_type,
+                        quota_unit=provider_hosting_configuration.quota_unit or QuotaUnit.TOKENS,
+                        quota_used=paid_pool.quota_used,
+                        quota_limit=paid_pool.quota_limit,
+                        is_valid=paid_pool.quota_limit > paid_pool.quota_used or paid_pool.quota_limit == -1,
+                        restrict_models=provider_quota.restrict_models,
+                    )
+
+                else:
+                    quota_configuration = QuotaConfiguration(
+                        quota_type=provider_quota.quota_type,
+                        quota_unit=provider_hosting_configuration.quota_unit or QuotaUnit.TOKENS,
+                        quota_used=provider_record.quota_used,
+                        quota_limit=provider_record.quota_limit,
+                        is_valid=provider_record.quota_limit > provider_record.quota_used
+                        or provider_record.quota_limit == -1,
+                        restrict_models=provider_quota.restrict_models,
+                    )

            quota_configurations.append(quota_configuration)

--- a/api/core/workflow/nodes/llm/llm_utils.py
+++ b/api/core/workflow/nodes/llm/llm_utils.py
@ -6,7 +6,7 @@ from sqlalchemy.orm import Session

 from configs import dify_config
 from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
-from core.entities.provider_entities import QuotaUnit
+from core.entities.provider_entities import ProviderQuotaType, QuotaUnit
 from core.file.models import File
 from core.memory.token_buffer_memory import TokenBufferMemory
 from core.model_manager import ModelInstance, ModelManager
@ -136,21 +136,37 @@ def deduct_llm_quota(tenant_id: str, model_instance: ModelInstance, usage: LLMUs
            used_quota = 1

    if used_quota is not None and system_configuration.current_quota_type is not None:
-        with Session(db.engine) as session:
-            stmt = (
-                update(Provider)
-                .where(
-                    Provider.tenant_id == tenant_id,
-                    # TODO: Use provider name with prefix after the data migration.
-                    Provider.provider_name == ModelProviderID(model_instance.provider).provider_name,
-                    Provider.provider_type == ProviderType.SYSTEM,
-                    Provider.quota_type == system_configuration.current_quota_type.value,
-                    Provider.quota_limit > Provider.quota_used,
-                )
-                .values(
-                    quota_used=Provider.quota_used + used_quota,
-                    last_used=naive_utc_now(),
-                )
+        if system_configuration.current_quota_type == ProviderQuotaType.TRIAL:
+            from services.credit_pool_service import CreditPoolService
+
+            CreditPoolService.check_and_deduct_credits(
+                tenant_id=tenant_id,
+                credits_required=used_quota,
            )
-            session.execute(stmt)
-            session.commit()
+        elif system_configuration.current_quota_type == ProviderQuotaType.PAID:
+            from services.credit_pool_service import CreditPoolService
+
+            CreditPoolService.check_and_deduct_credits(
+                tenant_id=tenant_id,
+                credits_required=used_quota,
+                pool_type="paid",
+            )
+        else:
+            with Session(db.engine) as session:
+                stmt = (
+                    update(Provider)
+                    .where(
+                        Provider.tenant_id == tenant_id,
+                        # TODO: Use provider name with prefix after the data migration.
+                        Provider.provider_name == ModelProviderID(model_instance.provider).provider_name,
+                        Provider.provider_type == ProviderType.SYSTEM.value,
+                        Provider.quota_type == system_configuration.current_quota_type.value,
+                        Provider.quota_limit > Provider.quota_used,
+                    )
+                    .values(
+                        quota_used=Provider.quota_used + used_quota,
+                        last_used=naive_utc_now(),
+                    )
+                )
+                session.execute(stmt)
+                session.commit()