Files
dify/api/controllers/openapi/completion_messages.py
GareArc 591048d7c2 feat(openapi): bearer auth pipeline + Layer 0 + per-token rate limit (CE)
Bearer auth surface for /openapi/v1/* run-routes:

- OAUTH_BEARER_PIPELINE (renamed from APP_PIPELINE for clarity outside this
  module) composes BearerCheck → ScopeCheck → AppResolver →
  WorkspaceMembershipCheck → AppAuthzCheck → CallerMount.
- BearerAuthenticator.authenticate() is the single source of identity +
  rate-limit. Both pipeline (BearerCheck) and decorator (validate_bearer)
  delegate to it, so per-token rate limit fires exactly once per request.
- Layer 0 (workspace membership) is CE-only; on EE the gateway owns
  tenant isolation. Verdicts are cached on the AuthContext entry as
  verified_tenants: dict[str, bool] (legacy "ok"/"denied" strings tolerated
  by from_cache for one TTL cycle, then removed).
- check_workspace_membership(...) is the shared core; the pipeline step
  and the inline require_workspace_member helper both delegate to it.
- Per-token rate limit: 60/min sliding window, RFC-7231-compliant 429
  with Retry-After header + JSON body { error, retry_after_ms }. Bucket
  key is sha256(token) so all replicas share state via Redis.

API hygiene:
- Scope StrEnum (FULL, APPS_READ, APPS_RUN) replaces bare string literals.
- /openapi/v1/apps/<id>/info: scope flipped from apps:run to apps:read.
- /info migrates off the pipeline to validate_bearer + require_scope +
  require_workspace_member (no AppAuthzCheck/CallerMount needed for reads).
- ResolvedRow gains to_cache() / from_cache() classmethods.
- AuthContext gains token_hash + verified_tenants, dropping the per-route
  re-hash and per-request Redis read on the cache hit path.

OPENAPI_RATE_LIMIT_PER_TOKEN config (default 60).
2026-05-05 18:07:47 -07:00

133 lines
4.4 KiB
Python

"""POST /openapi/v1/apps/<app_id>/completion-messages — port of
service_api/app/completion.py:CompletionApi."""
from __future__ import annotations
import logging
from collections.abc import Mapping
from typing import Any, Literal
from flask import request
from flask_restx import Resource
from pydantic import BaseModel, Field, ValidationError
from werkzeug.exceptions import BadRequest, InternalServerError, NotFound
import services
from controllers.openapi import openapi_ns
from controllers.openapi._audit import emit_app_run
from controllers.openapi._models import MessageMetadata
from controllers.openapi.auth.composition import OAUTH_BEARER_PIPELINE
from controllers.service_api.app.error import (
AppUnavailableError,
CompletionRequestError,
ConversationCompletedError,
ProviderModelCurrentlyNotSupportError,
ProviderNotInitializeError,
ProviderQuotaExceededError,
)
from core.app.entities.app_invoke_entities import InvokeFrom
from core.errors.error import (
ModelCurrentlyNotSupportError,
ProviderTokenNotInitError,
QuotaExceededError,
)
from graphon.model_runtime.errors.invoke import InvokeError
from libs import helper
from libs.oauth_bearer import Scope
from models.model import App, AppMode
from services.app_generate_service import AppGenerateService
logger = logging.getLogger(__name__)
class CompletionMessageRequest(BaseModel):
inputs: dict[str, Any]
query: str = Field(default="")
files: list[dict[str, Any]] | None = None
response_mode: Literal["blocking", "streaming"] | None = None
class CompletionMessageResponse(BaseModel):
event: str
task_id: str
id: str
message_id: str
mode: str
answer: str
metadata: MessageMetadata = Field(default_factory=MessageMetadata)
created_at: int
def _unpack_app(app_model):
return app_model
def _unpack_caller(caller):
return caller
@openapi_ns.route("/apps/<string:app_id>/completion-messages")
class CompletionMessagesApi(Resource):
@OAUTH_BEARER_PIPELINE.guard(scope=Scope.APPS_RUN)
def post(self, app_id: str, app_model: App, caller, caller_kind: str):
app = _unpack_app(app_model)
if AppMode.value_of(app.mode) != AppMode.COMPLETION:
raise AppUnavailableError()
body = request.get_json(silent=True) or {}
body.pop("user", None)
try:
payload = CompletionMessageRequest.model_validate(body)
except ValidationError as exc:
raise BadRequest(str(exc))
args = payload.model_dump(exclude_none=True)
args["auto_generate_name"] = False
streaming = payload.response_mode == "streaming"
try:
response = AppGenerateService.generate(
app_model=app,
user=_unpack_caller(caller),
args=args,
invoke_from=InvokeFrom.OPENAPI,
streaming=streaming,
)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError:
logger.exception("App model config broken.")
raise AppUnavailableError()
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except InvokeError as e:
raise CompletionRequestError(e.description)
except ValueError:
raise
except Exception:
logger.exception("internal server error.")
raise InternalServerError()
emit_app_run(
app_id=app.id,
tenant_id=app.tenant_id,
caller_kind=caller_kind,
mode=str(app.mode),
)
if streaming:
return helper.compact_generate_response(response)
if isinstance(response, tuple):
body_dict: Any = response[0] # pyright: ignore[reportArgumentType]
else:
body_dict = response
if not isinstance(body_dict, Mapping):
raise InternalServerError("blocking generate returned non-mapping response")
return CompletionMessageResponse.model_validate(dict(body_dict)).model_dump(mode="json"), 200