From af17e20f9908d5943f7a1fbd6b166ea810a623f7 Mon Sep 17 00:00:00 2001 From: Harry Date: Mon, 26 Jan 2026 01:11:53 +0800 Subject: [PATCH] feat(sandbox): implement sandbox archive upload/download endpoints and security enhancements - Added sandbox archive upload and download proxy endpoints with signed URL verification. - Introduced security helpers for generating and verifying signed URLs. - Updated file-related API routes to include sandbox archive functionality. - Refactored app asset storage methods to streamline download/upload URL generation. --- .../controllers/files/__init__.py.md | 2 +- .../controllers/files/sandbox_archive.py.md | 14 ++ api/agent-notes/core/sandbox/sandbox.py.md | 9 ++ .../core/sandbox/security/__init__.py.md | 2 + .../sandbox/security/archive_signer.py.md | 13 ++ .../sandbox/storage/archive_storage.py.md | 12 ++ api/controllers/files/__init__.py | 11 +- api/controllers/files/sandbox_archive.py | 76 +++++++++ api/core/app_assets/storage.py | 27 +--- api/core/sandbox/builder.py | 2 +- .../initializer/app_assets_initializer.py | 2 +- .../draft_app_assets_initializer.py | 2 +- api/core/sandbox/sandbox.py | 5 +- api/core/sandbox/security/__init__.py | 1 + api/core/sandbox/security/archive_signer.py | 152 ++++++++++++++++++ api/core/sandbox/storage/archive_storage.py | 19 ++- .../storage/cached_presign_storage.py | 4 +- 17 files changed, 322 insertions(+), 31 deletions(-) create mode 100644 api/agent-notes/controllers/files/sandbox_archive.py.md create mode 100644 api/agent-notes/core/sandbox/sandbox.py.md create mode 100644 api/agent-notes/core/sandbox/security/__init__.py.md create mode 100644 api/agent-notes/core/sandbox/security/archive_signer.py.md create mode 100644 api/agent-notes/core/sandbox/storage/archive_storage.py.md create mode 100644 api/controllers/files/sandbox_archive.py create mode 100644 api/core/sandbox/security/__init__.py create mode 100644 api/core/sandbox/security/archive_signer.py diff --git a/api/agent-notes/controllers/files/__init__.py.md b/api/agent-notes/controllers/files/__init__.py.md index bbe9598183..4de964db3b 100644 --- a/api/agent-notes/controllers/files/__init__.py.md +++ b/api/agent-notes/controllers/files/__init__.py.md @@ -1,6 +1,6 @@ Summary: - Registers file-related API namespaces and routes for files service. -- Includes app-assets download/upload proxy controllers. +- Includes app-assets and sandbox archive proxy controllers. Invariants: - files_ns must include all file controller modules to register routes. diff --git a/api/agent-notes/controllers/files/sandbox_archive.py.md b/api/agent-notes/controllers/files/sandbox_archive.py.md new file mode 100644 index 0000000000..be9002ac73 --- /dev/null +++ b/api/agent-notes/controllers/files/sandbox_archive.py.md @@ -0,0 +1,14 @@ +Summary: +- Sandbox archive upload/download proxy endpoints (signed URL verification, stream to storage). + +Invariants: +- Validates tenant_id and sandbox_id UUIDs. +- Verifies tenant-scoped signature and expiration before storage access. +- URL uses expires_at/nonce/sign query params. + +Edge Cases: +- Missing archive returns NotFound. +- Invalid signature or expired link returns Forbidden. + +Tests: +- Add unit tests for signature validation if needed. diff --git a/api/agent-notes/core/sandbox/sandbox.py.md b/api/agent-notes/core/sandbox/sandbox.py.md new file mode 100644 index 0000000000..11f3e1acc2 --- /dev/null +++ b/api/agent-notes/core/sandbox/sandbox.py.md @@ -0,0 +1,9 @@ +Summary: +- Sandbox lifecycle wrapper (ready/cancel/fail signals, mount/unmount, release). + +Invariants: +- wait_ready raises with the original initialization error as the cause. +- release always attempts unmount and environment release, logging failures. + +Tests: +- Covered by sandbox lifecycle/unit tests and workflow execution error handling. diff --git a/api/agent-notes/core/sandbox/security/__init__.py.md b/api/agent-notes/core/sandbox/security/__init__.py.md new file mode 100644 index 0000000000..fc53f73c1a --- /dev/null +++ b/api/agent-notes/core/sandbox/security/__init__.py.md @@ -0,0 +1,2 @@ +Summary: +- Sandbox security helper modules. diff --git a/api/agent-notes/core/sandbox/security/archive_signer.py.md b/api/agent-notes/core/sandbox/security/archive_signer.py.md new file mode 100644 index 0000000000..dcb91db061 --- /dev/null +++ b/api/agent-notes/core/sandbox/security/archive_signer.py.md @@ -0,0 +1,13 @@ +Summary: +- Generates and verifies signed URLs for sandbox archive upload/download. + +Invariants: +- tenant_id and sandbox_id must be UUIDs. +- Signatures are tenant-scoped and include operation, expiry, and nonce. + +Edge Cases: +- Missing tenant private key raises ValueError. +- Expired or tampered signatures are rejected. + +Tests: +- Add unit tests if sandbox archive signature behavior expands. diff --git a/api/agent-notes/core/sandbox/storage/archive_storage.py.md b/api/agent-notes/core/sandbox/storage/archive_storage.py.md new file mode 100644 index 0000000000..a05e67159f --- /dev/null +++ b/api/agent-notes/core/sandbox/storage/archive_storage.py.md @@ -0,0 +1,12 @@ +Summary: +- Manages sandbox archive uploads/downloads for workspace persistence. + +Invariants: +- Archive storage key is sandbox//.tar.gz. +- Signed URLs are tenant-scoped and use external files URL. + +Edge Cases: +- Missing archive skips mount. + +Tests: +- Covered indirectly via sandbox integration tests. diff --git a/api/controllers/files/__init__.py b/api/controllers/files/__init__.py index c2e5e52bec..abf9026b9c 100644 --- a/api/controllers/files/__init__.py +++ b/api/controllers/files/__init__.py @@ -14,7 +14,15 @@ api = ExternalApi( files_ns = Namespace("files", description="File operations", path="/") -from . import app_assets_download, app_assets_upload, image_preview, storage_download, tool_files, upload +from . import ( + app_assets_download, + app_assets_upload, + image_preview, + sandbox_archive, + storage_download, + tool_files, + upload, +) api.add_namespace(files_ns) @@ -25,6 +33,7 @@ __all__ = [ "bp", "files_ns", "image_preview", + "sandbox_archive", "storage_download", "tool_files", "upload", diff --git a/api/controllers/files/sandbox_archive.py b/api/controllers/files/sandbox_archive.py new file mode 100644 index 0000000000..4f5e591a08 --- /dev/null +++ b/api/controllers/files/sandbox_archive.py @@ -0,0 +1,76 @@ +from uuid import UUID + +from flask import Response, request +from flask_restx import Resource +from pydantic import BaseModel, Field +from werkzeug.exceptions import Forbidden, NotFound + +from controllers.files import files_ns +from core.sandbox.security.archive_signer import SandboxArchivePath, SandboxArchiveSigner +from extensions.ext_storage import storage + +DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}" + + +class SandboxArchiveQuery(BaseModel): + expires_at: int = Field(..., description="Unix timestamp when the link expires") + nonce: str = Field(..., description="Random string for signature") + sign: str = Field(..., description="HMAC signature") + + +files_ns.schema_model( + SandboxArchiveQuery.__name__, + SandboxArchiveQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0), +) + + +@files_ns.route("/sandbox-archives///download") +class SandboxArchiveDownloadApi(Resource): + def get(self, tenant_id: str, sandbox_id: str): + args = SandboxArchiveQuery.model_validate(request.args.to_dict(flat=True)) + + try: + archive_path = SandboxArchivePath(tenant_id=UUID(tenant_id), sandbox_id=UUID(sandbox_id)) + except ValueError as exc: + raise Forbidden(str(exc)) from exc + + if not SandboxArchiveSigner.verify_download_signature( + archive_path=archive_path, + expires_at=args.expires_at, + nonce=args.nonce, + sign=args.sign, + ): + raise Forbidden("Invalid or expired download link") + + try: + generator = storage.load_stream(archive_path.get_storage_key()) + except FileNotFoundError as exc: + raise NotFound("Archive not found") from exc + + return Response( + generator, + mimetype="application/gzip", + direct_passthrough=True, + ) + + +@files_ns.route("/sandbox-archives///upload") +class SandboxArchiveUploadApi(Resource): + def put(self, tenant_id: str, sandbox_id: str): + args = SandboxArchiveQuery.model_validate(request.args.to_dict(flat=True)) + + try: + archive_path = SandboxArchivePath(tenant_id=UUID(tenant_id), sandbox_id=UUID(sandbox_id)) + except ValueError as exc: + raise Forbidden(str(exc)) from exc + + if not SandboxArchiveSigner.verify_upload_signature( + archive_path=archive_path, + expires_at=args.expires_at, + nonce=args.nonce, + sign=args.sign, + ): + raise Forbidden("Invalid or expired upload link") + + storage.save(archive_path.get_storage_key(), request.get_data()) + return Response(status=204) diff --git a/api/core/app_assets/storage.py b/api/core/app_assets/storage.py index d37d4cf1b8..2f656f1776 100644 --- a/api/core/app_assets/storage.py +++ b/api/core/app_assets/storage.py @@ -301,21 +301,19 @@ class AppAssetStorage: def get_storage_key(self, asset_path: AssetPathBase) -> str: return asset_path.get_storage_key() - def get_download_url(self, asset_path: AssetPathBase, expires_in: int = 3600, *, for_external: bool = True) -> str: + def get_download_url(self, asset_path: AssetPathBase, expires_in: int = 3600) -> str: storage_key = self.get_storage_key(asset_path) try: return self._storage.get_download_url(storage_key, expires_in) except NotImplementedError: pass - return self._generate_signed_proxy_download_url(asset_path, expires_in, for_external=for_external) + return self._generate_signed_proxy_download_url(asset_path, expires_in) def get_download_urls( self, asset_paths: Iterable[AssetPathBase], expires_in: int = 3600, - *, - for_external: bool = True, ) -> list[str]: asset_paths_list = list(asset_paths) storage_keys = [self.get_storage_key(asset_path) for asset_path in asset_paths_list] @@ -325,17 +323,12 @@ class AppAssetStorage: except NotImplementedError: pass - return [ - self._generate_signed_proxy_download_url(asset_path, expires_in, for_external=for_external) - for asset_path in asset_paths_list - ] + return [self._generate_signed_proxy_download_url(asset_path, expires_in) for asset_path in asset_paths_list] def get_upload_url( self, asset_path: AssetPathBase, expires_in: int = 3600, - *, - for_external: bool = True, ) -> str: storage_key = self.get_storage_key(asset_path) try: @@ -343,30 +336,26 @@ class AppAssetStorage: except NotImplementedError: pass - return self._generate_signed_proxy_upload_url(asset_path, expires_in, for_external=for_external) + return self._generate_signed_proxy_upload_url(asset_path, expires_in) - def _generate_signed_proxy_download_url( - self, asset_path: AssetPathBase, expires_in: int, *, for_external: bool - ) -> str: + def _generate_signed_proxy_download_url(self, asset_path: AssetPathBase, expires_in: int) -> str: expires_in = min(expires_in, dify_config.FILES_ACCESS_TIMEOUT) expires_at = int(time.time()) + max(expires_in, 1) nonce = os.urandom(16).hex() sign = AppAssetSigner.create_download_signature(asset_path=asset_path, expires_at=expires_at, nonce=nonce) - base_url = dify_config.FILES_URL if for_external else (dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL) + base_url = dify_config.FILES_URL url = self._build_proxy_url(base_url=base_url, asset_path=asset_path, action="download") query = urllib.parse.urlencode({"expires_at": expires_at, "nonce": nonce, "sign": sign}) return f"{url}?{query}" - def _generate_signed_proxy_upload_url( - self, asset_path: AssetPathBase, expires_in: int, *, for_external: bool - ) -> str: + def _generate_signed_proxy_upload_url(self, asset_path: AssetPathBase, expires_in: int) -> str: expires_in = min(expires_in, dify_config.FILES_ACCESS_TIMEOUT) expires_at = int(time.time()) + max(expires_in, 1) nonce = os.urandom(16).hex() sign = AppAssetSigner.create_upload_signature(asset_path=asset_path, expires_at=expires_at, nonce=nonce) - base_url = dify_config.FILES_URL if for_external else (dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL) + base_url = dify_config.FILES_URL url = self._build_proxy_url(base_url=base_url, asset_path=asset_path, action="upload") query = urllib.parse.urlencode({"expires_at": expires_at, "nonce": nonce, "sign": sign}) return f"{url}?{query}" diff --git a/api/core/sandbox/builder.py b/api/core/sandbox/builder.py index 28d8b75a18..d035c4861b 100644 --- a/api/core/sandbox/builder.py +++ b/api/core/sandbox/builder.py @@ -155,7 +155,7 @@ class SandboxBuilder: @classmethod def draft_id(cls, user_id: str) -> str: - return f"sandbox_draft_{user_id}" + return user_id class VMConfig: diff --git a/api/core/sandbox/initializer/app_assets_initializer.py b/api/core/sandbox/initializer/app_assets_initializer.py index f9a88b42e9..34a55d9fc8 100644 --- a/api/core/sandbox/initializer/app_assets_initializer.py +++ b/api/core/sandbox/initializer/app_assets_initializer.py @@ -28,7 +28,7 @@ class AppAssetsInitializer(AsyncSandboxInitializer): vm = sandbox.vm asset_storage = app_asset_storage zip_ref = AssetPath.build_zip(self._tenant_id, self._app_id, self._assets_id) - download_url = asset_storage.get_download_url(zip_ref, for_external=False) + download_url = asset_storage.get_download_url(zip_ref) ( pipeline(vm) diff --git a/api/core/sandbox/initializer/draft_app_assets_initializer.py b/api/core/sandbox/initializer/draft_app_assets_initializer.py index 30b4cd0462..0f30db02fa 100644 --- a/api/core/sandbox/initializer/draft_app_assets_initializer.py +++ b/api/core/sandbox/initializer/draft_app_assets_initializer.py @@ -43,7 +43,7 @@ class DraftAppAssetsInitializer(AsyncSandboxInitializer): else AssetPath.draft(self._tenant_id, self._app_id, node.id) for node in nodes ] - urls = storage.get_download_urls(refs, DRAFT_ASSETS_EXPIRES_IN, for_external=False) + urls = storage.get_download_urls(refs, DRAFT_ASSETS_EXPIRES_IN) items = [AssetDownloadItem(path=tree.get_path(node.id).lstrip("/"), url=url) for node, url in zip(nodes, urls)] script = AssetDownloadService.build_download_script(items, AppAssets.PATH) pipeline(vm).add( diff --git a/api/core/sandbox/sandbox.py b/api/core/sandbox/sandbox.py index 5813e9a093..24c93e4741 100644 --- a/api/core/sandbox/sandbox.py +++ b/api/core/sandbox/sandbox.py @@ -87,7 +87,10 @@ class Sandbox: if self._cancel_event.is_set(): raise RuntimeError("Sandbox initialization was cancelled") if self._init_error is not None: - raise RuntimeError("Sandbox initialization failed") from self._init_error + if isinstance(self._init_error, ValueError): + raise RuntimeError(f"Sandbox initialization failed: {self._init_error}") from self._init_error + else: + raise RuntimeError("Sandbox initialization failed") from self._init_error def mount(self) -> bool: return self._storage.mount(self._vm) diff --git a/api/core/sandbox/security/__init__.py b/api/core/sandbox/security/__init__.py new file mode 100644 index 0000000000..c8c4ebefae --- /dev/null +++ b/api/core/sandbox/security/__init__.py @@ -0,0 +1 @@ +"""Sandbox security helpers.""" diff --git a/api/core/sandbox/security/archive_signer.py b/api/core/sandbox/security/archive_signer.py new file mode 100644 index 0000000000..5fd48b73db --- /dev/null +++ b/api/core/sandbox/security/archive_signer.py @@ -0,0 +1,152 @@ +from __future__ import annotations + +import base64 +import hashlib +import hmac +import os +import time +import urllib.parse +from dataclasses import dataclass +from uuid import UUID + +from configs import dify_config +from libs import rsa + + +@dataclass(frozen=True) +class SandboxArchivePath: + tenant_id: UUID + sandbox_id: UUID + + def get_storage_key(self) -> str: + return f"sandbox/{self.tenant_id}/{self.sandbox_id}.tar.gz" + + def proxy_path(self) -> str: + return f"{self.tenant_id}/{self.sandbox_id}" + + +class SandboxArchiveSigner: + SIGNATURE_PREFIX = "sandbox-archive" + SIGNATURE_VERSION = "v1" + OPERATION_DOWNLOAD = "download" + OPERATION_UPLOAD = "upload" + + @classmethod + def create_download_signature(cls, archive_path: SandboxArchivePath, expires_at: int, nonce: str) -> str: + return cls._create_signature( + archive_path=archive_path, + operation=cls.OPERATION_DOWNLOAD, + expires_at=expires_at, + nonce=nonce, + ) + + @classmethod + def create_upload_signature(cls, archive_path: SandboxArchivePath, expires_at: int, nonce: str) -> str: + return cls._create_signature( + archive_path=archive_path, + operation=cls.OPERATION_UPLOAD, + expires_at=expires_at, + nonce=nonce, + ) + + @classmethod + def verify_download_signature( + cls, archive_path: SandboxArchivePath, expires_at: int, nonce: str, sign: str + ) -> bool: + return cls._verify_signature( + archive_path=archive_path, + operation=cls.OPERATION_DOWNLOAD, + expires_at=expires_at, + nonce=nonce, + sign=sign, + ) + + @classmethod + def verify_upload_signature(cls, archive_path: SandboxArchivePath, expires_at: int, nonce: str, sign: str) -> bool: + return cls._verify_signature( + archive_path=archive_path, + operation=cls.OPERATION_UPLOAD, + expires_at=expires_at, + nonce=nonce, + sign=sign, + ) + + @classmethod + def _verify_signature( + cls, + *, + archive_path: SandboxArchivePath, + operation: str, + expires_at: int, + nonce: str, + sign: str, + ) -> bool: + if expires_at <= 0: + return False + + expected_sign = cls._create_signature( + archive_path=archive_path, + operation=operation, + expires_at=expires_at, + nonce=nonce, + ) + if not hmac.compare_digest(sign, expected_sign): + return False + + current_time = int(time.time()) + if expires_at < current_time: + return False + + if expires_at - current_time > dify_config.FILES_ACCESS_TIMEOUT: + return False + + return True + + @classmethod + def build_signed_url( + cls, + *, + archive_path: SandboxArchivePath, + expires_in: int, + action: str, + ) -> str: + expires_in = min(expires_in, dify_config.FILES_ACCESS_TIMEOUT) + expires_at = int(time.time()) + max(expires_in, 1) + nonce = os.urandom(16).hex() + sign = cls._create_signature( + archive_path=archive_path, + operation=action, + expires_at=expires_at, + nonce=nonce, + ) + + base_url = dify_config.FILES_URL + url = f"{base_url}/files/sandbox-archives/{archive_path.proxy_path()}/{action}" + query = urllib.parse.urlencode({"expires_at": expires_at, "nonce": nonce, "sign": sign}) + return f"{url}?{query}" + + @classmethod + def _create_signature( + cls, + *, + archive_path: SandboxArchivePath, + operation: str, + expires_at: int, + nonce: str, + ) -> str: + key = cls._tenant_key(str(archive_path.tenant_id)) + message = ( + f"{cls.SIGNATURE_PREFIX}|{cls.SIGNATURE_VERSION}|{operation}|" + f"{archive_path.tenant_id}|{archive_path.sandbox_id}|{expires_at}|{nonce}" + ) + sign = hmac.new(key, message.encode(), hashlib.sha256).digest() + return base64.urlsafe_b64encode(sign).decode() + + @classmethod + def _tenant_key(cls, tenant_id: str) -> bytes: + try: + rsa_key, _ = rsa.get_decrypt_decoding(tenant_id) + except rsa.PrivkeyNotFoundError as exc: + raise ValueError(f"Tenant private key missing for tenant_id={tenant_id}") from exc + private_key = rsa_key.export_key() + return hashlib.sha256(private_key).digest() diff --git a/api/core/sandbox/storage/archive_storage.py b/api/core/sandbox/storage/archive_storage.py index f7a74f515f..4230ca9d2c 100644 --- a/api/core/sandbox/storage/archive_storage.py +++ b/api/core/sandbox/storage/archive_storage.py @@ -1,10 +1,11 @@ import logging +from uuid import UUID +from core.sandbox.security.archive_signer import SandboxArchivePath, SandboxArchiveSigner from core.virtual_environment.__base.exec import PipelineExecutionError from core.virtual_environment.__base.helpers import pipeline from core.virtual_environment.__base.virtual_environment import VirtualEnvironment from extensions.ext_storage import storage -from extensions.storage.file_presign_storage import FilePresignStorage from .sandbox_storage import SandboxStorage @@ -34,14 +35,19 @@ class ArchiveSandboxStorage(SandboxStorage): @property def _storage_key(self) -> str: - return f"sandbox/{self._tenant_id}/{self._sandbox_id}.tar.gz" + return SandboxArchivePath(UUID(self._tenant_id), UUID(self._sandbox_id)).get_storage_key() def mount(self, sandbox: VirtualEnvironment) -> bool: if not self.exists(): logger.debug("No archive found for sandbox %s, skipping mount", self._sandbox_id) return False - download_url = FilePresignStorage(storage.storage_runner).get_download_url(self._storage_key) + archive_path = SandboxArchivePath(UUID(self._tenant_id), UUID(self._sandbox_id)) + download_url = SandboxArchiveSigner.build_signed_url( + archive_path=archive_path, + expires_in=ARCHIVE_DOWNLOAD_TIMEOUT, + action=SandboxArchiveSigner.OPERATION_DOWNLOAD, + ) try: ( pipeline(sandbox) @@ -58,7 +64,12 @@ class ArchiveSandboxStorage(SandboxStorage): return True def unmount(self, sandbox: VirtualEnvironment) -> bool: - upload_url = FilePresignStorage(storage.storage_runner).get_upload_url(self._storage_key) + archive_path = SandboxArchivePath(UUID(self._tenant_id), UUID(self._sandbox_id)) + upload_url = SandboxArchiveSigner.build_signed_url( + archive_path=archive_path, + expires_in=ARCHIVE_UPLOAD_TIMEOUT, + action=SandboxArchiveSigner.OPERATION_UPLOAD, + ) ( pipeline(sandbox) .add( diff --git a/api/extensions/storage/cached_presign_storage.py b/api/extensions/storage/cached_presign_storage.py index d636b4f117..f9f2ba08c0 100644 --- a/api/extensions/storage/cached_presign_storage.py +++ b/api/extensions/storage/cached_presign_storage.py @@ -57,7 +57,7 @@ class CachedPresignStorage(StorageWrapper): if cached: return cached - url = super().get_download_url(filename, expires_in) + url = self._storage.get_download_url(filename, expires_in) self._set_cached(cache_key, url, expires_in) return url @@ -94,7 +94,7 @@ class CachedPresignStorage(StorageWrapper): # Batch fetch uncached URLs from storage if uncached_filenames: - uncached_urls = [super().get_download_url(f, expires_in) for f in uncached_filenames] + uncached_urls = [self._storage.get_download_url(f, expires_in) for f in uncached_filenames] # Fill results at correct positions for idx, url in zip(uncached_indices, uncached_urls):