mirror of
https://github.com/langgenius/dify.git
synced 2026-05-04 17:38:04 +08:00
refactor(sandbox): async init and draft downloads
Reduce startup latency by deferring sandbox setup and downloading draft assets directly with cached presigned URLs.
This commit is contained in:
@ -1,19 +1,25 @@
|
||||
from .bash.dify_cli import (
|
||||
DifyCliBinary,
|
||||
DifyCliConfig,
|
||||
DifyCliEnvConfig,
|
||||
DifyCliLocator,
|
||||
DifyCliToolConfig,
|
||||
)
|
||||
from .bash.session import SandboxBashSession
|
||||
from .builder import SandboxBuilder, VMConfig
|
||||
from .entities import AppAssets, DifyCli, SandboxProviderApiEntity, SandboxType
|
||||
from .initializer import AppAssetsInitializer, DifyCliInitializer, SandboxInitializer
|
||||
from .manager import SandboxManager
|
||||
from .sandbox import Sandbox
|
||||
from .storage import ArchiveSandboxStorage, SandboxStorage
|
||||
from .utils.debug import sandbox_debug
|
||||
from .utils.encryption import create_sandbox_config_encrypter, masked_config
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .bash.dify_cli import (
|
||||
DifyCliBinary,
|
||||
DifyCliConfig,
|
||||
DifyCliEnvConfig,
|
||||
DifyCliLocator,
|
||||
DifyCliToolConfig,
|
||||
)
|
||||
from .bash.session import SandboxBashSession
|
||||
from .builder import SandboxBuilder, VMConfig
|
||||
from .entities import AppAssets, DifyCli, SandboxProviderApiEntity, SandboxType
|
||||
from .initializer import AppAssetsInitializer, DifyCliInitializer, SandboxInitializer
|
||||
from .manager import SandboxManager
|
||||
from .sandbox import Sandbox
|
||||
from .storage import ArchiveSandboxStorage, SandboxStorage
|
||||
from .utils.debug import sandbox_debug
|
||||
from .utils.encryption import create_sandbox_config_encrypter, masked_config
|
||||
|
||||
__all__ = [
|
||||
"AppAssets",
|
||||
@ -39,3 +45,38 @@ __all__ = [
|
||||
"masked_config",
|
||||
"sandbox_debug",
|
||||
]
|
||||
|
||||
_LAZY_IMPORTS = {
|
||||
"AppAssets": ("core.sandbox.entities", "AppAssets"),
|
||||
"AppAssetsInitializer": ("core.sandbox.initializer", "AppAssetsInitializer"),
|
||||
"ArchiveSandboxStorage": ("core.sandbox.storage", "ArchiveSandboxStorage"),
|
||||
"DifyCli": ("core.sandbox.entities", "DifyCli"),
|
||||
"DifyCliBinary": ("core.sandbox.bash.dify_cli", "DifyCliBinary"),
|
||||
"DifyCliConfig": ("core.sandbox.bash.dify_cli", "DifyCliConfig"),
|
||||
"DifyCliEnvConfig": ("core.sandbox.bash.dify_cli", "DifyCliEnvConfig"),
|
||||
"DifyCliInitializer": ("core.sandbox.initializer", "DifyCliInitializer"),
|
||||
"DifyCliLocator": ("core.sandbox.bash.dify_cli", "DifyCliLocator"),
|
||||
"DifyCliToolConfig": ("core.sandbox.bash.dify_cli", "DifyCliToolConfig"),
|
||||
"Sandbox": ("core.sandbox.sandbox", "Sandbox"),
|
||||
"SandboxBashSession": ("core.sandbox.bash.session", "SandboxBashSession"),
|
||||
"SandboxBuilder": ("core.sandbox.builder", "SandboxBuilder"),
|
||||
"SandboxInitializer": ("core.sandbox.initializer", "SandboxInitializer"),
|
||||
"SandboxManager": ("core.sandbox.manager", "SandboxManager"),
|
||||
"SandboxProviderApiEntity": ("core.sandbox.entities", "SandboxProviderApiEntity"),
|
||||
"SandboxStorage": ("core.sandbox.storage", "SandboxStorage"),
|
||||
"SandboxType": ("core.sandbox.entities", "SandboxType"),
|
||||
"VMConfig": ("core.sandbox.builder", "VMConfig"),
|
||||
"create_sandbox_config_encrypter": ("core.sandbox.utils.encryption", "create_sandbox_config_encrypter"),
|
||||
"masked_config": ("core.sandbox.utils.encryption", "masked_config"),
|
||||
"sandbox_debug": ("core.sandbox.utils.debug", "sandbox_debug"),
|
||||
}
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
if name not in _LAZY_IMPORTS:
|
||||
raise AttributeError(f"module 'core.sandbox' has no attribute {name}")
|
||||
module_path, attr_name = _LAZY_IMPORTS[name]
|
||||
module = importlib.import_module(module_path)
|
||||
value = getattr(module, attr_name)
|
||||
globals()[name] = value
|
||||
return value
|
||||
|
||||
@ -16,6 +16,8 @@ from .bash_tool import SandboxBashTool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SANDBOX_READY_TIMEOUT = 60 * 10
|
||||
|
||||
|
||||
class SandboxBashSession:
|
||||
def __init__(self, *, sandbox: Sandbox, node_id: str, tools: ToolArtifact | None) -> None:
|
||||
@ -30,6 +32,8 @@ class SandboxBashSession:
|
||||
self._assets_id = sandbox.assets_id
|
||||
|
||||
def __enter__(self) -> SandboxBashSession:
|
||||
# Ensure sandbox initialization completes before any bash commands run.
|
||||
self._sandbox.wait_ready(timeout=SANDBOX_READY_TIMEOUT)
|
||||
self._cli_api_session = CliApiSessionManager().create(
|
||||
tenant_id=self._tenant_id,
|
||||
user_id=self._user_id,
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from collections.abc import Mapping, Sequence
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
@ -13,6 +15,8 @@ from .sandbox import Sandbox
|
||||
if TYPE_CHECKING:
|
||||
from .storage.sandbox_storage import SandboxStorage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_sandbox_class(sandbox_type: SandboxType) -> type[VirtualEnvironment]:
|
||||
match sandbox_type:
|
||||
@ -108,10 +112,24 @@ class SandboxBuilder:
|
||||
app_id=self._app_id,
|
||||
assets_id=self._assets_id,
|
||||
)
|
||||
for init in self._initializers:
|
||||
init.initialize(sandbox)
|
||||
|
||||
sandbox.mount()
|
||||
# Run sandbox setup asynchronously so workflow execution can proceed.
|
||||
def initialize() -> None:
|
||||
try:
|
||||
for init in self._initializers:
|
||||
if sandbox.is_cancelled():
|
||||
return
|
||||
init.initialize(sandbox)
|
||||
if sandbox.is_cancelled():
|
||||
return
|
||||
sandbox.mount()
|
||||
sandbox.mark_ready()
|
||||
except Exception as exc:
|
||||
logger.exception("Failed to initialize sandbox: tenant_id=%s, app_id=%s", self._tenant_id, self._app_id)
|
||||
sandbox.mark_failed(exc)
|
||||
|
||||
# Background init completes or signals failure via sandbox state.
|
||||
threading.Thread(target=initialize, daemon=True).start()
|
||||
return sandbox
|
||||
|
||||
@staticmethod
|
||||
|
||||
@ -1,9 +1,11 @@
|
||||
from .app_assets_initializer import AppAssetsInitializer
|
||||
from .draft_app_assets_initializer import DraftAppAssetsInitializer
|
||||
from .base import SandboxInitializer
|
||||
from .dify_cli_initializer import DifyCliInitializer
|
||||
|
||||
__all__ = [
|
||||
"AppAssetsInitializer",
|
||||
"DraftAppAssetsInitializer",
|
||||
"DifyCliInitializer",
|
||||
"SandboxInitializer",
|
||||
]
|
||||
|
||||
@ -22,18 +22,21 @@ class AppAssetsInitializer(SandboxInitializer):
|
||||
self._app_id = app_id
|
||||
self._assets_id = assets_id
|
||||
|
||||
def initialize(self, sandbox: Sandbox) -> None:
|
||||
vm = sandbox.vm
|
||||
# load app assets
|
||||
def initialize(self, env: Sandbox) -> None:
|
||||
vm = env.vm
|
||||
# Load published app assets and unzip the artifact bundle.
|
||||
app_assets = AppAssetService.get_tenant_app_assets(self._tenant_id, self._assets_id)
|
||||
sandbox.attrs.set(AppAssetsAttrs.FILE_TREE, app_assets.asset_tree)
|
||||
env.attrs.set(AppAssetsAttrs.FILE_TREE, app_assets.asset_tree)
|
||||
|
||||
zip_key = AssetPaths.build_zip(self._tenant_id, self._app_id, self._assets_id)
|
||||
download_url = FilePresignStorage(storage.storage_runner).get_download_url(zip_key)
|
||||
|
||||
(
|
||||
pipeline(vm)
|
||||
.add(["wget", "-q", download_url, "-O", AppAssets.ZIP_PATH], error_message="Failed to download assets zip")
|
||||
.add(
|
||||
["wget", "-q", download_url, "-O", AppAssets.ZIP_PATH],
|
||||
error_message="Failed to download assets zip",
|
||||
)
|
||||
# unzip with silent error and return 1 if the zip is empty
|
||||
# FIXME(Mairuis): should use a more robust way to check if the zip is empty
|
||||
.add(
|
||||
|
||||
44
api/core/sandbox/initializer/draft_app_assets_initializer.py
Normal file
44
api/core/sandbox/initializer/draft_app_assets_initializer.py
Normal file
@ -0,0 +1,44 @@
|
||||
import logging
|
||||
|
||||
from core.app_assets.constants import AppAssetsAttrs
|
||||
from core.sandbox.entities import AppAssets
|
||||
from core.sandbox.sandbox import Sandbox
|
||||
from core.sandbox.services import AssetDownloadService
|
||||
from core.sandbox.services.asset_download_service import AssetDownloadItem
|
||||
from core.virtual_environment.__base.helpers import pipeline
|
||||
from services.app_asset_service import AppAssetService
|
||||
|
||||
from .base import SandboxInitializer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DRAFT_ASSETS_DOWNLOAD_TIMEOUT = 60 * 10
|
||||
|
||||
|
||||
class DraftAppAssetsInitializer(SandboxInitializer):
|
||||
def __init__(self, tenant_id: str, app_id: str, assets_id: str) -> None:
|
||||
self._tenant_id = tenant_id
|
||||
self._app_id = app_id
|
||||
self._assets_id = assets_id
|
||||
|
||||
def initialize(self, env: Sandbox) -> None:
|
||||
vm = env.vm
|
||||
# Draft assets download via presigned URLs to avoid zip build overhead.
|
||||
app_assets = AppAssetService.get_tenant_app_assets(self._tenant_id, self._assets_id)
|
||||
env.attrs.set(AppAssetsAttrs.FILE_TREE, app_assets.asset_tree)
|
||||
|
||||
items = [
|
||||
AssetDownloadItem(path=path, url=url)
|
||||
for path, url in AppAssetService.get_cached_draft_download_urls(app_assets)
|
||||
]
|
||||
script = AssetDownloadService.build_download_script(items, AppAssets.PATH)
|
||||
pipeline(vm).add(
|
||||
["sh", "-lc", script],
|
||||
error_message="Failed to download draft assets",
|
||||
).execute(timeout=DRAFT_ASSETS_DOWNLOAD_TIMEOUT, raise_on_error=True)
|
||||
|
||||
logger.info(
|
||||
"Draft app assets initialized for app_id=%s, assets_id=%s",
|
||||
self._app_id,
|
||||
self._assets_id,
|
||||
)
|
||||
@ -9,6 +9,7 @@ from core.sandbox.entities import AppAssets, SandboxType
|
||||
from core.sandbox.entities.providers import SandboxProviderEntity
|
||||
from core.sandbox.initializer.app_assets_initializer import AppAssetsInitializer
|
||||
from core.sandbox.initializer.dify_cli_initializer import DifyCliInitializer
|
||||
from core.sandbox.initializer.draft_app_assets_initializer import DraftAppAssetsInitializer
|
||||
from core.sandbox.initializer.skill_initializer import SkillInitializer
|
||||
from core.sandbox.sandbox import Sandbox
|
||||
from core.sandbox.storage.archive_storage import ArchiveSandboxStorage
|
||||
@ -158,7 +159,7 @@ class SandboxManager:
|
||||
.options(sandbox_provider.config)
|
||||
.user(user_id)
|
||||
.app(app_id)
|
||||
.initializer(AppAssetsInitializer(tenant_id, app_id, assets.id))
|
||||
.initializer(DraftAppAssetsInitializer(tenant_id, app_id, assets.id))
|
||||
.initializer(DifyCliInitializer(tenant_id, user_id, app_id, assets.id))
|
||||
.initializer(SkillInitializer(tenant_id, user_id, app_id, assets.id))
|
||||
.storage(storage, assets.id)
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from libs.attr_map import AttrMap
|
||||
@ -30,6 +31,9 @@ class Sandbox:
|
||||
self._app_id = app_id
|
||||
self._assets_id = assets_id
|
||||
self._attributes = AttrMap()
|
||||
self._ready_event = threading.Event()
|
||||
self._cancel_event = threading.Event()
|
||||
self._init_error: Exception | None = None
|
||||
|
||||
@property
|
||||
def attrs(self) -> AttrMap:
|
||||
@ -59,6 +63,32 @@ class Sandbox:
|
||||
def assets_id(self) -> str:
|
||||
return self._assets_id
|
||||
|
||||
def mark_ready(self) -> None:
|
||||
# Signal that sandbox initialization has completed successfully.
|
||||
self._ready_event.set()
|
||||
|
||||
def mark_failed(self, error: Exception) -> None:
|
||||
# Capture initialization error and unblock waiters.
|
||||
self._init_error = error
|
||||
self._ready_event.set()
|
||||
|
||||
def cancel_init(self) -> None:
|
||||
# Mark initialization as cancelled to stop background setup.
|
||||
self._cancel_event.set()
|
||||
self._ready_event.set()
|
||||
|
||||
def is_cancelled(self) -> bool:
|
||||
return self._cancel_event.is_set()
|
||||
|
||||
def wait_ready(self, timeout: float | None = None) -> None:
|
||||
# Block until initialization completes, fails, or is cancelled.
|
||||
if not self._ready_event.wait(timeout=timeout):
|
||||
raise TimeoutError("Sandbox initialization timed out")
|
||||
if self._cancel_event.is_set():
|
||||
raise RuntimeError("Sandbox initialization was cancelled")
|
||||
if self._init_error is not None:
|
||||
raise RuntimeError("Sandbox initialization failed") from self._init_error
|
||||
|
||||
def mount(self) -> bool:
|
||||
return self._storage.mount(self._vm)
|
||||
|
||||
@ -66,6 +96,7 @@ class Sandbox:
|
||||
return self._storage.unmount(self._vm)
|
||||
|
||||
def release(self) -> None:
|
||||
self.cancel_init()
|
||||
sandbox_id = self._vm.metadata.id
|
||||
try:
|
||||
self._storage.unmount(self._vm)
|
||||
|
||||
3
api/core/sandbox/services/__init__.py
Normal file
3
api/core/sandbox/services/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .asset_download_service import AssetDownloadService
|
||||
|
||||
__all__ = ["AssetDownloadService"]
|
||||
77
api/core/sandbox/services/asset_download_service.py
Normal file
77
api/core/sandbox/services/asset_download_service.py
Normal file
@ -0,0 +1,77 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import shlex
|
||||
import textwrap
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
def _render_download_script(root_path: str, download_commands: str) -> str:
|
||||
python_download_cmd = (
|
||||
"python3 - \"${url}\" \"${dest}\" <<\"PY\"\n"
|
||||
"import sys\n"
|
||||
"import urllib.request\n"
|
||||
"url = sys.argv[1]\n"
|
||||
"dest = sys.argv[2]\n"
|
||||
"with urllib.request.urlopen(url) as resp:\n"
|
||||
" data = resp.read()\n"
|
||||
"with open(dest, \"wb\") as f:\n"
|
||||
" f.write(data)\n"
|
||||
"PY"
|
||||
)
|
||||
script = f"""
|
||||
download_root={shlex.quote(root_path)}
|
||||
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
download_cmd='curl -fsSL "${{url}}" -o "${{dest}}"'
|
||||
elif command -v wget >/dev/null 2>&1; then
|
||||
download_cmd='wget -q "${{url}}" -O "${{dest}}"'
|
||||
elif command -v python3 >/dev/null 2>&1; then
|
||||
download_cmd={shlex.quote(python_download_cmd)}
|
||||
else
|
||||
echo 'No downloader found (curl/wget/python3)' >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "${{download_root}}"
|
||||
fail_log="$(mktemp)"
|
||||
|
||||
download_one() {{
|
||||
file_path="$1"
|
||||
url="$2"
|
||||
dest="${{download_root}}${{file_path}}"
|
||||
mkdir -p "$(dirname "${{dest}}")"
|
||||
eval "${{download_cmd}}" || echo "${{file_path}}" >> "${{fail_log}}"
|
||||
}}
|
||||
|
||||
{download_commands}
|
||||
|
||||
wait
|
||||
|
||||
if [ -s "${{fail_log}}" ]; then
|
||||
echo 'Failed downloads:' >&2
|
||||
cat "${{fail_log}}" >&2
|
||||
rm -f "${{fail_log}}"
|
||||
exit 1
|
||||
fi
|
||||
rm -f "${{fail_log}}"
|
||||
"""
|
||||
return textwrap.dedent(script).strip()
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AssetDownloadItem:
|
||||
path: str
|
||||
url: str
|
||||
|
||||
|
||||
class AssetDownloadService:
|
||||
@staticmethod
|
||||
def build_download_script(items: list[AssetDownloadItem], root_path: str) -> str:
|
||||
# Build a portable shell script to download assets in parallel.
|
||||
commands: list[str] = []
|
||||
for item in items:
|
||||
path = shlex.quote(item.path)
|
||||
url = shlex.quote(item.url)
|
||||
commands.append(f"download_one {path} {url} &")
|
||||
download_commands = "\n".join(commands)
|
||||
return _render_download_script(root_path, download_commands)
|
||||
Reference in New Issue
Block a user