mirror of
https://github.com/langgenius/dify.git
synced 2026-05-06 02:18:08 +08:00
refactor: unify download item types and eliminate extension-based branching
Merge AssetDownloadItem, AssetInlineItem into SandboxDownloadItem with optional 'content' field. All consumers now follow a clean pipeline: get items → accessor.resolve_items() → AppAssetService.to_download_items() → download Key changes: - SandboxDownloadItem gains content: bytes | None (entities.py) - ZipSandbox.download_items() handles both inline (base64 heredoc) and remote (curl) via a single pipeline — no structural branching - AssetDownloadService.build_download_script() takes unified list - CachedContentAccessor.resolve_items() batch-enriches items from DB (extension-agnostic, no 'if md' checks needed) - AppAssetService.to_download_items() converts AssetItem → SandboxDownloadItem - DraftAppAssetsInitializer, package_and_upload, export_bundle simplified - file_upload/node.py switched to SandboxDownloadItem - Deleted AssetDownloadItem and AssetInlineItem classes
This commit is contained in:
@ -13,12 +13,12 @@ from core.virtual_environment.__base.helpers import pipeline
|
||||
|
||||
from ..bash.dify_cli import DifyCliConfig, DifyCliLocator
|
||||
from ..entities import DifyCli
|
||||
from .base import AsyncSandboxInitializer
|
||||
from .base import SyncSandboxInitializer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DifyCliInitializer(AsyncSandboxInitializer):
|
||||
class DifyCliInitializer(SyncSandboxInitializer):
|
||||
def __init__(
|
||||
self,
|
||||
tenant_id: str,
|
||||
|
||||
@ -1,23 +1,46 @@
|
||||
"""Async initializer that populates a draft sandbox with app asset files.
|
||||
|
||||
Unlike ``AppAssetsInitializer`` (which downloads a pre-built ZIP for
|
||||
published assets), this initializer runs the build pipeline on the fly
|
||||
so that ``.md`` skill documents are compiled and their resolved content
|
||||
is embedded directly into the download script — avoiding the S3
|
||||
round-trip that was previously required for resolved keys.
|
||||
|
||||
Execution order guarantee:
|
||||
This runs as an ``AsyncSandboxInitializer`` in the background thread.
|
||||
By the time it finishes, ``SkillManager.save_bundle()`` has been
|
||||
called (inside ``SkillBuilder.build()``), so subsequent initializers
|
||||
like ``DifyCliInitializer`` can safely load the bundle from Redis/S3.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from core.app_assets.accessor import should_mirror
|
||||
from core.app_assets.builder.base import BuildContext
|
||||
from core.app_assets.builder.file_builder import FileBuilder
|
||||
from core.app_assets.builder.pipeline import AssetBuildPipeline
|
||||
from core.app_assets.builder.skill_builder import SkillBuilder
|
||||
from core.app_assets.constants import AppAssetsAttrs
|
||||
from core.app_assets.storage import AssetPaths
|
||||
from core.sandbox.entities import AppAssets
|
||||
from core.sandbox.sandbox import Sandbox
|
||||
from core.sandbox.services import AssetDownloadService
|
||||
from core.sandbox.services.asset_download_service import AssetDownloadItem
|
||||
from core.virtual_environment.__base.helpers import pipeline
|
||||
from services.app_asset_service import AppAssetService
|
||||
|
||||
from .base import AsyncSandboxInitializer
|
||||
from .base import SyncSandboxInitializer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TIMEOUT = 600 # 10 minutes
|
||||
|
||||
|
||||
class DraftAppAssetsInitializer(AsyncSandboxInitializer):
|
||||
class DraftAppAssetsInitializer(SyncSandboxInitializer):
|
||||
"""Compile draft assets and push them into the sandbox VM.
|
||||
|
||||
``.md`` (skill) files are compiled in-process and their resolved
|
||||
content is embedded as base64 heredocs in the download script.
|
||||
All other files are fetched from S3 via presigned URLs.
|
||||
"""
|
||||
|
||||
def __init__(self, tenant_id: str, app_id: str, assets_id: str) -> None:
|
||||
self._tenant_id = tenant_id
|
||||
self._app_id = app_id
|
||||
@ -25,22 +48,22 @@ class DraftAppAssetsInitializer(AsyncSandboxInitializer):
|
||||
|
||||
def initialize(self, sandbox: Sandbox) -> None:
|
||||
vm = sandbox.vm
|
||||
build_id = self._assets_id
|
||||
tree = sandbox.attrs.get(AppAssetsAttrs.FILE_TREE)
|
||||
asset_storage = AppAssetService.get_storage()
|
||||
nodes = list(tree.walk_files())
|
||||
if not nodes:
|
||||
if tree.empty():
|
||||
return
|
||||
# Inline-mirror nodes use the resolved (compiled) key; others use draft.
|
||||
keys = [
|
||||
AssetPaths.resolved(self._tenant_id, self._app_id, build_id, node.id)
|
||||
if should_mirror(node.extension)
|
||||
else AssetPaths.draft(self._tenant_id, self._app_id, node.id)
|
||||
for node in nodes
|
||||
]
|
||||
urls = asset_storage.get_download_urls(keys, _TIMEOUT)
|
||||
items = [AssetDownloadItem(path=tree.get_path(node.id).lstrip("/"), url=url) for node, url in zip(nodes, urls)]
|
||||
script = AssetDownloadService.build_download_script(items, AppAssets.PATH)
|
||||
|
||||
# --- 1. Run the build pipeline (SkillBuilder compiles .md inline) ---
|
||||
accessor = AppAssetService.get_accessor(self._tenant_id, self._app_id)
|
||||
build_pipeline = AssetBuildPipeline([SkillBuilder(accessor=accessor), FileBuilder()])
|
||||
ctx = BuildContext(tenant_id=self._tenant_id, app_id=self._app_id, build_id=self._assets_id)
|
||||
built_assets = build_pipeline.build_all(tree, ctx)
|
||||
|
||||
if not built_assets:
|
||||
return
|
||||
|
||||
# --- 2. Convert to unified download items and execute ---
|
||||
download_items = AppAssetService.to_download_items(built_assets)
|
||||
script = AssetDownloadService.build_download_script(download_items, AppAssets.PATH)
|
||||
pipeline(vm).add(
|
||||
["sh", "-c", script],
|
||||
error_message="Failed to download draft assets",
|
||||
|
||||
@ -1,11 +1,46 @@
|
||||
"""Shell script builder for downloading / writing assets into a sandbox VM.
|
||||
|
||||
Generates a self-contained POSIX shell script that handles two kinds of
|
||||
``SandboxDownloadItem``:
|
||||
|
||||
- Items with *content* — written via base64 heredoc (sequential).
|
||||
- Items with *url* — fetched via ``curl``/``wget``/``python3`` with
|
||||
auto-detection, run as parallel background jobs.
|
||||
|
||||
Both kinds can be mixed freely in a single call.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import shlex
|
||||
import textwrap
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from core.zip_sandbox.entities import SandboxDownloadItem
|
||||
|
||||
|
||||
def _render_download_script(root_path: str, download_commands: str) -> str:
|
||||
def _build_inline_commands(items: list[SandboxDownloadItem], root_var: str) -> str:
|
||||
"""Generate shell commands that write base64-encoded content to files."""
|
||||
lines: list[str] = []
|
||||
for idx, item in enumerate(items):
|
||||
assert item.content is not None
|
||||
dest = f"${{{root_var}}}/{shlex.quote(item.path)}"
|
||||
encoded = base64.b64encode(item.content).decode("ascii")
|
||||
lines.append(f'mkdir -p "$(dirname "{dest}")"')
|
||||
lines.append(f"base64 -d <<'_INLINE_{idx}' > \"{dest}\"")
|
||||
lines.append(encoded)
|
||||
lines.append(f"_INLINE_{idx}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _render_download_script(
|
||||
root_path: str,
|
||||
inline_commands: str,
|
||||
download_commands: str,
|
||||
need_downloader: bool,
|
||||
) -> str:
|
||||
python_download_cmd = (
|
||||
'python3 - "${url}" "${dest}" <<"PY"\n'
|
||||
"import sys\n"
|
||||
@ -18,59 +53,88 @@ def _render_download_script(root_path: str, download_commands: str) -> str:
|
||||
" f.write(data)\n"
|
||||
"PY"
|
||||
)
|
||||
script = f"""
|
||||
download_root={shlex.quote(root_path)}
|
||||
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
download_cmd='curl -fsSL "${{url}}" -o "${{dest}}"'
|
||||
elif command -v wget >/dev/null 2>&1; then
|
||||
download_cmd='wget -q "${{url}}" -O "${{dest}}"'
|
||||
elif command -v python3 >/dev/null 2>&1; then
|
||||
download_cmd={shlex.quote(python_download_cmd)}
|
||||
else
|
||||
echo 'No downloader found (curl/wget/python3)' >&2
|
||||
exit 1
|
||||
fi
|
||||
# Only emit the downloader-detection block when there are remote items.
|
||||
if need_downloader:
|
||||
downloader_block = f"""\
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
download_cmd='curl -fsSL "${{url}}" -o "${{dest}}"'
|
||||
elif command -v wget >/dev/null 2>&1; then
|
||||
download_cmd='wget -q "${{url}}" -O "${{dest}}"'
|
||||
elif command -v python3 >/dev/null 2>&1; then
|
||||
download_cmd={shlex.quote(python_download_cmd)}
|
||||
else
|
||||
echo 'No downloader found (curl/wget/python3)' >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "${{download_root}}"
|
||||
fail_log="$(mktemp)"
|
||||
fail_log="$(mktemp)"
|
||||
|
||||
download_one() {{
|
||||
file_path="$1"
|
||||
url="$2"
|
||||
dest="${{download_root}}/${{file_path}}"
|
||||
mkdir -p "$(dirname "${{dest}}")"
|
||||
eval "${{download_cmd}}" 2>/dev/null || echo "${{file_path}}" >> "${{fail_log}}"
|
||||
}}
|
||||
download_one() {{
|
||||
file_path="$1"
|
||||
url="$2"
|
||||
dest="${{download_root}}/${{file_path}}"
|
||||
mkdir -p "$(dirname "${{dest}}")"
|
||||
eval "${{download_cmd}}" 2>/dev/null || echo "${{file_path}}" >> "${{fail_log}}"
|
||||
}}"""
|
||||
else:
|
||||
downloader_block = ""
|
||||
|
||||
{download_commands}
|
||||
# The failure-check block is only meaningful when downloads occurred.
|
||||
if need_downloader:
|
||||
wait_block = textwrap.dedent("""\
|
||||
wait
|
||||
|
||||
wait
|
||||
if [ -s "${fail_log}" ]; then
|
||||
mv "${fail_log}" "${download_root}/DOWNLOAD_FAILURES.txt"
|
||||
else
|
||||
rm -f "${fail_log}"
|
||||
fi""")
|
||||
else:
|
||||
wait_block = ""
|
||||
|
||||
if [ -s "${{fail_log}}" ]; then
|
||||
mv "${{fail_log}}" "${{download_root}}/DOWNLOAD_FAILURES.txt"
|
||||
else
|
||||
rm -f "${{fail_log}}"
|
||||
fi
|
||||
exit 0
|
||||
"""
|
||||
return textwrap.dedent(script).strip()
|
||||
script = f"""\
|
||||
download_root={shlex.quote(root_path)}
|
||||
mkdir -p "${{download_root}}"
|
||||
|
||||
{downloader_block}
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AssetDownloadItem:
|
||||
path: str
|
||||
url: str
|
||||
{inline_commands}
|
||||
|
||||
{download_commands}
|
||||
|
||||
{wait_block}
|
||||
exit 0"""
|
||||
return script
|
||||
|
||||
|
||||
class AssetDownloadService:
|
||||
@staticmethod
|
||||
def build_download_script(items: list[AssetDownloadItem], root_path: str) -> str:
|
||||
# Build a portable shell script to download assets in parallel.
|
||||
def build_download_script(
|
||||
items: list[SandboxDownloadItem],
|
||||
root_path: str,
|
||||
) -> str:
|
||||
"""Build a portable shell script to write inline assets and download remote ones.
|
||||
|
||||
Items with *content* are written first (sequential base64 decode),
|
||||
then items with *url* are fetched in parallel background jobs.
|
||||
The two kinds can be mixed freely in a single list.
|
||||
"""
|
||||
inline = [item for item in items if item.content is not None]
|
||||
remote = [item for item in items if item.content is None]
|
||||
|
||||
inline_commands = _build_inline_commands(inline, "download_root") if inline else ""
|
||||
|
||||
commands: list[str] = []
|
||||
for item in items:
|
||||
for item in remote:
|
||||
path = shlex.quote(item.path)
|
||||
url = shlex.quote(item.url)
|
||||
commands.append(f"download_one {path} {url} &")
|
||||
download_commands = "\n".join(commands)
|
||||
return _render_download_script(root_path, download_commands)
|
||||
|
||||
return _render_download_script(
|
||||
root_path,
|
||||
inline_commands,
|
||||
download_commands,
|
||||
need_downloader=bool(remote),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user