refactor: unify download item types and eliminate extension-based branching

Merge AssetDownloadItem, AssetInlineItem into SandboxDownloadItem with
optional 'content' field. All consumers now follow a clean pipeline:
  get items → accessor.resolve_items() → AppAssetService.to_download_items() → download

Key changes:
- SandboxDownloadItem gains content: bytes | None (entities.py)
- ZipSandbox.download_items() handles both inline (base64 heredoc) and
  remote (curl) via a single pipeline — no structural branching
- AssetDownloadService.build_download_script() takes unified list
- CachedContentAccessor.resolve_items() batch-enriches items from DB
  (extension-agnostic, no 'if md' checks needed)
- AppAssetService.to_download_items() converts AssetItem → SandboxDownloadItem
- DraftAppAssetsInitializer, package_and_upload, export_bundle simplified
- file_upload/node.py switched to SandboxDownloadItem
- Deleted AssetDownloadItem and AssetInlineItem classes
This commit is contained in:
Harry
2026-03-10 17:11:41 +08:00
parent 6ac730ec2e
commit 65e89520c0
19 changed files with 492 additions and 214 deletions

View File

@ -1,11 +1,46 @@
"""Shell script builder for downloading / writing assets into a sandbox VM.
Generates a self-contained POSIX shell script that handles two kinds of
``SandboxDownloadItem``:
- Items with *content* — written via base64 heredoc (sequential).
- Items with *url* — fetched via ``curl``/``wget``/``python3`` with
auto-detection, run as parallel background jobs.
Both kinds can be mixed freely in a single call.
"""
from __future__ import annotations
import base64
import shlex
import textwrap
from dataclasses import dataclass
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from core.zip_sandbox.entities import SandboxDownloadItem
def _render_download_script(root_path: str, download_commands: str) -> str:
def _build_inline_commands(items: list[SandboxDownloadItem], root_var: str) -> str:
"""Generate shell commands that write base64-encoded content to files."""
lines: list[str] = []
for idx, item in enumerate(items):
assert item.content is not None
dest = f"${{{root_var}}}/{shlex.quote(item.path)}"
encoded = base64.b64encode(item.content).decode("ascii")
lines.append(f'mkdir -p "$(dirname "{dest}")"')
lines.append(f"base64 -d <<'_INLINE_{idx}' > \"{dest}\"")
lines.append(encoded)
lines.append(f"_INLINE_{idx}")
return "\n".join(lines)
def _render_download_script(
root_path: str,
inline_commands: str,
download_commands: str,
need_downloader: bool,
) -> str:
python_download_cmd = (
'python3 - "${url}" "${dest}" <<"PY"\n'
"import sys\n"
@ -18,59 +53,88 @@ def _render_download_script(root_path: str, download_commands: str) -> str:
" f.write(data)\n"
"PY"
)
script = f"""
download_root={shlex.quote(root_path)}
if command -v curl >/dev/null 2>&1; then
download_cmd='curl -fsSL "${{url}}" -o "${{dest}}"'
elif command -v wget >/dev/null 2>&1; then
download_cmd='wget -q "${{url}}" -O "${{dest}}"'
elif command -v python3 >/dev/null 2>&1; then
download_cmd={shlex.quote(python_download_cmd)}
else
echo 'No downloader found (curl/wget/python3)' >&2
exit 1
fi
# Only emit the downloader-detection block when there are remote items.
if need_downloader:
downloader_block = f"""\
if command -v curl >/dev/null 2>&1; then
download_cmd='curl -fsSL "${{url}}" -o "${{dest}}"'
elif command -v wget >/dev/null 2>&1; then
download_cmd='wget -q "${{url}}" -O "${{dest}}"'
elif command -v python3 >/dev/null 2>&1; then
download_cmd={shlex.quote(python_download_cmd)}
else
echo 'No downloader found (curl/wget/python3)' >&2
exit 1
fi
mkdir -p "${{download_root}}"
fail_log="$(mktemp)"
fail_log="$(mktemp)"
download_one() {{
file_path="$1"
url="$2"
dest="${{download_root}}/${{file_path}}"
mkdir -p "$(dirname "${{dest}}")"
eval "${{download_cmd}}" 2>/dev/null || echo "${{file_path}}" >> "${{fail_log}}"
}}
download_one() {{
file_path="$1"
url="$2"
dest="${{download_root}}/${{file_path}}"
mkdir -p "$(dirname "${{dest}}")"
eval "${{download_cmd}}" 2>/dev/null || echo "${{file_path}}" >> "${{fail_log}}"
}}"""
else:
downloader_block = ""
{download_commands}
# The failure-check block is only meaningful when downloads occurred.
if need_downloader:
wait_block = textwrap.dedent("""\
wait
wait
if [ -s "${fail_log}" ]; then
mv "${fail_log}" "${download_root}/DOWNLOAD_FAILURES.txt"
else
rm -f "${fail_log}"
fi""")
else:
wait_block = ""
if [ -s "${{fail_log}}" ]; then
mv "${{fail_log}}" "${{download_root}}/DOWNLOAD_FAILURES.txt"
else
rm -f "${{fail_log}}"
fi
exit 0
"""
return textwrap.dedent(script).strip()
script = f"""\
download_root={shlex.quote(root_path)}
mkdir -p "${{download_root}}"
{downloader_block}
@dataclass(frozen=True)
class AssetDownloadItem:
path: str
url: str
{inline_commands}
{download_commands}
{wait_block}
exit 0"""
return script
class AssetDownloadService:
@staticmethod
def build_download_script(items: list[AssetDownloadItem], root_path: str) -> str:
# Build a portable shell script to download assets in parallel.
def build_download_script(
items: list[SandboxDownloadItem],
root_path: str,
) -> str:
"""Build a portable shell script to write inline assets and download remote ones.
Items with *content* are written first (sequential base64 decode),
then items with *url* are fetched in parallel background jobs.
The two kinds can be mixed freely in a single list.
"""
inline = [item for item in items if item.content is not None]
remote = [item for item in items if item.content is None]
inline_commands = _build_inline_commands(inline, "download_root") if inline else ""
commands: list[str] = []
for item in items:
for item in remote:
path = shlex.quote(item.path)
url = shlex.quote(item.url)
commands.append(f"download_one {path} {url} &")
download_commands = "\n".join(commands)
return _render_download_script(root_path, download_commands)
return _render_download_script(
root_path,
inline_commands,
download_commands,
need_downloader=bool(remote),
)