refactor: unify download item types and eliminate extension-based branching

Merge AssetDownloadItem, AssetInlineItem into SandboxDownloadItem with
optional 'content' field. All consumers now follow a clean pipeline:
  get items → accessor.resolve_items() → AppAssetService.to_download_items() → download

Key changes:
- SandboxDownloadItem gains content: bytes | None (entities.py)
- ZipSandbox.download_items() handles both inline (base64 heredoc) and
  remote (curl) via a single pipeline — no structural branching
- AssetDownloadService.build_download_script() takes unified list
- CachedContentAccessor.resolve_items() batch-enriches items from DB
  (extension-agnostic, no 'if md' checks needed)
- AppAssetService.to_download_items() converts AssetItem → SandboxDownloadItem
- DraftAppAssetsInitializer, package_and_upload, export_bundle simplified
- file_upload/node.py switched to SandboxDownloadItem
- Deleted AssetDownloadItem and AssetInlineItem classes
This commit is contained in:
Harry
2026-03-10 17:11:41 +08:00
parent 6ac730ec2e
commit 65e89520c0
19 changed files with 492 additions and 214 deletions

View File

@ -6,12 +6,9 @@ All methods accept an AppAssetNode parameter to identify the target.
CachedContentAccessor is the primary entry point:
- Reads DB first, misses fall through to S3 with sync backfill.
- Writes go to both DB and S3 (dual-write).
- Wraps an internal StorageContentAccessor for S3 I/O.
Public helper:
- should_mirror(extension) — the ONLY place that maps file extensions to the
"should this node use DB mirror?" decision. All callers (presigned-upload
gating, etc.) should use this function instead of hard-coding extension checks.
- resolve_items() batch-enriches AssetItem lists with DB-cached content
(extension-agnostic), so callers never need to filter by extension.
- Wraps an internal _StorageAccessor for S3 I/O.
Collaborators:
- services.asset_content_service.AssetContentService (DB layer)
@ -24,29 +21,13 @@ from __future__ import annotations
import logging
from core.app.entities.app_asset_entities import AppAssetNode
from core.app_assets.entities.assets import AssetItem
from core.app_assets.storage import AssetPaths
from extensions.storage.cached_presign_storage import CachedPresignStorage
from services.asset_content_service import AssetContentService
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Extension-based policy — the single source of truth
# ---------------------------------------------------------------------------
_MIRROR_EXTENSIONS: frozenset[str] = frozenset({"md"})
def should_mirror(extension: str) -> bool:
"""Return True if files with *extension* should be cached in DB.
This is the ONLY place that maps file extensions to the inline-mirror
decision. All other modules should call this function instead of
checking extensions directly.
"""
return extension.lower() in _MIRROR_EXTENSIONS
# ---------------------------------------------------------------------------
# S3-only implementation (internal, used as inner delegate)
# ---------------------------------------------------------------------------
@ -162,6 +143,38 @@ class CachedContentAccessor:
)
self._inner.save(node, content)
def resolve_items(self, items: list[AssetItem]) -> list[AssetItem]:
"""Batch-enrich asset items with DB-cached content.
Queries by ``asset_id`` only — extension-agnostic. Items without
a DB cache row keep their original *content* value (typically
``None``), so only genuinely cached assets (e.g. ``.md`` skill
documents) get populated.
This eliminates the need for callers to filter by file extension
before deciding whether to read from the DB cache.
"""
if not items:
return items
node_ids = [a.asset_id for a in items]
cached = AssetContentService.get_many(self._tenant_id, self._app_id, node_ids)
if not cached:
return items
return [
AssetItem(
asset_id=a.asset_id,
path=a.path,
file_name=a.file_name,
extension=a.extension,
storage_key=a.storage_key,
content=cached[a.asset_id].encode("utf-8") if a.asset_id in cached else a.content,
)
for a in items
]
def delete(self, node: AppAssetNode) -> None:
AssetContentService.delete(self._tenant_id, self._app_id, node.id)
self._inner.delete(node)

View File

@ -1,14 +1,25 @@
"""Builder that compiles ``.md`` skill documents into resolved content.
The builder reads raw draft content from the DB-backed accessor, parses
each into a ``SkillDocument``, assembles a ``SkillBundle`` (with
transitive tool/file dependency resolution), and returns ``AssetItem``
objects whose *content* field carries the resolved bytes in-process.
No S3 writes happen here — the only persistence is the ``SkillBundle``
saved via ``SkillManager`` (S3 + Redis cache invalidation) so that
downstream consumers (``SkillInitializer``, ``DifyCliInitializer``) can
load it later.
"""
import json
import logging
from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode
from core.app_assets.accessor import CachedContentAccessor
from core.app_assets.entities import AssetItem
from core.app_assets.storage import AssetPaths
from core.skill.assembler import SkillBundleAssembler
from core.skill.entities.skill_bundle import SkillBundle
from core.skill.entities.skill_document import SkillDocument
from extensions.storage.base_storage import BaseStorage
from .base import BuildContext
@ -18,12 +29,10 @@ logger = logging.getLogger(__name__)
class SkillBuilder:
_nodes: list[tuple[AppAssetNode, str]]
_accessor: CachedContentAccessor
_storage: BaseStorage
def __init__(self, accessor: CachedContentAccessor, storage: BaseStorage) -> None:
def __init__(self, accessor: CachedContentAccessor) -> None:
self._nodes = []
self._accessor = accessor
self._storage = storage
def accept(self, node: AppAssetNode) -> bool:
return node.extension == "md"
@ -66,15 +75,14 @@ class SkillBuilder:
skill = bundle.get(node.id)
if skill is None:
continue
storage_key = AssetPaths.resolved(ctx.tenant_id, ctx.app_id, ctx.build_id, node.id)
self._storage.save(storage_key, skill.content.encode("utf-8"))
items.append(
AssetItem(
asset_id=node.id,
path=path,
file_name=node.name,
extension=node.extension or "",
storage_key=storage_key,
storage_key="",
content=skill.content.encode("utf-8"),
)
)
return items

View File

@ -1,10 +1,20 @@
from dataclasses import dataclass
from dataclasses import dataclass, field
@dataclass
class AssetItem:
"""A single asset file produced by the build pipeline.
When *content* is set the payload is available in-process and can be
written directly into a ZIP or uploaded to a sandbox VM without an
extra S3 round-trip. When *content* is ``None`` the caller should
fetch the bytes from *storage_key* (the traditional presigned-URL
path).
"""
asset_id: str
path: str
file_name: str
extension: str
storage_key: str
content: bytes | None = field(default=None, repr=False)

View File

@ -37,15 +37,6 @@ class AssetPaths:
_check_uuid(assets_id, "assets_id")
return f"{_BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}.zip"
@staticmethod
def resolved(tenant_id: str, app_id: str, assets_id: str, node_id: str) -> str:
"""app_assets/{tenant}/{app}/artifacts/{assets_id}/resolved/{node_id}"""
_check_uuid(tenant_id, "tenant_id")
_check_uuid(app_id, "app_id")
_check_uuid(assets_id, "assets_id")
_check_uuid(node_id, "node_id")
return f"{_BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}/resolved/{node_id}"
@staticmethod
def skill_bundle(tenant_id: str, app_id: str, assets_id: str) -> str:
"""app_assets/{tenant}/{app}/artifacts/{assets_id}/skill_artifact_set.json"""