mirror of
https://github.com/langgenius/dify.git
synced 2026-05-06 02:18:08 +08:00
refactor: unify download item types and eliminate extension-based branching
Merge AssetDownloadItem, AssetInlineItem into SandboxDownloadItem with optional 'content' field. All consumers now follow a clean pipeline: get items → accessor.resolve_items() → AppAssetService.to_download_items() → download Key changes: - SandboxDownloadItem gains content: bytes | None (entities.py) - ZipSandbox.download_items() handles both inline (base64 heredoc) and remote (curl) via a single pipeline — no structural branching - AssetDownloadService.build_download_script() takes unified list - CachedContentAccessor.resolve_items() batch-enriches items from DB (extension-agnostic, no 'if md' checks needed) - AppAssetService.to_download_items() converts AssetItem → SandboxDownloadItem - DraftAppAssetsInitializer, package_and_upload, export_bundle simplified - file_upload/node.py switched to SandboxDownloadItem - Deleted AssetDownloadItem and AssetInlineItem classes
This commit is contained in:
@ -6,12 +6,9 @@ All methods accept an AppAssetNode parameter to identify the target.
|
||||
CachedContentAccessor is the primary entry point:
|
||||
- Reads DB first, misses fall through to S3 with sync backfill.
|
||||
- Writes go to both DB and S3 (dual-write).
|
||||
- Wraps an internal StorageContentAccessor for S3 I/O.
|
||||
|
||||
Public helper:
|
||||
- should_mirror(extension) — the ONLY place that maps file extensions to the
|
||||
"should this node use DB mirror?" decision. All callers (presigned-upload
|
||||
gating, etc.) should use this function instead of hard-coding extension checks.
|
||||
- resolve_items() batch-enriches AssetItem lists with DB-cached content
|
||||
(extension-agnostic), so callers never need to filter by extension.
|
||||
- Wraps an internal _StorageAccessor for S3 I/O.
|
||||
|
||||
Collaborators:
|
||||
- services.asset_content_service.AssetContentService (DB layer)
|
||||
@ -24,29 +21,13 @@ from __future__ import annotations
|
||||
import logging
|
||||
|
||||
from core.app.entities.app_asset_entities import AppAssetNode
|
||||
from core.app_assets.entities.assets import AssetItem
|
||||
from core.app_assets.storage import AssetPaths
|
||||
from extensions.storage.cached_presign_storage import CachedPresignStorage
|
||||
from services.asset_content_service import AssetContentService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Extension-based policy — the single source of truth
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_MIRROR_EXTENSIONS: frozenset[str] = frozenset({"md"})
|
||||
|
||||
|
||||
def should_mirror(extension: str) -> bool:
|
||||
"""Return True if files with *extension* should be cached in DB.
|
||||
|
||||
This is the ONLY place that maps file extensions to the inline-mirror
|
||||
decision. All other modules should call this function instead of
|
||||
checking extensions directly.
|
||||
"""
|
||||
return extension.lower() in _MIRROR_EXTENSIONS
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# S3-only implementation (internal, used as inner delegate)
|
||||
# ---------------------------------------------------------------------------
|
||||
@ -162,6 +143,38 @@ class CachedContentAccessor:
|
||||
)
|
||||
self._inner.save(node, content)
|
||||
|
||||
def resolve_items(self, items: list[AssetItem]) -> list[AssetItem]:
|
||||
"""Batch-enrich asset items with DB-cached content.
|
||||
|
||||
Queries by ``asset_id`` only — extension-agnostic. Items without
|
||||
a DB cache row keep their original *content* value (typically
|
||||
``None``), so only genuinely cached assets (e.g. ``.md`` skill
|
||||
documents) get populated.
|
||||
|
||||
This eliminates the need for callers to filter by file extension
|
||||
before deciding whether to read from the DB cache.
|
||||
"""
|
||||
if not items:
|
||||
return items
|
||||
|
||||
node_ids = [a.asset_id for a in items]
|
||||
cached = AssetContentService.get_many(self._tenant_id, self._app_id, node_ids)
|
||||
|
||||
if not cached:
|
||||
return items
|
||||
|
||||
return [
|
||||
AssetItem(
|
||||
asset_id=a.asset_id,
|
||||
path=a.path,
|
||||
file_name=a.file_name,
|
||||
extension=a.extension,
|
||||
storage_key=a.storage_key,
|
||||
content=cached[a.asset_id].encode("utf-8") if a.asset_id in cached else a.content,
|
||||
)
|
||||
for a in items
|
||||
]
|
||||
|
||||
def delete(self, node: AppAssetNode) -> None:
|
||||
AssetContentService.delete(self._tenant_id, self._app_id, node.id)
|
||||
self._inner.delete(node)
|
||||
|
||||
@ -1,14 +1,25 @@
|
||||
"""Builder that compiles ``.md`` skill documents into resolved content.
|
||||
|
||||
The builder reads raw draft content from the DB-backed accessor, parses
|
||||
each into a ``SkillDocument``, assembles a ``SkillBundle`` (with
|
||||
transitive tool/file dependency resolution), and returns ``AssetItem``
|
||||
objects whose *content* field carries the resolved bytes in-process.
|
||||
|
||||
No S3 writes happen here — the only persistence is the ``SkillBundle``
|
||||
saved via ``SkillManager`` (S3 + Redis cache invalidation) so that
|
||||
downstream consumers (``SkillInitializer``, ``DifyCliInitializer``) can
|
||||
load it later.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode
|
||||
from core.app_assets.accessor import CachedContentAccessor
|
||||
from core.app_assets.entities import AssetItem
|
||||
from core.app_assets.storage import AssetPaths
|
||||
from core.skill.assembler import SkillBundleAssembler
|
||||
from core.skill.entities.skill_bundle import SkillBundle
|
||||
from core.skill.entities.skill_document import SkillDocument
|
||||
from extensions.storage.base_storage import BaseStorage
|
||||
|
||||
from .base import BuildContext
|
||||
|
||||
@ -18,12 +29,10 @@ logger = logging.getLogger(__name__)
|
||||
class SkillBuilder:
|
||||
_nodes: list[tuple[AppAssetNode, str]]
|
||||
_accessor: CachedContentAccessor
|
||||
_storage: BaseStorage
|
||||
|
||||
def __init__(self, accessor: CachedContentAccessor, storage: BaseStorage) -> None:
|
||||
def __init__(self, accessor: CachedContentAccessor) -> None:
|
||||
self._nodes = []
|
||||
self._accessor = accessor
|
||||
self._storage = storage
|
||||
|
||||
def accept(self, node: AppAssetNode) -> bool:
|
||||
return node.extension == "md"
|
||||
@ -66,15 +75,14 @@ class SkillBuilder:
|
||||
skill = bundle.get(node.id)
|
||||
if skill is None:
|
||||
continue
|
||||
storage_key = AssetPaths.resolved(ctx.tenant_id, ctx.app_id, ctx.build_id, node.id)
|
||||
self._storage.save(storage_key, skill.content.encode("utf-8"))
|
||||
items.append(
|
||||
AssetItem(
|
||||
asset_id=node.id,
|
||||
path=path,
|
||||
file_name=node.name,
|
||||
extension=node.extension or "",
|
||||
storage_key=storage_key,
|
||||
storage_key="",
|
||||
content=skill.content.encode("utf-8"),
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
@ -1,10 +1,20 @@
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class AssetItem:
|
||||
"""A single asset file produced by the build pipeline.
|
||||
|
||||
When *content* is set the payload is available in-process and can be
|
||||
written directly into a ZIP or uploaded to a sandbox VM without an
|
||||
extra S3 round-trip. When *content* is ``None`` the caller should
|
||||
fetch the bytes from *storage_key* (the traditional presigned-URL
|
||||
path).
|
||||
"""
|
||||
|
||||
asset_id: str
|
||||
path: str
|
||||
file_name: str
|
||||
extension: str
|
||||
storage_key: str
|
||||
content: bytes | None = field(default=None, repr=False)
|
||||
|
||||
@ -37,15 +37,6 @@ class AssetPaths:
|
||||
_check_uuid(assets_id, "assets_id")
|
||||
return f"{_BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}.zip"
|
||||
|
||||
@staticmethod
|
||||
def resolved(tenant_id: str, app_id: str, assets_id: str, node_id: str) -> str:
|
||||
"""app_assets/{tenant}/{app}/artifacts/{assets_id}/resolved/{node_id}"""
|
||||
_check_uuid(tenant_id, "tenant_id")
|
||||
_check_uuid(app_id, "app_id")
|
||||
_check_uuid(assets_id, "assets_id")
|
||||
_check_uuid(node_id, "node_id")
|
||||
return f"{_BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}/resolved/{node_id}"
|
||||
|
||||
@staticmethod
|
||||
def skill_bundle(tenant_id: str, app_id: str, assets_id: str) -> str:
|
||||
"""app_assets/{tenant}/{app}/artifacts/{assets_id}/skill_artifact_set.json"""
|
||||
|
||||
Reference in New Issue
Block a user