"""Unified content accessor for app asset nodes. Accessor is scoped to a single app (tenant_id + app_id), not a single node. All methods accept an AppAssetNode parameter to identify the target. CachedContentAccessor is the primary entry point: - Reads DB first, misses fall through to S3 with sync backfill. - Writes go to both DB and S3 (dual-write). - resolve_items() batch-enriches AssetItem lists with DB-cached content (extension-agnostic), so callers never need to filter by extension. - Wraps an internal _StorageAccessor for S3 I/O. Collaborators: - services.asset_content_service.AssetContentService (DB layer) - core.app_assets.storage.AssetPaths (S3 key generation) - extensions.storage.cached_presign_storage.CachedPresignStorage (S3 I/O) """ from __future__ import annotations import logging from core.app.entities.app_asset_entities import AppAssetNode from core.app_assets.entities.assets import AssetItem from core.app_assets.storage import AssetPaths from extensions.storage.cached_presign_storage import CachedPresignStorage from services.asset_content_service import AssetContentService logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # S3-only implementation (internal, used as inner delegate) # --------------------------------------------------------------------------- class _StorageAccessor: """Reads/writes draft content via object storage (S3) only.""" _storage: CachedPresignStorage _tenant_id: str _app_id: str def __init__(self, storage: CachedPresignStorage, tenant_id: str, app_id: str) -> None: self._storage = storage self._tenant_id = tenant_id self._app_id = app_id def _key(self, node: AppAssetNode) -> str: return AssetPaths.draft(self._tenant_id, self._app_id, node.id) def load(self, node: AppAssetNode) -> bytes: return self._storage.load_once(self._key(node)) def save(self, node: AppAssetNode, content: bytes) -> None: self._storage.save(self._key(node), content) def delete(self, node: AppAssetNode) -> None: try: self._storage.delete(self._key(node)) except Exception: logger.warning("Failed to delete storage key %s", self._key(node), exc_info=True) # --------------------------------------------------------------------------- # DB-cached implementation (the public API) # --------------------------------------------------------------------------- class CachedContentAccessor: """App-level content accessor with DB read-through cache over S3. Read path: DB first -> miss -> S3 fallback -> sync backfill DB Write path: DB upsert + S3 save (dual-write) Delete path: DB delete + S3 delete bulk_load uses a single SQL query for all nodes, with S3 fallback per miss. Usage: accessor = CachedContentAccessor(storage, tenant_id, app_id) content = accessor.load(node) accessor.save(node, content) results = accessor.bulk_load(nodes) """ _inner: _StorageAccessor _tenant_id: str _app_id: str def __init__(self, storage: CachedPresignStorage, tenant_id: str, app_id: str) -> None: self._inner = _StorageAccessor(storage, tenant_id, app_id) self._tenant_id = tenant_id self._app_id = app_id def load(self, node: AppAssetNode) -> bytes: # 1. Try DB cached = AssetContentService.get(self._tenant_id, self._app_id, node.id) if cached is not None: return cached.encode("utf-8") # 2. Fallback to S3 data = self._inner.load(node) # 3. Sync backfill DB AssetContentService.upsert( tenant_id=self._tenant_id, app_id=self._app_id, node_id=node.id, content=data.decode("utf-8"), size=len(data), ) return data def bulk_load(self, nodes: list[AppAssetNode]) -> dict[str, bytes]: """Single SQL for all nodes, S3 fallback + backfill per miss.""" result: dict[str, bytes] = {} node_ids = [n.id for n in nodes] cached = AssetContentService.get_many(self._tenant_id, self._app_id, node_ids) for node in nodes: if node.id in cached: result[node.id] = cached[node.id].encode("utf-8") else: # S3 fallback + sync backfill data = self._inner.load(node) AssetContentService.upsert( tenant_id=self._tenant_id, app_id=self._app_id, node_id=node.id, content=data.decode("utf-8"), size=len(data), ) result[node.id] = data return result def save(self, node: AppAssetNode, content: bytes) -> None: # Dual-write: DB + S3 AssetContentService.upsert( tenant_id=self._tenant_id, app_id=self._app_id, node_id=node.id, content=content.decode("utf-8"), size=len(content), ) self._inner.save(node, content) def resolve_items(self, items: list[AssetItem]) -> list[AssetItem]: """Batch-enrich asset items with DB-cached content. Queries by ``asset_id`` only — extension-agnostic. Items without a DB cache row keep their original *content* value (typically ``None``), so only genuinely cached assets (e.g. ``.md`` skill documents) get populated. This eliminates the need for callers to filter by file extension before deciding whether to read from the DB cache. """ if not items: return items node_ids = [a.asset_id for a in items] cached = AssetContentService.get_many(self._tenant_id, self._app_id, node_ids) if not cached: return items return [ AssetItem( asset_id=a.asset_id, path=a.path, file_name=a.file_name, extension=a.extension, storage_key=a.storage_key, content=cached[a.asset_id].encode("utf-8") if a.asset_id in cached else a.content, ) for a in items ] def delete(self, node: AppAssetNode) -> None: AssetContentService.delete(self._tenant_id, self._app_id, node.id) self._inner.delete(node)