feat: add DB inline content cache for app asset draft files

Introduce app_asset_contents table as a read-through cache over S3 for
text-like asset files (e.g. .md skill documents). This eliminates N
individual S3 fetches during SkillBuilder builds — bulk_load pulls all
content in a single SQL query with S3 fallback on miss.

Key components:
- CachedContentAccessor: DB-first read / dual-write / S3 fallback
- AssetContentService: static DB operations (get, get_many, upsert, delete)
- should_mirror(): single source of truth for extension-based policy
- Alembic migration for app_asset_contents table

Modified callers:
- SkillBuilder uses accessor.bulk_load() instead of per-node S3 reads
- AppAssetService.get/update_file_content route through accessor
- delete_node cleans both DB cache and S3
- draft_app_assets_initializer uses should_mirror() instead of hardcoded .md
This commit is contained in:
Harry
2026-03-09 14:44:21 +08:00
parent 53f76a20c2
commit a8074f4f4a
11 changed files with 391 additions and 52 deletions

View File

@ -2,12 +2,13 @@ import json
import logging
from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode
from core.app_assets.accessor import CachedContentAccessor
from core.app_assets.entities import AssetItem
from core.app_assets.storage import AssetPaths
from core.skill.assembler import SkillBundleAssembler
from core.skill.entities.skill_bundle import SkillBundle
from core.skill.entities.skill_document import SkillDocument
from extensions.storage.cached_presign_storage import CachedPresignStorage
from extensions.storage.base_storage import BaseStorage
from .base import BuildContext
@ -16,10 +17,12 @@ logger = logging.getLogger(__name__)
class SkillBuilder:
_nodes: list[tuple[AppAssetNode, str]]
_storage: CachedPresignStorage
_accessor: CachedContentAccessor
_storage: BaseStorage
def __init__(self, storage: CachedPresignStorage) -> None:
def __init__(self, accessor: CachedContentAccessor, storage: BaseStorage) -> None:
self._nodes = []
self._accessor = accessor
self._storage = storage
def accept(self, node: AppAssetNode) -> bool:
@ -37,15 +40,16 @@ class SkillBuilder:
)
return []
# load documents skip nodes whose draft content is still the empty
# placeholder written at creation time (the front-end has not uploaded
# the actual skill document yet).
# Batch-load all skill draft content in one DB query (with S3 fallback on miss).
nodes_only = [node for node, _ in self._nodes]
raw_contents = self._accessor.bulk_load(nodes_only)
# Parse documents — skip nodes whose draft content is still the empty
# placeholder written at creation time.
documents: dict[str, SkillDocument] = {}
for node, _ in self._nodes:
try:
key = AssetPaths.draft(ctx.tenant_id, ctx.app_id, node.id)
raw = self._storage.load_once(key)
# skip empty content
raw = raw_contents.get(node.id)
if not raw:
continue
data = {"skill_id": node.id, **json.loads(raw)}