feat(skill): skill parser & packager

This commit is contained in:
Harry
2026-01-19 12:40:44 +08:00
parent 245567118c
commit 0de32f682a
18 changed files with 535 additions and 28 deletions

View File

@ -0,0 +1,10 @@
from .asset_parser import AssetParser
from .base import AssetItemParser, FileAssetParser
from .skill_parser import SkillAssetParser
__all__ = [
"AssetItemParser",
"AssetParser",
"FileAssetParser",
"SkillAssetParser",
]

View File

@ -0,0 +1,51 @@
from typing import TYPE_CHECKING
from core.app.entities.app_asset_entities import AppAssetFileTree
from core.app_assets.assets import AssetItem
from core.app_assets.paths import AssetPaths
from .base import AssetItemParser, FileAssetParser
if TYPE_CHECKING:
from extensions.ext_storage import Storage
class AssetParser:
_tree: AppAssetFileTree
_tenant_id: str
_app_id: str
_storage: "Storage"
_parsers: dict[str, AssetItemParser]
_default_parser: AssetItemParser
def __init__(
self,
tree: AppAssetFileTree,
tenant_id: str,
app_id: str,
storage: "Storage",
) -> None:
self._tree = tree
self._tenant_id = tenant_id
self._app_id = app_id
self._storage = storage
self._parsers = {}
self._default_parser = FileAssetParser()
def register(self, extension: str, parser: AssetItemParser) -> None:
self._parsers[extension] = parser
def parse(self) -> list[AssetItem]:
assets: list[AssetItem] = []
for node in self._tree.walk_files():
path = self._tree.get_path(node.id).lstrip("/")
storage_key = AssetPaths.draft_file(self._tenant_id, self._app_id, node.id)
raw_bytes = self._storage.load_once(storage_key)
extension = node.extension or ""
parser = self._parsers.get(extension, self._default_parser)
asset = parser.parse(node.id, path, node.name, extension, storage_key, raw_bytes)
assets.append(asset)
return assets

View File

@ -0,0 +1,36 @@
from abc import ABC, abstractmethod
from core.app_assets.assets import AssetItem, FileAsset
class AssetItemParser(ABC):
@abstractmethod
def parse(
self,
node_id: str,
path: str,
file_name: str,
extension: str,
storage_key: str,
raw_bytes: bytes,
) -> AssetItem:
raise NotImplementedError
class FileAssetParser(AssetItemParser):
def parse(
self,
node_id: str,
path: str,
file_name: str,
extension: str,
storage_key: str,
raw_bytes: bytes,
) -> FileAsset:
return FileAsset(
node_id=node_id,
path=path,
file_name=file_name,
extension=extension,
storage_key=storage_key,
)

View File

@ -0,0 +1,120 @@
import json
import re
from typing import TYPE_CHECKING, Any
from core.app_assets.paths import AssetPaths
from core.app_assets.skill import (
FileReference,
SkillAsset,
SkillMetadata,
ToolReference,
)
from .base import AssetItemParser
if TYPE_CHECKING:
from extensions.ext_storage import Storage
TOOL_REFERENCE_PATTERN = re.compile(r"§\[tool\]\.\[([^\]]+)\]\.\[([^\]]+)\]\.\[([^\]]+)\")
FILE_REFERENCE_PATTERN = re.compile(r"§\[file\]\.\[([^\]]+)\]\.\[([^\]]+)\")
class SkillAssetParser(AssetItemParser):
_tenant_id: str
_app_id: str
_publish_id: str
_storage: "Storage"
def __init__(
self,
tenant_id: str,
app_id: str,
publish_id: str,
storage: "Storage",
) -> None:
self._tenant_id = tenant_id
self._app_id = app_id
self._publish_id = publish_id
self._storage = storage
def _get_resolved_key(self, node_id: str) -> str:
return AssetPaths.published_resolved_file(self._tenant_id, self._app_id, self._publish_id, node_id)
def parse(
self,
node_id: str,
path: str,
file_name: str,
extension: str,
storage_key: str,
raw_bytes: bytes,
) -> SkillAsset:
try:
data = json.loads(raw_bytes.decode("utf-8"))
except (json.JSONDecodeError, UnicodeDecodeError) as e:
raise ValueError(f"Invalid skill document JSON for {node_id}: {e}") from e
if not isinstance(data, dict):
raise ValueError(f"Skill document {node_id} must be a JSON object")
data_dict: dict[str, Any] = data
metadata_raw = data_dict.get("metadata", {})
content = data_dict.get("content", "")
if not isinstance(content, str):
raise ValueError(f"Skill document {node_id} 'content' must be a string")
metadata = SkillMetadata.model_validate(metadata_raw)
tool_references: list[ToolReference] = []
for match in TOOL_REFERENCE_PATTERN.finditer(content):
tool_references.append(
ToolReference(
provider=match.group(1),
tool_name=match.group(2),
uuid=match.group(3),
raw=match.group(0),
)
)
file_references: list[FileReference] = []
for match in FILE_REFERENCE_PATTERN.finditer(content):
file_references.append(
FileReference(
source=match.group(1),
uuid=match.group(2),
raw=match.group(0),
)
)
resolved_content = self._resolve_content(content, tool_references, file_references)
resolved_key = self._get_resolved_key(node_id)
self._storage.save(resolved_key, resolved_content.encode("utf-8"))
return SkillAsset(
node_id=node_id,
path=path,
file_name=file_name,
extension=extension,
storage_key=resolved_key,
metadata=metadata,
content=resolved_content,
tool_references=tool_references,
file_references=file_references,
)
def _resolve_content(
self,
content: str,
tool_references: list[ToolReference],
file_references: list[FileReference],
) -> str:
for ref in tool_references:
replacement = f"{ref.provider}/{ref.tool_name}"
content = content.replace(ref.raw, replacement)
for ref in file_references:
replacement = f"[file:{ref.uuid}]"
content = content.replace(ref.raw, replacement)
return content