feat(skill-compiler): skill compiler

This commit is contained in:
Harry
2026-01-22 03:06:41 +08:00
parent 5cb8d4cc11
commit 5565546295
27 changed files with 1952 additions and 291 deletions

View File

@ -1,12 +1,7 @@
from .entities import (
AssetItem,
FileAsset,
FileReference,
SkillAsset,
SkillMetadata,
ToolConfiguration,
ToolFieldConfig,
ToolReference,
)
from .packager import AssetPackager, ZipPackager
from .parser import AssetItemParser, AssetParser, FileAssetParser, SkillAssetParser
@ -20,12 +15,7 @@ __all__ = [
"AssetPaths",
"FileAsset",
"FileAssetParser",
"FileReference",
"SkillAsset",
"SkillAssetParser",
"SkillMetadata",
"ToolConfiguration",
"ToolFieldConfig",
"ToolReference",
"ZipPackager",
]

View File

@ -0,0 +1,12 @@
from .base import AssetBuilder, BuildContext
from .file_builder import FileBuilder
from .pipeline import AssetBuildPipeline
from .skill_builder import SkillBuilder
__all__ = [
"AssetBuildPipeline",
"AssetBuilder",
"BuildContext",
"FileBuilder",
"SkillBuilder",
]

View File

@ -0,0 +1,20 @@
from dataclasses import dataclass
from typing import Protocol
from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode
from core.app_assets.entities import AssetItem
@dataclass
class BuildContext:
tenant_id: str
app_id: str
build_id: str
class AssetBuilder(Protocol):
def accept(self, node: AppAssetNode) -> bool: ...
def collect(self, node: AppAssetNode, path: str, ctx: BuildContext) -> None: ...
def build(self, tree: AppAssetFileTree, ctx: BuildContext) -> list[AssetItem]: ...

View File

@ -0,0 +1,30 @@
from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode
from core.app_assets.entities import AssetItem, FileAsset
from core.app_assets.paths import AssetPaths
from .base import BuildContext
class FileBuilder:
_nodes: list[tuple[AppAssetNode, str]]
def __init__(self) -> None:
self._nodes = []
def accept(self, node: AppAssetNode) -> bool:
return True
def collect(self, node: AppAssetNode, path: str, ctx: BuildContext) -> None:
self._nodes.append((node, path))
def build(self, tree: AppAssetFileTree, ctx: BuildContext) -> list[AssetItem]:
return [
FileAsset(
asset_id=node.id,
path=path,
file_name=node.name,
extension=node.extension or "",
storage_key=AssetPaths.draft_file(ctx.tenant_id, ctx.app_id, node.id),
)
for node, path in self._nodes
]

View File

@ -0,0 +1,29 @@
from core.app.entities.app_asset_entities import AppAssetFileTree
from core.app_assets.builder.file_builder import FileBuilder
from core.app_assets.builder.skill_builder import SkillBuilder
from core.app_assets.entities import AssetItem
from .base import AssetBuilder, BuildContext
class AssetBuildPipeline:
_builders: list[AssetBuilder]
def __init__(self, builders: list[AssetBuilder] | None = None) -> None:
self._builders = builders or [SkillBuilder(), FileBuilder()]
def build_all(self, tree: AppAssetFileTree, ctx: BuildContext) -> list[AssetItem]:
# 1. Distribute: each node goes to first accepting builder
for node in tree.walk_files():
path = tree.get_path(node.id)
for builder in self._builders:
if builder.accept(node):
builder.collect(node, path, ctx)
break
# 2. Each builder builds its collected nodes
results: list[AssetItem] = []
for builder in self._builders:
results.extend(builder.build(tree, ctx))
return results

View File

@ -0,0 +1,85 @@
import json
from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode
from core.app_assets.entities import AssetItem, FileAsset
from core.app_assets.paths import AssetPaths
from core.skill.entities.skill_document import SkillDocument
from core.skill.skill_compiler import SkillCompiler
from core.skill.skill_manager import SkillManager
from extensions.ext_storage import storage
from .base import BuildContext
class SkillBuilder:
_nodes: list[tuple[AppAssetNode, str]]
def __init__(self) -> None:
self._nodes = []
def accept(self, node: AppAssetNode) -> bool:
return node.extension == "md"
def collect(self, node: AppAssetNode, path: str, ctx: BuildContext) -> None:
self._nodes.append((node, path))
def build(self, tree: AppAssetFileTree, ctx: BuildContext) -> list[AssetItem]:
if not self._nodes:
return []
# 1. Load and create documents
documents: list[SkillDocument] = []
for node, _ in self._nodes:
draft_key = AssetPaths.draft_file(ctx.tenant_id, ctx.app_id, node.id)
try:
data = json.loads(storage.load_once(draft_key))
content = data.get("content", "") if isinstance(data, dict) else ""
metadata = data.get("metadata", {}) if isinstance(data, dict) else {}
except Exception:
content = ""
metadata = {}
documents.append(
SkillDocument(
skill_id=node.id,
content=content,
metadata=metadata,
)
)
# 2. Compile all skills
compiler = SkillCompiler()
artifact_set = compiler.compile_all(documents, tree, ctx.build_id)
# 3. Save tool artifact
SkillManager.save_tool_artifact(
ctx.tenant_id,
ctx.app_id,
ctx.build_id,
artifact_set.get_tool_artifact(),
)
# 4. Save compiled content to storage and return FileAssets
results: list[AssetItem] = []
for node, path in self._nodes:
artifact = artifact_set.get(node.id)
if artifact is None:
continue
# Write compiled content to storage
resolved_key = AssetPaths.build_resolved_file(
ctx.tenant_id, ctx.app_id, ctx.build_id, node.id
)
storage.save(resolved_key, artifact.content.encode("utf-8"))
results.append(
FileAsset(
asset_id=node.id,
path=path,
file_name=node.name,
extension=node.extension or "",
storage_key=resolved_key,
)
)
return results

View File

@ -1,20 +1,8 @@
from .assets import AssetItem, FileAsset
from .skill import (
FileReference,
SkillAsset,
SkillMetadata,
ToolConfiguration,
ToolFieldConfig,
ToolReference,
)
from .skill import SkillAsset
__all__ = [
"AssetItem",
"FileAsset",
"FileReference",
"SkillAsset",
"SkillMetadata",
"ToolConfiguration",
"ToolFieldConfig",
"ToolReference",
]

View File

@ -4,7 +4,7 @@ from dataclasses import dataclass
@dataclass
class AssetItem(ABC):
node_id: str
asset_id: str
path: str
file_name: str
extension: str

View File

@ -1,59 +1,14 @@
from dataclasses import dataclass
from collections.abc import Mapping
from dataclasses import dataclass, field
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from core.tools.entities.tool_entities import ToolProviderType
from .assets import AssetItem
class ToolFieldConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
id: str
value: Any
auto: bool = False
class ToolConfiguration(BaseModel):
model_config = ConfigDict(extra="forbid")
fields: list[ToolFieldConfig] = Field(default_factory=list)
def default_values(self) -> dict[str, Any]:
return {field.id: field.value for field in self.fields if field.value is not None}
class ToolReference(BaseModel):
model_config = ConfigDict(extra="forbid")
uuid: str = Field(description="Unique identifier for this tool reference")
type: ToolProviderType = Field(description="Tool provider type")
provider: str = Field(description="Tool provider")
tool_name: str = Field(description="Tool name")
credential_id: str | None = Field(default=None, description="Credential ID")
configuration: ToolConfiguration | None = Field(default=None, description="Tool configuration")
class FileReference(BaseModel):
model_config = ConfigDict(extra="forbid")
source: str = Field(description="Source location or identifier of the file")
uuid: str = Field(description="Unique identifier for this file reference")
class SkillMetadata(BaseModel):
model_config = ConfigDict(extra="allow")
tools: dict[str, ToolReference] = Field(default_factory=dict, description="Map of tool references by UUID")
files: list[FileReference] = Field(default_factory=list, description="List of file references")
@dataclass
class SkillAsset(AssetItem):
storage_key: str
metadata: SkillMetadata
metadata: Mapping[str, Any] = field(default_factory=dict)
def get_storage_key(self) -> str:
return self.storage_key

View File

@ -7,7 +7,7 @@ class AssetItemParser(ABC):
@abstractmethod
def parse(
self,
node_id: str,
asset_id: str,
path: str,
file_name: str,
extension: str,
@ -19,14 +19,14 @@ class AssetItemParser(ABC):
class FileAssetParser(AssetItemParser):
def parse(
self,
node_id: str,
asset_id: str,
path: str,
file_name: str,
extension: str,
storage_key: str,
) -> FileAsset:
return FileAsset(
node_id=node_id,
asset_id=asset_id,
path=path,
file_name=file_name,
extension=extension,

View File

@ -1,161 +1,57 @@
import json
import logging
import re
from typing import Any
from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode
from core.app_assets.entities import (
SkillAsset,
SkillMetadata,
)
from core.app_assets.entities.skill import FileReference, ToolConfiguration, ToolReference
from core.app_assets.paths import AssetPaths
from core.tools.entities.tool_entities import ToolProviderType
from core.app_assets.entities import SkillAsset
from core.app_assets.entities.assets import AssetItem, FileAsset
from extensions.ext_storage import storage
from .base import AssetItemParser
TOOL_REFERENCE_PATTERN = re.compile(r"§\[tool\]\.\[([^\]]+)\]\.\[([^\]]+)\]\.\[([^\]]+)\")
FILE_REFERENCE_PATTERN = re.compile(r"§\[file\]\.\[([^\]]+)\]\.\[([^\]]+)\")
logger = logging.getLogger(__name__)
class SkillAssetParser(AssetItemParser):
def __init__(
self,
tenant_id: str,
app_id: str,
assets_id: str,
tree: AppAssetFileTree,
) -> None:
self._tenant_id = tenant_id
self._app_id = app_id
self._assets_id = assets_id
self._tree = tree
"""
Parser for skill assets.
Responsibilities:
- Read file from storage
- Parse JSON structure
- Return SkillAsset with raw metadata (no parsing/resolution)
Metadata parsing and content resolution are handled by SkillCompiler.
"""
def parse(
self,
node_id: str,
asset_id: str,
path: str,
file_name: str,
extension: str,
storage_key: str,
) -> SkillAsset:
try:
return self._parse_skill_asset(node_id, path, file_name, extension, storage_key)
except Exception:
logger.exception("Failed to parse skill asset %s", node_id)
# handle as plain text
return SkillAsset(
node_id=node_id,
path=path,
file_name=file_name,
extension=extension,
storage_key=storage_key,
metadata=SkillMetadata(),
)
def _parse_skill_asset(
self, node_id: str, path: str, file_name: str, extension: str, storage_key: str
) -> SkillAsset:
) -> AssetItem:
try:
data = json.loads(storage.load_once(storage_key))
except (json.JSONDecodeError, UnicodeDecodeError):
# handle as plain text
if not isinstance(data, dict):
raise ValueError(f"Skill document {asset_id} must be a JSON object")
metadata_raw: dict[str, Any] = data.get("metadata", {})
return SkillAsset(
node_id=node_id,
asset_id=asset_id,
path=path,
file_name=file_name,
extension=extension,
storage_key=storage_key,
metadata=SkillMetadata(),
metadata=metadata_raw,
)
if not isinstance(data, dict):
raise ValueError(f"Skill document {node_id} must be a JSON object")
data_dict: dict[str, Any] = data
metadata_raw = data_dict.get("metadata", {})
content = data_dict.get("content", "")
if not isinstance(content, str):
raise ValueError(f"Skill document {node_id} 'content' must be a string")
resolved_key = AssetPaths.build_resolved_file(self._tenant_id, self._app_id, self._assets_id, node_id)
current_file = self._tree.get(node_id)
if current_file is None:
raise ValueError(f"File not found for id={node_id}")
metadata = self._resolve_metadata(content, metadata_raw)
storage.save(resolved_key, self._resolve_content(current_file, content, metadata).encode("utf-8"))
return SkillAsset(
node_id=node_id,
path=path,
file_name=file_name,
extension=extension,
storage_key=resolved_key,
metadata=metadata,
)
def _resolve_content(self, current_file: AppAssetNode, content: str, metadata: SkillMetadata) -> str:
for match in FILE_REFERENCE_PATTERN.finditer(content):
# replace with file relative path
file_id = match.group(2)
file = self._tree.get(file_id)
if file is None:
logger.warning("File not found for id=%s, skipping", file_id)
# replace with file not found placeholder
content = content.replace(match.group(0), "[File not found]")
continue
content = content.replace(match.group(0), self._tree.relative_path(current_file, file))
for match in TOOL_REFERENCE_PATTERN.finditer(content):
tool_id = match.group(3)
tool = metadata.tools.get(tool_id)
if tool is None:
logger.warning("Tool not found for id=%s, skipping", tool_id)
# replace with tool not found placeholder
content = content.replace(match.group(0), f"[Tool not found: {tool_id}]")
continue
content = content.replace(match.group(0), f"[Bash Command: {tool.tool_name}_{tool_id}]")
return content
def _resolve_file_references(self, content: str) -> list[FileReference]:
file_references: list[FileReference] = []
for match in FILE_REFERENCE_PATTERN.finditer(content):
file_references.append(
FileReference(
source=match.group(1),
uuid=match.group(2),
)
except Exception:
logger.exception("Failed to parse skill asset %s", asset_id)
return FileAsset(
asset_id=asset_id,
path=path,
file_name=file_name,
extension=extension,
storage_key=storage_key,
)
return file_references
def _resolve_tool_references(self, content: str, tools: dict[str, Any]) -> dict[str, ToolReference]:
tool_references: dict[str, ToolReference] = {}
for match in TOOL_REFERENCE_PATTERN.finditer(content):
tool_id = match.group(3)
tool_name = match.group(2)
tool_provider = match.group(1)
metadata = tools.get(tool_id)
if metadata is None:
raise ValueError(f"Tool metadata for {tool_id} not found")
configuration = ToolConfiguration.model_validate(metadata.get("configuration", {}))
tool_references[tool_id] = ToolReference(
uuid=tool_id,
type=ToolProviderType.value_of(metadata.get("type", None)),
provider=tool_provider,
tool_name=tool_name,
credential_id=metadata.get("credential_id", None),
configuration=configuration,
)
return tool_references
def _resolve_metadata(self, content: str, metadata: dict[str, Any]) -> SkillMetadata:
return SkillMetadata(
files=self._resolve_file_references(content=content),
tools=self._resolve_tool_references(content=content, tools=metadata.get("tools", {})),
)

View File

@ -16,3 +16,7 @@ class AssetPaths:
@staticmethod
def build_tool_artifact(tenant_id: str, app_id: str, assets_id: str) -> str:
return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}/tool_artifact.json"
@staticmethod
def build_skill_artifact_set(tenant_id: str, app_id: str, assets_id: str) -> str:
return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}/skill_artifact_set.json"