feat(skill-compiler): skill compiler

This commit is contained in:
Harry
2026-01-22 03:06:41 +08:00
parent 5cb8d4cc11
commit 5565546295
27 changed files with 1952 additions and 291 deletions

View File

@ -1,12 +1,7 @@
from .entities import (
AssetItem,
FileAsset,
FileReference,
SkillAsset,
SkillMetadata,
ToolConfiguration,
ToolFieldConfig,
ToolReference,
)
from .packager import AssetPackager, ZipPackager
from .parser import AssetItemParser, AssetParser, FileAssetParser, SkillAssetParser
@ -20,12 +15,7 @@ __all__ = [
"AssetPaths",
"FileAsset",
"FileAssetParser",
"FileReference",
"SkillAsset",
"SkillAssetParser",
"SkillMetadata",
"ToolConfiguration",
"ToolFieldConfig",
"ToolReference",
"ZipPackager",
]

View File

@ -0,0 +1,12 @@
from .base import AssetBuilder, BuildContext
from .file_builder import FileBuilder
from .pipeline import AssetBuildPipeline
from .skill_builder import SkillBuilder
__all__ = [
"AssetBuildPipeline",
"AssetBuilder",
"BuildContext",
"FileBuilder",
"SkillBuilder",
]

View File

@ -0,0 +1,20 @@
from dataclasses import dataclass
from typing import Protocol
from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode
from core.app_assets.entities import AssetItem
@dataclass
class BuildContext:
tenant_id: str
app_id: str
build_id: str
class AssetBuilder(Protocol):
def accept(self, node: AppAssetNode) -> bool: ...
def collect(self, node: AppAssetNode, path: str, ctx: BuildContext) -> None: ...
def build(self, tree: AppAssetFileTree, ctx: BuildContext) -> list[AssetItem]: ...

View File

@ -0,0 +1,30 @@
from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode
from core.app_assets.entities import AssetItem, FileAsset
from core.app_assets.paths import AssetPaths
from .base import BuildContext
class FileBuilder:
_nodes: list[tuple[AppAssetNode, str]]
def __init__(self) -> None:
self._nodes = []
def accept(self, node: AppAssetNode) -> bool:
return True
def collect(self, node: AppAssetNode, path: str, ctx: BuildContext) -> None:
self._nodes.append((node, path))
def build(self, tree: AppAssetFileTree, ctx: BuildContext) -> list[AssetItem]:
return [
FileAsset(
asset_id=node.id,
path=path,
file_name=node.name,
extension=node.extension or "",
storage_key=AssetPaths.draft_file(ctx.tenant_id, ctx.app_id, node.id),
)
for node, path in self._nodes
]

View File

@ -0,0 +1,29 @@
from core.app.entities.app_asset_entities import AppAssetFileTree
from core.app_assets.builder.file_builder import FileBuilder
from core.app_assets.builder.skill_builder import SkillBuilder
from core.app_assets.entities import AssetItem
from .base import AssetBuilder, BuildContext
class AssetBuildPipeline:
_builders: list[AssetBuilder]
def __init__(self, builders: list[AssetBuilder] | None = None) -> None:
self._builders = builders or [SkillBuilder(), FileBuilder()]
def build_all(self, tree: AppAssetFileTree, ctx: BuildContext) -> list[AssetItem]:
# 1. Distribute: each node goes to first accepting builder
for node in tree.walk_files():
path = tree.get_path(node.id)
for builder in self._builders:
if builder.accept(node):
builder.collect(node, path, ctx)
break
# 2. Each builder builds its collected nodes
results: list[AssetItem] = []
for builder in self._builders:
results.extend(builder.build(tree, ctx))
return results

View File

@ -0,0 +1,85 @@
import json
from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode
from core.app_assets.entities import AssetItem, FileAsset
from core.app_assets.paths import AssetPaths
from core.skill.entities.skill_document import SkillDocument
from core.skill.skill_compiler import SkillCompiler
from core.skill.skill_manager import SkillManager
from extensions.ext_storage import storage
from .base import BuildContext
class SkillBuilder:
_nodes: list[tuple[AppAssetNode, str]]
def __init__(self) -> None:
self._nodes = []
def accept(self, node: AppAssetNode) -> bool:
return node.extension == "md"
def collect(self, node: AppAssetNode, path: str, ctx: BuildContext) -> None:
self._nodes.append((node, path))
def build(self, tree: AppAssetFileTree, ctx: BuildContext) -> list[AssetItem]:
if not self._nodes:
return []
# 1. Load and create documents
documents: list[SkillDocument] = []
for node, _ in self._nodes:
draft_key = AssetPaths.draft_file(ctx.tenant_id, ctx.app_id, node.id)
try:
data = json.loads(storage.load_once(draft_key))
content = data.get("content", "") if isinstance(data, dict) else ""
metadata = data.get("metadata", {}) if isinstance(data, dict) else {}
except Exception:
content = ""
metadata = {}
documents.append(
SkillDocument(
skill_id=node.id,
content=content,
metadata=metadata,
)
)
# 2. Compile all skills
compiler = SkillCompiler()
artifact_set = compiler.compile_all(documents, tree, ctx.build_id)
# 3. Save tool artifact
SkillManager.save_tool_artifact(
ctx.tenant_id,
ctx.app_id,
ctx.build_id,
artifact_set.get_tool_artifact(),
)
# 4. Save compiled content to storage and return FileAssets
results: list[AssetItem] = []
for node, path in self._nodes:
artifact = artifact_set.get(node.id)
if artifact is None:
continue
# Write compiled content to storage
resolved_key = AssetPaths.build_resolved_file(
ctx.tenant_id, ctx.app_id, ctx.build_id, node.id
)
storage.save(resolved_key, artifact.content.encode("utf-8"))
results.append(
FileAsset(
asset_id=node.id,
path=path,
file_name=node.name,
extension=node.extension or "",
storage_key=resolved_key,
)
)
return results

View File

@ -1,20 +1,8 @@
from .assets import AssetItem, FileAsset
from .skill import (
FileReference,
SkillAsset,
SkillMetadata,
ToolConfiguration,
ToolFieldConfig,
ToolReference,
)
from .skill import SkillAsset
__all__ = [
"AssetItem",
"FileAsset",
"FileReference",
"SkillAsset",
"SkillMetadata",
"ToolConfiguration",
"ToolFieldConfig",
"ToolReference",
]

View File

@ -4,7 +4,7 @@ from dataclasses import dataclass
@dataclass
class AssetItem(ABC):
node_id: str
asset_id: str
path: str
file_name: str
extension: str

View File

@ -1,59 +1,14 @@
from dataclasses import dataclass
from collections.abc import Mapping
from dataclasses import dataclass, field
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from core.tools.entities.tool_entities import ToolProviderType
from .assets import AssetItem
class ToolFieldConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
id: str
value: Any
auto: bool = False
class ToolConfiguration(BaseModel):
model_config = ConfigDict(extra="forbid")
fields: list[ToolFieldConfig] = Field(default_factory=list)
def default_values(self) -> dict[str, Any]:
return {field.id: field.value for field in self.fields if field.value is not None}
class ToolReference(BaseModel):
model_config = ConfigDict(extra="forbid")
uuid: str = Field(description="Unique identifier for this tool reference")
type: ToolProviderType = Field(description="Tool provider type")
provider: str = Field(description="Tool provider")
tool_name: str = Field(description="Tool name")
credential_id: str | None = Field(default=None, description="Credential ID")
configuration: ToolConfiguration | None = Field(default=None, description="Tool configuration")
class FileReference(BaseModel):
model_config = ConfigDict(extra="forbid")
source: str = Field(description="Source location or identifier of the file")
uuid: str = Field(description="Unique identifier for this file reference")
class SkillMetadata(BaseModel):
model_config = ConfigDict(extra="allow")
tools: dict[str, ToolReference] = Field(default_factory=dict, description="Map of tool references by UUID")
files: list[FileReference] = Field(default_factory=list, description="List of file references")
@dataclass
class SkillAsset(AssetItem):
storage_key: str
metadata: SkillMetadata
metadata: Mapping[str, Any] = field(default_factory=dict)
def get_storage_key(self) -> str:
return self.storage_key

View File

@ -7,7 +7,7 @@ class AssetItemParser(ABC):
@abstractmethod
def parse(
self,
node_id: str,
asset_id: str,
path: str,
file_name: str,
extension: str,
@ -19,14 +19,14 @@ class AssetItemParser(ABC):
class FileAssetParser(AssetItemParser):
def parse(
self,
node_id: str,
asset_id: str,
path: str,
file_name: str,
extension: str,
storage_key: str,
) -> FileAsset:
return FileAsset(
node_id=node_id,
asset_id=asset_id,
path=path,
file_name=file_name,
extension=extension,

View File

@ -1,161 +1,57 @@
import json
import logging
import re
from typing import Any
from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode
from core.app_assets.entities import (
SkillAsset,
SkillMetadata,
)
from core.app_assets.entities.skill import FileReference, ToolConfiguration, ToolReference
from core.app_assets.paths import AssetPaths
from core.tools.entities.tool_entities import ToolProviderType
from core.app_assets.entities import SkillAsset
from core.app_assets.entities.assets import AssetItem, FileAsset
from extensions.ext_storage import storage
from .base import AssetItemParser
TOOL_REFERENCE_PATTERN = re.compile(r"§\[tool\]\.\[([^\]]+)\]\.\[([^\]]+)\]\.\[([^\]]+)\")
FILE_REFERENCE_PATTERN = re.compile(r"§\[file\]\.\[([^\]]+)\]\.\[([^\]]+)\")
logger = logging.getLogger(__name__)
class SkillAssetParser(AssetItemParser):
def __init__(
self,
tenant_id: str,
app_id: str,
assets_id: str,
tree: AppAssetFileTree,
) -> None:
self._tenant_id = tenant_id
self._app_id = app_id
self._assets_id = assets_id
self._tree = tree
"""
Parser for skill assets.
Responsibilities:
- Read file from storage
- Parse JSON structure
- Return SkillAsset with raw metadata (no parsing/resolution)
Metadata parsing and content resolution are handled by SkillCompiler.
"""
def parse(
self,
node_id: str,
asset_id: str,
path: str,
file_name: str,
extension: str,
storage_key: str,
) -> SkillAsset:
try:
return self._parse_skill_asset(node_id, path, file_name, extension, storage_key)
except Exception:
logger.exception("Failed to parse skill asset %s", node_id)
# handle as plain text
return SkillAsset(
node_id=node_id,
path=path,
file_name=file_name,
extension=extension,
storage_key=storage_key,
metadata=SkillMetadata(),
)
def _parse_skill_asset(
self, node_id: str, path: str, file_name: str, extension: str, storage_key: str
) -> SkillAsset:
) -> AssetItem:
try:
data = json.loads(storage.load_once(storage_key))
except (json.JSONDecodeError, UnicodeDecodeError):
# handle as plain text
if not isinstance(data, dict):
raise ValueError(f"Skill document {asset_id} must be a JSON object")
metadata_raw: dict[str, Any] = data.get("metadata", {})
return SkillAsset(
node_id=node_id,
asset_id=asset_id,
path=path,
file_name=file_name,
extension=extension,
storage_key=storage_key,
metadata=SkillMetadata(),
metadata=metadata_raw,
)
if not isinstance(data, dict):
raise ValueError(f"Skill document {node_id} must be a JSON object")
data_dict: dict[str, Any] = data
metadata_raw = data_dict.get("metadata", {})
content = data_dict.get("content", "")
if not isinstance(content, str):
raise ValueError(f"Skill document {node_id} 'content' must be a string")
resolved_key = AssetPaths.build_resolved_file(self._tenant_id, self._app_id, self._assets_id, node_id)
current_file = self._tree.get(node_id)
if current_file is None:
raise ValueError(f"File not found for id={node_id}")
metadata = self._resolve_metadata(content, metadata_raw)
storage.save(resolved_key, self._resolve_content(current_file, content, metadata).encode("utf-8"))
return SkillAsset(
node_id=node_id,
path=path,
file_name=file_name,
extension=extension,
storage_key=resolved_key,
metadata=metadata,
)
def _resolve_content(self, current_file: AppAssetNode, content: str, metadata: SkillMetadata) -> str:
for match in FILE_REFERENCE_PATTERN.finditer(content):
# replace with file relative path
file_id = match.group(2)
file = self._tree.get(file_id)
if file is None:
logger.warning("File not found for id=%s, skipping", file_id)
# replace with file not found placeholder
content = content.replace(match.group(0), "[File not found]")
continue
content = content.replace(match.group(0), self._tree.relative_path(current_file, file))
for match in TOOL_REFERENCE_PATTERN.finditer(content):
tool_id = match.group(3)
tool = metadata.tools.get(tool_id)
if tool is None:
logger.warning("Tool not found for id=%s, skipping", tool_id)
# replace with tool not found placeholder
content = content.replace(match.group(0), f"[Tool not found: {tool_id}]")
continue
content = content.replace(match.group(0), f"[Bash Command: {tool.tool_name}_{tool_id}]")
return content
def _resolve_file_references(self, content: str) -> list[FileReference]:
file_references: list[FileReference] = []
for match in FILE_REFERENCE_PATTERN.finditer(content):
file_references.append(
FileReference(
source=match.group(1),
uuid=match.group(2),
)
except Exception:
logger.exception("Failed to parse skill asset %s", asset_id)
return FileAsset(
asset_id=asset_id,
path=path,
file_name=file_name,
extension=extension,
storage_key=storage_key,
)
return file_references
def _resolve_tool_references(self, content: str, tools: dict[str, Any]) -> dict[str, ToolReference]:
tool_references: dict[str, ToolReference] = {}
for match in TOOL_REFERENCE_PATTERN.finditer(content):
tool_id = match.group(3)
tool_name = match.group(2)
tool_provider = match.group(1)
metadata = tools.get(tool_id)
if metadata is None:
raise ValueError(f"Tool metadata for {tool_id} not found")
configuration = ToolConfiguration.model_validate(metadata.get("configuration", {}))
tool_references[tool_id] = ToolReference(
uuid=tool_id,
type=ToolProviderType.value_of(metadata.get("type", None)),
provider=tool_provider,
tool_name=tool_name,
credential_id=metadata.get("credential_id", None),
configuration=configuration,
)
return tool_references
def _resolve_metadata(self, content: str, metadata: dict[str, Any]) -> SkillMetadata:
return SkillMetadata(
files=self._resolve_file_references(content=content),
tools=self._resolve_tool_references(content=content, tools=metadata.get("tools", {})),
)

View File

@ -16,3 +16,7 @@ class AssetPaths:
@staticmethod
def build_tool_artifact(tenant_id: str, app_id: str, assets_id: str) -> str:
return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}/tool_artifact.json"
@staticmethod
def build_skill_artifact_set(tenant_id: str, app_id: str, assets_id: str) -> str:
return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}/skill_artifact_set.json"

View File

@ -6,10 +6,9 @@ from typing import TYPE_CHECKING, Any
from pydantic import BaseModel, Field
from core.app.entities.app_invoke_entities import InvokeFrom
from core.app_assets.entities import ToolReference
from core.model_runtime.utils.encoders import jsonable_encoder
from core.session.cli_api import CliApiSession
from core.skill.entities import ToolArtifact
from core.skill.entities import ToolArtifact, ToolReference
from core.tools.entities.tool_entities import ToolParameter, ToolProviderType
from core.tools.tool_manager import ToolManager
from core.virtual_environment.__base.entities import Arch, OperatingSystem

View File

@ -1,6 +1,4 @@
from core.app_assets.entities import ToolReference
from .entities import ToolArtifact, ToolDependency
from .entities import ToolArtifact, ToolDependency, ToolReference
from .skill_manager import SkillManager
__all__ = [

View File

@ -1,6 +1,27 @@
from .file_artifact import FilesArtifact
from .skill_artifact import SkillArtifact, SkillSourceInfo
from .skill_artifact_set import SkillArtifactSet
from .skill_document import SkillDocument
from .skill_metadata import (
FileReference,
SkillMetadata,
ToolConfiguration,
ToolFieldConfig,
ToolReference,
)
from .tool_artifact import ToolArtifact, ToolDependency
__all__ = [
"FileReference",
"FilesArtifact",
"SkillArtifact",
"SkillArtifactSet",
"SkillDocument",
"SkillMetadata",
"SkillSourceInfo",
"ToolArtifact",
"ToolConfiguration",
"ToolDependency",
"ToolFieldConfig",
"ToolReference",
]

View File

@ -0,0 +1,13 @@
from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.skill_metadata import FileReference
class FilesArtifact(BaseModel):
"""
File artifact - contains all file references (transitive closure)
"""
model_config = ConfigDict(extra="forbid")
references: list[FileReference] = Field(default_factory=list, description="All file references")

View File

@ -0,0 +1,30 @@
from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.file_artifact import FilesArtifact
from core.skill.entities.tool_artifact import ToolArtifact
class SkillSourceInfo(BaseModel):
"""Source file information for change detection."""
model_config = ConfigDict(extra="forbid")
asset_id: str = Field(description="Asset ID of the source skill file")
content_digest: str = Field(description="Hash of the original content for change detection")
class SkillArtifact(BaseModel):
"""
Compiled artifact for a single skill.
Contains the transitive closure of all tool and file dependencies,
plus the resolved content with all references replaced.
"""
model_config = ConfigDict(extra="forbid")
skill_id: str = Field(description="Unique identifier for this skill")
source: SkillSourceInfo = Field(description="Source file information")
tools: ToolArtifact = Field(description="All tool dependencies (transitive closure)")
files: FilesArtifact = Field(description="All file references (transitive closure)")
content: str = Field(description="Resolved content with all references replaced")

View File

@ -0,0 +1,105 @@
from collections.abc import Iterable
from datetime import datetime
from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.skill_artifact import SkillArtifact
from core.skill.entities.skill_metadata import ToolReference
from core.skill.entities.tool_artifact import ToolArtifact, ToolDependency
class SkillArtifactSet(BaseModel):
"""
Compiled index for an entire skill project.
- Corresponds to a single JSON file in S3
- Load once, query multiple times
- All persistence operations handled by SkillManager
"""
model_config = ConfigDict(extra="forbid")
assets_id: str = Field(description="Assets ID this artifact set belongs to")
schema_version: int = Field(default=1, description="Schema version for forward compatibility")
built_at: datetime | None = Field(default=None, description="Build timestamp")
items: dict[str, SkillArtifact] = Field(default_factory=dict, description="skill_id -> SkillArtifact")
dependency_graph: dict[str, list[str]] = Field(
default_factory=dict,
description="skill_id -> list of skill_ids it depends on",
)
reverse_graph: dict[str, list[str]] = Field(
default_factory=dict,
description="skill_id -> list of skill_ids that depend on it",
)
def get(self, skill_id: str) -> SkillArtifact | None:
return self.items.get(skill_id)
def upsert(self, artifact: SkillArtifact) -> None:
self.items[artifact.skill_id] = artifact
def remove(self, skill_id: str) -> None:
self.items.pop(skill_id, None)
self.dependency_graph.pop(skill_id, None)
self.reverse_graph.pop(skill_id, None)
for deps in self.reverse_graph.values():
if skill_id in deps:
deps.remove(skill_id)
for deps in self.dependency_graph.values():
if skill_id in deps:
deps.remove(skill_id)
def referenced_skill_ids(self, skill_id: str) -> set[str]:
return set(self.dependency_graph.get(skill_id, []))
def recompile_group_ids(self, skill_id: str) -> set[str]:
result: set[str] = {skill_id}
queue = [skill_id]
while queue:
current = queue.pop()
for dependent in self.reverse_graph.get(current, []):
if dependent not in result:
result.add(dependent)
queue.append(dependent)
return result
def subset(self, skill_ids: Iterable[str]) -> "SkillArtifactSet":
skill_id_set = set(skill_ids)
return SkillArtifactSet(
assets_id=self.assets_id,
schema_version=self.schema_version,
built_at=self.built_at,
items={sid: self.items[sid] for sid in skill_id_set if sid in self.items},
dependency_graph={
sid: [dep for dep in deps if dep in skill_id_set]
for sid, deps in self.dependency_graph.items()
if sid in skill_id_set
},
reverse_graph={
sid: [dep for dep in deps if dep in skill_id_set]
for sid, deps in self.reverse_graph.items()
if sid in skill_id_set
},
)
def get_tool_artifact(self) -> ToolArtifact:
dependencies: dict[str, ToolDependency] = {}
references: dict[str, ToolReference] = {}
for artifact in self.items.values():
for dep in artifact.tools.dependencies:
key = f"{dep.provider}.{dep.tool_name}"
if key not in dependencies:
dependencies[key] = dep
for ref in artifact.tools.references:
if ref.uuid not in references:
references[ref.uuid] = ref
return ToolArtifact(
dependencies=list(dependencies.values()),
references=list(references.values()),
)

View File

@ -0,0 +1,14 @@
from collections.abc import Mapping
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
class SkillDocument(BaseModel):
"""Input document for skill compilation."""
model_config = ConfigDict(extra="forbid")
skill_id: str = Field(description="Unique identifier, must match SkillAsset.asset_id")
content: str = Field(description="Raw content with reference placeholders")
metadata: Mapping[str, Any] = Field(default_factory=dict, description="Raw metadata dict")

View File

@ -0,0 +1,47 @@
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from core.tools.entities.tool_entities import ToolProviderType
class ToolFieldConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
id: str
value: Any
auto: bool = False
class ToolConfiguration(BaseModel):
model_config = ConfigDict(extra="forbid")
fields: list[ToolFieldConfig] = Field(default_factory=list)
def default_values(self) -> dict[str, Any]:
return {field.id: field.value for field in self.fields if field.value is not None}
class ToolReference(BaseModel):
model_config = ConfigDict(extra="forbid")
uuid: str
type: ToolProviderType
provider: str
tool_name: str
credential_id: str | None = None
configuration: ToolConfiguration | None = None
class FileReference(BaseModel):
model_config = ConfigDict(extra="forbid")
source: str
asset_id: str
class SkillMetadata(BaseModel):
model_config = ConfigDict(extra="allow")
tools: dict[str, ToolReference] = Field(default_factory=dict)
files: list[FileReference] = Field(default_factory=list)

View File

@ -1,6 +1,6 @@
from pydantic import BaseModel, ConfigDict, Field
from core.app_assets.entities import ToolReference
from core.skill.entities.skill_metadata import ToolReference
from core.tools.entities.tool_entities import ToolProviderType
@ -15,16 +15,8 @@ class ToolDependency(BaseModel):
class ToolArtifact(BaseModel):
model_config = ConfigDict(extra="forbid")
dependencies: list[ToolDependency] = Field(default_factory=list, description="List of tool dependencies")
references: list[ToolReference] = Field(default_factory=list, description="List of tool references")
"""
Filter the tool artifact to only include the given tools
:param tools: Tuple of (provider, tool_name)
:return: Filtered tool artifact
"""
dependencies: list[ToolDependency] = Field(default_factory=list)
references: list[ToolReference] = Field(default_factory=list)
def is_empty(self) -> bool:
return not self.dependencies and not self.references

View File

@ -0,0 +1,259 @@
import hashlib
import logging
import re
from collections.abc import Mapping
from datetime import UTC, datetime
from typing import Any
from core.app.entities.app_asset_entities import AppAssetFileTree
from core.skill.entities.file_artifact import FilesArtifact
from core.skill.entities.skill_artifact import SkillArtifact, SkillSourceInfo
from core.skill.entities.skill_artifact_set import SkillArtifactSet
from core.skill.entities.skill_document import SkillDocument
from core.skill.entities.skill_metadata import (
FileReference,
SkillMetadata,
ToolConfiguration,
ToolReference,
)
from core.skill.entities.tool_artifact import ToolArtifact, ToolDependency
from core.tools.entities.tool_entities import ToolProviderType
logger = logging.getLogger(__name__)
TOOL_REFERENCE_PATTERN = re.compile(r"§\[tool\]\.\[([^\]]+)\]\.\[([^\]]+)\]\.\[([^\]]+)\")
FILE_REFERENCE_PATTERN = re.compile(r"§\[file\]\.\[([^\]]+)\]\.\[([^\]]+)\")
class SkillCompiler:
"""
Stateless skill compiler.
Responsibilities:
- Parse raw metadata dict into SkillMetadata
- Parse direct dependencies from skill content
- Compute transitive closure based on existing artifact set
- Resolve content by replacing references
- Generate SkillArtifact
"""
def _parse_metadata(self, content: str, raw_metadata: Mapping[str, Any]) -> SkillMetadata:
tools_raw: dict[str, Any] = dict(raw_metadata.get("tools", {}))
tools: dict[str, ToolReference] = {}
files: list[FileReference] = []
for match in TOOL_REFERENCE_PATTERN.finditer(content):
tool_id = match.group(3)
tool_name = match.group(2)
tool_provider = match.group(1)
tool_meta = tools_raw.get(tool_id)
if tool_meta is None:
continue
config_raw = tool_meta.get("configuration", {})
configuration = ToolConfiguration.model_validate(config_raw) if config_raw else None
tools[tool_id] = ToolReference(
uuid=tool_id,
type=ToolProviderType.value_of(tool_meta.get("type")),
provider=tool_provider,
tool_name=tool_name,
credential_id=tool_meta.get("credential_id"),
configuration=configuration,
)
for match in FILE_REFERENCE_PATTERN.finditer(content):
files.append(
FileReference(
source=match.group(1),
asset_id=match.group(2),
)
)
return SkillMetadata(tools=tools, files=files)
def compile_all(
self,
documents: list[SkillDocument],
file_tree: AppAssetFileTree,
assets_id: str,
) -> SkillArtifactSet:
artifact_set = SkillArtifactSet(
assets_id=assets_id,
built_at=datetime.now(UTC),
)
doc_map: dict[str, SkillDocument] = {doc.skill_id: doc for doc in documents}
parsed_metadata: dict[str, SkillMetadata] = {}
for doc in documents:
metadata = self._parse_metadata(doc.content, doc.metadata)
parsed_metadata[doc.skill_id] = metadata
direct_skill_refs = self._extract_skill_refs(metadata, doc_map)
artifact_set.dependency_graph[doc.skill_id] = list(direct_skill_refs)
for ref_id in direct_skill_refs:
if ref_id not in artifact_set.reverse_graph:
artifact_set.reverse_graph[ref_id] = []
artifact_set.reverse_graph[ref_id].append(doc.skill_id)
for doc in documents:
metadata = parsed_metadata[doc.skill_id]
artifact = self._compile_single(doc, metadata, artifact_set, parsed_metadata, file_tree)
artifact_set.upsert(artifact)
return artifact_set
def compile_one(
self,
artifact_set: SkillArtifactSet,
document: SkillDocument,
file_tree: AppAssetFileTree,
all_documents: dict[str, SkillDocument] | None = None,
) -> SkillArtifact:
doc_map = all_documents or {}
if document.skill_id not in doc_map:
doc_map[document.skill_id] = document
parsed_metadata: dict[str, SkillMetadata] = {}
for skill_id, doc in doc_map.items():
parsed_metadata[skill_id] = self._parse_metadata(doc.content, doc.metadata)
metadata = parsed_metadata[document.skill_id]
direct_skill_refs = self._extract_skill_refs(metadata, doc_map)
artifact_set.dependency_graph[document.skill_id] = list(direct_skill_refs)
for ref_id in direct_skill_refs:
if ref_id not in artifact_set.reverse_graph:
artifact_set.reverse_graph[ref_id] = []
if document.skill_id not in artifact_set.reverse_graph[ref_id]:
artifact_set.reverse_graph[ref_id].append(document.skill_id)
return self._compile_single(document, metadata, artifact_set, parsed_metadata, file_tree)
def _compile_single(
self,
document: SkillDocument,
metadata: SkillMetadata,
artifact_set: SkillArtifactSet,
parsed_metadata: dict[str, SkillMetadata],
file_tree: AppAssetFileTree,
) -> SkillArtifact:
all_tools, all_files = self._compute_transitive_closure(
document.skill_id, artifact_set, parsed_metadata
)
current_node = file_tree.get(document.skill_id)
resolved_content = self._resolve_content(
document.content, metadata, current_node, file_tree
)
content_digest = hashlib.sha256(document.content.encode("utf-8")).hexdigest()
return SkillArtifact(
skill_id=document.skill_id,
source=SkillSourceInfo(
asset_id=document.skill_id,
content_digest=content_digest,
),
tools=ToolArtifact(
dependencies=list(all_tools.values()),
references=list(metadata.tools.values()),
),
files=FilesArtifact(
references=list(all_files.values()),
),
content=resolved_content,
)
def _extract_skill_refs(
self,
metadata: SkillMetadata,
doc_map: dict[str, SkillDocument],
) -> set[str]:
skill_refs: set[str] = set()
for file_ref in metadata.files:
if file_ref.asset_id in doc_map:
skill_refs.add(file_ref.asset_id)
return skill_refs
def _compute_transitive_closure(
self,
skill_id: str,
artifact_set: SkillArtifactSet,
parsed_metadata: dict[str, SkillMetadata],
) -> tuple[dict[str, ToolDependency], dict[str, FileReference]]:
all_tools: dict[str, ToolDependency] = {}
all_files: dict[str, FileReference] = {}
visited: set[str] = set()
queue = [skill_id]
while queue:
current_id = queue.pop(0)
if current_id in visited:
continue
visited.add(current_id)
metadata = parsed_metadata.get(current_id)
if metadata is None:
existing_artifact = artifact_set.get(current_id)
if existing_artifact:
for dep in existing_artifact.tools.dependencies:
key = f"{dep.provider}.{dep.tool_name}"
if key not in all_tools:
all_tools[key] = dep
for file_ref in existing_artifact.files.references:
if file_ref.asset_id not in all_files:
all_files[file_ref.asset_id] = file_ref
continue
for tool_ref in metadata.tools.values():
key = f"{tool_ref.provider}.{tool_ref.tool_name}"
if key not in all_tools:
all_tools[key] = ToolDependency(
type=tool_ref.type,
provider=tool_ref.provider,
tool_name=tool_ref.tool_name,
)
for file_ref in metadata.files:
if file_ref.asset_id not in all_files:
all_files[file_ref.asset_id] = file_ref
for dep_id in artifact_set.dependency_graph.get(current_id, []):
if dep_id not in visited:
queue.append(dep_id)
return all_tools, all_files
def _resolve_content(
self,
content: str,
metadata: SkillMetadata,
current_node: Any,
file_tree: AppAssetFileTree,
) -> str:
if not content:
return content
for match in FILE_REFERENCE_PATTERN.finditer(content):
file_id = match.group(2)
file_node = file_tree.get(file_id)
if file_node is None:
logger.warning("File not found for id=%s, skipping", file_id)
content = content.replace(match.group(0), "[File not found]")
continue
if current_node is not None:
content = content.replace(match.group(0), file_tree.relative_path(current_node, file_node))
else:
content = content.replace(match.group(0), f"[{file_node.name}]")
for match in TOOL_REFERENCE_PATTERN.finditer(content):
tool_id = match.group(3)
tool = metadata.tools.get(tool_id)
if tool is None:
logger.warning("Tool not found for id=%s, skipping", tool_id)
content = content.replace(match.group(0), f"[Tool not found: {tool_id}]")
continue
content = content.replace(match.group(0), f"[Bash Command: {tool.tool_name}_{tool_id}]")
return content

View File

@ -1,7 +1,9 @@
from core.app.entities.app_asset_entities import AppAssetFileTree
from core.app_assets.entities import SkillAsset
from core.app_assets.entities.skill import ToolReference
from core.app_assets.paths import AssetPaths
from core.skill.entities.tool_artifact import ToolDependency
from core.skill.entities.skill_artifact_set import SkillArtifactSet
from core.skill.entities.skill_document import SkillDocument
from core.skill.skill_compiler import SkillCompiler
from extensions.ext_storage import storage
from .entities import ToolArtifact
@ -9,29 +11,14 @@ from .entities import ToolArtifact
class SkillManager:
@staticmethod
def generate_tool_artifact(assets: list[SkillAsset]) -> ToolArtifact:
# provider + tool_name -> ToolDependency
dependencies: dict[str, ToolDependency] = {}
references: list[ToolReference] = []
def _load_content(storage_key: str) -> str:
import json
for asset in assets:
for id, tool in asset.metadata.tools.items():
dependencies[f"{tool.provider}.{tool.tool_name}"] = ToolDependency(
type=tool.type,
provider=tool.provider,
tool_name=tool.tool_name,
)
references.append(
ToolReference(
uuid=id,
type=tool.type,
provider=tool.provider,
tool_name=tool.tool_name,
)
)
return ToolArtifact(dependencies=list(dependencies.values()), references=references)
try:
data = json.loads(storage.load_once(storage_key))
return data.get("content", "") if isinstance(data, dict) else ""
except Exception:
return ""
@staticmethod
def save_tool_artifact(
@ -55,3 +42,49 @@ class SkillManager:
return ToolArtifact.model_validate_json(data)
except Exception:
return None
@staticmethod
def compile_all(
documents: list[SkillDocument],
file_tree: AppAssetFileTree,
assets_id: str,
) -> SkillArtifactSet:
compiler = SkillCompiler()
return compiler.compile_all(documents, file_tree, assets_id)
@staticmethod
def assets_to_documents(assets: list[SkillAsset]) -> list[SkillDocument]:
documents: list[SkillDocument] = []
for asset in assets:
content = SkillManager._load_content(asset.storage_key)
documents.append(
SkillDocument(
skill_id=asset.asset_id,
content=content,
metadata=asset.metadata,
)
)
return documents
@staticmethod
def load_artifact(
tenant_id: str,
app_id: str,
assets_id: str,
) -> SkillArtifactSet | None:
key = AssetPaths.build_skill_artifact_set(tenant_id, app_id, assets_id)
try:
data = storage.load_once(key)
return SkillArtifactSet.model_validate_json(data)
except Exception:
return None
@staticmethod
def save_artifact(
tenant_id: str,
app_id: str,
assets_id: str,
artifact_set: SkillArtifactSet,
) -> None:
key = AssetPaths.build_skill_artifact_set(tenant_id, app_id, assets_id)
storage.save(key, artifact_set.model_dump_json(indent=2).encode("utf-8"))