refactor: redesign skill compilation and document assembly process

This commit is contained in:
Harry
2026-03-06 15:02:44 +08:00
parent 52dd4b82e6
commit 53f76a20c2
16 changed files with 569 additions and 1423 deletions

View File

@ -1,44 +1,25 @@
import json
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from typing import Any, cast
import logging
from core.app.entities.app_asset_entities import AppAssetFileTree, AppAssetNode
from core.app_assets.entities import AssetItem
from core.app_assets.storage import AssetPaths
from core.skill.assembler import SkillBundleAssembler
from core.skill.entities.skill_bundle import SkillBundle
from core.skill.entities.skill_document import SkillDocument
from core.skill.skill_compiler import SkillCompiler
from extensions.storage.cached_presign_storage import CachedPresignStorage
from .base import BuildContext
@dataclass
class _LoadedSkill:
node: AppAssetNode
path: str
content: str
metadata: dict[str, Any]
logger = logging.getLogger(__name__)
@dataclass
class _CompiledSkill:
node: AppAssetNode
path: str
storage_key: str
content_bytes: bytes
# FIXME(Mairuis): move the logic into sandbox
class SkillBuilder:
_nodes: list[tuple[AppAssetNode, str]]
_max_workers: int
_storage: CachedPresignStorage
def __init__(self, storage: CachedPresignStorage, max_workers: int = 8) -> None:
def __init__(self, storage: CachedPresignStorage) -> None:
self._nodes = []
self._max_workers = max_workers
self._storage = storage
def accept(self, node: AppAssetNode) -> bool:
@ -51,77 +32,45 @@ class SkillBuilder:
from core.skill.skill_manager import SkillManager
if not self._nodes:
bundle = SkillBundle(assets_id=ctx.build_id)
SkillManager.save_bundle(ctx.tenant_id, ctx.app_id, ctx.build_id, bundle)
SkillManager.save_bundle(
ctx.tenant_id, ctx.app_id, ctx.build_id, SkillBundle(assets_id=ctx.build_id, asset_tree=tree)
)
return []
# 1. Load all skills (parallel IO)
loaded = self._load_all(ctx)
# 2. Compile all skills (CPU-bound, single thread)
documents = [SkillDocument(skill_id=s.node.id, content=s.content, metadata=s.metadata) for s in loaded]
artifact_set = SkillCompiler().compile_bundle(documents, tree, ctx.build_id)
SkillManager.save_bundle(ctx.tenant_id, ctx.app_id, ctx.build_id, artifact_set)
# 4. Prepare compiled skills for upload
to_upload: list[_CompiledSkill] = []
for skill in loaded:
artifact = artifact_set.get(skill.node.id)
if artifact is None:
continue
to_upload.append(
_CompiledSkill(
node=skill.node,
path=skill.path,
storage_key=AssetPaths.resolved(ctx.tenant_id, ctx.app_id, ctx.build_id, skill.node.id),
content_bytes=artifact.content.encode("utf-8"),
)
)
# 5. Upload all compiled skills (parallel IO)
self._upload_all(to_upload)
# 6. Return AssetItems
return [
AssetItem(
asset_id=s.node.id,
path=s.path,
file_name=s.node.name,
extension=s.node.extension or "",
storage_key=s.storage_key,
)
for s in to_upload
]
def _load_all(self, ctx: BuildContext) -> list[_LoadedSkill]:
def load_one(node: AppAssetNode, path: str) -> _LoadedSkill:
# load documents skip nodes whose draft content is still the empty
# placeholder written at creation time (the front-end has not uploaded
# the actual skill document yet).
documents: dict[str, SkillDocument] = {}
for node, _ in self._nodes:
try:
key = AssetPaths.draft(ctx.tenant_id, ctx.app_id, node.id)
data = json.loads(self._storage.load_once(key))
content = ""
metadata: dict[str, Any] = {}
if isinstance(data, dict):
data_dict = cast(dict[str, Any], data)
content_value = data_dict.get("content", "")
content = content_value if isinstance(content_value, str) else str(content_value)
metadata_value = data_dict.get("metadata", {})
if isinstance(metadata_value, dict):
metadata = cast(dict[str, Any], metadata_value)
except (FileNotFoundError, json.JSONDecodeError, TypeError, ValueError):
content = ""
metadata = {}
return _LoadedSkill(node=node, path=path, content=content, metadata=metadata)
raw = self._storage.load_once(key)
# skip empty content
if not raw:
continue
data = {"skill_id": node.id, **json.loads(raw)}
documents[node.id] = SkillDocument.model_validate(data)
except (FileNotFoundError, json.JSONDecodeError, TypeError, ValueError) as e:
logger.exception("Failed to load or parse skill document for node %s", node.id)
raise ValueError(f"Failed to load or parse skill document for node {node.id}") from e
with ThreadPoolExecutor(max_workers=self._max_workers) as executor:
futures = [executor.submit(load_one, node, path) for node, path in self._nodes]
return [f.result() for f in futures]
bundle = SkillBundleAssembler(tree).assemble_bundle(documents, ctx.build_id)
SkillManager.save_bundle(ctx.tenant_id, ctx.app_id, ctx.build_id, bundle)
def _upload_all(self, skills: list[_CompiledSkill]) -> None:
def upload_one(skill: _CompiledSkill) -> None:
self._storage.save(skill.storage_key, skill.content_bytes)
with ThreadPoolExecutor(max_workers=self._max_workers) as executor:
futures = [executor.submit(upload_one, skill) for skill in skills]
for f in futures:
f.result()
items: list[AssetItem] = []
for node, path in self._nodes:
skill = bundle.get(node.id)
if skill is None:
continue
storage_key = AssetPaths.resolved(ctx.tenant_id, ctx.app_id, ctx.build_id, node.id)
self._storage.save(storage_key, skill.content.encode("utf-8"))
items.append(
AssetItem(
asset_id=node.id,
path=path,
file_name=node.name,
extension=node.extension or "",
storage_key=storage_key,
)
)
return items

View File

@ -0,0 +1,6 @@
from core.skill.assembler.assemblers import SkillBundleAssembler, SkillDocumentAssembler
__all__ = [
"SkillBundleAssembler",
"SkillDocumentAssembler",
]

View File

@ -0,0 +1,80 @@
from collections.abc import Mapping
from core.app.entities.app_asset_entities import AppAssetFileTree
from core.skill.assembler.common import (
build_skill_graph,
compute_transitive_dependance,
expand_referenced_skill_ids,
get_metadata,
process_skill_content,
)
from core.skill.entities.skill_bundle import Skill, SkillBundle, SkillDependance
from core.skill.entities.skill_document import SkillDocument
class SkillBundleAssembler:
_file_tree: AppAssetFileTree
def __init__(self, file_tree: AppAssetFileTree) -> None:
self._file_tree = file_tree
def assemble_bundle(
self,
documents: Mapping[str, SkillDocument],
assets_id: str,
) -> SkillBundle:
direct_skills: dict[str, Skill] = {}
for skill_id, doc in documents.items():
metadata = get_metadata(doc.content, doc.metadata)
direct_dependance = SkillDependance.from_metadata(metadata)
direct_skills[skill_id] = Skill(
skill_id=skill_id,
direct_dependance=direct_dependance,
dependance=direct_dependance,
content=process_skill_content(doc.content, metadata, self._file_tree, skill_id),
)
graph = build_skill_graph(direct_skills, self._file_tree)
transitive_map = compute_transitive_dependance(direct_skills, graph)
compiled_skills: dict[str, Skill] = {}
for skill_id, skill in direct_skills.items():
compiled_skills[skill_id] = skill.model_copy(update={"dependance": transitive_map[skill_id]})
return SkillBundle(asset_tree=self._file_tree, assets_id=assets_id, skills=compiled_skills)
class SkillDocumentAssembler:
_bundle: SkillBundle
def __init__(self, bundle: SkillBundle) -> None:
self._bundle = bundle
def assemble_document(self, document: SkillDocument, base_path: str = "") -> Skill:
metadata = get_metadata(document.content, document.metadata)
direct_dependance = SkillDependance.from_metadata(metadata)
resolved_content = process_skill_content(
document.content,
metadata,
self._bundle.asset_tree,
document.skill_id,
base_path,
)
transitive_dependance = direct_dependance
known_skill_ids = set(self._bundle.skills.keys())
referenced_skill_ids = expand_referenced_skill_ids(
direct_dependance.files, known_skill_ids, self._bundle.asset_tree
)
for skill_id in sorted(referenced_skill_ids):
referenced_skill = self._bundle.get(skill_id)
if referenced_skill is None:
continue
transitive_dependance = transitive_dependance | referenced_skill.dependance
return Skill(
skill_id=document.skill_id,
direct_dependance=direct_dependance,
dependance=transitive_dependance,
content=resolved_content,
)

View File

@ -0,0 +1,136 @@
from collections import deque
from collections.abc import Mapping
from core.app.entities.app_asset_entities import AppAssetFileTree, AssetNodeType
from core.skill.assembler.replacers import (
FILE_PATTERN,
TOOL_METADATA_PATTERN,
FileReplacer,
Replacer,
ToolGroupReplacer,
ToolReplacer,
)
from core.skill.entities.skill_bundle import Skill, SkillDependance
from core.skill.entities.skill_metadata import FileReference, SkillMetadata, ToolReference
def process_skill_content(
content: str,
metadata: SkillMetadata,
file_tree: AppAssetFileTree,
current_id: str,
base_path: str = "",
) -> str:
"""Resolve all placeholders in content through the ordered replacer pipeline."""
replacers: list[Replacer] = [
FileReplacer(file_tree, current_id, base_path),
ToolGroupReplacer(metadata),
ToolReplacer(metadata),
]
for replacer in replacers:
content = replacer.resolve(content)
return content
def get_metadata(content: str, metadata: SkillMetadata) -> SkillMetadata:
"""Parse effective metadata from content placeholders and raw metadata."""
tools: dict[str, ToolReference] = {}
# find all tool refs actually used in content
for match in TOOL_METADATA_PATTERN.finditer(content):
provider, name, uuid = match.group(1), match.group(2), match.group(3)
tool_ref = metadata.tools.get(uuid)
if tool_ref is None:
raise ValueError(f"Tool reference with UUID {uuid} not found in metadata")
tool_ref.uuid = uuid
tool_ref.tool_name = name
tool_ref.provider = provider
tools[uuid] = tool_ref
# find all file refs
files: set[FileReference] = set()
for match in FILE_PATTERN.finditer(content):
source, asset_id = match.group(1), match.group(2)
files.add(FileReference(source=source, asset_id=asset_id))
return SkillMetadata(tools=tools, files=files)
def build_skill_graph(skills: Mapping[str, Skill], file_tree: AppAssetFileTree) -> dict[str, set[str]]:
"""Build adjacency list: skill_id -> referenced skill IDs."""
known_skill_ids = set(skills.keys())
graph: dict[str, set[str]] = {skill_id: set() for skill_id in known_skill_ids}
for skill_id, skill in skills.items():
graph[skill_id] = expand_referenced_skill_ids(skill.direct_dependance.files, known_skill_ids, file_tree)
return graph
def compute_transitive_dependance(
skills: Mapping[str, Skill],
graph: Mapping[str, set[str]],
) -> dict[str, SkillDependance]:
"""Compute transitive dependency closure with fixed-point iteration."""
dependance_map = {skill_id: skill.direct_dependance for skill_id, skill in skills.items()}
changed = True
while changed:
changed = False
for skill_id in sorted(skills.keys()):
merged = dependance_map[skill_id]
for dep_skill_id in sorted(graph.get(skill_id, set())):
if dep_skill_id == skill_id:
continue
merged = merged | dependance_map[dep_skill_id]
if merged != dependance_map[skill_id]:
dependance_map[skill_id] = merged
changed = True
return dependance_map
def expand_referenced_skill_ids(
refs: set[FileReference],
known_skill_ids: set[str],
file_tree: AppAssetFileTree,
) -> set[str]:
"""Resolve file/folder references to concrete known skill IDs."""
resolved: set[str] = set()
for ref in refs:
node = file_tree.get(ref.asset_id)
if node is None:
continue
if node.node_type == AssetNodeType.FILE:
if node.id in known_skill_ids:
resolved.add(node.id)
continue
descendant_ids = file_tree.get_descendant_ids(node.id)
for descendant_id in descendant_ids:
descendant = file_tree.get(descendant_id)
if descendant is None or descendant.node_type != AssetNodeType.FILE:
continue
if descendant_id in known_skill_ids:
resolved.add(descendant_id)
return resolved
def collect_transitive_skill_ids(
root_skill_ids: set[str],
graph: Mapping[str, set[str]],
) -> set[str]:
"""Collect all transitively reachable skill IDs from roots via BFS."""
visited: set[str] = set()
queue = deque(sorted(root_skill_ids))
while queue:
current = queue.popleft()
if current in visited:
continue
visited.add(current)
for next_skill_id in sorted(graph.get(current, set())):
if next_skill_id not in visited:
queue.append(next_skill_id)
return visited

View File

@ -0,0 +1,108 @@
"""Placeholder replacers for skill content.
Each replacer handles one category of ``§[...]§`` placeholder via the unified
``Replacer`` protocol. The shared ``resolve_content`` pipeline in
``core.skill.assembler.common`` builds a ``list[Replacer]`` and applies them
in order:
``FileReplacer`` → ``ToolGroupReplacer`` → ``ToolReplacer``
``ToolGroupReplacer`` MUST run before ``ToolReplacer`` so that group brackets
``[§[tool]...§, §[tool]...§]`` are resolved atomically; otherwise individual
tool replacement would destroy the group structure.
"""
import re
from typing import Protocol
from core.app.entities.app_asset_entities import AppAssetFileTree
from core.skill.entities.skill_metadata import SkillMetadata
TOOL_METADATA_PATTERN: re.Pattern[str] = re.compile(r"§\[tool\]\.\[([^\]]+)\]\.\[([^\]]+)\]\.\[([^\]]+)\")
TOOL_PATTERN: re.Pattern[str] = re.compile(r"§\[tool\]\.\[.*?\]\.\[.*?\]\.\[(.*?)\")
TOOL_GROUP_PATTERN: re.Pattern[str] = re.compile(
r"\[\s*§\[tool\]\.\[[^\]]+\]\.\[[^\]]+\]\.\[[^\]]+\"
r"(?:\s*,\s*§\[tool\]\.\[[^\]]+\]\.\[[^\]]+\]\.\[[^\]]+\]§)*\s*\]"
)
FILE_PATTERN: re.Pattern[str] = re.compile(r"§\[file\]\.\[([^\]]+)\]\.\[([^\]]+)\")
class Replacer(Protocol):
def resolve(self, content: str) -> str: ...
class FileReplacer:
_tree: AppAssetFileTree
_current_id: str
_base_path: str
def __init__(self, tree: AppAssetFileTree, current_id: str, base_path: str = "") -> None:
self._tree = tree
self._current_id = current_id
self._base_path = base_path.rstrip("/")
def resolve(self, content: str) -> str:
return FILE_PATTERN.sub(self._replace_match, content)
def _replace_match(self, match: re.Match[str]) -> str:
target_id = match.group(2)
source_node = self._tree.get(self._current_id)
target_node = self._tree.get(target_id)
if target_node is None:
return "[File not found]"
if source_node is not None:
return self._tree.relative_path(source_node, target_node)
full_path = self._tree.get_path(target_node.id)
if self._base_path:
return f"{self._base_path}/{full_path}"
return full_path
class ToolReplacer:
_metadata: SkillMetadata
def __init__(self, metadata: SkillMetadata) -> None:
self._metadata = metadata
def resolve(self, content: str) -> str:
return TOOL_PATTERN.sub(self._replace_match, content)
def _replace_match(self, match: re.Match[str]) -> str:
tool_id = match.group(1)
tool_ref = self._metadata.tools.get(tool_id)
if tool_ref is None:
return f"[Tool not found or disabled: {tool_id}]"
if not tool_ref.enabled:
return ""
return f"[Executable: {tool_ref.tool_name}_{tool_ref.uuid} --help command]"
class ToolGroupReplacer:
_metadata: SkillMetadata
def __init__(self, metadata: SkillMetadata) -> None:
self._metadata = metadata
def resolve(self, content: str) -> str:
return TOOL_GROUP_PATTERN.sub(self._replace_match, content)
def _replace_match(self, match: re.Match[str]) -> str:
group_text = match.group(0)
enabled_renders: list[str] = []
for tool_match in TOOL_PATTERN.finditer(group_text):
tool_id = tool_match.group(1)
tool_ref = self._metadata.tools.get(tool_id)
if tool_ref is None:
enabled_renders.append(f"[Tool not found or disabled: {tool_id}]")
continue
if not tool_ref.enabled:
continue
enabled_renders.append(f"[Executable: {tool_ref.tool_name}_{tool_ref.uuid} --help command]")
if not enabled_renders:
return ""
return "[" + ", ".join(enabled_renders) + "]"

View File

@ -1,6 +1,4 @@
from .asset_references import AssetReferences
from .skill_bundle import SkillBundle
from .skill_bundle_entry import SkillBundleEntry, SourceInfo
from .skill_bundle import Skill, SkillBundle, SkillDependance
from .skill_document import SkillDocument
from .skill_metadata import (
FileReference,
@ -13,13 +11,12 @@ from .tool_access_policy import ToolAccessPolicy, ToolDescription, ToolInvocatio
from .tool_dependencies import ToolDependencies, ToolDependency
__all__ = [
"AssetReferences",
"FileReference",
"Skill",
"SkillBundle",
"SkillBundleEntry",
"SkillDependance",
"SkillDocument",
"SkillMetadata",
"SourceInfo",
"ToolAccessPolicy",
"ToolConfiguration",
"ToolDependencies",

View File

@ -1,9 +0,0 @@
from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.skill_metadata import FileReference
class AssetReferences(BaseModel):
model_config = ConfigDict(extra="forbid")
references: list[FileReference] = Field(default_factory=list)

View File

@ -1,196 +1,94 @@
from collections.abc import Iterable
from datetime import datetime
from typing import TYPE_CHECKING
from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.asset_references import AssetReferences
from core.skill.entities.skill_bundle_entry import SkillBundleEntry
from core.skill.entities.skill_metadata import ToolReference
from core.app.entities.app_asset_entities import AppAssetFileTree
from core.skill.entities.skill_metadata import FileReference
from core.skill.entities.tool_dependencies import ToolDependencies, ToolDependency
from core.skill.graph_utils import collect_reachable, invert_dependency_map
if TYPE_CHECKING:
from core.skill.entities.skill_metadata import SkillMetadata
class SkillDependance(BaseModel):
model_config = ConfigDict(extra="forbid")
tools: ToolDependencies = Field(description="Direct tool dependencies parsed from this skill only")
files: set[FileReference] = Field(
default_factory=set,
description="Direct file references parsed from this skill only",
)
def __or__(self, other: "SkillDependance") -> "SkillDependance":
return SkillDependance(tools=self.tools.merge(other.tools), files=self.files | other.files)
@staticmethod
def from_metadata(metadata: "SkillMetadata") -> "SkillDependance":
"""Convert parsed metadata into direct tool/file dependency model."""
from core.skill.entities.skill_metadata import ToolReference
dep_map: dict[str, ToolDependency] = {}
ref_map: dict[str, ToolReference] = {}
for tool_ref in metadata.tools.values():
dep_map.setdefault(
tool_ref.tool_id(),
ToolDependency(
type=tool_ref.type,
provider=tool_ref.provider,
tool_name=tool_ref.tool_name,
enabled=tool_ref.enabled,
),
)
ref_map.setdefault(tool_ref.uuid, tool_ref)
return SkillDependance(
tools=ToolDependencies(
dependencies=[dep_map[key] for key in sorted(dep_map.keys())],
references=[ref_map[key] for key in sorted(ref_map.keys())],
),
files=metadata.files,
)
class Skill(BaseModel):
model_config = ConfigDict(extra="forbid")
skill_id: str = Field(description="Unique identifier for this skill, same with skill_id")
direct_dependance: SkillDependance = Field(description="Direct dependencies parsed from this skill only")
dependance: SkillDependance = Field(description="All dependencies including transitive closure")
content: str = Field(description="Resolved content with all references replaced")
@property
def tools(self) -> ToolDependencies:
return self.dependance.tools
class SkillBundle(BaseModel):
"""Persisted skill compilation snapshot with graph metadata and merge support."""
model_config = ConfigDict(extra="forbid")
asset_tree: AppAssetFileTree = Field(description="Asset tree for this bundle")
assets_id: str = Field(description="Assets ID this bundle belongs to")
schema_version: int = Field(default=2, description="Schema version for forward compatibility")
built_at: datetime | None = Field(default=None, description="Build timestamp")
entries: dict[str, SkillBundleEntry] = Field(default_factory=dict, description="skill_id -> SkillBundleEntry")
skills: dict[str, Skill] = Field(default_factory=dict)
depends_on_map: dict[str, list[str]] = Field(
default_factory=dict,
description="skill_id -> list of skill_ids it depends on",
)
@property
def entries(self) -> dict[str, Skill]:
return self.skills
reference_map: dict[str, list[str]] = Field(
default_factory=dict,
description="skill_id -> list of skill_ids that depend on it",
)
def get(self, skill_id: str) -> SkillBundleEntry | None:
return self.entries.get(skill_id)
def upsert(self, entry: SkillBundleEntry) -> None:
self.entries[entry.skill_id] = entry
def remove(self, skill_id: str) -> None:
self.entries.pop(skill_id, None)
self.depends_on_map.pop(skill_id, None)
self.reference_map.pop(skill_id, None)
for deps in self.reference_map.values():
if skill_id in deps:
deps.remove(skill_id)
for deps in self.depends_on_map.values():
if skill_id in deps:
deps.remove(skill_id)
def referenced_skill_ids(self, skill_id: str) -> set[str]:
return set(self.depends_on_map.get(skill_id, []))
def recompile_group_ids(self, skill_id: str) -> set[str]:
return collect_reachable([skill_id], self.reference_map)
def merge(self, patch: "SkillBundle") -> "SkillBundle":
"""Return a new bundle with patch entries merged and affected closure recomputed."""
if self.assets_id != patch.assets_id:
raise ValueError("bundle assets_id mismatch")
changed_skill_ids = set(patch.entries.keys())
if not changed_skill_ids:
return self.model_copy(deep=True)
merged_entries = dict(self.entries)
merged_entries.update(patch.entries)
merged_depends_on_map: dict[str, list[str]] = {
skill_id: [dep for dep in deps if dep in merged_entries]
for skill_id, deps in self.depends_on_map.items()
if skill_id in merged_entries
}
for skill_id in changed_skill_ids:
deps = patch.depends_on_map.get(skill_id)
if deps is None:
entry = patch.entries[skill_id]
deps = [f.asset_id for f in entry.direct_files.references]
merged_depends_on_map[skill_id] = [dep for dep in _dedupe(deps) if dep in merged_entries]
for skill_id in merged_entries:
merged_depends_on_map.setdefault(skill_id, [])
reference_map = {
skill_id: sorted(referrers)
for skill_id, referrers in invert_dependency_map(merged_depends_on_map, merged_entries.keys()).items()
}
affected_skill_ids = collect_reachable(changed_skill_ids, reference_map)
recomputed_entries = _recompute_affected_entries(merged_entries, merged_depends_on_map, affected_skill_ids)
merged_entries.update(recomputed_entries)
return SkillBundle(
assets_id=self.assets_id,
schema_version=max(self.schema_version, patch.schema_version),
built_at=patch.built_at or self.built_at,
entries=merged_entries,
depends_on_map=dict(merged_depends_on_map),
reference_map=reference_map,
)
def subset(self, skill_ids: Iterable[str]) -> "SkillBundle":
skill_id_set = set(skill_ids)
return SkillBundle(
assets_id=self.assets_id,
schema_version=self.schema_version,
built_at=self.built_at,
entries={sid: self.entries[sid] for sid in skill_id_set if sid in self.entries},
depends_on_map={
sid: [dep for dep in deps if dep in skill_id_set]
for sid, deps in self.depends_on_map.items()
if sid in skill_id_set
},
reference_map={
sid: [dep for dep in deps if dep in skill_id_set]
for sid, deps in self.reference_map.items()
if sid in skill_id_set
},
)
def get(self, skill_id: str) -> Skill | None:
return self.skills.get(skill_id)
def get_tool_dependencies(self) -> ToolDependencies:
dependencies: dict[str, ToolDependency] = {}
references: dict[str, ToolReference] = {}
merged = ToolDependencies()
for skill in self.skills.values():
merged = merged.merge(skill.dependance.tools)
return merged
for entry in self.entries.values():
for dep in entry.tools.dependencies:
key = f"{dep.provider}.{dep.tool_name}"
if key not in dependencies:
dependencies[key] = dep
for ref in entry.tools.references:
if ref.uuid not in references:
references[ref.uuid] = ref
return ToolDependencies(
dependencies=list(dependencies.values()),
references=list(references.values()),
)
def _dedupe(values: Iterable[str]) -> list[str]:
return list(dict.fromkeys(values))
def _recompute_affected_entries(
entries: dict[str, SkillBundleEntry],
depends_on_map: dict[str, list[str]],
affected_skill_ids: set[str],
) -> dict[str, SkillBundleEntry]:
recomputed_entries = {skill_id: entries[skill_id] for skill_id in affected_skill_ids if skill_id in entries}
changed = True
while changed:
changed = False
for skill_id in affected_skill_ids:
current_entry = recomputed_entries.get(skill_id)
if current_entry is None:
continue
merged_tool_deps: dict[str, ToolDependency] = {
dep.tool_id(): dep for dep in current_entry.direct_tools.dependencies
}
merged_tool_refs: dict[str, ToolReference] = {
ref.uuid: ref for ref in current_entry.direct_tools.references
}
merged_files = {f.asset_id: f for f in current_entry.direct_files.references}
for dep_id in depends_on_map.get(skill_id, []):
dep_entry = recomputed_entries.get(dep_id) or entries.get(dep_id)
if dep_entry is None:
continue
for dep in dep_entry.tools.dependencies:
merged_tool_deps.setdefault(dep.tool_id(), dep)
for ref in dep_entry.tools.references:
merged_tool_refs.setdefault(ref.uuid, ref)
for file_ref in dep_entry.files.references:
merged_files.setdefault(file_ref.asset_id, file_ref)
merged_tools = ToolDependencies(
dependencies=[merged_tool_deps[key] for key in sorted(merged_tool_deps.keys())],
references=[merged_tool_refs[key] for key in sorted(merged_tool_refs.keys())],
)
merged_asset_refs = AssetReferences(references=[merged_files[key] for key in sorted(merged_files.keys())])
if merged_tools != current_entry.tools or merged_asset_refs != current_entry.files:
recomputed_entries[skill_id] = current_entry.model_copy(
update={
"tools": merged_tools,
"files": merged_asset_refs,
}
)
changed = True
return recomputed_entries
def put(self, skill: Skill) -> None:
self.skills[skill.skill_id] = skill

View File

@ -1,23 +0,0 @@
from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.asset_references import AssetReferences
from core.skill.entities.tool_dependencies import ToolDependencies
class SourceInfo(BaseModel):
model_config = ConfigDict(extra="forbid")
asset_id: str = Field(description="Asset ID of the source skill file")
content_digest: str = Field(description="Hash of the original content for change detection")
class SkillBundleEntry(BaseModel):
model_config = ConfigDict(extra="forbid")
skill_id: str = Field(description="Unique identifier for this skill")
source: SourceInfo = Field(description="Source file information")
direct_tools: ToolDependencies = Field(description="Direct tool dependencies parsed from this skill only")
direct_files: AssetReferences = Field(description="Direct file references parsed from this skill only")
tools: ToolDependencies = Field(description="All tool dependencies (transitive closure)")
files: AssetReferences = Field(description="All file references (transitive closure)")
content: str = Field(description="Resolved content with all references replaced")

View File

@ -1,8 +1,11 @@
from collections.abc import Mapping
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.skill_metadata import SkillMetadata
class SkillFile(BaseModel):
model_config = ConfigDict(extra="forbid")
class SkillDocument(BaseModel):
"""Input document for skill compilation."""
@ -11,4 +14,4 @@ class SkillDocument(BaseModel):
skill_id: str = Field(description="Unique identifier, must match SkillAsset.asset_id")
content: str = Field(description="Raw content with reference placeholders")
metadata: Mapping[str, Any] = Field(default_factory=dict, description="Raw metadata dict")
metadata: SkillMetadata = Field(default_factory=SkillMetadata, description="Additional metadata for this skill")

View File

@ -1,6 +1,6 @@
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
from core.tools.entities.tool_entities import ToolProviderType
@ -16,7 +16,9 @@ class ToolFieldConfig(BaseModel):
class ToolConfiguration(BaseModel):
model_config = ConfigDict(extra="forbid")
fields: list[ToolFieldConfig] = Field(default_factory=list)
fields: list[ToolFieldConfig] = Field(
default_factory=list, description="List of field configurations for this tool"
)
def default_values(self) -> dict[str, Any]:
return {field.id: field.value for field in self.fields if field.value is not None}
@ -29,13 +31,35 @@ def create_tool_id(provider: str, tool_name: str) -> str:
class ToolReference(BaseModel):
model_config = ConfigDict(extra="forbid")
uuid: str
type: ToolProviderType
provider: str
tool_name: str
enabled: bool = True
credential_id: str | None = None
configuration: ToolConfiguration | None = None
uuid: str = Field(
default="",
description=(
"Unique identifier for this tool reference, used to distinguish multiple references to the same tool"
),
)
type: ToolProviderType = Field(description="The provider type of the tool")
provider: str = Field(
default="",
description="The provider name of the tool plugin. Can be inferred from placeholders during compilation.",
)
tool_name: str = Field(
default="",
description=(
"The tool name defined in the provider plugin. Can be inferred from placeholders during compilation."
),
)
enabled: bool = Field(default=True, description="Whether this tool reference is enabled")
credential_id: str | None = Field(
default=None,
description="Credential ID used to resolve credentials when invoking the tool.",
)
configuration: ToolConfiguration | None = Field(
default=None,
description=(
"Optional configuration for this tool reference, used to provide "
"additional parameters when invoking the tool"
),
)
def reference_id(self) -> str:
return f"{self.provider}.{self.tool_name}.{self.uuid}"
@ -45,14 +69,45 @@ class ToolReference(BaseModel):
class FileReference(BaseModel):
model_config = ConfigDict(extra="forbid")
model_config = ConfigDict(frozen=True)
source: str
source: str = Field(default="app")
asset_id: str
@model_validator(mode="before")
@classmethod
def normalize_input(cls, data: Any) -> Any:
if not isinstance(data, dict):
return data
if "asset_id" in data and "source" in data:
return {"source": data.get("source", "app"), "asset_id": data["asset_id"]}
# front end support
if "id" in data:
return {"source": "app", "asset_id": data["id"]}
return data
class SkillMetadata(BaseModel):
model_config = ConfigDict(extra="allow")
model_config = ConfigDict(extra="forbid")
tools: dict[str, ToolReference] = Field(default_factory=dict)
files: list[FileReference] = Field(default_factory=list)
files: set[FileReference] = Field(default_factory=set)
@field_validator("files", mode="before")
@classmethod
def coerce_files_to_set(cls, v: Any) -> set[FileReference] | Any:
if isinstance(v, list):
refs: set[FileReference] = set()
for item in v:
if isinstance(item, dict):
refs.add(FileReference.model_validate(item))
elif isinstance(item, FileReference):
refs.add(item)
return refs
if isinstance(v, dict):
refs = set()
for item in v.values():
if isinstance(item, dict):
refs.add(FileReference.model_validate(item))
return refs
return v

View File

@ -1,29 +0,0 @@
from __future__ import annotations
from collections import deque
from collections.abc import Iterable, Mapping
def invert_dependency_map(depends_on_map: Mapping[str, Iterable[str]], all_nodes: Iterable[str]) -> dict[str, set[str]]:
"""Build a reverse lookup map: target_id -> direct referrer ids."""
reference_map: dict[str, set[str]] = {node_id: set() for node_id in all_nodes}
for node_id, deps in depends_on_map.items():
for dep_id in deps:
if dep_id in reference_map:
reference_map[dep_id].add(node_id)
return reference_map
def collect_reachable(start_nodes: Iterable[str], adjacency_map: Mapping[str, Iterable[str]]) -> set[str]:
"""Return all nodes reachable from start nodes in adjacency map, inclusive."""
visited: set[str] = set()
queue = deque(start_nodes)
while queue:
node_id = queue.popleft()
if node_id in visited:
continue
visited.add(node_id)
for next_id in adjacency_map.get(node_id, []):
if next_id not in visited:
queue.append(next_id)
return visited

View File

@ -1,359 +0,0 @@
import hashlib
import re
from collections.abc import Iterable, Mapping
from dataclasses import dataclass
from typing import Any, Protocol, cast
from core.app.entities.app_asset_entities import AppAssetFileTree
from core.skill.entities.asset_references import AssetReferences
from core.skill.entities.skill_bundle import SkillBundle
from core.skill.entities.skill_bundle_entry import SkillBundleEntry, SourceInfo
from core.skill.entities.skill_document import SkillDocument
from core.skill.entities.skill_metadata import (
FileReference,
SkillMetadata,
ToolConfiguration,
ToolReference,
create_tool_id,
)
from core.skill.entities.tool_dependencies import ToolDependencies, ToolDependency
from core.skill.graph_utils import invert_dependency_map
from core.tools.entities.tool_entities import ToolProviderType
class PathResolver(Protocol):
def resolve(self, source_id: str, target_id: str) -> str: ...
class ToolResolver(Protocol):
def resolve(self, tool_ref: ToolReference) -> str: ...
@dataclass(frozen=True)
class CompilerConfig:
tool_pattern: re.Pattern[str] = re.compile(r"§\[tool\]\.\[.*?\]\.\[.*?\]\.\[(.*?)\")
# Evolved format: a group of tool placeholders wrapped by "[...]".
# Example: [§[tool].[provider].[name].[uuid-a]§, §[tool].[provider].[name].[uuid-b]§]
tool_group_pattern: re.Pattern[str] = re.compile(
r"\[\s*§\[tool\]\.\[[^\]]+\]\.\[[^\]]+\]\.\[[^\]]+\]§(?:\s*,\s*§\[tool\]\.\[[^\]]+\]\.\[[^\]]+\]\.\[[^\]]+\]§)*\s*\]"
)
file_pattern: re.Pattern[str] = re.compile(r"§\[file\]\.\[.*?\]\.\[(.*?)\")
class FileTreePathResolver:
def __init__(self, tree: AppAssetFileTree, base_path: str = ""):
self._tree = tree
self._base_path = base_path.rstrip("/")
def resolve(self, source_id: str, target_id: str) -> str:
source_node = self._tree.get(source_id)
target_node = self._tree.get(target_id)
if target_node is None:
return "[File not found]"
if source_node is not None:
return self._tree.relative_path(source_node, target_node)
full_path = self._tree.get_path(target_node.id)
if self._base_path:
return f"{self._base_path}/{full_path}"
return full_path
class DefaultToolResolver:
def resolve(self, tool_ref: ToolReference) -> str:
# Keep outputs readable for the most common built-in tools.
if tool_ref.provider == "sandbox" and tool_ref.tool_name == "bash":
return f"[Bash Command: {tool_ref.tool_name}_{tool_ref.uuid}]"
if tool_ref.provider == "sandbox" and tool_ref.tool_name == "python":
return f"[Python Code: {tool_ref.tool_name}_{tool_ref.uuid}]"
return f"[Executable: {tool_ref.tool_name}_{tool_ref.uuid} --help command]"
class SkillCompiler:
"""Compile skill documents into full bundles or incremental patches."""
def __init__(
self,
path_resolver: PathResolver | None = None,
tool_resolver: ToolResolver | None = None,
config: CompilerConfig | None = None,
):
self._path_resolver = path_resolver
self._tool_resolver = tool_resolver or DefaultToolResolver()
self._config = config or CompilerConfig()
def compile_bundle(
self,
documents: Iterable[SkillDocument],
file_tree: AppAssetFileTree,
assets_id: str,
) -> SkillBundle:
"""Compile all provided documents into a complete persisted bundle."""
path_resolver = self._path_resolver or FileTreePathResolver(file_tree)
doc_map = {doc.skill_id: doc for doc in documents}
entries, metadata_cache = self._compile_documents_direct(doc_map.values(), path_resolver)
depends_on_map = self._build_depends_on_map(metadata_cache, set(entries.keys()))
direct_bundle = SkillBundle(
assets_id=assets_id,
entries=entries,
depends_on_map=depends_on_map,
reference_map=self._build_reference_map(depends_on_map, set(entries.keys())),
)
return SkillBundle(assets_id=assets_id).merge(direct_bundle)
def compile_increment(
self,
base_bundle: SkillBundle,
documents: Iterable[SkillDocument],
file_tree: AppAssetFileTree,
base_path: str = "",
) -> SkillBundle:
"""Compile changed documents against base bundle and return a merge-ready patch."""
doc_map = {doc.skill_id: doc for doc in documents}
if not doc_map:
return SkillBundle(assets_id=base_bundle.assets_id)
path_resolver = self._path_resolver or FileTreePathResolver(file_tree, base_path)
entries, metadata_cache = self._compile_documents_direct(doc_map.values(), path_resolver)
known_skill_ids = set(base_bundle.entries.keys()) | set(entries.keys())
depends_on_map = self._build_depends_on_map(metadata_cache, known_skill_ids)
direct_patch = SkillBundle(
assets_id=base_bundle.assets_id,
entries=entries,
depends_on_map=depends_on_map,
reference_map=self._build_reference_map(depends_on_map, set(entries.keys())),
)
merged_bundle = base_bundle.merge(direct_patch)
compiled_entries = {
skill_id: merged_bundle.entries[skill_id] for skill_id in entries if skill_id in merged_bundle.entries
}
return SkillBundle(
assets_id=base_bundle.assets_id,
schema_version=merged_bundle.schema_version,
built_at=merged_bundle.built_at,
entries=compiled_entries,
depends_on_map=depends_on_map,
reference_map=self._build_reference_map(depends_on_map, set(compiled_entries.keys())),
)
def compile_document(
self,
bundle: SkillBundle,
document: SkillDocument,
file_tree: AppAssetFileTree,
base_path: str = "",
) -> SkillBundleEntry:
"""Compile one document with bundle context without mutating the bundle."""
patch = self.compile_increment(bundle, [document], file_tree, base_path)
entry = patch.get(document.skill_id)
if entry is not None:
return entry
path_resolver = self._path_resolver or FileTreePathResolver(file_tree, base_path)
metadata = self._parse_metadata(document.content, document.metadata)
return self._build_direct_entry(document, metadata, path_resolver)
def put(
self,
base_bundle: SkillBundle,
document: SkillDocument,
file_tree: AppAssetFileTree,
base_path: str = "",
) -> SkillBundle:
"""Compile one document and merge it into a newly returned bundle."""
patch = self.compile_increment(base_bundle, [document], file_tree, base_path)
return base_bundle.merge(patch)
def compile_all(
self,
documents: Iterable[SkillDocument],
file_tree: AppAssetFileTree,
assets_id: str,
) -> SkillBundle:
return self.compile_bundle(documents, file_tree, assets_id)
def compile_one(
self,
bundle: SkillBundle,
document: SkillDocument,
file_tree: AppAssetFileTree,
base_path: str = "",
) -> SkillBundleEntry:
return self.compile_document(bundle, document, file_tree, base_path)
def _compile_documents_direct(
self,
documents: Iterable[SkillDocument],
path_resolver: PathResolver,
) -> tuple[dict[str, SkillBundleEntry], dict[str, SkillMetadata]]:
entries: dict[str, SkillBundleEntry] = {}
metadata_cache: dict[str, SkillMetadata] = {}
for doc in documents:
metadata = self._parse_metadata(doc.content, doc.metadata)
metadata_cache[doc.skill_id] = metadata
entries[doc.skill_id] = self._build_direct_entry(doc, metadata, path_resolver)
return entries, metadata_cache
def _build_depends_on_map(
self,
metadata_cache: Mapping[str, SkillMetadata],
known_skill_ids: set[str],
) -> dict[str, list[str]]:
depends_on_map: dict[str, list[str]] = {}
for skill_id, metadata in metadata_cache.items():
deps: list[str] = []
seen: set[str] = set()
for file_ref in metadata.files:
dep_id = file_ref.asset_id
if dep_id in known_skill_ids and dep_id not in seen:
seen.add(dep_id)
deps.append(dep_id)
depends_on_map[skill_id] = deps
return depends_on_map
def _build_reference_map(
self,
depends_on_map: Mapping[str, list[str]],
all_skill_ids: set[str],
) -> dict[str, list[str]]:
return {
skill_id: sorted(referrers)
for skill_id, referrers in invert_dependency_map(depends_on_map, all_skill_ids).items()
}
def _build_direct_entry(
self,
doc: SkillDocument,
metadata: SkillMetadata,
path_resolver: PathResolver,
) -> SkillBundleEntry:
direct_tool_deps: dict[str, ToolDependency] = {}
direct_tool_refs: dict[str, ToolReference] = {}
for tool_ref in metadata.tools.values():
direct_tool_deps.setdefault(
tool_ref.tool_id(),
ToolDependency(
type=tool_ref.type,
provider=tool_ref.provider,
tool_name=tool_ref.tool_name,
enabled=tool_ref.enabled,
),
)
direct_tool_refs[tool_ref.uuid] = tool_ref
direct_files: dict[str, FileReference] = {f.asset_id: f for f in metadata.files}
resolved_content = self._resolve_content(doc.content, metadata, path_resolver, doc.skill_id)
direct_tools = ToolDependencies(
dependencies=list(direct_tool_deps.values()),
references=list(direct_tool_refs.values()),
)
direct_file_refs = AssetReferences(references=list(direct_files.values()))
return SkillBundleEntry(
skill_id=doc.skill_id,
source=SourceInfo(
asset_id=doc.skill_id,
content_digest=hashlib.sha256(doc.content.encode("utf-8")).hexdigest(),
),
direct_tools=direct_tools,
direct_files=direct_file_refs,
tools=ToolDependencies(
dependencies=list(direct_tool_deps.values()),
references=list(direct_tool_refs.values()),
),
files=AssetReferences(references=list(direct_files.values())),
content=resolved_content,
)
def _resolve_content(
self,
content: str,
metadata: SkillMetadata,
path_resolver: PathResolver,
current_id: str,
) -> str:
def replace_file(match: re.Match[str]) -> str:
target_id = match.group(1)
try:
return path_resolver.resolve(current_id, target_id)
except Exception:
return match.group(0)
def replace_tool(match: re.Match[str]) -> str:
tool_id = match.group(1)
tool_ref: ToolReference | None = metadata.tools.get(tool_id)
if not tool_ref:
return f"[Tool not found or disabled: {tool_id}]"
if not tool_ref.enabled:
return ""
return self._tool_resolver.resolve(tool_ref)
def replace_tool_group(match: re.Match[str]) -> str:
group_text = match.group(0)
enabled_renders: list[str] = []
for tool_match in self._config.tool_pattern.finditer(group_text):
tool_id = tool_match.group(1)
tool_ref: ToolReference | None = metadata.tools.get(tool_id)
if not tool_ref:
enabled_renders.append(f"[Tool not found or disabled: {tool_id}]")
continue
if not tool_ref.enabled:
continue
enabled_renders.append(self._tool_resolver.resolve(tool_ref))
if not enabled_renders:
return ""
return "[" + ", ".join(enabled_renders) + "]"
content = self._config.file_pattern.sub(replace_file, content)
content = self._config.tool_group_pattern.sub(replace_tool_group, content)
content = self._config.tool_pattern.sub(replace_tool, content)
return content
def _parse_metadata(
self,
content: str,
raw_metadata: Mapping[str, Any],
disabled_tools: list[ToolDependency] | None = None,
) -> SkillMetadata:
tools_raw = dict(raw_metadata.get("tools", {}))
tools: dict[str, ToolReference] = {}
disabled_tools_set = {tool.tool_id() for tool in disabled_tools or []}
tool_iter = re.finditer(r"§\[tool\]\.\[([^\]]+)\]\.\[([^\]]+)\]\.\[([^\]]+)\", content)
for match in tool_iter:
provider, name, uuid = match.group(1), match.group(2), match.group(3)
if uuid not in tools_raw:
continue
meta = tools_raw[uuid]
meta_dict = cast(dict[str, Any], meta)
provider_type = cast(str, meta_dict.get("type"))
if create_tool_id(provider, name) in disabled_tools_set:
continue
tools[uuid] = ToolReference(
uuid=uuid,
type=ToolProviderType.value_of(provider_type),
provider=provider,
tool_name=name,
enabled=cast(bool, meta_dict.get("enabled", True)),
credential_id=cast(str | None, meta_dict.get("credential_id")),
configuration=ToolConfiguration.model_validate(meta_dict.get("configuration", {}))
if meta_dict.get("configuration")
else None,
)
parsed_files: list[FileReference] = []
file_iter = re.finditer(r"§\[file\]\.\[([^\]]+)\]\.\[([^\]]+)\", content)
for match in file_iter:
source, asset_id = match.group(1), match.group(2)
parsed_files.append(FileReference(source=source, asset_id=asset_id))
return SkillMetadata(tools=tools, files=parsed_files)

View File

@ -17,9 +17,7 @@ from sqlalchemy import select
from core.agent.entities import AgentEntity, AgentLog, AgentResult, AgentToolEntity, ExecutionContext
from core.agent.patterns import StrategyFactory
from core.app.entities.app_asset_entities import AppAssetFileTree
from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
from core.app_assets.constants import AppAssetsAttrs
from core.file import File, FileTransferMethod, FileType, file_manager
from core.helper.code_executor import CodeExecutor, CodeLanguage
from core.llm_generator.output_parser.errors import OutputParserError
@ -66,11 +64,11 @@ from core.rag.entities.citation_metadata import RetrievalSourceMetadata
from core.sandbox import Sandbox
from core.sandbox.bash.session import MAX_OUTPUT_FILE_SIZE, MAX_OUTPUT_FILES, SandboxBashSession
from core.sandbox.entities.config import AppAssets
from core.skill.assembler import SkillDocumentAssembler
from core.skill.constants import SkillAttrs
from core.skill.entities.skill_bundle import SkillBundle
from core.skill.entities.skill_document import SkillDocument
from core.skill.entities.tool_dependencies import ToolDependencies, ToolDependency
from core.skill.skill_compiler import SkillCompiler
from core.tools.__base.tool import Tool
from core.tools.signature import sign_tool_file, sign_upload_file
from core.tools.tool_file_manager import ToolFileManager
@ -1621,10 +1619,8 @@ class LLMNode(Node[LLMNodeData]):
prompt_messages: list[PromptMessage] = []
bundle: SkillBundle | None = None
file_tree: AppAssetFileTree | None = None
if sandbox:
bundle = sandbox.attrs.get(SkillAttrs.BUNDLE)
file_tree = sandbox.attrs.get(AppAssetsAttrs.FILE_TREE)
for message in messages:
if message.edition_type == "jinja2":
@ -1634,13 +1630,11 @@ class LLMNode(Node[LLMNodeData]):
variable_pool=variable_pool,
)
if bundle is not None and file_tree is not None:
skill_entry = SkillCompiler().compile_document(
bundle=bundle,
if bundle is not None:
skill_entry = SkillDocumentAssembler(bundle).assemble_document(
document=SkillDocument(
skill_id="anonymous", content=result_text, metadata=message.metadata or {}
),
file_tree=file_tree,
base_path=AppAssets.PATH,
)
result_text = skill_entry.content
@ -1675,13 +1669,11 @@ class LLMNode(Node[LLMNodeData]):
plain_text = segment_group.text
if plain_text and bundle is not None and file_tree is not None:
skill_entry = SkillCompiler().compile_document(
bundle=bundle,
if plain_text and bundle is not None:
skill_entry = SkillDocumentAssembler(bundle).assemble_document(
document=SkillDocument(
skill_id="anonymous", content=plain_text, metadata=message.metadata or {}
),
file_tree=file_tree,
base_path=AppAssets.PATH,
)
plain_text = skill_entry.content
@ -2036,14 +2028,11 @@ class LLMNode(Node[LLMNodeData]):
raise LLMNodeError("Sandbox not found")
bundle = sandbox.attrs.get(SkillAttrs.BUNDLE)
file_tree = sandbox.attrs.get(AppAssetsAttrs.FILE_TREE)
tool_deps_list: list[ToolDependencies] = []
for prompt in self.node_data.prompt_template:
if isinstance(prompt, LLMNodeChatModelMessage):
skill_entry = SkillCompiler().compile_document(
bundle=bundle,
skill_entry = SkillDocumentAssembler(bundle).assemble_document(
document=SkillDocument(skill_id="anonymous", content=prompt.text, metadata=prompt.metadata or {}),
file_tree=file_tree,
base_path=AppAssets.PATH,
)
tool_deps_list.append(skill_entry.tools)