refactor(skill): transition from artifact set to bundle structure

- Replaced SkillArtifactSet with SkillBundle across various components, enhancing the organization of skill dependencies and references.
- Updated SkillManager methods to load and save bundles instead of artifacts, improving clarity in asset management.
- Refactored SkillCompiler to compile skills into bundles, streamlining the dependency resolution process.
- Adjusted DifyCli and SandboxBashSession to utilize ToolDependencies, ensuring consistent handling of tool references.
- Introduced AssetReferences for better management of file dependencies within skill bundles.
This commit is contained in:
Harry
2026-01-22 20:25:28 +08:00
parent 17404e0956
commit a43efef9f0
17 changed files with 158 additions and 194 deletions

View File

@ -1,11 +1,11 @@
from .constants import SkillAttrs
from .entities import ToolArtifact, ToolDependency, ToolReference
from .entities import ToolDependencies, ToolDependency, ToolReference
from .skill_manager import SkillManager
__all__ = [
"SkillAttrs",
"SkillManager",
"ToolArtifact",
"ToolDependencies",
"ToolDependency",
"ToolReference",
]

View File

@ -1,7 +1,6 @@
from core.skill.entities.skill_artifact_set import SkillArtifactSet
from core.skill.entities.skill_bundle import SkillBundle
from libs.attr_map import AttrKey
class SkillAttrs:
# Skill artifact set
ARTIFACT_SET = AttrKey("skill_artifact_set", SkillArtifactSet)
BUNDLE = AttrKey("skill_bundle", SkillBundle)

View File

@ -1,6 +1,6 @@
from .file_artifact import FilesArtifact
from .skill_artifact import SkillArtifact, SkillSourceInfo
from .skill_artifact_set import SkillArtifactSet
from .asset_references import AssetReferences
from .skill_bundle import SkillBundle
from .skill_bundle_entry import SkillBundleEntry, SourceInfo
from .skill_document import SkillDocument
from .skill_metadata import (
FileReference,
@ -9,18 +9,18 @@ from .skill_metadata import (
ToolFieldConfig,
ToolReference,
)
from .tool_artifact import ToolArtifact, ToolDependency
from .tool_dependencies import ToolDependencies, ToolDependency
__all__ = [
"AssetReferences",
"FileReference",
"FilesArtifact",
"SkillArtifact",
"SkillArtifactSet",
"SkillBundle",
"SkillBundleEntry",
"SkillDocument",
"SkillMetadata",
"SkillSourceInfo",
"ToolArtifact",
"SourceInfo",
"ToolConfiguration",
"ToolDependencies",
"ToolDependency",
"ToolFieldConfig",
"ToolReference",

View File

@ -3,11 +3,7 @@ from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.skill_metadata import FileReference
class FilesArtifact(BaseModel):
"""
File artifact - contains all file references (transitive closure)
"""
class AssetReferences(BaseModel):
model_config = ConfigDict(extra="forbid")
references: list[FileReference] = Field(default_factory=list, description="All file references")
references: list[FileReference] = Field(default_factory=list)

View File

@ -1,30 +0,0 @@
from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.file_artifact import FilesArtifact
from core.skill.entities.tool_artifact import ToolArtifact
class SkillSourceInfo(BaseModel):
"""Source file information for change detection."""
model_config = ConfigDict(extra="forbid")
asset_id: str = Field(description="Asset ID of the source skill file")
content_digest: str = Field(description="Hash of the original content for change detection")
class SkillArtifact(BaseModel):
"""
Compiled artifact for a single skill.
Contains the transitive closure of all tool and file dependencies,
plus the resolved content with all references replaced.
"""
model_config = ConfigDict(extra="forbid")
skill_id: str = Field(description="Unique identifier for this skill")
source: SkillSourceInfo = Field(description="Source file information")
tools: ToolArtifact = Field(description="All tool dependencies (transitive closure)")
files: FilesArtifact = Field(description="All file references (transitive closure)")
content: str = Field(description="Resolved content with all references replaced")

View File

@ -3,27 +3,19 @@ from datetime import datetime
from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.skill_artifact import SkillArtifact
from core.skill.entities.skill_bundle_entry import SkillBundleEntry
from core.skill.entities.skill_metadata import ToolReference
from core.skill.entities.tool_artifact import ToolArtifact, ToolDependency
from core.skill.entities.tool_dependencies import ToolDependencies, ToolDependency
class SkillArtifactSet(BaseModel):
"""
Compiled index for an entire skill project.
- Corresponds to a single JSON file in S3
- Load once, query multiple times
- All persistence operations handled by SkillManager
"""
class SkillBundle(BaseModel):
model_config = ConfigDict(extra="forbid")
assets_id: str = Field(description="Assets ID this artifact set belongs to")
assets_id: str = Field(description="Assets ID this bundle belongs to")
schema_version: int = Field(default=1, description="Schema version for forward compatibility")
built_at: datetime | None = Field(default=None, description="Build timestamp")
items: dict[str, SkillArtifact] = Field(default_factory=dict, description="skill_id -> SkillArtifact")
entries: dict[str, SkillBundleEntry] = Field(default_factory=dict, description="skill_id -> SkillBundleEntry")
dependency_graph: dict[str, list[str]] = Field(
default_factory=dict,
@ -35,14 +27,14 @@ class SkillArtifactSet(BaseModel):
description="skill_id -> list of skill_ids that depend on it",
)
def get(self, skill_id: str) -> SkillArtifact | None:
return self.items.get(skill_id)
def get(self, skill_id: str) -> SkillBundleEntry | None:
return self.entries.get(skill_id)
def upsert(self, artifact: SkillArtifact) -> None:
self.items[artifact.skill_id] = artifact
def upsert(self, entry: SkillBundleEntry) -> None:
self.entries[entry.skill_id] = entry
def remove(self, skill_id: str) -> None:
self.items.pop(skill_id, None)
self.entries.pop(skill_id, None)
self.dependency_graph.pop(skill_id, None)
self.reverse_graph.pop(skill_id, None)
for deps in self.reverse_graph.values():
@ -66,13 +58,13 @@ class SkillArtifactSet(BaseModel):
queue.append(dependent)
return result
def subset(self, skill_ids: Iterable[str]) -> "SkillArtifactSet":
def subset(self, skill_ids: Iterable[str]) -> "SkillBundle":
skill_id_set = set(skill_ids)
return SkillArtifactSet(
return SkillBundle(
assets_id=self.assets_id,
schema_version=self.schema_version,
built_at=self.built_at,
items={sid: self.items[sid] for sid in skill_id_set if sid in self.items},
entries={sid: self.entries[sid] for sid in skill_id_set if sid in self.entries},
dependency_graph={
sid: [dep for dep in deps if dep in skill_id_set]
for sid, deps in self.dependency_graph.items()
@ -85,21 +77,21 @@ class SkillArtifactSet(BaseModel):
},
)
def get_tool_artifact(self) -> ToolArtifact:
def get_tool_dependencies(self) -> ToolDependencies:
dependencies: dict[str, ToolDependency] = {}
references: dict[str, ToolReference] = {}
for artifact in self.items.values():
for dep in artifact.tools.dependencies:
for entry in self.entries.values():
for dep in entry.tools.dependencies:
key = f"{dep.provider}.{dep.tool_name}"
if key not in dependencies:
dependencies[key] = dep
for ref in artifact.tools.references:
for ref in entry.tools.references:
if ref.uuid not in references:
references[ref.uuid] = ref
return ToolArtifact(
return ToolDependencies(
dependencies=list(dependencies.values()),
references=list(references.values()),
)

View File

@ -0,0 +1,21 @@
from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.asset_references import AssetReferences
from core.skill.entities.tool_dependencies import ToolDependencies
class SourceInfo(BaseModel):
model_config = ConfigDict(extra="forbid")
asset_id: str = Field(description="Asset ID of the source skill file")
content_digest: str = Field(description="Hash of the original content for change detection")
class SkillBundleEntry(BaseModel):
model_config = ConfigDict(extra="forbid")
skill_id: str = Field(description="Unique identifier for this skill")
source: SourceInfo = Field(description="Source file information")
tools: ToolDependencies = Field(description="All tool dependencies (transitive closure)")
files: AssetReferences = Field(description="All file references (transitive closure)")
content: str = Field(description="Resolved content with all references replaced")

View File

@ -12,7 +12,7 @@ class ToolDependency(BaseModel):
tool_name: str
class ToolArtifact(BaseModel):
class ToolDependencies(BaseModel):
model_config = ConfigDict(extra="forbid")
dependencies: list[ToolDependency] = Field(default_factory=list)
@ -21,9 +21,9 @@ class ToolArtifact(BaseModel):
def is_empty(self) -> bool:
return not self.dependencies and not self.references
def filter(self, tools: list[tuple[str, str]]) -> "ToolArtifact":
def filter(self, tools: list[tuple[str, str]]) -> "ToolDependencies":
tool_names = {f"{provider}.{tool_name}" for provider, tool_name in tools}
return ToolArtifact(
return ToolDependencies(
dependencies=[
dependency
for dependency in self.dependencies
@ -36,7 +36,7 @@ class ToolArtifact(BaseModel):
],
)
def merge(self, other: "ToolArtifact") -> "ToolArtifact":
def merge(self, other: "ToolDependencies") -> "ToolDependencies":
dep_map: dict[str, ToolDependency] = {}
for dep in self.dependencies:
key = f"{dep.provider}.{dep.tool_name}"
@ -53,7 +53,7 @@ class ToolArtifact(BaseModel):
if ref.uuid not in ref_map:
ref_map[ref.uuid] = ref
return ToolArtifact(
return ToolDependencies(
dependencies=list(dep_map.values()),
references=list(ref_map.values()),
)
)

View File

@ -6,9 +6,9 @@ from datetime import UTC, datetime
from typing import Any
from core.app.entities.app_asset_entities import AppAssetFileTree
from core.skill.entities.file_artifact import FilesArtifact
from core.skill.entities.skill_artifact import SkillArtifact, SkillSourceInfo
from core.skill.entities.skill_artifact_set import SkillArtifactSet
from core.skill.entities.asset_references import AssetReferences
from core.skill.entities.skill_bundle import SkillBundle
from core.skill.entities.skill_bundle_entry import SkillBundleEntry, SourceInfo
from core.skill.entities.skill_document import SkillDocument
from core.skill.entities.skill_metadata import (
FileReference,
@ -16,7 +16,7 @@ from core.skill.entities.skill_metadata import (
ToolConfiguration,
ToolReference,
)
from core.skill.entities.tool_artifact import ToolArtifact, ToolDependency
from core.skill.entities.tool_dependencies import ToolDependencies, ToolDependency
from core.tools.entities.tool_entities import ToolProviderType
logger = logging.getLogger(__name__)
@ -26,17 +26,6 @@ FILE_REFERENCE_PATTERN = re.compile(r"§\[file\]\.\[([^\]]+)\]\.\[([^\]]+)\]§")
class SkillCompiler:
"""
Stateless skill compiler.
Responsibilities:
- Parse raw metadata dict into SkillMetadata
- Parse direct dependencies from skill content
- Compute transitive closure based on existing artifact set
- Resolve content by replacing references
- Generate SkillArtifact
"""
def _parse_metadata(self, content: str, raw_metadata: Mapping[str, Any]) -> SkillMetadata:
tools_raw: dict[str, Any] = dict(raw_metadata.get("tools", {}))
tools: dict[str, ToolReference] = {}
@ -76,8 +65,8 @@ class SkillCompiler:
documents: list[SkillDocument],
file_tree: AppAssetFileTree,
assets_id: str,
) -> SkillArtifactSet:
artifact_set = SkillArtifactSet(
) -> SkillBundle:
bundle = SkillBundle(
assets_id=assets_id,
built_at=datetime.now(UTC),
)
@ -89,26 +78,26 @@ class SkillCompiler:
metadata = self._parse_metadata(doc.content, doc.metadata)
parsed_metadata[doc.skill_id] = metadata
direct_skill_refs = self._extract_skill_refs(metadata, doc_map)
artifact_set.dependency_graph[doc.skill_id] = list(direct_skill_refs)
bundle.dependency_graph[doc.skill_id] = list(direct_skill_refs)
for ref_id in direct_skill_refs:
if ref_id not in artifact_set.reverse_graph:
artifact_set.reverse_graph[ref_id] = []
artifact_set.reverse_graph[ref_id].append(doc.skill_id)
if ref_id not in bundle.reverse_graph:
bundle.reverse_graph[ref_id] = []
bundle.reverse_graph[ref_id].append(doc.skill_id)
for doc in documents:
metadata = parsed_metadata[doc.skill_id]
artifact = self._compile_single(doc, metadata, artifact_set, parsed_metadata, file_tree)
artifact_set.upsert(artifact)
entry = self._compile_single(doc, metadata, bundle, parsed_metadata, file_tree)
bundle.upsert(entry)
return artifact_set
return bundle
def compile_one(
self,
artifact_set: SkillArtifactSet,
bundle: SkillBundle,
document: SkillDocument,
file_tree: AppAssetFileTree,
all_documents: dict[str, SkillDocument] | None = None,
) -> SkillArtifact:
) -> SkillBundleEntry:
doc_map = all_documents or {}
if document.skill_id not in doc_map:
doc_map[document.skill_id] = document
@ -119,25 +108,25 @@ class SkillCompiler:
metadata = parsed_metadata[document.skill_id]
direct_skill_refs = self._extract_skill_refs(metadata, doc_map)
artifact_set.dependency_graph[document.skill_id] = list(direct_skill_refs)
bundle.dependency_graph[document.skill_id] = list(direct_skill_refs)
for ref_id in direct_skill_refs:
if ref_id not in artifact_set.reverse_graph:
artifact_set.reverse_graph[ref_id] = []
if document.skill_id not in artifact_set.reverse_graph[ref_id]:
artifact_set.reverse_graph[ref_id].append(document.skill_id)
if ref_id not in bundle.reverse_graph:
bundle.reverse_graph[ref_id] = []
if document.skill_id not in bundle.reverse_graph[ref_id]:
bundle.reverse_graph[ref_id].append(document.skill_id)
return self._compile_single(document, metadata, artifact_set, parsed_metadata, file_tree)
return self._compile_single(document, metadata, bundle, parsed_metadata, file_tree)
def _compile_single(
self,
document: SkillDocument,
metadata: SkillMetadata,
artifact_set: SkillArtifactSet,
bundle: SkillBundle,
parsed_metadata: dict[str, SkillMetadata],
file_tree: AppAssetFileTree,
) -> SkillArtifact:
) -> SkillBundleEntry:
all_tools, all_files = self._compute_transitive_closure(
document.skill_id, artifact_set, parsed_metadata
document.skill_id, bundle, parsed_metadata
)
current_node = file_tree.get(document.skill_id)
@ -148,17 +137,17 @@ class SkillCompiler:
content_digest = hashlib.sha256(document.content.encode("utf-8")).hexdigest()
return SkillArtifact(
return SkillBundleEntry(
skill_id=document.skill_id,
source=SkillSourceInfo(
source=SourceInfo(
asset_id=document.skill_id,
content_digest=content_digest,
),
tools=ToolArtifact(
tools=ToolDependencies(
dependencies=list(all_tools.values()),
references=list(metadata.tools.values()),
),
files=FilesArtifact(
files=AssetReferences(
references=list(all_files.values()),
),
content=resolved_content,
@ -178,7 +167,7 @@ class SkillCompiler:
def _compute_transitive_closure(
self,
skill_id: str,
artifact_set: SkillArtifactSet,
bundle: SkillBundle,
parsed_metadata: dict[str, SkillMetadata],
) -> tuple[dict[str, ToolDependency], dict[str, FileReference]]:
all_tools: dict[str, ToolDependency] = {}
@ -195,13 +184,13 @@ class SkillCompiler:
metadata = parsed_metadata.get(current_id)
if metadata is None:
existing_artifact = artifact_set.get(current_id)
if existing_artifact:
for dep in existing_artifact.tools.dependencies:
existing_entry = bundle.get(current_id)
if existing_entry:
for dep in existing_entry.tools.dependencies:
key = f"{dep.provider}.{dep.tool_name}"
if key not in all_tools:
all_tools[key] = dep
for file_ref in existing_artifact.files.references:
for file_ref in existing_entry.files.references:
if file_ref.asset_id not in all_files:
all_files[file_ref.asset_id] = file_ref
continue
@ -219,7 +208,7 @@ class SkillCompiler:
if file_ref.asset_id not in all_files:
all_files[file_ref.asset_id] = file_ref
for dep_id in artifact_set.dependency_graph.get(current_id, []):
for dep_id in bundle.dependency_graph.get(current_id, []):
if dep_id not in visited:
queue.append(dep_id)

View File

@ -1,7 +1,7 @@
import logging
from core.app_assets.paths import AssetPaths
from core.skill.entities.skill_artifact_set import SkillArtifactSet
from core.skill.entities.skill_bundle import SkillBundle
from extensions.ext_storage import storage
logger = logging.getLogger(__name__)
@ -9,25 +9,25 @@ logger = logging.getLogger(__name__)
class SkillManager:
@staticmethod
def load_artifact(
def load_bundle(
tenant_id: str,
app_id: str,
assets_id: str,
) -> SkillArtifactSet:
) -> SkillBundle | None:
key = AssetPaths.build_skill_artifact_set(tenant_id, app_id, assets_id)
try:
data = storage.load_once(key)
return SkillArtifactSet.model_validate_json(data)
return SkillBundle.model_validate_json(data)
except Exception:
logger.info("Skill artifact set missing or invalid for assets_id=%s", assets_id)
return SkillArtifactSet(assets_id=assets_id)
logger.info("Skill bundle missing or invalid for assets_id=%s", assets_id)
return None
@staticmethod
def save_artifact(
def save_bundle(
tenant_id: str,
app_id: str,
assets_id: str,
artifact_set: SkillArtifactSet,
bundle: SkillBundle,
) -> None:
key = AssetPaths.build_skill_artifact_set(tenant_id, app_id, assets_id)
storage.save(key, artifact_set.model_dump_json(indent=2).encode("utf-8"))
storage.save(key, bundle.model_dump_json(indent=2).encode("utf-8"))