refactor: redesign skill compilation and document assembly process

This commit is contained in:
Harry
2026-03-06 15:02:44 +08:00
parent 52dd4b82e6
commit 53f76a20c2
16 changed files with 569 additions and 1423 deletions

View File

@ -1,6 +1,4 @@
from .asset_references import AssetReferences
from .skill_bundle import SkillBundle
from .skill_bundle_entry import SkillBundleEntry, SourceInfo
from .skill_bundle import Skill, SkillBundle, SkillDependance
from .skill_document import SkillDocument
from .skill_metadata import (
FileReference,
@ -13,13 +11,12 @@ from .tool_access_policy import ToolAccessPolicy, ToolDescription, ToolInvocatio
from .tool_dependencies import ToolDependencies, ToolDependency
__all__ = [
"AssetReferences",
"FileReference",
"Skill",
"SkillBundle",
"SkillBundleEntry",
"SkillDependance",
"SkillDocument",
"SkillMetadata",
"SourceInfo",
"ToolAccessPolicy",
"ToolConfiguration",
"ToolDependencies",

View File

@ -1,9 +0,0 @@
from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.skill_metadata import FileReference
class AssetReferences(BaseModel):
model_config = ConfigDict(extra="forbid")
references: list[FileReference] = Field(default_factory=list)

View File

@ -1,196 +1,94 @@
from collections.abc import Iterable
from datetime import datetime
from typing import TYPE_CHECKING
from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.asset_references import AssetReferences
from core.skill.entities.skill_bundle_entry import SkillBundleEntry
from core.skill.entities.skill_metadata import ToolReference
from core.app.entities.app_asset_entities import AppAssetFileTree
from core.skill.entities.skill_metadata import FileReference
from core.skill.entities.tool_dependencies import ToolDependencies, ToolDependency
from core.skill.graph_utils import collect_reachable, invert_dependency_map
if TYPE_CHECKING:
from core.skill.entities.skill_metadata import SkillMetadata
class SkillDependance(BaseModel):
model_config = ConfigDict(extra="forbid")
tools: ToolDependencies = Field(description="Direct tool dependencies parsed from this skill only")
files: set[FileReference] = Field(
default_factory=set,
description="Direct file references parsed from this skill only",
)
def __or__(self, other: "SkillDependance") -> "SkillDependance":
return SkillDependance(tools=self.tools.merge(other.tools), files=self.files | other.files)
@staticmethod
def from_metadata(metadata: "SkillMetadata") -> "SkillDependance":
"""Convert parsed metadata into direct tool/file dependency model."""
from core.skill.entities.skill_metadata import ToolReference
dep_map: dict[str, ToolDependency] = {}
ref_map: dict[str, ToolReference] = {}
for tool_ref in metadata.tools.values():
dep_map.setdefault(
tool_ref.tool_id(),
ToolDependency(
type=tool_ref.type,
provider=tool_ref.provider,
tool_name=tool_ref.tool_name,
enabled=tool_ref.enabled,
),
)
ref_map.setdefault(tool_ref.uuid, tool_ref)
return SkillDependance(
tools=ToolDependencies(
dependencies=[dep_map[key] for key in sorted(dep_map.keys())],
references=[ref_map[key] for key in sorted(ref_map.keys())],
),
files=metadata.files,
)
class Skill(BaseModel):
model_config = ConfigDict(extra="forbid")
skill_id: str = Field(description="Unique identifier for this skill, same with skill_id")
direct_dependance: SkillDependance = Field(description="Direct dependencies parsed from this skill only")
dependance: SkillDependance = Field(description="All dependencies including transitive closure")
content: str = Field(description="Resolved content with all references replaced")
@property
def tools(self) -> ToolDependencies:
return self.dependance.tools
class SkillBundle(BaseModel):
"""Persisted skill compilation snapshot with graph metadata and merge support."""
model_config = ConfigDict(extra="forbid")
asset_tree: AppAssetFileTree = Field(description="Asset tree for this bundle")
assets_id: str = Field(description="Assets ID this bundle belongs to")
schema_version: int = Field(default=2, description="Schema version for forward compatibility")
built_at: datetime | None = Field(default=None, description="Build timestamp")
entries: dict[str, SkillBundleEntry] = Field(default_factory=dict, description="skill_id -> SkillBundleEntry")
skills: dict[str, Skill] = Field(default_factory=dict)
depends_on_map: dict[str, list[str]] = Field(
default_factory=dict,
description="skill_id -> list of skill_ids it depends on",
)
@property
def entries(self) -> dict[str, Skill]:
return self.skills
reference_map: dict[str, list[str]] = Field(
default_factory=dict,
description="skill_id -> list of skill_ids that depend on it",
)
def get(self, skill_id: str) -> SkillBundleEntry | None:
return self.entries.get(skill_id)
def upsert(self, entry: SkillBundleEntry) -> None:
self.entries[entry.skill_id] = entry
def remove(self, skill_id: str) -> None:
self.entries.pop(skill_id, None)
self.depends_on_map.pop(skill_id, None)
self.reference_map.pop(skill_id, None)
for deps in self.reference_map.values():
if skill_id in deps:
deps.remove(skill_id)
for deps in self.depends_on_map.values():
if skill_id in deps:
deps.remove(skill_id)
def referenced_skill_ids(self, skill_id: str) -> set[str]:
return set(self.depends_on_map.get(skill_id, []))
def recompile_group_ids(self, skill_id: str) -> set[str]:
return collect_reachable([skill_id], self.reference_map)
def merge(self, patch: "SkillBundle") -> "SkillBundle":
"""Return a new bundle with patch entries merged and affected closure recomputed."""
if self.assets_id != patch.assets_id:
raise ValueError("bundle assets_id mismatch")
changed_skill_ids = set(patch.entries.keys())
if not changed_skill_ids:
return self.model_copy(deep=True)
merged_entries = dict(self.entries)
merged_entries.update(patch.entries)
merged_depends_on_map: dict[str, list[str]] = {
skill_id: [dep for dep in deps if dep in merged_entries]
for skill_id, deps in self.depends_on_map.items()
if skill_id in merged_entries
}
for skill_id in changed_skill_ids:
deps = patch.depends_on_map.get(skill_id)
if deps is None:
entry = patch.entries[skill_id]
deps = [f.asset_id for f in entry.direct_files.references]
merged_depends_on_map[skill_id] = [dep for dep in _dedupe(deps) if dep in merged_entries]
for skill_id in merged_entries:
merged_depends_on_map.setdefault(skill_id, [])
reference_map = {
skill_id: sorted(referrers)
for skill_id, referrers in invert_dependency_map(merged_depends_on_map, merged_entries.keys()).items()
}
affected_skill_ids = collect_reachable(changed_skill_ids, reference_map)
recomputed_entries = _recompute_affected_entries(merged_entries, merged_depends_on_map, affected_skill_ids)
merged_entries.update(recomputed_entries)
return SkillBundle(
assets_id=self.assets_id,
schema_version=max(self.schema_version, patch.schema_version),
built_at=patch.built_at or self.built_at,
entries=merged_entries,
depends_on_map=dict(merged_depends_on_map),
reference_map=reference_map,
)
def subset(self, skill_ids: Iterable[str]) -> "SkillBundle":
skill_id_set = set(skill_ids)
return SkillBundle(
assets_id=self.assets_id,
schema_version=self.schema_version,
built_at=self.built_at,
entries={sid: self.entries[sid] for sid in skill_id_set if sid in self.entries},
depends_on_map={
sid: [dep for dep in deps if dep in skill_id_set]
for sid, deps in self.depends_on_map.items()
if sid in skill_id_set
},
reference_map={
sid: [dep for dep in deps if dep in skill_id_set]
for sid, deps in self.reference_map.items()
if sid in skill_id_set
},
)
def get(self, skill_id: str) -> Skill | None:
return self.skills.get(skill_id)
def get_tool_dependencies(self) -> ToolDependencies:
dependencies: dict[str, ToolDependency] = {}
references: dict[str, ToolReference] = {}
merged = ToolDependencies()
for skill in self.skills.values():
merged = merged.merge(skill.dependance.tools)
return merged
for entry in self.entries.values():
for dep in entry.tools.dependencies:
key = f"{dep.provider}.{dep.tool_name}"
if key not in dependencies:
dependencies[key] = dep
for ref in entry.tools.references:
if ref.uuid not in references:
references[ref.uuid] = ref
return ToolDependencies(
dependencies=list(dependencies.values()),
references=list(references.values()),
)
def _dedupe(values: Iterable[str]) -> list[str]:
return list(dict.fromkeys(values))
def _recompute_affected_entries(
entries: dict[str, SkillBundleEntry],
depends_on_map: dict[str, list[str]],
affected_skill_ids: set[str],
) -> dict[str, SkillBundleEntry]:
recomputed_entries = {skill_id: entries[skill_id] for skill_id in affected_skill_ids if skill_id in entries}
changed = True
while changed:
changed = False
for skill_id in affected_skill_ids:
current_entry = recomputed_entries.get(skill_id)
if current_entry is None:
continue
merged_tool_deps: dict[str, ToolDependency] = {
dep.tool_id(): dep for dep in current_entry.direct_tools.dependencies
}
merged_tool_refs: dict[str, ToolReference] = {
ref.uuid: ref for ref in current_entry.direct_tools.references
}
merged_files = {f.asset_id: f for f in current_entry.direct_files.references}
for dep_id in depends_on_map.get(skill_id, []):
dep_entry = recomputed_entries.get(dep_id) or entries.get(dep_id)
if dep_entry is None:
continue
for dep in dep_entry.tools.dependencies:
merged_tool_deps.setdefault(dep.tool_id(), dep)
for ref in dep_entry.tools.references:
merged_tool_refs.setdefault(ref.uuid, ref)
for file_ref in dep_entry.files.references:
merged_files.setdefault(file_ref.asset_id, file_ref)
merged_tools = ToolDependencies(
dependencies=[merged_tool_deps[key] for key in sorted(merged_tool_deps.keys())],
references=[merged_tool_refs[key] for key in sorted(merged_tool_refs.keys())],
)
merged_asset_refs = AssetReferences(references=[merged_files[key] for key in sorted(merged_files.keys())])
if merged_tools != current_entry.tools or merged_asset_refs != current_entry.files:
recomputed_entries[skill_id] = current_entry.model_copy(
update={
"tools": merged_tools,
"files": merged_asset_refs,
}
)
changed = True
return recomputed_entries
def put(self, skill: Skill) -> None:
self.skills[skill.skill_id] = skill

View File

@ -1,23 +0,0 @@
from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.asset_references import AssetReferences
from core.skill.entities.tool_dependencies import ToolDependencies
class SourceInfo(BaseModel):
model_config = ConfigDict(extra="forbid")
asset_id: str = Field(description="Asset ID of the source skill file")
content_digest: str = Field(description="Hash of the original content for change detection")
class SkillBundleEntry(BaseModel):
model_config = ConfigDict(extra="forbid")
skill_id: str = Field(description="Unique identifier for this skill")
source: SourceInfo = Field(description="Source file information")
direct_tools: ToolDependencies = Field(description="Direct tool dependencies parsed from this skill only")
direct_files: AssetReferences = Field(description="Direct file references parsed from this skill only")
tools: ToolDependencies = Field(description="All tool dependencies (transitive closure)")
files: AssetReferences = Field(description="All file references (transitive closure)")
content: str = Field(description="Resolved content with all references replaced")

View File

@ -1,8 +1,11 @@
from collections.abc import Mapping
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from core.skill.entities.skill_metadata import SkillMetadata
class SkillFile(BaseModel):
model_config = ConfigDict(extra="forbid")
class SkillDocument(BaseModel):
"""Input document for skill compilation."""
@ -11,4 +14,4 @@ class SkillDocument(BaseModel):
skill_id: str = Field(description="Unique identifier, must match SkillAsset.asset_id")
content: str = Field(description="Raw content with reference placeholders")
metadata: Mapping[str, Any] = Field(default_factory=dict, description="Raw metadata dict")
metadata: SkillMetadata = Field(default_factory=SkillMetadata, description="Additional metadata for this skill")

View File

@ -1,6 +1,6 @@
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
from core.tools.entities.tool_entities import ToolProviderType
@ -16,7 +16,9 @@ class ToolFieldConfig(BaseModel):
class ToolConfiguration(BaseModel):
model_config = ConfigDict(extra="forbid")
fields: list[ToolFieldConfig] = Field(default_factory=list)
fields: list[ToolFieldConfig] = Field(
default_factory=list, description="List of field configurations for this tool"
)
def default_values(self) -> dict[str, Any]:
return {field.id: field.value for field in self.fields if field.value is not None}
@ -29,13 +31,35 @@ def create_tool_id(provider: str, tool_name: str) -> str:
class ToolReference(BaseModel):
model_config = ConfigDict(extra="forbid")
uuid: str
type: ToolProviderType
provider: str
tool_name: str
enabled: bool = True
credential_id: str | None = None
configuration: ToolConfiguration | None = None
uuid: str = Field(
default="",
description=(
"Unique identifier for this tool reference, used to distinguish multiple references to the same tool"
),
)
type: ToolProviderType = Field(description="The provider type of the tool")
provider: str = Field(
default="",
description="The provider name of the tool plugin. Can be inferred from placeholders during compilation.",
)
tool_name: str = Field(
default="",
description=(
"The tool name defined in the provider plugin. Can be inferred from placeholders during compilation."
),
)
enabled: bool = Field(default=True, description="Whether this tool reference is enabled")
credential_id: str | None = Field(
default=None,
description="Credential ID used to resolve credentials when invoking the tool.",
)
configuration: ToolConfiguration | None = Field(
default=None,
description=(
"Optional configuration for this tool reference, used to provide "
"additional parameters when invoking the tool"
),
)
def reference_id(self) -> str:
return f"{self.provider}.{self.tool_name}.{self.uuid}"
@ -45,14 +69,45 @@ class ToolReference(BaseModel):
class FileReference(BaseModel):
model_config = ConfigDict(extra="forbid")
model_config = ConfigDict(frozen=True)
source: str
source: str = Field(default="app")
asset_id: str
@model_validator(mode="before")
@classmethod
def normalize_input(cls, data: Any) -> Any:
if not isinstance(data, dict):
return data
if "asset_id" in data and "source" in data:
return {"source": data.get("source", "app"), "asset_id": data["asset_id"]}
# front end support
if "id" in data:
return {"source": "app", "asset_id": data["id"]}
return data
class SkillMetadata(BaseModel):
model_config = ConfigDict(extra="allow")
model_config = ConfigDict(extra="forbid")
tools: dict[str, ToolReference] = Field(default_factory=dict)
files: list[FileReference] = Field(default_factory=list)
files: set[FileReference] = Field(default_factory=set)
@field_validator("files", mode="before")
@classmethod
def coerce_files_to_set(cls, v: Any) -> set[FileReference] | Any:
if isinstance(v, list):
refs: set[FileReference] = set()
for item in v:
if isinstance(item, dict):
refs.add(FileReference.model_validate(item))
elif isinstance(item, FileReference):
refs.add(item)
return refs
if isinstance(v, dict):
refs = set()
for item in v.values():
if isinstance(item, dict):
refs.add(FileReference.model_validate(item))
return refs
return v