mirror of
https://github.com/langgenius/dify.git
synced 2026-05-04 09:28:04 +08:00
refactor: redesign skill compilation and document assembly process
This commit is contained in:
@ -1,196 +1,94 @@
|
||||
from collections.abc import Iterable
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from core.skill.entities.asset_references import AssetReferences
|
||||
from core.skill.entities.skill_bundle_entry import SkillBundleEntry
|
||||
from core.skill.entities.skill_metadata import ToolReference
|
||||
from core.app.entities.app_asset_entities import AppAssetFileTree
|
||||
from core.skill.entities.skill_metadata import FileReference
|
||||
from core.skill.entities.tool_dependencies import ToolDependencies, ToolDependency
|
||||
from core.skill.graph_utils import collect_reachable, invert_dependency_map
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from core.skill.entities.skill_metadata import SkillMetadata
|
||||
|
||||
|
||||
class SkillDependance(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
tools: ToolDependencies = Field(description="Direct tool dependencies parsed from this skill only")
|
||||
|
||||
files: set[FileReference] = Field(
|
||||
default_factory=set,
|
||||
description="Direct file references parsed from this skill only",
|
||||
)
|
||||
|
||||
def __or__(self, other: "SkillDependance") -> "SkillDependance":
|
||||
return SkillDependance(tools=self.tools.merge(other.tools), files=self.files | other.files)
|
||||
|
||||
@staticmethod
|
||||
def from_metadata(metadata: "SkillMetadata") -> "SkillDependance":
|
||||
"""Convert parsed metadata into direct tool/file dependency model."""
|
||||
from core.skill.entities.skill_metadata import ToolReference
|
||||
|
||||
dep_map: dict[str, ToolDependency] = {}
|
||||
ref_map: dict[str, ToolReference] = {}
|
||||
|
||||
for tool_ref in metadata.tools.values():
|
||||
dep_map.setdefault(
|
||||
tool_ref.tool_id(),
|
||||
ToolDependency(
|
||||
type=tool_ref.type,
|
||||
provider=tool_ref.provider,
|
||||
tool_name=tool_ref.tool_name,
|
||||
enabled=tool_ref.enabled,
|
||||
),
|
||||
)
|
||||
ref_map.setdefault(tool_ref.uuid, tool_ref)
|
||||
|
||||
return SkillDependance(
|
||||
tools=ToolDependencies(
|
||||
dependencies=[dep_map[key] for key in sorted(dep_map.keys())],
|
||||
references=[ref_map[key] for key in sorted(ref_map.keys())],
|
||||
),
|
||||
files=metadata.files,
|
||||
)
|
||||
|
||||
|
||||
class Skill(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
skill_id: str = Field(description="Unique identifier for this skill, same with skill_id")
|
||||
|
||||
direct_dependance: SkillDependance = Field(description="Direct dependencies parsed from this skill only")
|
||||
|
||||
dependance: SkillDependance = Field(description="All dependencies including transitive closure")
|
||||
|
||||
content: str = Field(description="Resolved content with all references replaced")
|
||||
|
||||
@property
|
||||
def tools(self) -> ToolDependencies:
|
||||
return self.dependance.tools
|
||||
|
||||
|
||||
class SkillBundle(BaseModel):
|
||||
"""Persisted skill compilation snapshot with graph metadata and merge support."""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
asset_tree: AppAssetFileTree = Field(description="Asset tree for this bundle")
|
||||
|
||||
assets_id: str = Field(description="Assets ID this bundle belongs to")
|
||||
schema_version: int = Field(default=2, description="Schema version for forward compatibility")
|
||||
built_at: datetime | None = Field(default=None, description="Build timestamp")
|
||||
|
||||
entries: dict[str, SkillBundleEntry] = Field(default_factory=dict, description="skill_id -> SkillBundleEntry")
|
||||
skills: dict[str, Skill] = Field(default_factory=dict)
|
||||
|
||||
depends_on_map: dict[str, list[str]] = Field(
|
||||
default_factory=dict,
|
||||
description="skill_id -> list of skill_ids it depends on",
|
||||
)
|
||||
@property
|
||||
def entries(self) -> dict[str, Skill]:
|
||||
return self.skills
|
||||
|
||||
reference_map: dict[str, list[str]] = Field(
|
||||
default_factory=dict,
|
||||
description="skill_id -> list of skill_ids that depend on it",
|
||||
)
|
||||
|
||||
def get(self, skill_id: str) -> SkillBundleEntry | None:
|
||||
return self.entries.get(skill_id)
|
||||
|
||||
def upsert(self, entry: SkillBundleEntry) -> None:
|
||||
self.entries[entry.skill_id] = entry
|
||||
|
||||
def remove(self, skill_id: str) -> None:
|
||||
self.entries.pop(skill_id, None)
|
||||
self.depends_on_map.pop(skill_id, None)
|
||||
self.reference_map.pop(skill_id, None)
|
||||
for deps in self.reference_map.values():
|
||||
if skill_id in deps:
|
||||
deps.remove(skill_id)
|
||||
for deps in self.depends_on_map.values():
|
||||
if skill_id in deps:
|
||||
deps.remove(skill_id)
|
||||
|
||||
def referenced_skill_ids(self, skill_id: str) -> set[str]:
|
||||
return set(self.depends_on_map.get(skill_id, []))
|
||||
|
||||
def recompile_group_ids(self, skill_id: str) -> set[str]:
|
||||
return collect_reachable([skill_id], self.reference_map)
|
||||
|
||||
def merge(self, patch: "SkillBundle") -> "SkillBundle":
|
||||
"""Return a new bundle with patch entries merged and affected closure recomputed."""
|
||||
if self.assets_id != patch.assets_id:
|
||||
raise ValueError("bundle assets_id mismatch")
|
||||
|
||||
changed_skill_ids = set(patch.entries.keys())
|
||||
if not changed_skill_ids:
|
||||
return self.model_copy(deep=True)
|
||||
|
||||
merged_entries = dict(self.entries)
|
||||
merged_entries.update(patch.entries)
|
||||
|
||||
merged_depends_on_map: dict[str, list[str]] = {
|
||||
skill_id: [dep for dep in deps if dep in merged_entries]
|
||||
for skill_id, deps in self.depends_on_map.items()
|
||||
if skill_id in merged_entries
|
||||
}
|
||||
|
||||
for skill_id in changed_skill_ids:
|
||||
deps = patch.depends_on_map.get(skill_id)
|
||||
if deps is None:
|
||||
entry = patch.entries[skill_id]
|
||||
deps = [f.asset_id for f in entry.direct_files.references]
|
||||
merged_depends_on_map[skill_id] = [dep for dep in _dedupe(deps) if dep in merged_entries]
|
||||
|
||||
for skill_id in merged_entries:
|
||||
merged_depends_on_map.setdefault(skill_id, [])
|
||||
|
||||
reference_map = {
|
||||
skill_id: sorted(referrers)
|
||||
for skill_id, referrers in invert_dependency_map(merged_depends_on_map, merged_entries.keys()).items()
|
||||
}
|
||||
|
||||
affected_skill_ids = collect_reachable(changed_skill_ids, reference_map)
|
||||
recomputed_entries = _recompute_affected_entries(merged_entries, merged_depends_on_map, affected_skill_ids)
|
||||
merged_entries.update(recomputed_entries)
|
||||
|
||||
return SkillBundle(
|
||||
assets_id=self.assets_id,
|
||||
schema_version=max(self.schema_version, patch.schema_version),
|
||||
built_at=patch.built_at or self.built_at,
|
||||
entries=merged_entries,
|
||||
depends_on_map=dict(merged_depends_on_map),
|
||||
reference_map=reference_map,
|
||||
)
|
||||
|
||||
def subset(self, skill_ids: Iterable[str]) -> "SkillBundle":
|
||||
skill_id_set = set(skill_ids)
|
||||
return SkillBundle(
|
||||
assets_id=self.assets_id,
|
||||
schema_version=self.schema_version,
|
||||
built_at=self.built_at,
|
||||
entries={sid: self.entries[sid] for sid in skill_id_set if sid in self.entries},
|
||||
depends_on_map={
|
||||
sid: [dep for dep in deps if dep in skill_id_set]
|
||||
for sid, deps in self.depends_on_map.items()
|
||||
if sid in skill_id_set
|
||||
},
|
||||
reference_map={
|
||||
sid: [dep for dep in deps if dep in skill_id_set]
|
||||
for sid, deps in self.reference_map.items()
|
||||
if sid in skill_id_set
|
||||
},
|
||||
)
|
||||
def get(self, skill_id: str) -> Skill | None:
|
||||
return self.skills.get(skill_id)
|
||||
|
||||
def get_tool_dependencies(self) -> ToolDependencies:
|
||||
dependencies: dict[str, ToolDependency] = {}
|
||||
references: dict[str, ToolReference] = {}
|
||||
merged = ToolDependencies()
|
||||
for skill in self.skills.values():
|
||||
merged = merged.merge(skill.dependance.tools)
|
||||
return merged
|
||||
|
||||
for entry in self.entries.values():
|
||||
for dep in entry.tools.dependencies:
|
||||
key = f"{dep.provider}.{dep.tool_name}"
|
||||
if key not in dependencies:
|
||||
dependencies[key] = dep
|
||||
|
||||
for ref in entry.tools.references:
|
||||
if ref.uuid not in references:
|
||||
references[ref.uuid] = ref
|
||||
|
||||
return ToolDependencies(
|
||||
dependencies=list(dependencies.values()),
|
||||
references=list(references.values()),
|
||||
)
|
||||
|
||||
|
||||
def _dedupe(values: Iterable[str]) -> list[str]:
|
||||
return list(dict.fromkeys(values))
|
||||
|
||||
|
||||
def _recompute_affected_entries(
|
||||
entries: dict[str, SkillBundleEntry],
|
||||
depends_on_map: dict[str, list[str]],
|
||||
affected_skill_ids: set[str],
|
||||
) -> dict[str, SkillBundleEntry]:
|
||||
recomputed_entries = {skill_id: entries[skill_id] for skill_id in affected_skill_ids if skill_id in entries}
|
||||
changed = True
|
||||
while changed:
|
||||
changed = False
|
||||
for skill_id in affected_skill_ids:
|
||||
current_entry = recomputed_entries.get(skill_id)
|
||||
if current_entry is None:
|
||||
continue
|
||||
|
||||
merged_tool_deps: dict[str, ToolDependency] = {
|
||||
dep.tool_id(): dep for dep in current_entry.direct_tools.dependencies
|
||||
}
|
||||
merged_tool_refs: dict[str, ToolReference] = {
|
||||
ref.uuid: ref for ref in current_entry.direct_tools.references
|
||||
}
|
||||
merged_files = {f.asset_id: f for f in current_entry.direct_files.references}
|
||||
|
||||
for dep_id in depends_on_map.get(skill_id, []):
|
||||
dep_entry = recomputed_entries.get(dep_id) or entries.get(dep_id)
|
||||
if dep_entry is None:
|
||||
continue
|
||||
|
||||
for dep in dep_entry.tools.dependencies:
|
||||
merged_tool_deps.setdefault(dep.tool_id(), dep)
|
||||
|
||||
for ref in dep_entry.tools.references:
|
||||
merged_tool_refs.setdefault(ref.uuid, ref)
|
||||
|
||||
for file_ref in dep_entry.files.references:
|
||||
merged_files.setdefault(file_ref.asset_id, file_ref)
|
||||
|
||||
merged_tools = ToolDependencies(
|
||||
dependencies=[merged_tool_deps[key] for key in sorted(merged_tool_deps.keys())],
|
||||
references=[merged_tool_refs[key] for key in sorted(merged_tool_refs.keys())],
|
||||
)
|
||||
merged_asset_refs = AssetReferences(references=[merged_files[key] for key in sorted(merged_files.keys())])
|
||||
if merged_tools != current_entry.tools or merged_asset_refs != current_entry.files:
|
||||
recomputed_entries[skill_id] = current_entry.model_copy(
|
||||
update={
|
||||
"tools": merged_tools,
|
||||
"files": merged_asset_refs,
|
||||
}
|
||||
)
|
||||
changed = True
|
||||
|
||||
return recomputed_entries
|
||||
def put(self, skill: Skill) -> None:
|
||||
self.skills[skill.skill_id] = skill
|
||||
|
||||
Reference in New Issue
Block a user