mirror of
https://github.com/langgenius/dify.git
synced 2026-05-03 17:08:03 +08:00
feat(skill): skill parser & packager
This commit is contained in:
10
api/core/app_assets/parser/__init__.py
Normal file
10
api/core/app_assets/parser/__init__.py
Normal file
@ -0,0 +1,10 @@
|
||||
from .asset_parser import AssetParser
|
||||
from .base import AssetItemParser, FileAssetParser
|
||||
from .skill_parser import SkillAssetParser
|
||||
|
||||
__all__ = [
|
||||
"AssetItemParser",
|
||||
"AssetParser",
|
||||
"FileAssetParser",
|
||||
"SkillAssetParser",
|
||||
]
|
||||
51
api/core/app_assets/parser/asset_parser.py
Normal file
51
api/core/app_assets/parser/asset_parser.py
Normal file
@ -0,0 +1,51 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from core.app.entities.app_asset_entities import AppAssetFileTree
|
||||
from core.app_assets.assets import AssetItem
|
||||
from core.app_assets.paths import AssetPaths
|
||||
|
||||
from .base import AssetItemParser, FileAssetParser
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from extensions.ext_storage import Storage
|
||||
|
||||
|
||||
class AssetParser:
|
||||
_tree: AppAssetFileTree
|
||||
_tenant_id: str
|
||||
_app_id: str
|
||||
_storage: "Storage"
|
||||
_parsers: dict[str, AssetItemParser]
|
||||
_default_parser: AssetItemParser
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
tree: AppAssetFileTree,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
storage: "Storage",
|
||||
) -> None:
|
||||
self._tree = tree
|
||||
self._tenant_id = tenant_id
|
||||
self._app_id = app_id
|
||||
self._storage = storage
|
||||
self._parsers = {}
|
||||
self._default_parser = FileAssetParser()
|
||||
|
||||
def register(self, extension: str, parser: AssetItemParser) -> None:
|
||||
self._parsers[extension] = parser
|
||||
|
||||
def parse(self) -> list[AssetItem]:
|
||||
assets: list[AssetItem] = []
|
||||
|
||||
for node in self._tree.walk_files():
|
||||
path = self._tree.get_path(node.id).lstrip("/")
|
||||
storage_key = AssetPaths.draft_file(self._tenant_id, self._app_id, node.id)
|
||||
raw_bytes = self._storage.load_once(storage_key)
|
||||
extension = node.extension or ""
|
||||
|
||||
parser = self._parsers.get(extension, self._default_parser)
|
||||
asset = parser.parse(node.id, path, node.name, extension, storage_key, raw_bytes)
|
||||
assets.append(asset)
|
||||
|
||||
return assets
|
||||
36
api/core/app_assets/parser/base.py
Normal file
36
api/core/app_assets/parser/base.py
Normal file
@ -0,0 +1,36 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from core.app_assets.assets import AssetItem, FileAsset
|
||||
|
||||
|
||||
class AssetItemParser(ABC):
|
||||
@abstractmethod
|
||||
def parse(
|
||||
self,
|
||||
node_id: str,
|
||||
path: str,
|
||||
file_name: str,
|
||||
extension: str,
|
||||
storage_key: str,
|
||||
raw_bytes: bytes,
|
||||
) -> AssetItem:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class FileAssetParser(AssetItemParser):
|
||||
def parse(
|
||||
self,
|
||||
node_id: str,
|
||||
path: str,
|
||||
file_name: str,
|
||||
extension: str,
|
||||
storage_key: str,
|
||||
raw_bytes: bytes,
|
||||
) -> FileAsset:
|
||||
return FileAsset(
|
||||
node_id=node_id,
|
||||
path=path,
|
||||
file_name=file_name,
|
||||
extension=extension,
|
||||
storage_key=storage_key,
|
||||
)
|
||||
120
api/core/app_assets/parser/skill_parser.py
Normal file
120
api/core/app_assets/parser/skill_parser.py
Normal file
@ -0,0 +1,120 @@
|
||||
import json
|
||||
import re
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from core.app_assets.paths import AssetPaths
|
||||
from core.app_assets.skill import (
|
||||
FileReference,
|
||||
SkillAsset,
|
||||
SkillMetadata,
|
||||
ToolReference,
|
||||
)
|
||||
|
||||
from .base import AssetItemParser
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from extensions.ext_storage import Storage
|
||||
|
||||
TOOL_REFERENCE_PATTERN = re.compile(r"§\[tool\]\.\[([^\]]+)\]\.\[([^\]]+)\]\.\[([^\]]+)\]§")
|
||||
FILE_REFERENCE_PATTERN = re.compile(r"§\[file\]\.\[([^\]]+)\]\.\[([^\]]+)\]§")
|
||||
|
||||
|
||||
class SkillAssetParser(AssetItemParser):
|
||||
_tenant_id: str
|
||||
_app_id: str
|
||||
_publish_id: str
|
||||
_storage: "Storage"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
publish_id: str,
|
||||
storage: "Storage",
|
||||
) -> None:
|
||||
self._tenant_id = tenant_id
|
||||
self._app_id = app_id
|
||||
self._publish_id = publish_id
|
||||
self._storage = storage
|
||||
|
||||
def _get_resolved_key(self, node_id: str) -> str:
|
||||
return AssetPaths.published_resolved_file(self._tenant_id, self._app_id, self._publish_id, node_id)
|
||||
|
||||
def parse(
|
||||
self,
|
||||
node_id: str,
|
||||
path: str,
|
||||
file_name: str,
|
||||
extension: str,
|
||||
storage_key: str,
|
||||
raw_bytes: bytes,
|
||||
) -> SkillAsset:
|
||||
try:
|
||||
data = json.loads(raw_bytes.decode("utf-8"))
|
||||
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
||||
raise ValueError(f"Invalid skill document JSON for {node_id}: {e}") from e
|
||||
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError(f"Skill document {node_id} must be a JSON object")
|
||||
|
||||
data_dict: dict[str, Any] = data
|
||||
metadata_raw = data_dict.get("metadata", {})
|
||||
content = data_dict.get("content", "")
|
||||
|
||||
if not isinstance(content, str):
|
||||
raise ValueError(f"Skill document {node_id} 'content' must be a string")
|
||||
|
||||
metadata = SkillMetadata.model_validate(metadata_raw)
|
||||
|
||||
tool_references: list[ToolReference] = []
|
||||
for match in TOOL_REFERENCE_PATTERN.finditer(content):
|
||||
tool_references.append(
|
||||
ToolReference(
|
||||
provider=match.group(1),
|
||||
tool_name=match.group(2),
|
||||
uuid=match.group(3),
|
||||
raw=match.group(0),
|
||||
)
|
||||
)
|
||||
|
||||
file_references: list[FileReference] = []
|
||||
for match in FILE_REFERENCE_PATTERN.finditer(content):
|
||||
file_references.append(
|
||||
FileReference(
|
||||
source=match.group(1),
|
||||
uuid=match.group(2),
|
||||
raw=match.group(0),
|
||||
)
|
||||
)
|
||||
|
||||
resolved_content = self._resolve_content(content, tool_references, file_references)
|
||||
resolved_key = self._get_resolved_key(node_id)
|
||||
self._storage.save(resolved_key, resolved_content.encode("utf-8"))
|
||||
|
||||
return SkillAsset(
|
||||
node_id=node_id,
|
||||
path=path,
|
||||
file_name=file_name,
|
||||
extension=extension,
|
||||
storage_key=resolved_key,
|
||||
metadata=metadata,
|
||||
content=resolved_content,
|
||||
tool_references=tool_references,
|
||||
file_references=file_references,
|
||||
)
|
||||
|
||||
def _resolve_content(
|
||||
self,
|
||||
content: str,
|
||||
tool_references: list[ToolReference],
|
||||
file_references: list[FileReference],
|
||||
) -> str:
|
||||
for ref in tool_references:
|
||||
replacement = f"{ref.provider}/{ref.tool_name}"
|
||||
content = content.replace(ref.raw, replacement)
|
||||
|
||||
for ref in file_references:
|
||||
replacement = f"[file:{ref.uuid}]"
|
||||
content = content.replace(ref.raw, replacement)
|
||||
|
||||
return content
|
||||
Reference in New Issue
Block a user