mirror of
https://github.com/langgenius/dify.git
synced 2026-05-03 17:08:03 +08:00
feat: pull variable add sandbox file support
This commit is contained in:
@ -68,7 +68,10 @@ class SandboxBashTool(Tool):
|
||||
),
|
||||
llm="Execute bash commands in current working directory. "
|
||||
"Use this tool to run shell commands, scripts, or interact with the system. "
|
||||
"The command will be executed in the current working directory.",
|
||||
"The command will be executed in the current working directory. "
|
||||
"IMPORTANT: If you generate any output files (images, documents, etc.) that need to be "
|
||||
"returned or referenced later, you MUST save them to the 'output/' directory "
|
||||
"(e.g., 'mkdir -p output && cp result.png output/'). Only files in output/ will be collected.",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@ -2,13 +2,18 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
from io import BytesIO
|
||||
from types import TracebackType
|
||||
|
||||
from core.file import File, FileTransferMethod, FileType
|
||||
from core.sandbox.sandbox import Sandbox
|
||||
from core.session.cli_api import CliApiSession, CliApiSessionManager, CliContext
|
||||
from core.skill.entities import ToolAccessPolicy
|
||||
from core.skill.entities.tool_dependencies import ToolDependencies
|
||||
from core.tools.signature import sign_tool_file
|
||||
from core.tools.tool_file_manager import ToolFileManager
|
||||
from core.virtual_environment.__base.helpers import pipeline
|
||||
|
||||
from ..bash.dify_cli import DifyCliConfig
|
||||
@ -19,6 +24,13 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
SANDBOX_READY_TIMEOUT = 60 * 10
|
||||
|
||||
# Default output directory for sandbox-generated files
|
||||
SANDBOX_OUTPUT_DIR = "output"
|
||||
# Maximum number of files to collect from sandbox output
|
||||
MAX_OUTPUT_FILES = 50
|
||||
# Maximum file size to collect (10MB)
|
||||
MAX_OUTPUT_FILE_SIZE = 10 * 1024 * 1024
|
||||
|
||||
|
||||
class SandboxBashSession:
|
||||
def __init__(self, *, sandbox: Sandbox, node_id: str, tools: ToolDependencies | None) -> None:
|
||||
@ -105,3 +117,115 @@ class SandboxBashSession:
|
||||
if self._bash_tool is None:
|
||||
raise RuntimeError("SandboxSession is not initialized")
|
||||
return self._bash_tool
|
||||
|
||||
def collect_output_files(self, output_dir: str = SANDBOX_OUTPUT_DIR) -> list[File]:
|
||||
"""
|
||||
Collect files from sandbox output directory and save them as ToolFiles.
|
||||
|
||||
Scans the specified output directory in sandbox, downloads each file,
|
||||
saves it as a ToolFile, and returns a list of File objects. The File
|
||||
objects will have valid tool_file_id that can be referenced by subsequent
|
||||
nodes via structured output.
|
||||
|
||||
Args:
|
||||
output_dir: Directory path in sandbox to scan for output files.
|
||||
Defaults to "output" (relative to workspace).
|
||||
|
||||
Returns:
|
||||
List of File objects representing the collected files.
|
||||
"""
|
||||
vm = self._sandbox.vm
|
||||
collected_files: list[File] = []
|
||||
|
||||
try:
|
||||
file_states = vm.list_files(output_dir, limit=MAX_OUTPUT_FILES)
|
||||
except Exception as exc:
|
||||
# Output directory may not exist if no files were generated
|
||||
logger.debug("Failed to list sandbox output files in %s: %s", output_dir, exc)
|
||||
return collected_files
|
||||
|
||||
tool_file_manager = ToolFileManager()
|
||||
|
||||
for file_state in file_states:
|
||||
# Skip files that are too large
|
||||
if file_state.size > MAX_OUTPUT_FILE_SIZE:
|
||||
logger.warning(
|
||||
"Skipping sandbox output file %s: size %d exceeds limit %d",
|
||||
file_state.path,
|
||||
file_state.size,
|
||||
MAX_OUTPUT_FILE_SIZE,
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
# file_state.path is already relative to working_path (e.g., "output/file.png")
|
||||
file_content = vm.download_file(file_state.path)
|
||||
file_binary = file_content.getvalue()
|
||||
|
||||
# Determine mime type from extension
|
||||
filename = os.path.basename(file_state.path)
|
||||
mime_type, _ = mimetypes.guess_type(filename)
|
||||
if not mime_type:
|
||||
mime_type = "application/octet-stream"
|
||||
|
||||
# Save as ToolFile
|
||||
tool_file = tool_file_manager.create_file_by_raw(
|
||||
user_id=self._user_id,
|
||||
tenant_id=self._tenant_id,
|
||||
conversation_id=None,
|
||||
file_binary=file_binary,
|
||||
mimetype=mime_type,
|
||||
filename=filename,
|
||||
)
|
||||
|
||||
# Determine file type from mime type
|
||||
file_type = _get_file_type_from_mime(mime_type)
|
||||
extension = os.path.splitext(filename)[1] if "." in filename else ".bin"
|
||||
url = sign_tool_file(tool_file.id, extension)
|
||||
|
||||
# Create File object with tool_file_id as related_id
|
||||
file_obj = File(
|
||||
id=tool_file.id, # Use tool_file_id as the File id for easy reference
|
||||
tenant_id=self._tenant_id,
|
||||
type=file_type,
|
||||
transfer_method=FileTransferMethod.TOOL_FILE,
|
||||
filename=filename,
|
||||
extension=extension,
|
||||
mime_type=mime_type,
|
||||
size=len(file_binary),
|
||||
related_id=tool_file.id,
|
||||
url=url,
|
||||
storage_key=tool_file.file_key,
|
||||
)
|
||||
collected_files.append(file_obj)
|
||||
|
||||
logger.info(
|
||||
"Collected sandbox output file: %s -> tool_file_id=%s",
|
||||
file_state.path,
|
||||
tool_file.id,
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to collect sandbox output file %s: %s", file_state.path, exc)
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
"Collected %d files from sandbox output directory %s",
|
||||
len(collected_files),
|
||||
output_dir,
|
||||
)
|
||||
return collected_files
|
||||
|
||||
|
||||
def _get_file_type_from_mime(mime_type: str) -> FileType:
|
||||
"""Determine FileType from mime type."""
|
||||
if mime_type.startswith("image/"):
|
||||
return FileType.IMAGE
|
||||
elif mime_type.startswith("video/"):
|
||||
return FileType.VIDEO
|
||||
elif mime_type.startswith("audio/"):
|
||||
return FileType.AUDIO
|
||||
elif "text" in mime_type or "pdf" in mime_type:
|
||||
return FileType.DOCUMENT
|
||||
else:
|
||||
return FileType.CUSTOM
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
from collections.abc import Sequence
|
||||
from typing import cast
|
||||
from typing import Any, cast
|
||||
|
||||
from sqlalchemy import select, update
|
||||
from sqlalchemy.orm import Session
|
||||
@ -216,6 +216,7 @@ def build_context(
|
||||
prompt_messages: Sequence[PromptMessage],
|
||||
assistant_response: str,
|
||||
generation_data: LLMGenerationData | None = None,
|
||||
files: Sequence[Any] | None = None,
|
||||
) -> list[PromptMessage]:
|
||||
"""
|
||||
Build context from prompt messages and assistant response.
|
||||
@ -229,24 +230,58 @@ def build_context(
|
||||
prompt_messages: Initial prompt messages (user query, etc.)
|
||||
assistant_response: Final assistant response text
|
||||
generation_data: Optional generation data containing trace for tool-enabled runs
|
||||
files: Optional list of File objects generated during execution
|
||||
"""
|
||||
|
||||
context_messages: list[PromptMessage] = [
|
||||
_truncate_multimodal_content(m) for m in prompt_messages if m.role != PromptMessageRole.SYSTEM
|
||||
]
|
||||
|
||||
# Build file description suffix if files were generated
|
||||
file_suffix = ""
|
||||
if files:
|
||||
file_descriptions = _build_file_descriptions(files)
|
||||
if file_descriptions:
|
||||
file_suffix = f"\n\n{file_descriptions}"
|
||||
|
||||
# For tool-enabled runs, reconstruct messages from trace
|
||||
if generation_data and generation_data.trace:
|
||||
context_messages.extend(_build_messages_from_trace(generation_data, assistant_response))
|
||||
context_messages.extend(_build_messages_from_trace(generation_data, assistant_response, file_suffix))
|
||||
else:
|
||||
context_messages.append(AssistantPromptMessage(content=assistant_response))
|
||||
context_messages.append(AssistantPromptMessage(content=assistant_response + file_suffix))
|
||||
|
||||
return context_messages
|
||||
|
||||
|
||||
def _build_file_descriptions(files: Sequence[Any]) -> str:
|
||||
"""
|
||||
Build a text description of generated files for inclusion in context.
|
||||
|
||||
The description includes file_id which can be used by subsequent nodes
|
||||
to reference the files via structured output.
|
||||
"""
|
||||
if not files:
|
||||
return ""
|
||||
|
||||
descriptions: list[str] = ["[Generated Files]"]
|
||||
for file in files:
|
||||
# Get file attributes (File is a Pydantic model)
|
||||
file_id = getattr(file, "id", None) or getattr(file, "related_id", None)
|
||||
filename = getattr(file, "filename", "unknown")
|
||||
file_type = getattr(file, "type", "unknown")
|
||||
if hasattr(file_type, "value"):
|
||||
file_type = file_type.value
|
||||
|
||||
if file_id:
|
||||
descriptions.append(f"- {filename} (id: {file_id}, type: {file_type})")
|
||||
|
||||
return "\n".join(descriptions)
|
||||
|
||||
|
||||
def _build_messages_from_trace(
|
||||
generation_data: LLMGenerationData,
|
||||
assistant_response: str,
|
||||
file_suffix: str = "",
|
||||
) -> list[PromptMessage]:
|
||||
"""
|
||||
Build assistant and tool messages from trace segments.
|
||||
@ -254,7 +289,7 @@ def _build_messages_from_trace(
|
||||
Processes trace in order to reconstruct the conversation flow:
|
||||
- Model segments with tool_calls -> AssistantPromptMessage with tool_calls
|
||||
- Tool segments -> ToolPromptMessage with result
|
||||
- Final response -> AssistantPromptMessage with assistant_response
|
||||
- Final response -> AssistantPromptMessage with assistant_response (with optional file_suffix)
|
||||
"""
|
||||
from core.workflow.nodes.llm.entities import ModelTraceSegment, ToolTraceSegment
|
||||
|
||||
@ -290,8 +325,8 @@ def _build_messages_from_trace(
|
||||
)
|
||||
)
|
||||
|
||||
# Add final assistant response as the authoritative text
|
||||
messages.append(AssistantPromptMessage(content=assistant_response))
|
||||
# Add final assistant response as the authoritative text (with file info if present)
|
||||
messages.append(AssistantPromptMessage(content=assistant_response + file_suffix))
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
@ -383,16 +383,7 @@ class LLMNode(Node[LLMNodeData]):
|
||||
if tool.enabled
|
||||
]
|
||||
|
||||
# Unified outputs building
|
||||
outputs = {
|
||||
"text": clean_text,
|
||||
"reasoning_content": reasoning_content,
|
||||
"usage": jsonable_encoder(usage),
|
||||
"finish_reason": finish_reason,
|
||||
"context": llm_utils.build_context(prompt_messages, clean_text, generation_data),
|
||||
}
|
||||
|
||||
# Build generation field
|
||||
# Build generation field and determine files_to_output first
|
||||
if generation_data:
|
||||
# Use generation_data from tool invocation (supports multi-turn)
|
||||
generation = {
|
||||
@ -420,6 +411,15 @@ class LLMNode(Node[LLMNodeData]):
|
||||
}
|
||||
files_to_output = self._file_outputs
|
||||
|
||||
# Unified outputs building (files passed to context for subsequent node reference)
|
||||
outputs = {
|
||||
"text": clean_text,
|
||||
"reasoning_content": reasoning_content,
|
||||
"usage": jsonable_encoder(usage),
|
||||
"finish_reason": finish_reason,
|
||||
"context": llm_utils.build_context(prompt_messages, clean_text, generation_data, files=files_to_output),
|
||||
}
|
||||
|
||||
outputs["generation"] = generation
|
||||
if files_to_output:
|
||||
outputs["files"] = ArrayFileSegment(value=files_to_output)
|
||||
@ -1921,6 +1921,7 @@ class LLMNode(Node[LLMNodeData]):
|
||||
tool_dependencies: ToolDependencies | None,
|
||||
) -> Generator[NodeEventBase, None, LLMGenerationData]:
|
||||
result: LLMGenerationData | None = None
|
||||
sandbox_output_files: list[File] = []
|
||||
|
||||
# FIXME(Mairuis): Async processing for bash session.
|
||||
with SandboxBashSession(sandbox=sandbox, node_id=self.id, tools=tool_dependencies) as session:
|
||||
@ -1961,9 +1962,26 @@ class LLMNode(Node[LLMNodeData]):
|
||||
|
||||
result = yield from self._process_tool_outputs(outputs)
|
||||
|
||||
# Collect output files from sandbox before session ends
|
||||
# Files are saved as ToolFiles with valid tool_file_id for later reference
|
||||
sandbox_output_files = session.collect_output_files()
|
||||
|
||||
if result is None:
|
||||
raise LLMNodeError("SandboxSession exited unexpectedly")
|
||||
|
||||
# Merge sandbox output files into result
|
||||
if sandbox_output_files:
|
||||
result = LLMGenerationData(
|
||||
text=result.text,
|
||||
reasoning_contents=result.reasoning_contents,
|
||||
tool_calls=result.tool_calls,
|
||||
sequence=result.sequence,
|
||||
usage=result.usage,
|
||||
finish_reason=result.finish_reason,
|
||||
files=result.files + sandbox_output_files,
|
||||
trace=result.trace,
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _get_model_features(self, model_instance: ModelInstance) -> list[ModelFeature]:
|
||||
|
||||
Reference in New Issue
Block a user