from __future__ import annotations import json import logging import mimetypes import os import shlex from types import TracebackType from core.file import File, FileTransferMethod, FileType from core.sandbox.sandbox import Sandbox from core.session.cli_api import CliApiSession, CliApiSessionManager, CliContext from core.skill.entities import ToolAccessPolicy from core.skill.entities.tool_dependencies import ToolDependencies from core.tools.signature import sign_tool_file from core.tools.tool_file_manager import ToolFileManager from core.virtual_environment.__base.helpers import pipeline from ..bash.dify_cli import DifyCliConfig from ..entities import DifyCli from .bash_tool import SandboxBashTool logger = logging.getLogger(__name__) SANDBOX_READY_TIMEOUT = 60 * 10 # Default output directory for sandbox-generated files SANDBOX_OUTPUT_DIR = "output" # Maximum number of files to collect from sandbox output MAX_OUTPUT_FILES = 50 # Maximum file size to collect (10MB) MAX_OUTPUT_FILE_SIZE = 10 * 1024 * 1024 class SandboxBashSession: def __init__(self, *, sandbox: Sandbox, node_id: str, tools: ToolDependencies | None) -> None: self._sandbox = sandbox self._node_id = node_id self._tools = tools self._bash_tool: SandboxBashTool | None = None self._cli_api_session: CliApiSession | None = None self._tenant_id = sandbox.tenant_id self._user_id = sandbox.user_id self._app_id = sandbox.app_id self._assets_id = sandbox.assets_id def __enter__(self) -> SandboxBashSession: # Ensure sandbox initialization completes before any bash commands run. self._sandbox.wait_ready(timeout=SANDBOX_READY_TIMEOUT) cli = DifyCli(self._sandbox.id) self._cli_api_session = CliApiSessionManager().create( tenant_id=self._tenant_id, user_id=self._user_id, context=CliContext(tool_access=ToolAccessPolicy.from_dependencies(self._tools)), ) if self._tools is not None and not self._tools.is_empty(): tools_path = self._setup_node_tools_directory(cli, self._node_id, self._tools, self._cli_api_session) else: tools_path = cli.global_tools_path self._bash_tool = SandboxBashTool( sandbox=self._sandbox.vm, tenant_id=self._tenant_id, tools_path=tools_path, ) return self def _setup_node_tools_directory( self, cli: DifyCli, node_id: str, tools: ToolDependencies, cli_api_session: CliApiSession, ) -> str: node_tools_path = cli.node_tools_path(node_id) config_json = json.dumps( DifyCliConfig.create(session=cli_api_session, tenant_id=self._tenant_id, tool_deps=tools).model_dump( mode="json" ), ensure_ascii=False, ) config_path = shlex.quote(cli.node_config_path(node_id)) vm = self._sandbox.vm # Merge mkdir + config write into a single pipeline to reduce round-trips. ( pipeline(vm) .add(["mkdir", "-p", cli.global_tools_path], error_message="Failed to create global tools dir") .add(["mkdir", "-p", node_tools_path], error_message="Failed to create node tools dir") # Use a quoted heredoc (<<'EOF') so the shell performs no expansion on the # content — safe regardless of $, `, \, or quotes inside the JSON. .add( ["sh", "-c", f"cat > {config_path} << '__DIFY_CFG__'\n{config_json}\n__DIFY_CFG__"], error_message="Failed to write CLI config", ) .execute(raise_on_error=True) ) pipeline(vm, cwd=node_tools_path).add( [cli.bin_path, "init"], error_message="Failed to initialize Dify CLI" ).execute(raise_on_error=True) logger.info( "Node %s tools initialized, path=%s, tool_count=%d", node_id, node_tools_path, len(tools.references) ) return node_tools_path def __exit__( self, exc_type: type[BaseException] | None, exc: BaseException | None, tb: TracebackType | None, ) -> bool: try: if self._cli_api_session is not None: CliApiSessionManager().delete(self._cli_api_session.id) logger.debug("Cleaned up SandboxSession session_id=%s", self._cli_api_session.id) self._cli_api_session = None except Exception: logger.exception("Failed to cleanup SandboxSession") return False @property def bash_tool(self) -> SandboxBashTool: if self._bash_tool is None: raise RuntimeError("SandboxSession is not initialized") return self._bash_tool def collect_output_files(self, output_dir: str = SANDBOX_OUTPUT_DIR) -> list[File]: """ Collect files from sandbox output directory and save them as ToolFiles. Scans the specified output directory in sandbox, downloads each file, saves it as a ToolFile, and returns a list of File objects. The File objects will have valid tool_file_id that can be referenced by subsequent nodes via structured output. Args: output_dir: Directory path in sandbox to scan for output files. Defaults to "output" (relative to workspace). Returns: List of File objects representing the collected files. """ vm = self._sandbox.vm collected_files: list[File] = [] try: file_states = vm.list_files(output_dir, limit=MAX_OUTPUT_FILES) except Exception as exc: # Output directory may not exist if no files were generated logger.debug("Failed to list sandbox output files in %s: %s", output_dir, exc) return collected_files tool_file_manager = ToolFileManager() for file_state in file_states: # Skip files that are too large if file_state.size > MAX_OUTPUT_FILE_SIZE: logger.warning( "Skipping sandbox output file %s: size %d exceeds limit %d", file_state.path, file_state.size, MAX_OUTPUT_FILE_SIZE, ) continue try: # file_state.path is already relative to working_path (e.g., "output/file.png") file_content = vm.download_file(file_state.path) file_binary = file_content.getvalue() # Determine mime type from extension filename = os.path.basename(file_state.path) mime_type, _ = mimetypes.guess_type(filename) if not mime_type: mime_type = "application/octet-stream" # Save as ToolFile tool_file = tool_file_manager.create_file_by_raw( user_id=self._user_id, tenant_id=self._tenant_id, conversation_id=None, file_binary=file_binary, mimetype=mime_type, filename=filename, ) # Determine file type from mime type file_type = _get_file_type_from_mime(mime_type) extension = os.path.splitext(filename)[1] if "." in filename else ".bin" url = sign_tool_file(tool_file.id, extension) # Create File object with tool_file_id as related_id file_obj = File( id=tool_file.id, # Use tool_file_id as the File id for easy reference tenant_id=self._tenant_id, type=file_type, transfer_method=FileTransferMethod.TOOL_FILE, filename=filename, extension=extension, mime_type=mime_type, size=len(file_binary), related_id=tool_file.id, url=url, storage_key=tool_file.file_key, ) collected_files.append(file_obj) logger.info( "Collected sandbox output file: %s -> tool_file_id=%s", file_state.path, tool_file.id, ) except Exception as exc: logger.warning("Failed to collect sandbox output file %s: %s", file_state.path, exc) continue logger.info( "Collected %d files from sandbox output directory %s", len(collected_files), output_dir, ) return collected_files def _get_file_type_from_mime(mime_type: str) -> FileType: """Determine FileType from mime type.""" if mime_type.startswith("image/"): return FileType.IMAGE elif mime_type.startswith("video/"): return FileType.VIDEO elif mime_type.startswith("audio/"): return FileType.AUDIO elif "text" in mime_type or "pdf" in mime_type: return FileType.DOCUMENT else: return FileType.CUSTOM