feat(sandbox): restructure file handling by introducing a new inspector module with runtime and archive sources

This commit is contained in:
Harry
2026-01-27 18:57:46 +08:00
parent 951af125af
commit 506163ab2d
8 changed files with 561 additions and 464 deletions

View File

@ -1,462 +0,0 @@
from __future__ import annotations
import abc
import json
import logging
import os
import tempfile
from pathlib import Path, PurePosixPath
from uuid import UUID, uuid4
from core.sandbox.entities.files import SandboxFileDownloadTicket, SandboxFileNode
from core.sandbox.manager import SandboxManager
from core.sandbox.security.archive_signer import SandboxArchivePath
from core.sandbox.security.sandbox_file_signer import SandboxFileDownloadPath
from core.sandbox.storage import sandbox_file_storage
from core.virtual_environment.__base.exec import CommandExecutionError
from core.virtual_environment.__base.helpers import execute
from core.virtual_environment.__base.virtual_environment import VirtualEnvironment
from extensions.ext_storage import storage
logger = logging.getLogger(__name__)
class SandboxFileSource(abc.ABC):
_LIST_TIMEOUT_SECONDS = 30
_UPLOAD_TIMEOUT_SECONDS = 60
_EXPORT_EXPIRES_IN_SECONDS = 60 * 5
def __init__(self, *, tenant_id: str, sandbox_id: str):
self._tenant_id = tenant_id
self._sandbox_id = sandbox_id
@abc.abstractmethod
def list_files(self, *, path: str, recursive: bool) -> list[SandboxFileNode]:
raise NotImplementedError
@abc.abstractmethod
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
raise NotImplementedError
class SandboxFileRuntimeSource(SandboxFileSource):
def __init__(self, *, tenant_id: str, sandbox_id: str, runtime: VirtualEnvironment):
super().__init__(tenant_id=tenant_id, sandbox_id=sandbox_id)
self._runtime = runtime
def list_files(self, *, path: str, recursive: bool) -> list[SandboxFileNode]:
script = r"""
import json
import os
import sys
path = sys.argv[1]
recursive = sys.argv[2] == "1"
def norm(rel: str) -> str:
rel = rel.replace("\\\\", "/")
rel = rel.lstrip("./")
return rel or "."
def stat_entry(full_path: str, rel_path: str) -> dict:
st = os.stat(full_path)
is_dir = os.path.isdir(full_path)
return {
"path": norm(rel_path),
"is_dir": is_dir,
"size": None if is_dir else int(st.st_size),
"mtime": int(st.st_mtime),
}
entries = []
if recursive:
for root, dirs, files in os.walk(path):
for d in dirs:
fp = os.path.join(root, d)
rp = os.path.relpath(fp, ".")
entries.append(stat_entry(fp, rp))
for f in files:
fp = os.path.join(root, f)
rp = os.path.relpath(fp, ".")
entries.append(stat_entry(fp, rp))
else:
if os.path.isfile(path):
rel_path = os.path.relpath(path, ".")
entries.append(stat_entry(path, rel_path))
else:
for item in os.scandir(path):
rel_path = os.path.relpath(item.path, ".")
entries.append(stat_entry(item.path, rel_path))
print(json.dumps(entries))
"""
try:
result = execute(
self._runtime,
[
"sh",
"-c",
'if command -v python3 >/dev/null 2>&1; then py=python3; else py=python; fi; "$py" -c "$0" "$@"',
script,
path,
"1" if recursive else "0",
],
timeout=self._LIST_TIMEOUT_SECONDS,
error_message="Failed to list sandbox files",
)
except CommandExecutionError as exc:
raise RuntimeError(str(exc)) from exc
try:
raw = json.loads(result.stdout.decode("utf-8"))
except Exception as exc:
raise RuntimeError("Malformed sandbox file list output") from exc
entries: list[SandboxFileNode] = []
for item in raw:
item_path = str(item.get("path"))
item_is_dir = bool(item.get("is_dir"))
extension = None
if not item_is_dir:
ext = os.path.splitext(item_path)[1]
extension = ext or None
entries.append(
SandboxFileNode(
path=item_path,
is_dir=item_is_dir,
size=item.get("size"),
mtime=item.get("mtime"),
extension=extension,
)
)
return entries
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
kind = self._detect_path_kind(path)
export_name = os.path.basename(path.rstrip("/")) or "workspace"
filename = f"{export_name}.tar.gz" if kind == "dir" else (os.path.basename(path) or "file")
export_id = uuid4().hex
export_path = SandboxFileDownloadPath(
tenant_id=UUID(self._tenant_id),
sandbox_id=UUID(self._sandbox_id),
export_id=export_id,
filename=filename,
)
upload_url = sandbox_file_storage.get_upload_url(export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS)
if kind == "dir":
archive_path = f"/tmp/{export_id}.tar.gz"
try:
execute(
self._runtime,
["tar", "-czf", archive_path, "-C", ".", path],
timeout=self._UPLOAD_TIMEOUT_SECONDS,
error_message="Failed to archive directory in sandbox",
)
execute(
self._runtime,
["curl", "-s", "-f", "-X", "PUT", "-T", archive_path, upload_url],
timeout=self._UPLOAD_TIMEOUT_SECONDS,
error_message="Failed to upload directory archive from sandbox",
)
except CommandExecutionError as exc:
raise RuntimeError(str(exc)) from exc
finally:
try:
execute(
self._runtime,
["rm", "-f", archive_path],
timeout=self._LIST_TIMEOUT_SECONDS,
error_message="Failed to cleanup temp archive",
)
except Exception as exc:
# Best-effort cleanup; do not fail the download on cleanup issues.
logger.debug("Failed to cleanup temp archive %s: %s", archive_path, exc)
else:
try:
execute(
self._runtime,
["curl", "-s", "-f", "-X", "PUT", "-T", path, upload_url],
timeout=self._UPLOAD_TIMEOUT_SECONDS,
error_message="Failed to upload file from sandbox",
)
except CommandExecutionError as exc:
raise RuntimeError(str(exc)) from exc
download_url = sandbox_file_storage.get_download_url(export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS)
return SandboxFileDownloadTicket(
download_url=download_url,
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
export_id=export_id,
)
def _detect_path_kind(self, path: str) -> str:
script = r"""
import os
import sys
p = sys.argv[1]
if os.path.isdir(p):
print("dir")
raise SystemExit(0)
if os.path.isfile(p):
print("file")
raise SystemExit(0)
print("none")
raise SystemExit(2)
"""
try:
result = execute(
self._runtime,
[
"sh",
"-c",
'if command -v python3 >/dev/null 2>&1; then py=python3; else py=python; fi; "$py" -c "$0" "$@"',
script,
path,
],
timeout=self._LIST_TIMEOUT_SECONDS,
error_message="Failed to check path in sandbox",
)
except CommandExecutionError as exc:
raise ValueError(str(exc)) from exc
kind = result.stdout.decode("utf-8", errors="replace").strip()
if kind not in ("dir", "file"):
raise ValueError("File not found in sandbox")
return kind
class SandboxFileArchiveSource(SandboxFileSource):
def list_files(self, *, path: str, recursive: bool) -> list[SandboxFileNode]:
import tarfile
archive_path = SandboxArchivePath(tenant_id=UUID(self._tenant_id), sandbox_id=UUID(self._sandbox_id))
storage_key = archive_path.get_storage_key()
if not storage.exists(storage_key):
raise ValueError("Sandbox archive not found")
with tempfile.TemporaryDirectory(prefix="dify-sandbox-archive-") as tmpdir:
local_archive = os.path.join(tmpdir, "workspace.tar.gz")
storage.download(storage_key, local_archive)
entries_by_path: dict[str, SandboxFileNode] = {}
def add_dir(dir_path: str) -> None:
if dir_path in ("", "."):
return
if dir_path not in entries_by_path:
entries_by_path[dir_path] = SandboxFileNode(
path=dir_path, is_dir=True, size=None, mtime=None, extension=None
)
def clean(member_name: str) -> str:
name = member_name.lstrip("./")
return name.rstrip("/")
target_prefix = "" if path in (".", "") else f"{path}/"
with tarfile.open(local_archive, mode="r:gz") as tf:
for m in tf.getmembers():
mp = clean(m.name)
if mp in ("", "."):
continue
if not recursive:
if path in (".", ""):
if "/" in mp:
add_dir(mp.split("/", 1)[0])
continue
else:
if not mp.startswith(target_prefix):
continue
rest = mp[len(target_prefix) :]
if rest == "":
continue
if "/" in rest:
add_dir(f"{path}/{rest.split('/', 1)[0]}")
continue
else:
if path not in (".", "") and not (mp == path or mp.startswith(target_prefix)):
continue
parent = os.path.dirname(mp)
while parent not in ("", "."):
if path not in (".", "") and parent == path:
break
add_dir(parent)
parent = os.path.dirname(parent)
is_dir = m.isdir()
extension = None
if not is_dir:
ext = os.path.splitext(mp)[1]
extension = ext or None
entries_by_path[mp] = SandboxFileNode(
path=mp,
is_dir=is_dir,
size=None if is_dir else int(m.size),
mtime=int(m.mtime) if m.mtime else None,
extension=extension,
)
return sorted(entries_by_path.values(), key=lambda e: e.path)
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
import tarfile
archive_path = SandboxArchivePath(tenant_id=UUID(self._tenant_id), sandbox_id=UUID(self._sandbox_id))
storage_key = archive_path.get_storage_key()
if not storage.exists(storage_key):
raise ValueError("Sandbox archive not found")
export_name = os.path.basename(path.rstrip("/")) or "workspace"
export_id = uuid4().hex
# Decide file vs directory inside archive.
is_dir_request = path in (".", "")
with tempfile.TemporaryDirectory(prefix="dify-sandbox-archive-") as tmpdir:
local_archive = os.path.join(tmpdir, "workspace.tar.gz")
storage.download(storage_key, local_archive)
with tarfile.open(local_archive, mode="r:gz") as tf:
member_name = path.lstrip("./").rstrip("/")
if not is_dir_request:
# If it is an explicit file in archive, treat as file download.
member = None
try:
member = tf.getmember(member_name)
except KeyError:
try:
member = tf.getmember(f"./{member_name}")
except KeyError:
member = None
if member is not None and not member.isdir():
export_path = SandboxFileDownloadPath(
tenant_id=UUID(self._tenant_id),
sandbox_id=UUID(self._sandbox_id),
export_id=export_id,
filename=os.path.basename(member_name) or "file",
)
extracted = tf.extractfile(member)
if extracted is None:
raise ValueError("File not found in sandbox archive")
sandbox_file_storage.save(export_path, extracted.read())
download_url = sandbox_file_storage.get_download_url(
export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS
)
return SandboxFileDownloadTicket(
download_url=download_url,
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
export_id=export_id,
)
# Otherwise treat as directory (implied dir is common in tar).
is_dir_request = True
if is_dir_request:
export_path = SandboxFileDownloadPath(
tenant_id=UUID(self._tenant_id),
sandbox_id=UUID(self._sandbox_id),
export_id=export_id,
filename=f"{export_name}.tar.gz",
)
export_local = os.path.join(tmpdir, "export.tar.gz")
prefix = "" if member_name in (".", "") else f"{member_name}/"
found_any = False
for m in tf.getmembers():
src_name = m.name.lstrip("./").rstrip("/")
if member_name not in (".", ""):
if src_name != member_name and not src_name.startswith(prefix):
continue
found_any = True
break
if not found_any:
raise ValueError("File not found in sandbox archive")
with tarfile.open(export_local, mode="w:gz") as out:
if member_name not in (".", ""):
dir_info = tarfile.TarInfo(name=member_name)
dir_info.type = tarfile.DIRTYPE
dir_info.size = 0
out.addfile(dir_info)
for m in tf.getmembers():
src_name = m.name.lstrip("./")
if member_name not in (".", ""):
if src_name != member_name and not src_name.startswith(prefix):
continue
ti = tarfile.TarInfo(name=src_name.rstrip("/"))
ti.mode = m.mode
ti.mtime = m.mtime
ti.uid = m.uid
ti.gid = m.gid
ti.uname = m.uname
ti.gname = m.gname
if m.isdir():
ti.type = tarfile.DIRTYPE
ti.size = 0
out.addfile(ti)
continue
extracted = tf.extractfile(m)
if extracted is None:
continue
ti.size = int(m.size)
out.addfile(ti, fileobj=extracted)
sandbox_file_storage.save(export_path, Path(export_local).read_bytes())
download_url = sandbox_file_storage.get_download_url(
export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS
)
return SandboxFileDownloadTicket(
download_url=download_url,
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
export_id=export_id,
)
raise ValueError("File not found in sandbox archive")
class SandboxFileBrowser:
def __init__(self, *, tenant_id: str, sandbox_id: str):
self._tenant_id = tenant_id
self._sandbox_id = sandbox_id
@staticmethod
def _normalize_workspace_path(path: str | None) -> str:
raw = (path or ".").strip()
if raw == "":
raw = "."
p = PurePosixPath(raw)
if p.is_absolute():
raise ValueError("path must be relative")
if any(part == ".." for part in p.parts):
raise ValueError("path must not contain '..'")
normalized = str(p)
return "." if normalized in (".", "") else normalized
def _backend(self) -> SandboxFileSource:
runtime = SandboxManager.get(self._sandbox_id)
if runtime is not None:
return SandboxFileRuntimeSource(tenant_id=self._tenant_id, sandbox_id=self._sandbox_id, runtime=runtime)
return SandboxFileArchiveSource(tenant_id=self._tenant_id, sandbox_id=self._sandbox_id)
def list_files(self, *, path: str | None = None, recursive: bool = False) -> list[SandboxFileNode]:
workspace_path = self._normalize_workspace_path(path)
return self._backend().list_files(path=workspace_path, recursive=recursive)
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
workspace_path = self._normalize_workspace_path(path)
return self._backend().download_file(path=workspace_path)

View File

@ -0,0 +1,11 @@
from core.sandbox.inspector.archive_source import SandboxFileArchiveSource
from core.sandbox.inspector.base import SandboxFileSource
from core.sandbox.inspector.browser import SandboxFileBrowser
from core.sandbox.inspector.runtime_source import SandboxFileRuntimeSource
__all__ = [
"SandboxFileArchiveSource",
"SandboxFileBrowser",
"SandboxFileRuntimeSource",
"SandboxFileSource",
]

View File

@ -0,0 +1,218 @@
from __future__ import annotations
import json
import os
from typing import TYPE_CHECKING
from uuid import UUID, uuid4
from core.sandbox.entities.files import SandboxFileDownloadTicket, SandboxFileNode
from core.sandbox.inspector.base import SandboxFileSource
from core.sandbox.security.archive_signer import SandboxArchivePath, SandboxArchiveSigner
from core.sandbox.security.sandbox_file_signer import SandboxFileDownloadPath
from core.sandbox.storage import sandbox_file_storage
from core.virtual_environment.__base.exec import CommandExecutionError
from core.virtual_environment.__base.helpers import execute
from extensions.ext_storage import storage
if TYPE_CHECKING:
from core.zip_sandbox import ZipSandbox
class SandboxFileArchiveSource(SandboxFileSource):
_PYTHON_EXEC_CMD = 'if command -v python3 >/dev/null 2>&1; then py=python3; else py=python; fi; "$py" -c "$0" "$@"'
_LIST_SCRIPT = r"""
import json
import os
import sys
path = sys.argv[1]
recursive = sys.argv[2] == "1"
def norm(rel: str) -> str:
rel = rel.replace("\\", "/")
rel = rel.lstrip("./")
return rel or "."
def stat_entry(full_path: str, rel_path: str) -> dict:
st = os.stat(full_path)
is_dir = os.path.isdir(full_path)
return {
"path": norm(rel_path),
"is_dir": is_dir,
"size": None if is_dir else int(st.st_size),
"mtime": int(st.st_mtime),
}
entries = []
if recursive:
for root, dirs, files in os.walk(path):
for d in dirs:
fp = os.path.join(root, d)
rp = os.path.relpath(fp, ".")
entries.append(stat_entry(fp, rp))
for f in files:
fp = os.path.join(root, f)
rp = os.path.relpath(fp, ".")
entries.append(stat_entry(fp, rp))
else:
if os.path.isfile(path):
rel_path = os.path.relpath(path, ".")
entries.append(stat_entry(path, rel_path))
else:
for item in os.scandir(path):
rel_path = os.path.relpath(item.path, ".")
entries.append(stat_entry(item.path, rel_path))
print(json.dumps(entries))
"""
def _get_archive_download_url(self) -> str:
"""Get a pre-signed download URL for the sandbox archive."""
archive_path = SandboxArchivePath(tenant_id=UUID(self._tenant_id), sandbox_id=UUID(self._sandbox_id))
storage_key = archive_path.get_storage_key()
if not storage.exists(storage_key):
raise ValueError("Sandbox archive not found")
return SandboxArchiveSigner.build_signed_url(
archive_path=archive_path,
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
action=SandboxArchiveSigner.OPERATION_DOWNLOAD,
)
def _create_zip_sandbox(self) -> ZipSandbox:
"""Create a ZipSandbox instance for archive operations."""
from core.zip_sandbox import ZipSandbox
return ZipSandbox(tenant_id=self._tenant_id, user_id="system", app_id="sandbox-archive-browser")
def list_files(self, *, path: str, recursive: bool) -> list[SandboxFileNode]:
archive_url = self._get_archive_download_url()
with self._create_zip_sandbox() as zs:
# Download and extract the archive
archive_path = zs.download_archive(archive_url, path="workspace.tar.gz")
zs.untar(archive_path=archive_path, dest_dir="workspace")
# List files using Python script in sandbox
try:
result = execute(
zs.vm,
[
"sh",
"-c",
self._PYTHON_EXEC_CMD,
self._LIST_SCRIPT,
f"workspace/{path}" if path not in (".", "") else "workspace",
"1" if recursive else "0",
],
timeout=self._LIST_TIMEOUT_SECONDS,
error_message="Failed to list sandbox files",
)
except CommandExecutionError as exc:
raise RuntimeError(str(exc)) from exc
try:
raw = json.loads(result.stdout.decode("utf-8"))
except Exception as exc:
raise RuntimeError("Malformed sandbox file list output") from exc
entries: list[SandboxFileNode] = []
for item in raw:
item_path = str(item.get("path"))
# Strip the "workspace/" prefix from paths
if item_path.startswith("workspace/"):
item_path = item_path[len("workspace/") :]
elif item_path == "workspace":
continue # Skip the workspace directory itself
item_is_dir = bool(item.get("is_dir"))
extension = None
if not item_is_dir:
ext = os.path.splitext(item_path)[1]
extension = ext or None
entries.append(
SandboxFileNode(
path=item_path,
is_dir=item_is_dir,
size=item.get("size"),
mtime=item.get("mtime"),
extension=extension,
)
)
return sorted(entries, key=lambda e: e.path)
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
archive_url = self._get_archive_download_url()
export_name = os.path.basename(path.rstrip("/")) or "workspace"
export_id = uuid4().hex
with self._create_zip_sandbox() as zs:
# Download and extract the archive
archive_path = zs.download_archive(archive_url, path="workspace.tar.gz")
zs.untar(archive_path=archive_path, dest_dir="workspace")
# Determine the target path inside extracted workspace
target_path = f"workspace/{path}" if path not in (".", "") else "workspace"
# Detect if target is file or directory
detect_script = r"""
import os
import sys
p = sys.argv[1]
if os.path.isdir(p):
print("dir")
raise SystemExit(0)
if os.path.isfile(p):
print("file")
raise SystemExit(0)
print("none")
raise SystemExit(2)
"""
try:
result = execute(
zs.vm,
[
"sh",
"-c",
self._PYTHON_EXEC_CMD,
detect_script,
target_path,
],
timeout=self._LIST_TIMEOUT_SECONDS,
error_message="Failed to check path in sandbox",
)
except CommandExecutionError as exc:
raise ValueError(str(exc)) from exc
kind = result.stdout.decode("utf-8", errors="replace").strip()
if kind not in ("dir", "file"):
raise ValueError("File not found in sandbox archive")
if kind == "file":
# Download file content from sandbox
file_data = zs.read_file(target_path)
export_path = SandboxFileDownloadPath(
tenant_id=UUID(self._tenant_id),
sandbox_id=UUID(self._sandbox_id),
export_id=export_id,
filename=os.path.basename(path) or "file",
)
sandbox_file_storage.save(export_path, file_data)
else:
# Create tar.gz archive of the directory
tar_file = zs.tar(target_path, include_base=True, compress=True)
tar_data = zs.read_file(tar_file.path)
export_path = SandboxFileDownloadPath(
tenant_id=UUID(self._tenant_id),
sandbox_id=UUID(self._sandbox_id),
export_id=export_id,
filename=f"{export_name}.tar.gz",
)
sandbox_file_storage.save(export_path, tar_data)
download_url = sandbox_file_storage.get_download_url(export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS)
return SandboxFileDownloadTicket(
download_url=download_url,
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
export_id=export_id,
)

View File

@ -0,0 +1,23 @@
from __future__ import annotations
import abc
from core.sandbox.entities.files import SandboxFileDownloadTicket, SandboxFileNode
class SandboxFileSource(abc.ABC):
_LIST_TIMEOUT_SECONDS = 30
_UPLOAD_TIMEOUT_SECONDS = 60 * 10
_EXPORT_EXPIRES_IN_SECONDS = 60 * 10
def __init__(self, *, tenant_id: str, sandbox_id: str):
self._tenant_id = tenant_id
self._sandbox_id = sandbox_id
@abc.abstractmethod
def list_files(self, *, path: str, recursive: bool) -> list[SandboxFileNode]:
raise NotImplementedError
@abc.abstractmethod
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
raise NotImplementedError

View File

@ -0,0 +1,44 @@
from __future__ import annotations
from pathlib import PurePosixPath
from core.sandbox.entities.files import SandboxFileDownloadTicket, SandboxFileNode
from core.sandbox.inspector.archive_source import SandboxFileArchiveSource
from core.sandbox.inspector.base import SandboxFileSource
from core.sandbox.inspector.runtime_source import SandboxFileRuntimeSource
from core.sandbox.manager import SandboxManager
class SandboxFileBrowser:
def __init__(self, *, tenant_id: str, sandbox_id: str):
self._tenant_id = tenant_id
self._sandbox_id = sandbox_id
@staticmethod
def _normalize_workspace_path(path: str | None) -> str:
raw = (path or ".").strip()
if raw == "":
raw = "."
p = PurePosixPath(raw)
if p.is_absolute():
raise ValueError("path must be relative")
if any(part == ".." for part in p.parts):
raise ValueError("path must not contain '..'")
normalized = str(p)
return "." if normalized in (".", "") else normalized
def _backend(self) -> SandboxFileSource:
runtime = SandboxManager.get(self._sandbox_id)
if runtime is not None:
return SandboxFileRuntimeSource(tenant_id=self._tenant_id, sandbox_id=self._sandbox_id, runtime=runtime)
return SandboxFileArchiveSource(tenant_id=self._tenant_id, sandbox_id=self._sandbox_id)
def list_files(self, *, path: str | None = None, recursive: bool = False) -> list[SandboxFileNode]:
workspace_path = self._normalize_workspace_path(path)
return self._backend().list_files(path=workspace_path, recursive=recursive)
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
workspace_path = self._normalize_workspace_path(path)
return self._backend().download_file(path=workspace_path)

View File

@ -0,0 +1,208 @@
from __future__ import annotations
import json
import logging
import os
from uuid import UUID, uuid4
from core.sandbox.entities.files import SandboxFileDownloadTicket, SandboxFileNode
from core.sandbox.inspector.base import SandboxFileSource
from core.sandbox.security.sandbox_file_signer import SandboxFileDownloadPath
from core.sandbox.storage import sandbox_file_storage
from core.virtual_environment.__base.exec import CommandExecutionError
from core.virtual_environment.__base.helpers import execute
from core.virtual_environment.__base.virtual_environment import VirtualEnvironment
logger = logging.getLogger(__name__)
class SandboxFileRuntimeSource(SandboxFileSource):
def __init__(self, *, tenant_id: str, sandbox_id: str, runtime: VirtualEnvironment):
super().__init__(tenant_id=tenant_id, sandbox_id=sandbox_id)
self._runtime = runtime
def list_files(self, *, path: str, recursive: bool) -> list[SandboxFileNode]:
script = r"""
import json
import os
import sys
path = sys.argv[1]
recursive = sys.argv[2] == "1"
def norm(rel: str) -> str:
rel = rel.replace("\\\\", "/")
rel = rel.lstrip("./")
return rel or "."
def stat_entry(full_path: str, rel_path: str) -> dict:
st = os.stat(full_path)
is_dir = os.path.isdir(full_path)
return {
"path": norm(rel_path),
"is_dir": is_dir,
"size": None if is_dir else int(st.st_size),
"mtime": int(st.st_mtime),
}
entries = []
if recursive:
for root, dirs, files in os.walk(path):
for d in dirs:
fp = os.path.join(root, d)
rp = os.path.relpath(fp, ".")
entries.append(stat_entry(fp, rp))
for f in files:
fp = os.path.join(root, f)
rp = os.path.relpath(fp, ".")
entries.append(stat_entry(fp, rp))
else:
if os.path.isfile(path):
rel_path = os.path.relpath(path, ".")
entries.append(stat_entry(path, rel_path))
else:
for item in os.scandir(path):
rel_path = os.path.relpath(item.path, ".")
entries.append(stat_entry(item.path, rel_path))
print(json.dumps(entries))
"""
try:
result = execute(
self._runtime,
[
"sh",
"-c",
'if command -v python3 >/dev/null 2>&1; then py=python3; else py=python; fi; "$py" -c "$0" "$@"',
script,
path,
"1" if recursive else "0",
],
timeout=self._LIST_TIMEOUT_SECONDS,
error_message="Failed to list sandbox files",
)
except CommandExecutionError as exc:
raise RuntimeError(str(exc)) from exc
try:
raw = json.loads(result.stdout.decode("utf-8"))
except Exception as exc:
raise RuntimeError("Malformed sandbox file list output") from exc
entries: list[SandboxFileNode] = []
for item in raw:
item_path = str(item.get("path"))
item_is_dir = bool(item.get("is_dir"))
extension = None
if not item_is_dir:
ext = os.path.splitext(item_path)[1]
extension = ext or None
entries.append(
SandboxFileNode(
path=item_path,
is_dir=item_is_dir,
size=item.get("size"),
mtime=item.get("mtime"),
extension=extension,
)
)
return entries
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
kind = self._detect_path_kind(path)
export_name = os.path.basename(path.rstrip("/")) or "workspace"
filename = f"{export_name}.tar.gz" if kind == "dir" else (os.path.basename(path) or "file")
export_id = uuid4().hex
export_path = SandboxFileDownloadPath(
tenant_id=UUID(self._tenant_id),
sandbox_id=UUID(self._sandbox_id),
export_id=export_id,
filename=filename,
)
upload_url = sandbox_file_storage.get_upload_url(export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS)
if kind == "dir":
archive_path = f"/tmp/{export_id}.tar.gz"
try:
execute(
self._runtime,
["tar", "-czf", archive_path, "-C", ".", path],
timeout=self._UPLOAD_TIMEOUT_SECONDS,
error_message="Failed to archive directory in sandbox",
)
execute(
self._runtime,
["curl", "-s", "-f", "-X", "PUT", "-T", archive_path, upload_url],
timeout=self._UPLOAD_TIMEOUT_SECONDS,
error_message="Failed to upload directory archive from sandbox",
)
except CommandExecutionError as exc:
raise RuntimeError(str(exc)) from exc
finally:
try:
execute(
self._runtime,
["rm", "-f", archive_path],
timeout=self._LIST_TIMEOUT_SECONDS,
error_message="Failed to cleanup temp archive",
)
except Exception as exc:
# Best-effort cleanup; do not fail the download on cleanup issues.
logger.debug("Failed to cleanup temp archive %s: %s", archive_path, exc)
else:
try:
execute(
self._runtime,
["curl", "-s", "-f", "-X", "PUT", "-T", path, upload_url],
timeout=self._UPLOAD_TIMEOUT_SECONDS,
error_message="Failed to upload file from sandbox",
)
except CommandExecutionError as exc:
raise RuntimeError(str(exc)) from exc
download_url = sandbox_file_storage.get_download_url(export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS)
return SandboxFileDownloadTicket(
download_url=download_url,
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
export_id=export_id,
)
def _detect_path_kind(self, path: str) -> str:
script = r"""
import os
import sys
p = sys.argv[1]
if os.path.isdir(p):
print("dir")
raise SystemExit(0)
if os.path.isfile(p):
print("file")
raise SystemExit(0)
print("none")
raise SystemExit(2)
"""
try:
result = execute(
self._runtime,
[
"sh",
"-c",
'if command -v python3 >/dev/null 2>&1; then py=python3; else py=python; fi; "$py" -c "$0" "$@"',
script,
path,
],
timeout=self._LIST_TIMEOUT_SECONDS,
error_message="Failed to check path in sandbox",
)
except CommandExecutionError as exc:
raise ValueError(str(exc)) from exc
kind = result.stdout.decode("utf-8", errors="replace").strip()
if kind not in ("dir", "file"):
raise ValueError("File not found in sandbox")
return kind

View File

@ -59,7 +59,10 @@ class ArchiveSandboxStorage(SandboxStorage):
(
pipeline(sandbox)
.add(["curl", "-fsSL", download_url, "-o", archive_name], error_message="Failed to download archive")
.add(["tar", "-xzf", archive_name], error_message="Failed to extract archive")
.add(
["sh", "-c", 'tar -xzf "$1" 2>/dev/null; exit $?', "sh", archive_name],
error_message="Failed to extract archive",
)
.add(["rm", archive_name], error_message="Failed to cleanup archive")
.execute(timeout=ARCHIVE_DOWNLOAD_TIMEOUT, raise_on_error=True)
)

View File

@ -349,10 +349,62 @@ class ZipSandbox:
(
pipeline(self.vm)
.add(["mkdir", "-p", dest_dir], error_message="Failed to create destination directory")
.add(["tar", extract_flag, archive_path, "-C", dest_dir], error_message="Failed to extract tar archive")
.add(
["sh", "-c", f'tar {extract_flag} "$1" -C "$2" 2>/dev/null; exit $?', "sh", archive_path, dest_dir],
error_message="Failed to extract tar archive",
)
.execute(timeout=self._DEFAULT_TIMEOUT_SECONDS, raise_on_error=True)
)
except PipelineExecutionError as exc:
raise RuntimeError(str(exc)) from exc
return dest_dir
def tar(self, src: str = ".", *, include_base: bool = True, compress: bool = True) -> SandboxFile:
"""Create a tar archive and return a handle to it.
Args:
src: Source path to archive (file or directory)
include_base: If True, include the base directory name in the archive
compress: If True, create a gzipped tar archive (.tar.gz)
Returns:
SandboxFile handle to the created archive
"""
src = self._normalize_path(src)
extension = ".tar.gz" if compress else ".tar"
out_path = f"/tmp/{uuid4().hex}{extension}"
create_flag = "-czf" if compress else "-cf"
try:
if src in (".", ""):
# Archive current directory contents
execute(
self.vm,
["tar", create_flag, out_path, "-C", ".", "."],
timeout=self._DEFAULT_TIMEOUT_SECONDS,
error_message="Failed to create tar archive",
)
elif include_base:
# Archive with base directory name included
parent_dir = posixpath.dirname(src) or "."
base_name = posixpath.basename(src)
execute(
self.vm,
["tar", create_flag, out_path, "-C", parent_dir, base_name],
timeout=self._DEFAULT_TIMEOUT_SECONDS,
error_message="Failed to create tar archive",
)
else:
# Archive contents without base directory name
execute(
self.vm,
["tar", create_flag, out_path, "-C", src, "."],
timeout=self._DEFAULT_TIMEOUT_SECONDS,
error_message="Failed to create tar archive",
)
except CommandExecutionError as exc:
raise RuntimeError(str(exc)) from exc
return SandboxFile(path=out_path)