mirror of
https://github.com/langgenius/dify.git
synced 2026-05-03 17:08:03 +08:00
feat(sandbox): restructure file handling by introducing a new inspector module with runtime and archive sources
This commit is contained in:
@ -1,462 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path, PurePosixPath
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from core.sandbox.entities.files import SandboxFileDownloadTicket, SandboxFileNode
|
||||
from core.sandbox.manager import SandboxManager
|
||||
from core.sandbox.security.archive_signer import SandboxArchivePath
|
||||
from core.sandbox.security.sandbox_file_signer import SandboxFileDownloadPath
|
||||
from core.sandbox.storage import sandbox_file_storage
|
||||
from core.virtual_environment.__base.exec import CommandExecutionError
|
||||
from core.virtual_environment.__base.helpers import execute
|
||||
from core.virtual_environment.__base.virtual_environment import VirtualEnvironment
|
||||
from extensions.ext_storage import storage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SandboxFileSource(abc.ABC):
|
||||
_LIST_TIMEOUT_SECONDS = 30
|
||||
_UPLOAD_TIMEOUT_SECONDS = 60
|
||||
_EXPORT_EXPIRES_IN_SECONDS = 60 * 5
|
||||
|
||||
def __init__(self, *, tenant_id: str, sandbox_id: str):
|
||||
self._tenant_id = tenant_id
|
||||
self._sandbox_id = sandbox_id
|
||||
|
||||
@abc.abstractmethod
|
||||
def list_files(self, *, path: str, recursive: bool) -> list[SandboxFileNode]:
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class SandboxFileRuntimeSource(SandboxFileSource):
|
||||
def __init__(self, *, tenant_id: str, sandbox_id: str, runtime: VirtualEnvironment):
|
||||
super().__init__(tenant_id=tenant_id, sandbox_id=sandbox_id)
|
||||
self._runtime = runtime
|
||||
|
||||
def list_files(self, *, path: str, recursive: bool) -> list[SandboxFileNode]:
|
||||
script = r"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
path = sys.argv[1]
|
||||
recursive = sys.argv[2] == "1"
|
||||
|
||||
def norm(rel: str) -> str:
|
||||
rel = rel.replace("\\\\", "/")
|
||||
rel = rel.lstrip("./")
|
||||
return rel or "."
|
||||
|
||||
def stat_entry(full_path: str, rel_path: str) -> dict:
|
||||
st = os.stat(full_path)
|
||||
is_dir = os.path.isdir(full_path)
|
||||
return {
|
||||
"path": norm(rel_path),
|
||||
"is_dir": is_dir,
|
||||
"size": None if is_dir else int(st.st_size),
|
||||
"mtime": int(st.st_mtime),
|
||||
}
|
||||
|
||||
entries = []
|
||||
if recursive:
|
||||
for root, dirs, files in os.walk(path):
|
||||
for d in dirs:
|
||||
fp = os.path.join(root, d)
|
||||
rp = os.path.relpath(fp, ".")
|
||||
entries.append(stat_entry(fp, rp))
|
||||
for f in files:
|
||||
fp = os.path.join(root, f)
|
||||
rp = os.path.relpath(fp, ".")
|
||||
entries.append(stat_entry(fp, rp))
|
||||
else:
|
||||
if os.path.isfile(path):
|
||||
rel_path = os.path.relpath(path, ".")
|
||||
entries.append(stat_entry(path, rel_path))
|
||||
else:
|
||||
for item in os.scandir(path):
|
||||
rel_path = os.path.relpath(item.path, ".")
|
||||
entries.append(stat_entry(item.path, rel_path))
|
||||
|
||||
print(json.dumps(entries))
|
||||
"""
|
||||
|
||||
try:
|
||||
result = execute(
|
||||
self._runtime,
|
||||
[
|
||||
"sh",
|
||||
"-c",
|
||||
'if command -v python3 >/dev/null 2>&1; then py=python3; else py=python; fi; "$py" -c "$0" "$@"',
|
||||
script,
|
||||
path,
|
||||
"1" if recursive else "0",
|
||||
],
|
||||
timeout=self._LIST_TIMEOUT_SECONDS,
|
||||
error_message="Failed to list sandbox files",
|
||||
)
|
||||
except CommandExecutionError as exc:
|
||||
raise RuntimeError(str(exc)) from exc
|
||||
|
||||
try:
|
||||
raw = json.loads(result.stdout.decode("utf-8"))
|
||||
except Exception as exc:
|
||||
raise RuntimeError("Malformed sandbox file list output") from exc
|
||||
|
||||
entries: list[SandboxFileNode] = []
|
||||
for item in raw:
|
||||
item_path = str(item.get("path"))
|
||||
item_is_dir = bool(item.get("is_dir"))
|
||||
extension = None
|
||||
if not item_is_dir:
|
||||
ext = os.path.splitext(item_path)[1]
|
||||
extension = ext or None
|
||||
entries.append(
|
||||
SandboxFileNode(
|
||||
path=item_path,
|
||||
is_dir=item_is_dir,
|
||||
size=item.get("size"),
|
||||
mtime=item.get("mtime"),
|
||||
extension=extension,
|
||||
)
|
||||
)
|
||||
return entries
|
||||
|
||||
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
|
||||
kind = self._detect_path_kind(path)
|
||||
|
||||
export_name = os.path.basename(path.rstrip("/")) or "workspace"
|
||||
filename = f"{export_name}.tar.gz" if kind == "dir" else (os.path.basename(path) or "file")
|
||||
export_id = uuid4().hex
|
||||
export_path = SandboxFileDownloadPath(
|
||||
tenant_id=UUID(self._tenant_id),
|
||||
sandbox_id=UUID(self._sandbox_id),
|
||||
export_id=export_id,
|
||||
filename=filename,
|
||||
)
|
||||
|
||||
upload_url = sandbox_file_storage.get_upload_url(export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS)
|
||||
|
||||
if kind == "dir":
|
||||
archive_path = f"/tmp/{export_id}.tar.gz"
|
||||
try:
|
||||
execute(
|
||||
self._runtime,
|
||||
["tar", "-czf", archive_path, "-C", ".", path],
|
||||
timeout=self._UPLOAD_TIMEOUT_SECONDS,
|
||||
error_message="Failed to archive directory in sandbox",
|
||||
)
|
||||
execute(
|
||||
self._runtime,
|
||||
["curl", "-s", "-f", "-X", "PUT", "-T", archive_path, upload_url],
|
||||
timeout=self._UPLOAD_TIMEOUT_SECONDS,
|
||||
error_message="Failed to upload directory archive from sandbox",
|
||||
)
|
||||
except CommandExecutionError as exc:
|
||||
raise RuntimeError(str(exc)) from exc
|
||||
finally:
|
||||
try:
|
||||
execute(
|
||||
self._runtime,
|
||||
["rm", "-f", archive_path],
|
||||
timeout=self._LIST_TIMEOUT_SECONDS,
|
||||
error_message="Failed to cleanup temp archive",
|
||||
)
|
||||
except Exception as exc:
|
||||
# Best-effort cleanup; do not fail the download on cleanup issues.
|
||||
logger.debug("Failed to cleanup temp archive %s: %s", archive_path, exc)
|
||||
else:
|
||||
try:
|
||||
execute(
|
||||
self._runtime,
|
||||
["curl", "-s", "-f", "-X", "PUT", "-T", path, upload_url],
|
||||
timeout=self._UPLOAD_TIMEOUT_SECONDS,
|
||||
error_message="Failed to upload file from sandbox",
|
||||
)
|
||||
except CommandExecutionError as exc:
|
||||
raise RuntimeError(str(exc)) from exc
|
||||
|
||||
download_url = sandbox_file_storage.get_download_url(export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS)
|
||||
return SandboxFileDownloadTicket(
|
||||
download_url=download_url,
|
||||
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
|
||||
export_id=export_id,
|
||||
)
|
||||
|
||||
def _detect_path_kind(self, path: str) -> str:
|
||||
script = r"""
|
||||
import os
|
||||
import sys
|
||||
|
||||
p = sys.argv[1]
|
||||
if os.path.isdir(p):
|
||||
print("dir")
|
||||
raise SystemExit(0)
|
||||
if os.path.isfile(p):
|
||||
print("file")
|
||||
raise SystemExit(0)
|
||||
print("none")
|
||||
raise SystemExit(2)
|
||||
"""
|
||||
|
||||
try:
|
||||
result = execute(
|
||||
self._runtime,
|
||||
[
|
||||
"sh",
|
||||
"-c",
|
||||
'if command -v python3 >/dev/null 2>&1; then py=python3; else py=python; fi; "$py" -c "$0" "$@"',
|
||||
script,
|
||||
path,
|
||||
],
|
||||
timeout=self._LIST_TIMEOUT_SECONDS,
|
||||
error_message="Failed to check path in sandbox",
|
||||
)
|
||||
except CommandExecutionError as exc:
|
||||
raise ValueError(str(exc)) from exc
|
||||
|
||||
kind = result.stdout.decode("utf-8", errors="replace").strip()
|
||||
if kind not in ("dir", "file"):
|
||||
raise ValueError("File not found in sandbox")
|
||||
return kind
|
||||
|
||||
|
||||
class SandboxFileArchiveSource(SandboxFileSource):
|
||||
def list_files(self, *, path: str, recursive: bool) -> list[SandboxFileNode]:
|
||||
import tarfile
|
||||
|
||||
archive_path = SandboxArchivePath(tenant_id=UUID(self._tenant_id), sandbox_id=UUID(self._sandbox_id))
|
||||
storage_key = archive_path.get_storage_key()
|
||||
if not storage.exists(storage_key):
|
||||
raise ValueError("Sandbox archive not found")
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix="dify-sandbox-archive-") as tmpdir:
|
||||
local_archive = os.path.join(tmpdir, "workspace.tar.gz")
|
||||
storage.download(storage_key, local_archive)
|
||||
|
||||
entries_by_path: dict[str, SandboxFileNode] = {}
|
||||
|
||||
def add_dir(dir_path: str) -> None:
|
||||
if dir_path in ("", "."):
|
||||
return
|
||||
if dir_path not in entries_by_path:
|
||||
entries_by_path[dir_path] = SandboxFileNode(
|
||||
path=dir_path, is_dir=True, size=None, mtime=None, extension=None
|
||||
)
|
||||
|
||||
def clean(member_name: str) -> str:
|
||||
name = member_name.lstrip("./")
|
||||
return name.rstrip("/")
|
||||
|
||||
target_prefix = "" if path in (".", "") else f"{path}/"
|
||||
|
||||
with tarfile.open(local_archive, mode="r:gz") as tf:
|
||||
for m in tf.getmembers():
|
||||
mp = clean(m.name)
|
||||
if mp in ("", "."):
|
||||
continue
|
||||
|
||||
if not recursive:
|
||||
if path in (".", ""):
|
||||
if "/" in mp:
|
||||
add_dir(mp.split("/", 1)[0])
|
||||
continue
|
||||
else:
|
||||
if not mp.startswith(target_prefix):
|
||||
continue
|
||||
rest = mp[len(target_prefix) :]
|
||||
if rest == "":
|
||||
continue
|
||||
if "/" in rest:
|
||||
add_dir(f"{path}/{rest.split('/', 1)[0]}")
|
||||
continue
|
||||
else:
|
||||
if path not in (".", "") and not (mp == path or mp.startswith(target_prefix)):
|
||||
continue
|
||||
|
||||
parent = os.path.dirname(mp)
|
||||
while parent not in ("", "."):
|
||||
if path not in (".", "") and parent == path:
|
||||
break
|
||||
add_dir(parent)
|
||||
parent = os.path.dirname(parent)
|
||||
|
||||
is_dir = m.isdir()
|
||||
extension = None
|
||||
if not is_dir:
|
||||
ext = os.path.splitext(mp)[1]
|
||||
extension = ext or None
|
||||
entries_by_path[mp] = SandboxFileNode(
|
||||
path=mp,
|
||||
is_dir=is_dir,
|
||||
size=None if is_dir else int(m.size),
|
||||
mtime=int(m.mtime) if m.mtime else None,
|
||||
extension=extension,
|
||||
)
|
||||
|
||||
return sorted(entries_by_path.values(), key=lambda e: e.path)
|
||||
|
||||
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
|
||||
import tarfile
|
||||
|
||||
archive_path = SandboxArchivePath(tenant_id=UUID(self._tenant_id), sandbox_id=UUID(self._sandbox_id))
|
||||
storage_key = archive_path.get_storage_key()
|
||||
if not storage.exists(storage_key):
|
||||
raise ValueError("Sandbox archive not found")
|
||||
|
||||
export_name = os.path.basename(path.rstrip("/")) or "workspace"
|
||||
export_id = uuid4().hex
|
||||
|
||||
# Decide file vs directory inside archive.
|
||||
is_dir_request = path in (".", "")
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix="dify-sandbox-archive-") as tmpdir:
|
||||
local_archive = os.path.join(tmpdir, "workspace.tar.gz")
|
||||
storage.download(storage_key, local_archive)
|
||||
|
||||
with tarfile.open(local_archive, mode="r:gz") as tf:
|
||||
member_name = path.lstrip("./").rstrip("/")
|
||||
if not is_dir_request:
|
||||
# If it is an explicit file in archive, treat as file download.
|
||||
member = None
|
||||
try:
|
||||
member = tf.getmember(member_name)
|
||||
except KeyError:
|
||||
try:
|
||||
member = tf.getmember(f"./{member_name}")
|
||||
except KeyError:
|
||||
member = None
|
||||
|
||||
if member is not None and not member.isdir():
|
||||
export_path = SandboxFileDownloadPath(
|
||||
tenant_id=UUID(self._tenant_id),
|
||||
sandbox_id=UUID(self._sandbox_id),
|
||||
export_id=export_id,
|
||||
filename=os.path.basename(member_name) or "file",
|
||||
)
|
||||
extracted = tf.extractfile(member)
|
||||
if extracted is None:
|
||||
raise ValueError("File not found in sandbox archive")
|
||||
sandbox_file_storage.save(export_path, extracted.read())
|
||||
|
||||
download_url = sandbox_file_storage.get_download_url(
|
||||
export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS
|
||||
)
|
||||
return SandboxFileDownloadTicket(
|
||||
download_url=download_url,
|
||||
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
|
||||
export_id=export_id,
|
||||
)
|
||||
|
||||
# Otherwise treat as directory (implied dir is common in tar).
|
||||
is_dir_request = True
|
||||
|
||||
if is_dir_request:
|
||||
export_path = SandboxFileDownloadPath(
|
||||
tenant_id=UUID(self._tenant_id),
|
||||
sandbox_id=UUID(self._sandbox_id),
|
||||
export_id=export_id,
|
||||
filename=f"{export_name}.tar.gz",
|
||||
)
|
||||
export_local = os.path.join(tmpdir, "export.tar.gz")
|
||||
|
||||
prefix = "" if member_name in (".", "") else f"{member_name}/"
|
||||
found_any = False
|
||||
for m in tf.getmembers():
|
||||
src_name = m.name.lstrip("./").rstrip("/")
|
||||
if member_name not in (".", ""):
|
||||
if src_name != member_name and not src_name.startswith(prefix):
|
||||
continue
|
||||
found_any = True
|
||||
break
|
||||
|
||||
if not found_any:
|
||||
raise ValueError("File not found in sandbox archive")
|
||||
|
||||
with tarfile.open(export_local, mode="w:gz") as out:
|
||||
if member_name not in (".", ""):
|
||||
dir_info = tarfile.TarInfo(name=member_name)
|
||||
dir_info.type = tarfile.DIRTYPE
|
||||
dir_info.size = 0
|
||||
out.addfile(dir_info)
|
||||
|
||||
for m in tf.getmembers():
|
||||
src_name = m.name.lstrip("./")
|
||||
if member_name not in (".", ""):
|
||||
if src_name != member_name and not src_name.startswith(prefix):
|
||||
continue
|
||||
ti = tarfile.TarInfo(name=src_name.rstrip("/"))
|
||||
ti.mode = m.mode
|
||||
ti.mtime = m.mtime
|
||||
ti.uid = m.uid
|
||||
ti.gid = m.gid
|
||||
ti.uname = m.uname
|
||||
ti.gname = m.gname
|
||||
if m.isdir():
|
||||
ti.type = tarfile.DIRTYPE
|
||||
ti.size = 0
|
||||
out.addfile(ti)
|
||||
continue
|
||||
extracted = tf.extractfile(m)
|
||||
if extracted is None:
|
||||
continue
|
||||
ti.size = int(m.size)
|
||||
out.addfile(ti, fileobj=extracted)
|
||||
|
||||
sandbox_file_storage.save(export_path, Path(export_local).read_bytes())
|
||||
|
||||
download_url = sandbox_file_storage.get_download_url(
|
||||
export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS
|
||||
)
|
||||
return SandboxFileDownloadTicket(
|
||||
download_url=download_url,
|
||||
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
|
||||
export_id=export_id,
|
||||
)
|
||||
|
||||
raise ValueError("File not found in sandbox archive")
|
||||
|
||||
|
||||
class SandboxFileBrowser:
|
||||
def __init__(self, *, tenant_id: str, sandbox_id: str):
|
||||
self._tenant_id = tenant_id
|
||||
self._sandbox_id = sandbox_id
|
||||
|
||||
@staticmethod
|
||||
def _normalize_workspace_path(path: str | None) -> str:
|
||||
raw = (path or ".").strip()
|
||||
if raw == "":
|
||||
raw = "."
|
||||
|
||||
p = PurePosixPath(raw)
|
||||
if p.is_absolute():
|
||||
raise ValueError("path must be relative")
|
||||
if any(part == ".." for part in p.parts):
|
||||
raise ValueError("path must not contain '..'")
|
||||
|
||||
normalized = str(p)
|
||||
return "." if normalized in (".", "") else normalized
|
||||
|
||||
def _backend(self) -> SandboxFileSource:
|
||||
runtime = SandboxManager.get(self._sandbox_id)
|
||||
if runtime is not None:
|
||||
return SandboxFileRuntimeSource(tenant_id=self._tenant_id, sandbox_id=self._sandbox_id, runtime=runtime)
|
||||
return SandboxFileArchiveSource(tenant_id=self._tenant_id, sandbox_id=self._sandbox_id)
|
||||
|
||||
def list_files(self, *, path: str | None = None, recursive: bool = False) -> list[SandboxFileNode]:
|
||||
workspace_path = self._normalize_workspace_path(path)
|
||||
return self._backend().list_files(path=workspace_path, recursive=recursive)
|
||||
|
||||
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
|
||||
workspace_path = self._normalize_workspace_path(path)
|
||||
return self._backend().download_file(path=workspace_path)
|
||||
11
api/core/sandbox/inspector/__init__.py
Normal file
11
api/core/sandbox/inspector/__init__.py
Normal file
@ -0,0 +1,11 @@
|
||||
from core.sandbox.inspector.archive_source import SandboxFileArchiveSource
|
||||
from core.sandbox.inspector.base import SandboxFileSource
|
||||
from core.sandbox.inspector.browser import SandboxFileBrowser
|
||||
from core.sandbox.inspector.runtime_source import SandboxFileRuntimeSource
|
||||
|
||||
__all__ = [
|
||||
"SandboxFileArchiveSource",
|
||||
"SandboxFileBrowser",
|
||||
"SandboxFileRuntimeSource",
|
||||
"SandboxFileSource",
|
||||
]
|
||||
218
api/core/sandbox/inspector/archive_source.py
Normal file
218
api/core/sandbox/inspector/archive_source.py
Normal file
@ -0,0 +1,218 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from core.sandbox.entities.files import SandboxFileDownloadTicket, SandboxFileNode
|
||||
from core.sandbox.inspector.base import SandboxFileSource
|
||||
from core.sandbox.security.archive_signer import SandboxArchivePath, SandboxArchiveSigner
|
||||
from core.sandbox.security.sandbox_file_signer import SandboxFileDownloadPath
|
||||
from core.sandbox.storage import sandbox_file_storage
|
||||
from core.virtual_environment.__base.exec import CommandExecutionError
|
||||
from core.virtual_environment.__base.helpers import execute
|
||||
from extensions.ext_storage import storage
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from core.zip_sandbox import ZipSandbox
|
||||
|
||||
|
||||
class SandboxFileArchiveSource(SandboxFileSource):
|
||||
_PYTHON_EXEC_CMD = 'if command -v python3 >/dev/null 2>&1; then py=python3; else py=python; fi; "$py" -c "$0" "$@"'
|
||||
_LIST_SCRIPT = r"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
path = sys.argv[1]
|
||||
recursive = sys.argv[2] == "1"
|
||||
|
||||
def norm(rel: str) -> str:
|
||||
rel = rel.replace("\\", "/")
|
||||
rel = rel.lstrip("./")
|
||||
return rel or "."
|
||||
|
||||
def stat_entry(full_path: str, rel_path: str) -> dict:
|
||||
st = os.stat(full_path)
|
||||
is_dir = os.path.isdir(full_path)
|
||||
return {
|
||||
"path": norm(rel_path),
|
||||
"is_dir": is_dir,
|
||||
"size": None if is_dir else int(st.st_size),
|
||||
"mtime": int(st.st_mtime),
|
||||
}
|
||||
|
||||
entries = []
|
||||
if recursive:
|
||||
for root, dirs, files in os.walk(path):
|
||||
for d in dirs:
|
||||
fp = os.path.join(root, d)
|
||||
rp = os.path.relpath(fp, ".")
|
||||
entries.append(stat_entry(fp, rp))
|
||||
for f in files:
|
||||
fp = os.path.join(root, f)
|
||||
rp = os.path.relpath(fp, ".")
|
||||
entries.append(stat_entry(fp, rp))
|
||||
else:
|
||||
if os.path.isfile(path):
|
||||
rel_path = os.path.relpath(path, ".")
|
||||
entries.append(stat_entry(path, rel_path))
|
||||
else:
|
||||
for item in os.scandir(path):
|
||||
rel_path = os.path.relpath(item.path, ".")
|
||||
entries.append(stat_entry(item.path, rel_path))
|
||||
|
||||
print(json.dumps(entries))
|
||||
"""
|
||||
|
||||
def _get_archive_download_url(self) -> str:
|
||||
"""Get a pre-signed download URL for the sandbox archive."""
|
||||
archive_path = SandboxArchivePath(tenant_id=UUID(self._tenant_id), sandbox_id=UUID(self._sandbox_id))
|
||||
storage_key = archive_path.get_storage_key()
|
||||
if not storage.exists(storage_key):
|
||||
raise ValueError("Sandbox archive not found")
|
||||
return SandboxArchiveSigner.build_signed_url(
|
||||
archive_path=archive_path,
|
||||
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
|
||||
action=SandboxArchiveSigner.OPERATION_DOWNLOAD,
|
||||
)
|
||||
|
||||
def _create_zip_sandbox(self) -> ZipSandbox:
|
||||
"""Create a ZipSandbox instance for archive operations."""
|
||||
from core.zip_sandbox import ZipSandbox
|
||||
|
||||
return ZipSandbox(tenant_id=self._tenant_id, user_id="system", app_id="sandbox-archive-browser")
|
||||
|
||||
def list_files(self, *, path: str, recursive: bool) -> list[SandboxFileNode]:
|
||||
archive_url = self._get_archive_download_url()
|
||||
|
||||
with self._create_zip_sandbox() as zs:
|
||||
# Download and extract the archive
|
||||
archive_path = zs.download_archive(archive_url, path="workspace.tar.gz")
|
||||
zs.untar(archive_path=archive_path, dest_dir="workspace")
|
||||
|
||||
# List files using Python script in sandbox
|
||||
try:
|
||||
result = execute(
|
||||
zs.vm,
|
||||
[
|
||||
"sh",
|
||||
"-c",
|
||||
self._PYTHON_EXEC_CMD,
|
||||
self._LIST_SCRIPT,
|
||||
f"workspace/{path}" if path not in (".", "") else "workspace",
|
||||
"1" if recursive else "0",
|
||||
],
|
||||
timeout=self._LIST_TIMEOUT_SECONDS,
|
||||
error_message="Failed to list sandbox files",
|
||||
)
|
||||
except CommandExecutionError as exc:
|
||||
raise RuntimeError(str(exc)) from exc
|
||||
|
||||
try:
|
||||
raw = json.loads(result.stdout.decode("utf-8"))
|
||||
except Exception as exc:
|
||||
raise RuntimeError("Malformed sandbox file list output") from exc
|
||||
|
||||
entries: list[SandboxFileNode] = []
|
||||
for item in raw:
|
||||
item_path = str(item.get("path"))
|
||||
# Strip the "workspace/" prefix from paths
|
||||
if item_path.startswith("workspace/"):
|
||||
item_path = item_path[len("workspace/") :]
|
||||
elif item_path == "workspace":
|
||||
continue # Skip the workspace directory itself
|
||||
|
||||
item_is_dir = bool(item.get("is_dir"))
|
||||
extension = None
|
||||
if not item_is_dir:
|
||||
ext = os.path.splitext(item_path)[1]
|
||||
extension = ext or None
|
||||
entries.append(
|
||||
SandboxFileNode(
|
||||
path=item_path,
|
||||
is_dir=item_is_dir,
|
||||
size=item.get("size"),
|
||||
mtime=item.get("mtime"),
|
||||
extension=extension,
|
||||
)
|
||||
)
|
||||
return sorted(entries, key=lambda e: e.path)
|
||||
|
||||
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
|
||||
archive_url = self._get_archive_download_url()
|
||||
export_name = os.path.basename(path.rstrip("/")) or "workspace"
|
||||
export_id = uuid4().hex
|
||||
|
||||
with self._create_zip_sandbox() as zs:
|
||||
# Download and extract the archive
|
||||
archive_path = zs.download_archive(archive_url, path="workspace.tar.gz")
|
||||
zs.untar(archive_path=archive_path, dest_dir="workspace")
|
||||
|
||||
# Determine the target path inside extracted workspace
|
||||
target_path = f"workspace/{path}" if path not in (".", "") else "workspace"
|
||||
|
||||
# Detect if target is file or directory
|
||||
detect_script = r"""
|
||||
import os
|
||||
import sys
|
||||
|
||||
p = sys.argv[1]
|
||||
if os.path.isdir(p):
|
||||
print("dir")
|
||||
raise SystemExit(0)
|
||||
if os.path.isfile(p):
|
||||
print("file")
|
||||
raise SystemExit(0)
|
||||
print("none")
|
||||
raise SystemExit(2)
|
||||
"""
|
||||
try:
|
||||
result = execute(
|
||||
zs.vm,
|
||||
[
|
||||
"sh",
|
||||
"-c",
|
||||
self._PYTHON_EXEC_CMD,
|
||||
detect_script,
|
||||
target_path,
|
||||
],
|
||||
timeout=self._LIST_TIMEOUT_SECONDS,
|
||||
error_message="Failed to check path in sandbox",
|
||||
)
|
||||
except CommandExecutionError as exc:
|
||||
raise ValueError(str(exc)) from exc
|
||||
|
||||
kind = result.stdout.decode("utf-8", errors="replace").strip()
|
||||
if kind not in ("dir", "file"):
|
||||
raise ValueError("File not found in sandbox archive")
|
||||
|
||||
if kind == "file":
|
||||
# Download file content from sandbox
|
||||
file_data = zs.read_file(target_path)
|
||||
export_path = SandboxFileDownloadPath(
|
||||
tenant_id=UUID(self._tenant_id),
|
||||
sandbox_id=UUID(self._sandbox_id),
|
||||
export_id=export_id,
|
||||
filename=os.path.basename(path) or "file",
|
||||
)
|
||||
sandbox_file_storage.save(export_path, file_data)
|
||||
else:
|
||||
# Create tar.gz archive of the directory
|
||||
tar_file = zs.tar(target_path, include_base=True, compress=True)
|
||||
tar_data = zs.read_file(tar_file.path)
|
||||
export_path = SandboxFileDownloadPath(
|
||||
tenant_id=UUID(self._tenant_id),
|
||||
sandbox_id=UUID(self._sandbox_id),
|
||||
export_id=export_id,
|
||||
filename=f"{export_name}.tar.gz",
|
||||
)
|
||||
sandbox_file_storage.save(export_path, tar_data)
|
||||
|
||||
download_url = sandbox_file_storage.get_download_url(export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS)
|
||||
return SandboxFileDownloadTicket(
|
||||
download_url=download_url,
|
||||
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
|
||||
export_id=export_id,
|
||||
)
|
||||
23
api/core/sandbox/inspector/base.py
Normal file
23
api/core/sandbox/inspector/base.py
Normal file
@ -0,0 +1,23 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
|
||||
from core.sandbox.entities.files import SandboxFileDownloadTicket, SandboxFileNode
|
||||
|
||||
|
||||
class SandboxFileSource(abc.ABC):
|
||||
_LIST_TIMEOUT_SECONDS = 30
|
||||
_UPLOAD_TIMEOUT_SECONDS = 60 * 10
|
||||
_EXPORT_EXPIRES_IN_SECONDS = 60 * 10
|
||||
|
||||
def __init__(self, *, tenant_id: str, sandbox_id: str):
|
||||
self._tenant_id = tenant_id
|
||||
self._sandbox_id = sandbox_id
|
||||
|
||||
@abc.abstractmethod
|
||||
def list_files(self, *, path: str, recursive: bool) -> list[SandboxFileNode]:
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
|
||||
raise NotImplementedError
|
||||
44
api/core/sandbox/inspector/browser.py
Normal file
44
api/core/sandbox/inspector/browser.py
Normal file
@ -0,0 +1,44 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import PurePosixPath
|
||||
|
||||
from core.sandbox.entities.files import SandboxFileDownloadTicket, SandboxFileNode
|
||||
from core.sandbox.inspector.archive_source import SandboxFileArchiveSource
|
||||
from core.sandbox.inspector.base import SandboxFileSource
|
||||
from core.sandbox.inspector.runtime_source import SandboxFileRuntimeSource
|
||||
from core.sandbox.manager import SandboxManager
|
||||
|
||||
|
||||
class SandboxFileBrowser:
|
||||
def __init__(self, *, tenant_id: str, sandbox_id: str):
|
||||
self._tenant_id = tenant_id
|
||||
self._sandbox_id = sandbox_id
|
||||
|
||||
@staticmethod
|
||||
def _normalize_workspace_path(path: str | None) -> str:
|
||||
raw = (path or ".").strip()
|
||||
if raw == "":
|
||||
raw = "."
|
||||
|
||||
p = PurePosixPath(raw)
|
||||
if p.is_absolute():
|
||||
raise ValueError("path must be relative")
|
||||
if any(part == ".." for part in p.parts):
|
||||
raise ValueError("path must not contain '..'")
|
||||
|
||||
normalized = str(p)
|
||||
return "." if normalized in (".", "") else normalized
|
||||
|
||||
def _backend(self) -> SandboxFileSource:
|
||||
runtime = SandboxManager.get(self._sandbox_id)
|
||||
if runtime is not None:
|
||||
return SandboxFileRuntimeSource(tenant_id=self._tenant_id, sandbox_id=self._sandbox_id, runtime=runtime)
|
||||
return SandboxFileArchiveSource(tenant_id=self._tenant_id, sandbox_id=self._sandbox_id)
|
||||
|
||||
def list_files(self, *, path: str | None = None, recursive: bool = False) -> list[SandboxFileNode]:
|
||||
workspace_path = self._normalize_workspace_path(path)
|
||||
return self._backend().list_files(path=workspace_path, recursive=recursive)
|
||||
|
||||
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
|
||||
workspace_path = self._normalize_workspace_path(path)
|
||||
return self._backend().download_file(path=workspace_path)
|
||||
208
api/core/sandbox/inspector/runtime_source.py
Normal file
208
api/core/sandbox/inspector/runtime_source.py
Normal file
@ -0,0 +1,208 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from core.sandbox.entities.files import SandboxFileDownloadTicket, SandboxFileNode
|
||||
from core.sandbox.inspector.base import SandboxFileSource
|
||||
from core.sandbox.security.sandbox_file_signer import SandboxFileDownloadPath
|
||||
from core.sandbox.storage import sandbox_file_storage
|
||||
from core.virtual_environment.__base.exec import CommandExecutionError
|
||||
from core.virtual_environment.__base.helpers import execute
|
||||
from core.virtual_environment.__base.virtual_environment import VirtualEnvironment
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SandboxFileRuntimeSource(SandboxFileSource):
|
||||
def __init__(self, *, tenant_id: str, sandbox_id: str, runtime: VirtualEnvironment):
|
||||
super().__init__(tenant_id=tenant_id, sandbox_id=sandbox_id)
|
||||
self._runtime = runtime
|
||||
|
||||
def list_files(self, *, path: str, recursive: bool) -> list[SandboxFileNode]:
|
||||
script = r"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
path = sys.argv[1]
|
||||
recursive = sys.argv[2] == "1"
|
||||
|
||||
def norm(rel: str) -> str:
|
||||
rel = rel.replace("\\\\", "/")
|
||||
rel = rel.lstrip("./")
|
||||
return rel or "."
|
||||
|
||||
def stat_entry(full_path: str, rel_path: str) -> dict:
|
||||
st = os.stat(full_path)
|
||||
is_dir = os.path.isdir(full_path)
|
||||
return {
|
||||
"path": norm(rel_path),
|
||||
"is_dir": is_dir,
|
||||
"size": None if is_dir else int(st.st_size),
|
||||
"mtime": int(st.st_mtime),
|
||||
}
|
||||
|
||||
entries = []
|
||||
if recursive:
|
||||
for root, dirs, files in os.walk(path):
|
||||
for d in dirs:
|
||||
fp = os.path.join(root, d)
|
||||
rp = os.path.relpath(fp, ".")
|
||||
entries.append(stat_entry(fp, rp))
|
||||
for f in files:
|
||||
fp = os.path.join(root, f)
|
||||
rp = os.path.relpath(fp, ".")
|
||||
entries.append(stat_entry(fp, rp))
|
||||
else:
|
||||
if os.path.isfile(path):
|
||||
rel_path = os.path.relpath(path, ".")
|
||||
entries.append(stat_entry(path, rel_path))
|
||||
else:
|
||||
for item in os.scandir(path):
|
||||
rel_path = os.path.relpath(item.path, ".")
|
||||
entries.append(stat_entry(item.path, rel_path))
|
||||
|
||||
print(json.dumps(entries))
|
||||
"""
|
||||
|
||||
try:
|
||||
result = execute(
|
||||
self._runtime,
|
||||
[
|
||||
"sh",
|
||||
"-c",
|
||||
'if command -v python3 >/dev/null 2>&1; then py=python3; else py=python; fi; "$py" -c "$0" "$@"',
|
||||
script,
|
||||
path,
|
||||
"1" if recursive else "0",
|
||||
],
|
||||
timeout=self._LIST_TIMEOUT_SECONDS,
|
||||
error_message="Failed to list sandbox files",
|
||||
)
|
||||
except CommandExecutionError as exc:
|
||||
raise RuntimeError(str(exc)) from exc
|
||||
|
||||
try:
|
||||
raw = json.loads(result.stdout.decode("utf-8"))
|
||||
except Exception as exc:
|
||||
raise RuntimeError("Malformed sandbox file list output") from exc
|
||||
|
||||
entries: list[SandboxFileNode] = []
|
||||
for item in raw:
|
||||
item_path = str(item.get("path"))
|
||||
item_is_dir = bool(item.get("is_dir"))
|
||||
extension = None
|
||||
if not item_is_dir:
|
||||
ext = os.path.splitext(item_path)[1]
|
||||
extension = ext or None
|
||||
entries.append(
|
||||
SandboxFileNode(
|
||||
path=item_path,
|
||||
is_dir=item_is_dir,
|
||||
size=item.get("size"),
|
||||
mtime=item.get("mtime"),
|
||||
extension=extension,
|
||||
)
|
||||
)
|
||||
return entries
|
||||
|
||||
def download_file(self, *, path: str) -> SandboxFileDownloadTicket:
|
||||
kind = self._detect_path_kind(path)
|
||||
|
||||
export_name = os.path.basename(path.rstrip("/")) or "workspace"
|
||||
filename = f"{export_name}.tar.gz" if kind == "dir" else (os.path.basename(path) or "file")
|
||||
export_id = uuid4().hex
|
||||
export_path = SandboxFileDownloadPath(
|
||||
tenant_id=UUID(self._tenant_id),
|
||||
sandbox_id=UUID(self._sandbox_id),
|
||||
export_id=export_id,
|
||||
filename=filename,
|
||||
)
|
||||
|
||||
upload_url = sandbox_file_storage.get_upload_url(export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS)
|
||||
|
||||
if kind == "dir":
|
||||
archive_path = f"/tmp/{export_id}.tar.gz"
|
||||
try:
|
||||
execute(
|
||||
self._runtime,
|
||||
["tar", "-czf", archive_path, "-C", ".", path],
|
||||
timeout=self._UPLOAD_TIMEOUT_SECONDS,
|
||||
error_message="Failed to archive directory in sandbox",
|
||||
)
|
||||
execute(
|
||||
self._runtime,
|
||||
["curl", "-s", "-f", "-X", "PUT", "-T", archive_path, upload_url],
|
||||
timeout=self._UPLOAD_TIMEOUT_SECONDS,
|
||||
error_message="Failed to upload directory archive from sandbox",
|
||||
)
|
||||
except CommandExecutionError as exc:
|
||||
raise RuntimeError(str(exc)) from exc
|
||||
finally:
|
||||
try:
|
||||
execute(
|
||||
self._runtime,
|
||||
["rm", "-f", archive_path],
|
||||
timeout=self._LIST_TIMEOUT_SECONDS,
|
||||
error_message="Failed to cleanup temp archive",
|
||||
)
|
||||
except Exception as exc:
|
||||
# Best-effort cleanup; do not fail the download on cleanup issues.
|
||||
logger.debug("Failed to cleanup temp archive %s: %s", archive_path, exc)
|
||||
else:
|
||||
try:
|
||||
execute(
|
||||
self._runtime,
|
||||
["curl", "-s", "-f", "-X", "PUT", "-T", path, upload_url],
|
||||
timeout=self._UPLOAD_TIMEOUT_SECONDS,
|
||||
error_message="Failed to upload file from sandbox",
|
||||
)
|
||||
except CommandExecutionError as exc:
|
||||
raise RuntimeError(str(exc)) from exc
|
||||
|
||||
download_url = sandbox_file_storage.get_download_url(export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS)
|
||||
return SandboxFileDownloadTicket(
|
||||
download_url=download_url,
|
||||
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
|
||||
export_id=export_id,
|
||||
)
|
||||
|
||||
def _detect_path_kind(self, path: str) -> str:
|
||||
script = r"""
|
||||
import os
|
||||
import sys
|
||||
|
||||
p = sys.argv[1]
|
||||
if os.path.isdir(p):
|
||||
print("dir")
|
||||
raise SystemExit(0)
|
||||
if os.path.isfile(p):
|
||||
print("file")
|
||||
raise SystemExit(0)
|
||||
print("none")
|
||||
raise SystemExit(2)
|
||||
"""
|
||||
|
||||
try:
|
||||
result = execute(
|
||||
self._runtime,
|
||||
[
|
||||
"sh",
|
||||
"-c",
|
||||
'if command -v python3 >/dev/null 2>&1; then py=python3; else py=python; fi; "$py" -c "$0" "$@"',
|
||||
script,
|
||||
path,
|
||||
],
|
||||
timeout=self._LIST_TIMEOUT_SECONDS,
|
||||
error_message="Failed to check path in sandbox",
|
||||
)
|
||||
except CommandExecutionError as exc:
|
||||
raise ValueError(str(exc)) from exc
|
||||
|
||||
kind = result.stdout.decode("utf-8", errors="replace").strip()
|
||||
if kind not in ("dir", "file"):
|
||||
raise ValueError("File not found in sandbox")
|
||||
return kind
|
||||
@ -59,7 +59,10 @@ class ArchiveSandboxStorage(SandboxStorage):
|
||||
(
|
||||
pipeline(sandbox)
|
||||
.add(["curl", "-fsSL", download_url, "-o", archive_name], error_message="Failed to download archive")
|
||||
.add(["tar", "-xzf", archive_name], error_message="Failed to extract archive")
|
||||
.add(
|
||||
["sh", "-c", 'tar -xzf "$1" 2>/dev/null; exit $?', "sh", archive_name],
|
||||
error_message="Failed to extract archive",
|
||||
)
|
||||
.add(["rm", archive_name], error_message="Failed to cleanup archive")
|
||||
.execute(timeout=ARCHIVE_DOWNLOAD_TIMEOUT, raise_on_error=True)
|
||||
)
|
||||
|
||||
@ -349,10 +349,62 @@ class ZipSandbox:
|
||||
(
|
||||
pipeline(self.vm)
|
||||
.add(["mkdir", "-p", dest_dir], error_message="Failed to create destination directory")
|
||||
.add(["tar", extract_flag, archive_path, "-C", dest_dir], error_message="Failed to extract tar archive")
|
||||
.add(
|
||||
["sh", "-c", f'tar {extract_flag} "$1" -C "$2" 2>/dev/null; exit $?', "sh", archive_path, dest_dir],
|
||||
error_message="Failed to extract tar archive",
|
||||
)
|
||||
.execute(timeout=self._DEFAULT_TIMEOUT_SECONDS, raise_on_error=True)
|
||||
)
|
||||
except PipelineExecutionError as exc:
|
||||
raise RuntimeError(str(exc)) from exc
|
||||
|
||||
return dest_dir
|
||||
|
||||
def tar(self, src: str = ".", *, include_base: bool = True, compress: bool = True) -> SandboxFile:
|
||||
"""Create a tar archive and return a handle to it.
|
||||
|
||||
Args:
|
||||
src: Source path to archive (file or directory)
|
||||
include_base: If True, include the base directory name in the archive
|
||||
compress: If True, create a gzipped tar archive (.tar.gz)
|
||||
|
||||
Returns:
|
||||
SandboxFile handle to the created archive
|
||||
"""
|
||||
src = self._normalize_path(src)
|
||||
extension = ".tar.gz" if compress else ".tar"
|
||||
out_path = f"/tmp/{uuid4().hex}{extension}"
|
||||
|
||||
create_flag = "-czf" if compress else "-cf"
|
||||
|
||||
try:
|
||||
if src in (".", ""):
|
||||
# Archive current directory contents
|
||||
execute(
|
||||
self.vm,
|
||||
["tar", create_flag, out_path, "-C", ".", "."],
|
||||
timeout=self._DEFAULT_TIMEOUT_SECONDS,
|
||||
error_message="Failed to create tar archive",
|
||||
)
|
||||
elif include_base:
|
||||
# Archive with base directory name included
|
||||
parent_dir = posixpath.dirname(src) or "."
|
||||
base_name = posixpath.basename(src)
|
||||
execute(
|
||||
self.vm,
|
||||
["tar", create_flag, out_path, "-C", parent_dir, base_name],
|
||||
timeout=self._DEFAULT_TIMEOUT_SECONDS,
|
||||
error_message="Failed to create tar archive",
|
||||
)
|
||||
else:
|
||||
# Archive contents without base directory name
|
||||
execute(
|
||||
self.vm,
|
||||
["tar", create_flag, out_path, "-C", src, "."],
|
||||
timeout=self._DEFAULT_TIMEOUT_SECONDS,
|
||||
error_message="Failed to create tar archive",
|
||||
)
|
||||
except CommandExecutionError as exc:
|
||||
raise RuntimeError(str(exc)) from exc
|
||||
|
||||
return SandboxFile(path=out_path)
|
||||
|
||||
Reference in New Issue
Block a user