Files
dify/api/dify_graph/file/file_manager.py
Novice 94b01f6821 Merge commit '92bde350' into sandboxed-agent-rebase
Made-with: Cursor

# Conflicts:
#	api/controllers/console/app/workflow_draft_variable.py
#	api/core/agent/cot_agent_runner.py
#	api/core/agent/cot_chat_agent_runner.py
#	api/core/agent/cot_completion_agent_runner.py
#	api/core/agent/fc_agent_runner.py
#	api/core/app/apps/advanced_chat/app_generator.py
#	api/core/app/apps/advanced_chat/app_runner.py
#	api/core/app/apps/agent_chat/app_runner.py
#	api/core/app/apps/workflow/app_generator.py
#	api/core/app/apps/workflow/app_runner.py
#	api/core/app/entities/app_invoke_entities.py
#	api/core/app/entities/queue_entities.py
#	api/core/llm_generator/output_parser/structured_output.py
#	api/core/workflow/workflow_entry.py
#	api/dify_graph/context/__init__.py
#	api/dify_graph/entities/tool_entities.py
#	api/dify_graph/file/file_manager.py
#	api/dify_graph/graph_engine/response_coordinator/coordinator.py
#	api/dify_graph/graph_events/node.py
#	api/dify_graph/node_events/node.py
#	api/dify_graph/nodes/agent/agent_node.py
#	api/dify_graph/nodes/llm/entities.py
#	api/dify_graph/nodes/llm/llm_utils.py
#	api/dify_graph/nodes/llm/node.py
#	api/dify_graph/nodes/question_classifier/question_classifier_node.py
#	api/dify_graph/runtime/graph_runtime_state.py
#	api/dify_graph/variables/segments.py
#	api/factories/variable_factory.py
#	api/services/variable_truncator.py
#	api/tests/unit_tests/utils/structured_output_parser/test_structured_output_parser.py
#	api/uv.lock
#	web/app/components/app-sidebar/app-info.tsx
#	web/app/components/app-sidebar/app-sidebar-dropdown.tsx
#	web/app/components/app/create-app-modal/index.spec.tsx
#	web/app/components/apps/__tests__/list.spec.tsx
#	web/app/components/apps/app-card.tsx
#	web/app/components/apps/list.tsx
#	web/app/components/header/account-dropdown/compliance.tsx
#	web/app/components/header/account-dropdown/index.tsx
#	web/app/components/header/account-dropdown/support.tsx
#	web/app/components/workflow-app/components/workflow-onboarding-modal/index.tsx
#	web/app/components/workflow/panel/debug-and-preview/hooks.ts
#	web/contract/console/apps.ts
#	web/contract/router.ts
#	web/eslint-suppressions.json
#	web/next.config.ts
#	web/pnpm-lock.yaml
2026-03-23 09:39:49 +08:00

289 lines
10 KiB
Python

from __future__ import annotations
import base64
import logging
from collections.abc import Mapping
from configs import dify_config
from dify_graph.model_runtime.entities import (
AudioPromptMessageContent,
DocumentPromptMessageContent,
ImagePromptMessageContent,
TextPromptMessageContent,
VideoPromptMessageContent,
)
from dify_graph.model_runtime.entities.message_entities import (
MultiModalPromptMessageContent,
PromptMessageContentUnionTypes,
)
from . import helpers
from .enums import FileAttribute
from .models import File, FileTransferMethod, FileType
from .runtime import get_workflow_file_runtime
logger = logging.getLogger(__name__)
def get_attr(*, file: File, attr: FileAttribute):
match attr:
case FileAttribute.TYPE:
return file.type.value
case FileAttribute.SIZE:
return file.size
case FileAttribute.NAME:
return file.filename
case FileAttribute.MIME_TYPE:
return file.mime_type
case FileAttribute.TRANSFER_METHOD:
return file.transfer_method.value
case FileAttribute.URL:
return _to_url(file)
case FileAttribute.EXTENSION:
return file.extension
case FileAttribute.RELATED_ID:
return file.related_id
def to_prompt_message_content(
f: File,
/,
*,
image_detail_config: ImagePromptMessageContent.DETAIL | None = None,
) -> PromptMessageContentUnionTypes:
"""Convert a file to prompt message content."""
if f.extension is None:
raise ValueError("Missing file extension")
if f.mime_type is None:
raise ValueError("Missing file mime_type")
prompt_class_map: Mapping[FileType, type[PromptMessageContentUnionTypes]] = {
FileType.IMAGE: ImagePromptMessageContent,
FileType.AUDIO: AudioPromptMessageContent,
FileType.VIDEO: VideoPromptMessageContent,
FileType.DOCUMENT: DocumentPromptMessageContent,
}
if f.type not in prompt_class_map:
return TextPromptMessageContent(data=f"[Unsupported file type: {f.filename} ({f.type.value})]")
send_format = get_workflow_file_runtime().multimodal_send_format
params = {
"base64_data": _get_encoded_string(f) if send_format == "base64" else "",
"url": _to_url(f) if send_format == "url" else "",
"format": f.extension.removeprefix("."),
"mime_type": f.mime_type,
"filename": f.filename or "",
# Encoded file reference for context restoration: "transfer_method:related_id" or "remote:url"
"file_ref": _encode_file_ref(f),
}
if f.type == FileType.IMAGE:
params["detail"] = image_detail_config or ImagePromptMessageContent.DETAIL.LOW
return prompt_class_map[f.type].model_validate(params)
def _encode_file_ref(f: File) -> str | None:
"""Encode file reference as 'transfer_method:id_or_url' string."""
if f.transfer_method == FileTransferMethod.REMOTE_URL:
return f"remote:{f.remote_url}" if f.remote_url else None
elif f.transfer_method == FileTransferMethod.LOCAL_FILE:
return f"local:{f.related_id}" if f.related_id else None
elif f.transfer_method == FileTransferMethod.TOOL_FILE:
return f"tool:{f.related_id}" if f.related_id else None
return None
def download(f: File, /) -> bytes:
if f.transfer_method in (
FileTransferMethod.TOOL_FILE,
FileTransferMethod.LOCAL_FILE,
FileTransferMethod.DATASOURCE_FILE,
):
return _download_file_content(f.storage_key)
elif f.transfer_method == FileTransferMethod.REMOTE_URL:
if f.remote_url is None:
raise ValueError("Missing file remote_url")
response = get_workflow_file_runtime().http_get(f.remote_url, follow_redirects=True)
response.raise_for_status()
return response.content
raise ValueError(f"unsupported transfer method: {f.transfer_method}")
def _download_file_content(path: str, /) -> bytes:
"""Download and return a file from storage as bytes."""
data = get_workflow_file_runtime().storage_load(path, stream=False)
if not isinstance(data, bytes):
raise ValueError(f"file {path} is not a bytes object")
return data
def _get_encoded_string(f: File, /) -> str:
match f.transfer_method:
case FileTransferMethod.REMOTE_URL:
if f.remote_url is None:
raise ValueError("Missing file remote_url")
response = get_workflow_file_runtime().http_get(f.remote_url, follow_redirects=True)
response.raise_for_status()
data = response.content
case FileTransferMethod.LOCAL_FILE:
data = _download_file_content(f.storage_key)
case FileTransferMethod.TOOL_FILE:
data = _download_file_content(f.storage_key)
case FileTransferMethod.DATASOURCE_FILE:
data = _download_file_content(f.storage_key)
return base64.b64encode(data).decode("utf-8")
def _to_url(f: File, /):
if f.transfer_method == FileTransferMethod.REMOTE_URL:
if f.remote_url is None:
raise ValueError("Missing file remote_url")
return f.remote_url
elif f.transfer_method == FileTransferMethod.LOCAL_FILE:
if f.related_id is None:
raise ValueError("Missing file related_id")
return f.remote_url or helpers.get_signed_file_url(upload_file_id=f.related_id)
elif f.transfer_method == FileTransferMethod.TOOL_FILE:
if f.related_id is None or f.extension is None:
raise ValueError("Missing file related_id or extension")
return helpers.get_signed_tool_file_url(tool_file_id=f.related_id, extension=f.extension)
else:
raise ValueError(f"Unsupported transfer method: {f.transfer_method}")
def restore_multimodal_content(
content: MultiModalPromptMessageContent,
) -> MultiModalPromptMessageContent:
"""
Restore base64_data or url for multimodal content from file_ref.
file_ref format: "transfer_method:id_or_url" (e.g., "local:abc123", "remote:https://...")
Args:
content: MultiModalPromptMessageContent with file_ref field
Returns:
MultiModalPromptMessageContent with restored base64_data or url
"""
# Skip if no file reference or content already has data
if not content.file_ref:
return content
if content.base64_data or content.url:
return content
try:
file = _build_file_from_ref(
file_ref=content.file_ref,
file_format=content.format,
mime_type=content.mime_type,
filename=content.filename,
)
if not file:
return content
# Restore content based on config
if dify_config.MULTIMODAL_SEND_FORMAT == "base64":
restored_base64 = _get_encoded_string(file)
return content.model_copy(update={"base64_data": restored_base64})
else:
restored_url = _to_url(file)
return content.model_copy(update={"url": restored_url})
except Exception as e:
logger.warning("Failed to restore multimodal content: %s", e)
return content
def _build_file_from_ref(
file_ref: str,
file_format: str | None,
mime_type: str | None,
filename: str | None,
) -> File | None:
"""
Build a File object from encoded file_ref string.
Args:
file_ref: Encoded reference "transfer_method:id_or_url"
file_format: The file format/extension (without dot)
mime_type: The mime type
filename: The filename
Returns:
File object with storage_key loaded, or None if not found
"""
from sqlalchemy import select
from sqlalchemy.orm import Session
from extensions.ext_database import db
from models.model import UploadFile
from models.tools import ToolFile
# Parse file_ref: "method:value"
if ":" not in file_ref:
logger.warning("Invalid file_ref format: %s", file_ref)
return None
method, value = file_ref.split(":", 1)
extension = f".{file_format}" if file_format else None
if method == "remote":
return File(
tenant_id="",
type=FileType.IMAGE,
transfer_method=FileTransferMethod.REMOTE_URL,
remote_url=value,
extension=extension,
mime_type=mime_type,
filename=filename,
storage_key="",
)
# Query database for storage_key
with Session(db.engine) as session:
if method == "local":
stmt = select(UploadFile).where(UploadFile.id == value)
upload_file = session.scalar(stmt)
if upload_file:
return File(
tenant_id=upload_file.tenant_id,
type=FileType(upload_file.extension)
if hasattr(FileType, upload_file.extension.upper())
else FileType.IMAGE,
transfer_method=FileTransferMethod.LOCAL_FILE,
related_id=value,
extension=extension or ("." + upload_file.extension if upload_file.extension else None),
mime_type=mime_type or upload_file.mime_type,
filename=filename or upload_file.name,
storage_key=upload_file.key,
)
elif method == "tool":
stmt = select(ToolFile).where(ToolFile.id == value)
tool_file = session.scalar(stmt)
if tool_file:
return File(
tenant_id=tool_file.tenant_id,
type=FileType.IMAGE,
transfer_method=FileTransferMethod.TOOL_FILE,
related_id=value,
extension=extension,
mime_type=mime_type or tool_file.mimetype,
filename=filename or tool_file.name,
storage_key=tool_file.file_key,
)
logger.warning("File not found for file_ref: %s", file_ref)
return None
class FileManager:
"""Adapter exposing file manager helpers behind FileManagerProtocol."""
def download(self, f: File, /) -> bytes:
return download(f)
file_manager = FileManager()