refactor: move workflow package to dify_graph (#32844)

This commit is contained in:
-LAN-
2026-03-02 18:42:30 +08:00
committed by GitHub
parent 9c33923985
commit c917838f9c
613 changed files with 2008 additions and 2012 deletions

View File

@ -0,0 +1,22 @@
from .config import build_http_request_config, resolve_http_request_config
from .entities import (
HTTP_REQUEST_CONFIG_FILTER_KEY,
BodyData,
HttpRequestNodeAuthorization,
HttpRequestNodeBody,
HttpRequestNodeConfig,
HttpRequestNodeData,
)
from .node import HttpRequestNode
__all__ = [
"HTTP_REQUEST_CONFIG_FILTER_KEY",
"BodyData",
"HttpRequestNode",
"HttpRequestNodeAuthorization",
"HttpRequestNodeBody",
"HttpRequestNodeConfig",
"HttpRequestNodeData",
"build_http_request_config",
"resolve_http_request_config",
]

View File

@ -0,0 +1,33 @@
from collections.abc import Mapping
from .entities import HTTP_REQUEST_CONFIG_FILTER_KEY, HttpRequestNodeConfig
def build_http_request_config(
*,
max_connect_timeout: int = 10,
max_read_timeout: int = 600,
max_write_timeout: int = 600,
max_binary_size: int = 10 * 1024 * 1024,
max_text_size: int = 1 * 1024 * 1024,
ssl_verify: bool = True,
ssrf_default_max_retries: int = 3,
) -> HttpRequestNodeConfig:
return HttpRequestNodeConfig(
max_connect_timeout=max_connect_timeout,
max_read_timeout=max_read_timeout,
max_write_timeout=max_write_timeout,
max_binary_size=max_binary_size,
max_text_size=max_text_size,
ssl_verify=ssl_verify,
ssrf_default_max_retries=ssrf_default_max_retries,
)
def resolve_http_request_config(filters: Mapping[str, object] | None) -> HttpRequestNodeConfig:
if not filters:
raise ValueError("http_request_config is required to build HTTP request default config")
config = filters.get(HTTP_REQUEST_CONFIG_FILTER_KEY)
if not isinstance(config, HttpRequestNodeConfig):
raise ValueError("http_request_config must be an HttpRequestNodeConfig instance")
return config

View File

@ -0,0 +1,239 @@
import mimetypes
from collections.abc import Sequence
from dataclasses import dataclass
from email.message import Message
from typing import Any, Literal
import charset_normalizer
import httpx
from pydantic import BaseModel, Field, ValidationInfo, field_validator
from dify_graph.nodes.base import BaseNodeData
HTTP_REQUEST_CONFIG_FILTER_KEY = "http_request_config"
class HttpRequestNodeAuthorizationConfig(BaseModel):
type: Literal["basic", "bearer", "custom"]
api_key: str
header: str = ""
class HttpRequestNodeAuthorization(BaseModel):
type: Literal["no-auth", "api-key"]
config: HttpRequestNodeAuthorizationConfig | None = None
@field_validator("config", mode="before")
@classmethod
def check_config(cls, v: HttpRequestNodeAuthorizationConfig, values: ValidationInfo):
"""
Check config, if type is no-auth, config should be None, otherwise it should be a dict.
"""
if values.data["type"] == "no-auth":
return None
else:
if not v or not isinstance(v, dict):
raise ValueError("config should be a dict")
return v
class BodyData(BaseModel):
key: str = ""
type: Literal["file", "text"]
value: str = ""
file: Sequence[str] = Field(default_factory=list)
class HttpRequestNodeBody(BaseModel):
type: Literal["none", "form-data", "x-www-form-urlencoded", "raw-text", "json", "binary"]
data: Sequence[BodyData] = Field(default_factory=list)
@field_validator("data", mode="before")
@classmethod
def check_data(cls, v: Any):
"""For compatibility, if body is not set, return empty list."""
if not v:
return []
if isinstance(v, str):
return [BodyData(key="", type="text", value=v)]
return v
class HttpRequestNodeTimeout(BaseModel):
connect: int | None = None
read: int | None = None
write: int | None = None
@dataclass(frozen=True, slots=True)
class HttpRequestNodeConfig:
max_connect_timeout: int
max_read_timeout: int
max_write_timeout: int
max_binary_size: int
max_text_size: int
ssl_verify: bool
ssrf_default_max_retries: int
def default_timeout(self) -> "HttpRequestNodeTimeout":
return HttpRequestNodeTimeout(
connect=self.max_connect_timeout,
read=self.max_read_timeout,
write=self.max_write_timeout,
)
class HttpRequestNodeData(BaseNodeData):
"""
Code Node Data.
"""
method: Literal[
"get",
"post",
"put",
"patch",
"delete",
"head",
"options",
"GET",
"POST",
"PUT",
"PATCH",
"DELETE",
"HEAD",
"OPTIONS",
]
url: str
authorization: HttpRequestNodeAuthorization
headers: str
params: str
body: HttpRequestNodeBody | None = None
timeout: HttpRequestNodeTimeout | None = None
ssl_verify: bool | None = None
class Response:
headers: dict[str, str]
response: httpx.Response
_cached_text: str | None
def __init__(self, response: httpx.Response):
self.response = response
self.headers = dict(response.headers)
self._cached_text = None
@property
def is_file(self):
"""
Determine if the response contains a file by checking:
1. Content-Disposition header (RFC 6266)
2. Content characteristics
3. MIME type analysis
"""
content_type = self.content_type.split(";")[0].strip().lower()
parsed_content_disposition = self.parsed_content_disposition
# Check if it's explicitly marked as an attachment
if parsed_content_disposition:
disp_type = parsed_content_disposition.get_content_disposition() # Returns 'attachment', 'inline', or None
filename = parsed_content_disposition.get_filename() # Returns filename if present, None otherwise
if disp_type == "attachment" or filename is not None:
return True
# For 'text/' types, only 'csv' should be downloaded as file
if content_type.startswith("text/") and "csv" not in content_type:
return False
# For application types, try to detect if it's a text-based format
if content_type.startswith("application/"):
# Common text-based application types
if any(
text_type in content_type
for text_type in ("json", "xml", "javascript", "x-www-form-urlencoded", "yaml", "graphql")
):
return False
# Try to detect if content is text-based by sampling first few bytes
try:
# Sample first 1024 bytes for text detection
content_sample = self.response.content[:1024]
content_sample.decode("utf-8")
# If we can decode as UTF-8 and find common text patterns, likely not a file
text_markers = (b"{", b"[", b"<", b"function", b"var ", b"const ", b"let ")
if any(marker in content_sample for marker in text_markers):
return False
except UnicodeDecodeError:
# If we can't decode as UTF-8, likely a binary file
return True
# For other types, use MIME type analysis
main_type, _ = mimetypes.guess_type("dummy" + (mimetypes.guess_extension(content_type) or ""))
if main_type:
return main_type.split("/")[0] in ("application", "image", "audio", "video")
# For unknown types, check if it's a media type
return any(media_type in content_type for media_type in ("image/", "audio/", "video/"))
@property
def content_type(self) -> str:
return self.headers.get("content-type", "")
@property
def text(self) -> str:
"""
Get response text with robust encoding detection.
Uses charset_normalizer for better encoding detection than httpx's default,
which helps handle Chinese and other non-ASCII characters properly.
"""
# Check cache first
if hasattr(self, "_cached_text") and self._cached_text is not None:
return self._cached_text
# Try charset_normalizer for robust encoding detection first
detected_encoding = charset_normalizer.from_bytes(self.response.content).best()
if detected_encoding and detected_encoding.encoding:
try:
text = self.response.content.decode(detected_encoding.encoding)
self._cached_text = text
return text
except (UnicodeDecodeError, TypeError, LookupError):
# Fallback to httpx's encoding detection if charset_normalizer fails
pass
# Fallback to httpx's built-in encoding detection
text = self.response.text
self._cached_text = text
return text
@property
def content(self) -> bytes:
return self.response.content
@property
def status_code(self) -> int:
return self.response.status_code
@property
def size(self) -> int:
return len(self.content)
@property
def readable_size(self) -> str:
if self.size < 1024:
return f"{self.size} bytes"
elif self.size < 1024 * 1024:
return f"{(self.size / 1024):.2f} KB"
else:
return f"{(self.size / 1024 / 1024):.2f} MB"
@property
def parsed_content_disposition(self) -> Message | None:
content_disposition = self.headers.get("content-disposition", "")
if content_disposition:
msg = Message()
msg["content-disposition"] = content_disposition
return msg
return None

View File

@ -0,0 +1,26 @@
class HttpRequestNodeError(ValueError):
"""Custom error for HTTP request node."""
class AuthorizationConfigError(HttpRequestNodeError):
"""Raised when authorization config is missing or invalid."""
class FileFetchError(HttpRequestNodeError):
"""Raised when a file cannot be fetched."""
class InvalidHttpMethodError(HttpRequestNodeError):
"""Raised when an invalid HTTP method is used."""
class ResponseSizeError(HttpRequestNodeError):
"""Raised when the response size exceeds the allowed threshold."""
class RequestBodyError(HttpRequestNodeError):
"""Raised when the request body is invalid."""
class InvalidURLError(HttpRequestNodeError):
"""Raised when the URL is invalid."""

View File

@ -0,0 +1,488 @@
import base64
import json
import secrets
import string
from collections.abc import Callable, Mapping
from copy import deepcopy
from typing import Any, Literal
from urllib.parse import urlencode, urlparse
import httpx
from json_repair import repair_json
from dify_graph.file.enums import FileTransferMethod
from dify_graph.runtime import VariablePool
from dify_graph.variables.segments import ArrayFileSegment, FileSegment
from ..protocols import FileManagerProtocol, HttpClientProtocol
from .entities import (
HttpRequestNodeAuthorization,
HttpRequestNodeConfig,
HttpRequestNodeData,
HttpRequestNodeTimeout,
Response,
)
from .exc import (
AuthorizationConfigError,
FileFetchError,
HttpRequestNodeError,
InvalidHttpMethodError,
InvalidURLError,
RequestBodyError,
ResponseSizeError,
)
BODY_TYPE_TO_CONTENT_TYPE = {
"json": "application/json",
"x-www-form-urlencoded": "application/x-www-form-urlencoded",
"form-data": "multipart/form-data",
"raw-text": "text/plain",
}
class Executor:
method: Literal[
"get",
"head",
"post",
"put",
"delete",
"patch",
"options",
"GET",
"POST",
"PUT",
"PATCH",
"DELETE",
"HEAD",
"OPTIONS",
]
url: str
params: list[tuple[str, str]] | None
content: str | bytes | None
data: Mapping[str, Any] | None
files: list[tuple[str, tuple[str | None, bytes, str]]] | None
json: Any
headers: dict[str, str]
auth: HttpRequestNodeAuthorization
timeout: HttpRequestNodeTimeout
max_retries: int
boundary: str
def __init__(
self,
*,
node_data: HttpRequestNodeData,
timeout: HttpRequestNodeTimeout,
variable_pool: VariablePool,
http_request_config: HttpRequestNodeConfig,
max_retries: int | None = None,
ssl_verify: bool | None = None,
http_client: HttpClientProtocol,
file_manager: FileManagerProtocol,
):
self._http_request_config = http_request_config
# If authorization API key is present, convert the API key using the variable pool
if node_data.authorization.type == "api-key":
if node_data.authorization.config is None:
raise AuthorizationConfigError("authorization config is required")
node_data.authorization.config.api_key = variable_pool.convert_template(
node_data.authorization.config.api_key
).text
# Validate that API key is not empty after template conversion
if not node_data.authorization.config.api_key or not node_data.authorization.config.api_key.strip():
raise AuthorizationConfigError(
"API key is required for authorization but was empty. Please provide a valid API key."
)
self.url = node_data.url
self.method = node_data.method
self.auth = node_data.authorization
self.timeout = timeout
self.ssl_verify = ssl_verify if ssl_verify is not None else node_data.ssl_verify
if self.ssl_verify is None:
self.ssl_verify = self._http_request_config.ssl_verify
if not isinstance(self.ssl_verify, bool):
raise ValueError("ssl_verify must be a boolean")
self.params = None
self.headers = {}
self.content = None
self.files = None
self.data = None
self.json = None
self.max_retries = (
max_retries if max_retries is not None else self._http_request_config.ssrf_default_max_retries
)
self._http_client = http_client
self._file_manager = file_manager
# init template
self.variable_pool = variable_pool
self.node_data = node_data
self._initialize()
def _initialize(self):
self._init_url()
self._init_params()
self._init_headers()
self._init_body()
def _init_url(self):
self.url = self.variable_pool.convert_template(self.node_data.url).text
# check if url is a valid URL
if not self.url:
raise InvalidURLError("url is required")
if not self.url.startswith(("http://", "https://")):
raise InvalidURLError("url should start with http:// or https://")
def _init_params(self):
"""
Almost same as _init_headers(), difference:
1. response a list tuple to support same key, like 'aa=1&aa=2'
2. param value may have '\n', we need to splitlines then extract the variable value.
"""
result = []
for line in self.node_data.params.splitlines():
if not (line := line.strip()):
continue
key, *value = line.split(":", 1)
if not (key := key.strip()):
continue
value_str = value[0].strip() if value else ""
result.append(
(self.variable_pool.convert_template(key).text, self.variable_pool.convert_template(value_str).text)
)
if result:
self.params = result
def _init_headers(self):
"""
Convert the header string of frontend to a dictionary.
Each line in the header string represents a key-value pair.
Keys and values are separated by ':'.
Empty values are allowed.
Examples:
'aa:bb\n cc:dd' -> {'aa': 'bb', 'cc': 'dd'}
'aa:\n cc:dd\n' -> {'aa': '', 'cc': 'dd'}
'aa\n cc : dd' -> {'aa': '', 'cc': 'dd'}
"""
headers = self.variable_pool.convert_template(self.node_data.headers).text
self.headers = {
key.strip(): (value[0].strip() if value else "")
for line in headers.splitlines()
if line.strip()
for key, *value in [line.split(":", 1)]
}
def _init_body(self):
body = self.node_data.body
if body is not None:
data = body.data
match body.type:
case "none":
self.content = ""
case "raw-text":
if len(data) != 1:
raise RequestBodyError("raw-text body type should have exactly one item")
self.content = self.variable_pool.convert_template(data[0].value).text
case "json":
if len(data) != 1:
raise RequestBodyError("json body type should have exactly one item")
json_string = self.variable_pool.convert_template(data[0].value).text
try:
repaired = repair_json(json_string)
json_object = json.loads(repaired, strict=False)
except json.JSONDecodeError as e:
raise RequestBodyError(f"Failed to parse JSON: {json_string}") from e
self.json = json_object
# self.json = self._parse_object_contains_variables(json_object)
case "binary":
if len(data) != 1:
raise RequestBodyError("binary body type should have exactly one item")
file_selector = data[0].file
file_variable = self.variable_pool.get_file(file_selector)
if file_variable is None:
raise FileFetchError(f"cannot fetch file with selector {file_selector}")
file = file_variable.value
self.content = self._file_manager.download(file)
case "x-www-form-urlencoded":
form_data = {
self.variable_pool.convert_template(item.key).text: self.variable_pool.convert_template(
item.value
).text
for item in data
}
self.data = form_data
case "form-data":
form_data = {
self.variable_pool.convert_template(item.key).text: self.variable_pool.convert_template(
item.value
).text
for item in filter(lambda item: item.type == "text", data)
}
file_selectors = {
self.variable_pool.convert_template(item.key).text: item.file
for item in filter(lambda item: item.type == "file", data)
}
# get files from file_selectors, add support for array file variables
files_list = []
for key, selector in file_selectors.items():
segment = self.variable_pool.get(selector)
if isinstance(segment, FileSegment):
files_list.append((key, [segment.value]))
elif isinstance(segment, ArrayFileSegment):
files_list.append((key, list(segment.value)))
# get files from file_manager
files: dict[str, list[tuple[str | None, bytes, str]]] = {}
for key, files_in_segment in files_list:
for file in files_in_segment:
if file.related_id is not None or (
file.transfer_method == FileTransferMethod.REMOTE_URL and file.remote_url is not None
):
file_tuple = (
file.filename,
self._file_manager.download(file),
file.mime_type or "application/octet-stream",
)
if key not in files:
files[key] = []
files[key].append(file_tuple)
# convert files to list for httpx request
# If there are no actual files, we still need to force httpx to use `multipart/form-data`.
# This is achieved by inserting a harmless placeholder file that will be ignored by the server.
if not files:
self.files = [("__multipart_placeholder__", ("", b"", "application/octet-stream"))]
if files:
self.files = []
for key, file_tuples in files.items():
for file_tuple in file_tuples:
self.files.append((key, file_tuple))
self.data = form_data
def _assembling_headers(self) -> dict[str, Any]:
authorization = deepcopy(self.auth)
headers = deepcopy(self.headers) or {}
if self.auth.type == "api-key":
if self.auth.config is None:
raise AuthorizationConfigError("self.authorization config is required")
if authorization.config is None:
raise AuthorizationConfigError("authorization config is required")
if not authorization.config.header:
authorization.config.header = "Authorization"
if self.auth.config.type == "bearer" and authorization.config.api_key:
headers[authorization.config.header] = f"Bearer {authorization.config.api_key}"
elif self.auth.config.type == "basic" and authorization.config.api_key:
credentials = authorization.config.api_key
if ":" in credentials:
encoded_credentials = base64.b64encode(credentials.encode("utf-8")).decode("utf-8")
else:
encoded_credentials = credentials
headers[authorization.config.header] = f"Basic {encoded_credentials}"
elif self.auth.config.type == "custom":
if authorization.config.header and authorization.config.api_key:
headers[authorization.config.header] = authorization.config.api_key
# Handle Content-Type for multipart/form-data requests
# Fix for issue #23829: Missing boundary when using multipart/form-data
body = self.node_data.body
if body and body.type == "form-data":
# For multipart/form-data with files (including placeholder files),
# remove any manually set Content-Type header to let httpx handle
# For multipart/form-data, if any files are present (including placeholder files),
# we must remove any manually set Content-Type header. This is because httpx needs to
# automatically set the Content-Type and boundary for multipart encoding whenever files
# are included, even if they are placeholders, to avoid boundary issues and ensure correct
# file upload behaviour. Manually setting Content-Type can cause httpx to fail to set the
# boundary, resulting in invalid requests.
if self.files:
# Remove Content-Type if it was manually set to avoid boundary issues
headers = {k: v for k, v in headers.items() if k.lower() != "content-type"}
else:
# No files at all, set Content-Type manually
if "content-type" not in (k.lower() for k in headers):
headers["Content-Type"] = "multipart/form-data"
elif body and body.type in BODY_TYPE_TO_CONTENT_TYPE:
# Set Content-Type for other body types
if "content-type" not in (k.lower() for k in headers):
headers["Content-Type"] = BODY_TYPE_TO_CONTENT_TYPE[body.type]
return headers
def _validate_and_parse_response(self, response: httpx.Response) -> Response:
executor_response = Response(response)
threshold_size = (
self._http_request_config.max_binary_size
if executor_response.is_file
else self._http_request_config.max_text_size
)
if executor_response.size > threshold_size:
raise ResponseSizeError(
f"{'File' if executor_response.is_file else 'Text'} size is too large,"
f" max size is {threshold_size / 1024 / 1024:.2f} MB,"
f" but current size is {executor_response.readable_size}."
)
return executor_response
def _do_http_request(self, headers: dict[str, Any]) -> httpx.Response:
"""
do http request depending on api bundle
"""
_METHOD_MAP: dict[str, Callable[..., httpx.Response]] = {
"get": self._http_client.get,
"head": self._http_client.head,
"post": self._http_client.post,
"put": self._http_client.put,
"delete": self._http_client.delete,
"patch": self._http_client.patch,
}
method_lc = self.method.lower()
if method_lc not in _METHOD_MAP:
raise InvalidHttpMethodError(f"Invalid http method {self.method}")
request_args: dict[str, Any] = {
"data": self.data,
"files": self.files,
"json": self.json,
"content": self.content,
"headers": headers,
"params": self.params,
"timeout": (self.timeout.connect, self.timeout.read, self.timeout.write),
"ssl_verify": self.ssl_verify,
"follow_redirects": True,
}
# request_args = {k: v for k, v in request_args.items() if v is not None}
try:
response = _METHOD_MAP[method_lc](
url=self.url,
**request_args,
max_retries=self.max_retries,
)
except self._http_client.max_retries_exceeded_error as e:
raise HttpRequestNodeError(f"Reached maximum retries for URL {self.url}") from e
except self._http_client.request_error as e:
raise HttpRequestNodeError(str(e)) from e
return response
def invoke(self) -> Response:
# assemble headers
headers = self._assembling_headers()
# do http request
response = self._do_http_request(headers)
# validate response
return self._validate_and_parse_response(response)
def to_log(self):
url_parts = urlparse(self.url)
path = url_parts.path or "/"
# Add query parameters
if self.params:
query_string = urlencode(self.params)
path += f"?{query_string}"
elif url_parts.query:
path += f"?{url_parts.query}"
raw = f"{self.method.upper()} {path} HTTP/1.1\r\n"
raw += f"Host: {url_parts.netloc}\r\n"
headers = self._assembling_headers()
body = self.node_data.body
boundary = f"----WebKitFormBoundary{_generate_random_string(16)}"
if body:
if "content-type" not in (k.lower() for k in self.headers) and body.type in BODY_TYPE_TO_CONTENT_TYPE:
headers["Content-Type"] = BODY_TYPE_TO_CONTENT_TYPE[body.type]
if body.type == "form-data":
headers["Content-Type"] = f"multipart/form-data; boundary={boundary}"
for k, v in headers.items():
if self.auth.type == "api-key":
authorization_header = "Authorization"
if self.auth.config and self.auth.config.header:
authorization_header = self.auth.config.header
if k.lower() == authorization_header.lower():
raw += f"{k}: {'*' * len(v)}\r\n"
continue
raw += f"{k}: {v}\r\n"
body_string = ""
# Only log actual files if present.
# '__multipart_placeholder__' is inserted to force multipart encoding but is not a real file.
# This prevents logging meaningless placeholder entries.
if self.files and not all(f[0] == "__multipart_placeholder__" for f in self.files):
for file_entry in self.files:
# file_entry should be (key, (filename, content, mime_type)), but handle edge cases
if len(file_entry) != 2 or len(file_entry[1]) < 2:
continue # skip malformed entries
key = file_entry[0]
content = file_entry[1][1]
body_string += f"--{boundary}\r\n"
body_string += f'Content-Disposition: form-data; name="{key}"\r\n\r\n'
# decode content safely
# Do not decode binary content; use a placeholder with file metadata instead.
# Includes filename, size, and MIME type for better logging context.
body_string += (
f"<file_content_binary: '{file_entry[1][0] or 'unknown'}', "
f"type='{file_entry[1][2] if len(file_entry[1]) > 2 else 'unknown'}', "
f"size={len(content)} bytes>\r\n"
)
body_string += f"--{boundary}--\r\n"
elif self.node_data.body:
if self.content:
# If content is bytes, do not decode it; show a placeholder with size.
# Provides content size information for binary data without exposing the raw bytes.
if isinstance(self.content, bytes):
body_string = f"<binary_content: size={len(self.content)} bytes>"
else:
body_string = self.content
elif self.data and self.node_data.body.type == "x-www-form-urlencoded":
body_string = urlencode(self.data)
elif self.data and self.node_data.body.type == "form-data":
for key, value in self.data.items():
body_string += f"--{boundary}\r\n"
body_string += f'Content-Disposition: form-data; name="{key}"\r\n\r\n'
body_string += f"{value}\r\n"
body_string += f"--{boundary}--\r\n"
elif self.json:
body_string = json.dumps(self.json)
elif self.node_data.body.type == "raw-text":
if len(self.node_data.body.data) != 1:
raise RequestBodyError("raw-text body type should have exactly one item")
body_string = self.node_data.body.data[0].value
if body_string:
raw += f"Content-Length: {len(body_string)}\r\n"
raw += "\r\n" # Empty line between headers and body
raw += body_string
return raw
def _generate_random_string(n: int) -> str:
"""
Generate a random string of lowercase ASCII letters.
Args:
n (int): The length of the random string to generate.
Returns:
str: A random string of lowercase ASCII letters with length n.
Example:
>>> _generate_random_string(5)
'abcde'
"""
return "".join(secrets.choice(string.ascii_lowercase) for _ in range(n))

View File

@ -0,0 +1,260 @@
import logging
import mimetypes
from collections.abc import Callable, Mapping, Sequence
from typing import TYPE_CHECKING, Any
from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus
from dify_graph.file import File, FileTransferMethod
from dify_graph.node_events import NodeRunResult
from dify_graph.nodes.base import variable_template_parser
from dify_graph.nodes.base.entities import VariableSelector
from dify_graph.nodes.base.node import Node
from dify_graph.nodes.http_request.executor import Executor
from dify_graph.nodes.protocols import FileManagerProtocol, HttpClientProtocol, ToolFileManagerProtocol
from dify_graph.variables.segments import ArrayFileSegment
from factories import file_factory
from .config import build_http_request_config, resolve_http_request_config
from .entities import (
HTTP_REQUEST_CONFIG_FILTER_KEY,
HttpRequestNodeConfig,
HttpRequestNodeData,
HttpRequestNodeTimeout,
Response,
)
from .exc import HttpRequestNodeError, RequestBodyError
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from dify_graph.entities import GraphInitParams
from dify_graph.runtime import GraphRuntimeState
class HttpRequestNode(Node[HttpRequestNodeData]):
node_type = NodeType.HTTP_REQUEST
def __init__(
self,
id: str,
config: Mapping[str, Any],
graph_init_params: "GraphInitParams",
graph_runtime_state: "GraphRuntimeState",
*,
http_request_config: HttpRequestNodeConfig,
http_client: HttpClientProtocol,
tool_file_manager_factory: Callable[[], ToolFileManagerProtocol],
file_manager: FileManagerProtocol,
) -> None:
super().__init__(
id=id,
config=config,
graph_init_params=graph_init_params,
graph_runtime_state=graph_runtime_state,
)
self._http_request_config = http_request_config
self._http_client = http_client
self._tool_file_manager_factory = tool_file_manager_factory
self._file_manager = file_manager
@classmethod
def get_default_config(cls, filters: Mapping[str, object] | None = None) -> Mapping[str, object]:
if not filters or HTTP_REQUEST_CONFIG_FILTER_KEY not in filters:
http_request_config = build_http_request_config()
else:
http_request_config = resolve_http_request_config(filters)
default_timeout = http_request_config.default_timeout()
return {
"type": "http-request",
"config": {
"method": "get",
"authorization": {
"type": "no-auth",
},
"body": {"type": "none"},
"timeout": {
**default_timeout.model_dump(),
"max_connect_timeout": http_request_config.max_connect_timeout,
"max_read_timeout": http_request_config.max_read_timeout,
"max_write_timeout": http_request_config.max_write_timeout,
},
"ssl_verify": http_request_config.ssl_verify,
},
"retry_config": {
"max_retries": http_request_config.ssrf_default_max_retries,
"retry_interval": 0.5 * (2**2),
"retry_enabled": True,
},
}
@classmethod
def version(cls) -> str:
return "1"
def _run(self) -> NodeRunResult:
process_data = {}
try:
http_executor = Executor(
node_data=self.node_data,
timeout=self._get_request_timeout(self.node_data),
variable_pool=self.graph_runtime_state.variable_pool,
http_request_config=self._http_request_config,
max_retries=0,
ssl_verify=self.node_data.ssl_verify,
http_client=self._http_client,
file_manager=self._file_manager,
)
process_data["request"] = http_executor.to_log()
response = http_executor.invoke()
files = self.extract_files(url=http_executor.url, response=response)
if not response.response.is_success and (self.error_strategy or self.retry):
return NodeRunResult(
status=WorkflowNodeExecutionStatus.FAILED,
outputs={
"status_code": response.status_code,
"body": response.text if not files.value else "",
"headers": response.headers,
"files": files,
},
process_data={
"request": http_executor.to_log(),
},
error=f"Request failed with status code {response.status_code}",
error_type="HTTPResponseCodeError",
)
return NodeRunResult(
status=WorkflowNodeExecutionStatus.SUCCEEDED,
outputs={
"status_code": response.status_code,
"body": response.text if not files.value else "",
"headers": response.headers,
"files": files,
},
process_data={
"request": http_executor.to_log(),
},
)
except HttpRequestNodeError as e:
logger.warning("http request node %s failed to run: %s", self._node_id, e)
return NodeRunResult(
status=WorkflowNodeExecutionStatus.FAILED,
error=str(e),
process_data=process_data,
error_type=type(e).__name__,
)
def _get_request_timeout(self, node_data: HttpRequestNodeData) -> HttpRequestNodeTimeout:
default_timeout = self._http_request_config.default_timeout()
timeout = node_data.timeout
if timeout is None:
return default_timeout
return HttpRequestNodeTimeout(
connect=timeout.connect or default_timeout.connect,
read=timeout.read or default_timeout.read,
write=timeout.write or default_timeout.write,
)
@classmethod
def _extract_variable_selector_to_variable_mapping(
cls,
*,
graph_config: Mapping[str, Any],
node_id: str,
node_data: Mapping[str, Any],
) -> Mapping[str, Sequence[str]]:
# Create typed NodeData from dict
typed_node_data = HttpRequestNodeData.model_validate(node_data)
selectors: list[VariableSelector] = []
selectors += variable_template_parser.extract_selectors_from_template(typed_node_data.url)
selectors += variable_template_parser.extract_selectors_from_template(typed_node_data.headers)
selectors += variable_template_parser.extract_selectors_from_template(typed_node_data.params)
if typed_node_data.body:
body_type = typed_node_data.body.type
data = typed_node_data.body.data
match body_type:
case "none":
pass
case "binary":
if len(data) != 1:
raise RequestBodyError("invalid body data, should have only one item")
selector = data[0].file
selectors.append(VariableSelector(variable="#" + ".".join(selector) + "#", value_selector=selector))
case "json" | "raw-text":
if len(data) != 1:
raise RequestBodyError("invalid body data, should have only one item")
selectors += variable_template_parser.extract_selectors_from_template(data[0].key)
selectors += variable_template_parser.extract_selectors_from_template(data[0].value)
case "x-www-form-urlencoded":
for item in data:
selectors += variable_template_parser.extract_selectors_from_template(item.key)
selectors += variable_template_parser.extract_selectors_from_template(item.value)
case "form-data":
for item in data:
selectors += variable_template_parser.extract_selectors_from_template(item.key)
if item.type == "text":
selectors += variable_template_parser.extract_selectors_from_template(item.value)
elif item.type == "file":
selectors.append(
VariableSelector(variable="#" + ".".join(item.file) + "#", value_selector=item.file)
)
mapping = {}
for selector_iter in selectors:
mapping[node_id + "." + selector_iter.variable] = selector_iter.value_selector
return mapping
def extract_files(self, url: str, response: Response) -> ArrayFileSegment:
"""
Extract files from response by checking both Content-Type header and URL
"""
files: list[File] = []
is_file = response.is_file
content_type = response.content_type
content = response.content
parsed_content_disposition = response.parsed_content_disposition
content_disposition_type = None
if not is_file:
return ArrayFileSegment(value=[])
if parsed_content_disposition:
content_disposition_filename = parsed_content_disposition.get_filename()
if content_disposition_filename:
# If filename is available from content-disposition, use it to guess the content type
content_disposition_type = mimetypes.guess_type(content_disposition_filename)[0]
# Guess file extension from URL or Content-Type header
filename = url.split("?")[0].split("/")[-1] or ""
mime_type = (
content_disposition_type or content_type or mimetypes.guess_type(filename)[0] or "application/octet-stream"
)
tool_file_manager = self._tool_file_manager_factory()
tool_file = tool_file_manager.create_file_by_raw(
user_id=self.user_id,
tenant_id=self.tenant_id,
conversation_id=None,
file_binary=content,
mimetype=mime_type,
)
mapping = {
"tool_file_id": tool_file.id,
"transfer_method": FileTransferMethod.TOOL_FILE,
}
file = file_factory.build_from_mapping(
mapping=mapping,
tenant_id=self.tenant_id,
)
files.append(file)
return ArrayFileSegment(value=files)
@property
def retry(self) -> bool:
return self.node_data.retry_config.retry_enabled