mirror of
https://github.com/langgenius/dify.git
synced 2026-04-30 23:48:04 +08:00
refactor: move workflow package to dify_graph (#32844)
This commit is contained in:
22
api/dify_graph/nodes/http_request/__init__.py
Normal file
22
api/dify_graph/nodes/http_request/__init__.py
Normal file
@ -0,0 +1,22 @@
|
||||
from .config import build_http_request_config, resolve_http_request_config
|
||||
from .entities import (
|
||||
HTTP_REQUEST_CONFIG_FILTER_KEY,
|
||||
BodyData,
|
||||
HttpRequestNodeAuthorization,
|
||||
HttpRequestNodeBody,
|
||||
HttpRequestNodeConfig,
|
||||
HttpRequestNodeData,
|
||||
)
|
||||
from .node import HttpRequestNode
|
||||
|
||||
__all__ = [
|
||||
"HTTP_REQUEST_CONFIG_FILTER_KEY",
|
||||
"BodyData",
|
||||
"HttpRequestNode",
|
||||
"HttpRequestNodeAuthorization",
|
||||
"HttpRequestNodeBody",
|
||||
"HttpRequestNodeConfig",
|
||||
"HttpRequestNodeData",
|
||||
"build_http_request_config",
|
||||
"resolve_http_request_config",
|
||||
]
|
||||
33
api/dify_graph/nodes/http_request/config.py
Normal file
33
api/dify_graph/nodes/http_request/config.py
Normal file
@ -0,0 +1,33 @@
|
||||
from collections.abc import Mapping
|
||||
|
||||
from .entities import HTTP_REQUEST_CONFIG_FILTER_KEY, HttpRequestNodeConfig
|
||||
|
||||
|
||||
def build_http_request_config(
|
||||
*,
|
||||
max_connect_timeout: int = 10,
|
||||
max_read_timeout: int = 600,
|
||||
max_write_timeout: int = 600,
|
||||
max_binary_size: int = 10 * 1024 * 1024,
|
||||
max_text_size: int = 1 * 1024 * 1024,
|
||||
ssl_verify: bool = True,
|
||||
ssrf_default_max_retries: int = 3,
|
||||
) -> HttpRequestNodeConfig:
|
||||
return HttpRequestNodeConfig(
|
||||
max_connect_timeout=max_connect_timeout,
|
||||
max_read_timeout=max_read_timeout,
|
||||
max_write_timeout=max_write_timeout,
|
||||
max_binary_size=max_binary_size,
|
||||
max_text_size=max_text_size,
|
||||
ssl_verify=ssl_verify,
|
||||
ssrf_default_max_retries=ssrf_default_max_retries,
|
||||
)
|
||||
|
||||
|
||||
def resolve_http_request_config(filters: Mapping[str, object] | None) -> HttpRequestNodeConfig:
|
||||
if not filters:
|
||||
raise ValueError("http_request_config is required to build HTTP request default config")
|
||||
config = filters.get(HTTP_REQUEST_CONFIG_FILTER_KEY)
|
||||
if not isinstance(config, HttpRequestNodeConfig):
|
||||
raise ValueError("http_request_config must be an HttpRequestNodeConfig instance")
|
||||
return config
|
||||
239
api/dify_graph/nodes/http_request/entities.py
Normal file
239
api/dify_graph/nodes/http_request/entities.py
Normal file
@ -0,0 +1,239 @@
|
||||
import mimetypes
|
||||
from collections.abc import Sequence
|
||||
from dataclasses import dataclass
|
||||
from email.message import Message
|
||||
from typing import Any, Literal
|
||||
|
||||
import charset_normalizer
|
||||
import httpx
|
||||
from pydantic import BaseModel, Field, ValidationInfo, field_validator
|
||||
|
||||
from dify_graph.nodes.base import BaseNodeData
|
||||
|
||||
HTTP_REQUEST_CONFIG_FILTER_KEY = "http_request_config"
|
||||
|
||||
|
||||
class HttpRequestNodeAuthorizationConfig(BaseModel):
|
||||
type: Literal["basic", "bearer", "custom"]
|
||||
api_key: str
|
||||
header: str = ""
|
||||
|
||||
|
||||
class HttpRequestNodeAuthorization(BaseModel):
|
||||
type: Literal["no-auth", "api-key"]
|
||||
config: HttpRequestNodeAuthorizationConfig | None = None
|
||||
|
||||
@field_validator("config", mode="before")
|
||||
@classmethod
|
||||
def check_config(cls, v: HttpRequestNodeAuthorizationConfig, values: ValidationInfo):
|
||||
"""
|
||||
Check config, if type is no-auth, config should be None, otherwise it should be a dict.
|
||||
"""
|
||||
if values.data["type"] == "no-auth":
|
||||
return None
|
||||
else:
|
||||
if not v or not isinstance(v, dict):
|
||||
raise ValueError("config should be a dict")
|
||||
|
||||
return v
|
||||
|
||||
|
||||
class BodyData(BaseModel):
|
||||
key: str = ""
|
||||
type: Literal["file", "text"]
|
||||
value: str = ""
|
||||
file: Sequence[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class HttpRequestNodeBody(BaseModel):
|
||||
type: Literal["none", "form-data", "x-www-form-urlencoded", "raw-text", "json", "binary"]
|
||||
data: Sequence[BodyData] = Field(default_factory=list)
|
||||
|
||||
@field_validator("data", mode="before")
|
||||
@classmethod
|
||||
def check_data(cls, v: Any):
|
||||
"""For compatibility, if body is not set, return empty list."""
|
||||
if not v:
|
||||
return []
|
||||
if isinstance(v, str):
|
||||
return [BodyData(key="", type="text", value=v)]
|
||||
return v
|
||||
|
||||
|
||||
class HttpRequestNodeTimeout(BaseModel):
|
||||
connect: int | None = None
|
||||
read: int | None = None
|
||||
write: int | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class HttpRequestNodeConfig:
|
||||
max_connect_timeout: int
|
||||
max_read_timeout: int
|
||||
max_write_timeout: int
|
||||
max_binary_size: int
|
||||
max_text_size: int
|
||||
ssl_verify: bool
|
||||
ssrf_default_max_retries: int
|
||||
|
||||
def default_timeout(self) -> "HttpRequestNodeTimeout":
|
||||
return HttpRequestNodeTimeout(
|
||||
connect=self.max_connect_timeout,
|
||||
read=self.max_read_timeout,
|
||||
write=self.max_write_timeout,
|
||||
)
|
||||
|
||||
|
||||
class HttpRequestNodeData(BaseNodeData):
|
||||
"""
|
||||
Code Node Data.
|
||||
"""
|
||||
|
||||
method: Literal[
|
||||
"get",
|
||||
"post",
|
||||
"put",
|
||||
"patch",
|
||||
"delete",
|
||||
"head",
|
||||
"options",
|
||||
"GET",
|
||||
"POST",
|
||||
"PUT",
|
||||
"PATCH",
|
||||
"DELETE",
|
||||
"HEAD",
|
||||
"OPTIONS",
|
||||
]
|
||||
url: str
|
||||
authorization: HttpRequestNodeAuthorization
|
||||
headers: str
|
||||
params: str
|
||||
body: HttpRequestNodeBody | None = None
|
||||
timeout: HttpRequestNodeTimeout | None = None
|
||||
ssl_verify: bool | None = None
|
||||
|
||||
|
||||
class Response:
|
||||
headers: dict[str, str]
|
||||
response: httpx.Response
|
||||
_cached_text: str | None
|
||||
|
||||
def __init__(self, response: httpx.Response):
|
||||
self.response = response
|
||||
self.headers = dict(response.headers)
|
||||
self._cached_text = None
|
||||
|
||||
@property
|
||||
def is_file(self):
|
||||
"""
|
||||
Determine if the response contains a file by checking:
|
||||
1. Content-Disposition header (RFC 6266)
|
||||
2. Content characteristics
|
||||
3. MIME type analysis
|
||||
"""
|
||||
content_type = self.content_type.split(";")[0].strip().lower()
|
||||
parsed_content_disposition = self.parsed_content_disposition
|
||||
|
||||
# Check if it's explicitly marked as an attachment
|
||||
if parsed_content_disposition:
|
||||
disp_type = parsed_content_disposition.get_content_disposition() # Returns 'attachment', 'inline', or None
|
||||
filename = parsed_content_disposition.get_filename() # Returns filename if present, None otherwise
|
||||
if disp_type == "attachment" or filename is not None:
|
||||
return True
|
||||
|
||||
# For 'text/' types, only 'csv' should be downloaded as file
|
||||
if content_type.startswith("text/") and "csv" not in content_type:
|
||||
return False
|
||||
|
||||
# For application types, try to detect if it's a text-based format
|
||||
if content_type.startswith("application/"):
|
||||
# Common text-based application types
|
||||
if any(
|
||||
text_type in content_type
|
||||
for text_type in ("json", "xml", "javascript", "x-www-form-urlencoded", "yaml", "graphql")
|
||||
):
|
||||
return False
|
||||
|
||||
# Try to detect if content is text-based by sampling first few bytes
|
||||
try:
|
||||
# Sample first 1024 bytes for text detection
|
||||
content_sample = self.response.content[:1024]
|
||||
content_sample.decode("utf-8")
|
||||
# If we can decode as UTF-8 and find common text patterns, likely not a file
|
||||
text_markers = (b"{", b"[", b"<", b"function", b"var ", b"const ", b"let ")
|
||||
if any(marker in content_sample for marker in text_markers):
|
||||
return False
|
||||
except UnicodeDecodeError:
|
||||
# If we can't decode as UTF-8, likely a binary file
|
||||
return True
|
||||
|
||||
# For other types, use MIME type analysis
|
||||
main_type, _ = mimetypes.guess_type("dummy" + (mimetypes.guess_extension(content_type) or ""))
|
||||
if main_type:
|
||||
return main_type.split("/")[0] in ("application", "image", "audio", "video")
|
||||
|
||||
# For unknown types, check if it's a media type
|
||||
return any(media_type in content_type for media_type in ("image/", "audio/", "video/"))
|
||||
|
||||
@property
|
||||
def content_type(self) -> str:
|
||||
return self.headers.get("content-type", "")
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
"""
|
||||
Get response text with robust encoding detection.
|
||||
|
||||
Uses charset_normalizer for better encoding detection than httpx's default,
|
||||
which helps handle Chinese and other non-ASCII characters properly.
|
||||
"""
|
||||
# Check cache first
|
||||
if hasattr(self, "_cached_text") and self._cached_text is not None:
|
||||
return self._cached_text
|
||||
|
||||
# Try charset_normalizer for robust encoding detection first
|
||||
detected_encoding = charset_normalizer.from_bytes(self.response.content).best()
|
||||
if detected_encoding and detected_encoding.encoding:
|
||||
try:
|
||||
text = self.response.content.decode(detected_encoding.encoding)
|
||||
self._cached_text = text
|
||||
return text
|
||||
except (UnicodeDecodeError, TypeError, LookupError):
|
||||
# Fallback to httpx's encoding detection if charset_normalizer fails
|
||||
pass
|
||||
|
||||
# Fallback to httpx's built-in encoding detection
|
||||
text = self.response.text
|
||||
self._cached_text = text
|
||||
return text
|
||||
|
||||
@property
|
||||
def content(self) -> bytes:
|
||||
return self.response.content
|
||||
|
||||
@property
|
||||
def status_code(self) -> int:
|
||||
return self.response.status_code
|
||||
|
||||
@property
|
||||
def size(self) -> int:
|
||||
return len(self.content)
|
||||
|
||||
@property
|
||||
def readable_size(self) -> str:
|
||||
if self.size < 1024:
|
||||
return f"{self.size} bytes"
|
||||
elif self.size < 1024 * 1024:
|
||||
return f"{(self.size / 1024):.2f} KB"
|
||||
else:
|
||||
return f"{(self.size / 1024 / 1024):.2f} MB"
|
||||
|
||||
@property
|
||||
def parsed_content_disposition(self) -> Message | None:
|
||||
content_disposition = self.headers.get("content-disposition", "")
|
||||
if content_disposition:
|
||||
msg = Message()
|
||||
msg["content-disposition"] = content_disposition
|
||||
return msg
|
||||
return None
|
||||
26
api/dify_graph/nodes/http_request/exc.py
Normal file
26
api/dify_graph/nodes/http_request/exc.py
Normal file
@ -0,0 +1,26 @@
|
||||
class HttpRequestNodeError(ValueError):
|
||||
"""Custom error for HTTP request node."""
|
||||
|
||||
|
||||
class AuthorizationConfigError(HttpRequestNodeError):
|
||||
"""Raised when authorization config is missing or invalid."""
|
||||
|
||||
|
||||
class FileFetchError(HttpRequestNodeError):
|
||||
"""Raised when a file cannot be fetched."""
|
||||
|
||||
|
||||
class InvalidHttpMethodError(HttpRequestNodeError):
|
||||
"""Raised when an invalid HTTP method is used."""
|
||||
|
||||
|
||||
class ResponseSizeError(HttpRequestNodeError):
|
||||
"""Raised when the response size exceeds the allowed threshold."""
|
||||
|
||||
|
||||
class RequestBodyError(HttpRequestNodeError):
|
||||
"""Raised when the request body is invalid."""
|
||||
|
||||
|
||||
class InvalidURLError(HttpRequestNodeError):
|
||||
"""Raised when the URL is invalid."""
|
||||
488
api/dify_graph/nodes/http_request/executor.py
Normal file
488
api/dify_graph/nodes/http_request/executor.py
Normal file
@ -0,0 +1,488 @@
|
||||
import base64
|
||||
import json
|
||||
import secrets
|
||||
import string
|
||||
from collections.abc import Callable, Mapping
|
||||
from copy import deepcopy
|
||||
from typing import Any, Literal
|
||||
from urllib.parse import urlencode, urlparse
|
||||
|
||||
import httpx
|
||||
from json_repair import repair_json
|
||||
|
||||
from dify_graph.file.enums import FileTransferMethod
|
||||
from dify_graph.runtime import VariablePool
|
||||
from dify_graph.variables.segments import ArrayFileSegment, FileSegment
|
||||
|
||||
from ..protocols import FileManagerProtocol, HttpClientProtocol
|
||||
from .entities import (
|
||||
HttpRequestNodeAuthorization,
|
||||
HttpRequestNodeConfig,
|
||||
HttpRequestNodeData,
|
||||
HttpRequestNodeTimeout,
|
||||
Response,
|
||||
)
|
||||
from .exc import (
|
||||
AuthorizationConfigError,
|
||||
FileFetchError,
|
||||
HttpRequestNodeError,
|
||||
InvalidHttpMethodError,
|
||||
InvalidURLError,
|
||||
RequestBodyError,
|
||||
ResponseSizeError,
|
||||
)
|
||||
|
||||
BODY_TYPE_TO_CONTENT_TYPE = {
|
||||
"json": "application/json",
|
||||
"x-www-form-urlencoded": "application/x-www-form-urlencoded",
|
||||
"form-data": "multipart/form-data",
|
||||
"raw-text": "text/plain",
|
||||
}
|
||||
|
||||
|
||||
class Executor:
|
||||
method: Literal[
|
||||
"get",
|
||||
"head",
|
||||
"post",
|
||||
"put",
|
||||
"delete",
|
||||
"patch",
|
||||
"options",
|
||||
"GET",
|
||||
"POST",
|
||||
"PUT",
|
||||
"PATCH",
|
||||
"DELETE",
|
||||
"HEAD",
|
||||
"OPTIONS",
|
||||
]
|
||||
url: str
|
||||
params: list[tuple[str, str]] | None
|
||||
content: str | bytes | None
|
||||
data: Mapping[str, Any] | None
|
||||
files: list[tuple[str, tuple[str | None, bytes, str]]] | None
|
||||
json: Any
|
||||
headers: dict[str, str]
|
||||
auth: HttpRequestNodeAuthorization
|
||||
timeout: HttpRequestNodeTimeout
|
||||
max_retries: int
|
||||
|
||||
boundary: str
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
node_data: HttpRequestNodeData,
|
||||
timeout: HttpRequestNodeTimeout,
|
||||
variable_pool: VariablePool,
|
||||
http_request_config: HttpRequestNodeConfig,
|
||||
max_retries: int | None = None,
|
||||
ssl_verify: bool | None = None,
|
||||
http_client: HttpClientProtocol,
|
||||
file_manager: FileManagerProtocol,
|
||||
):
|
||||
self._http_request_config = http_request_config
|
||||
# If authorization API key is present, convert the API key using the variable pool
|
||||
if node_data.authorization.type == "api-key":
|
||||
if node_data.authorization.config is None:
|
||||
raise AuthorizationConfigError("authorization config is required")
|
||||
node_data.authorization.config.api_key = variable_pool.convert_template(
|
||||
node_data.authorization.config.api_key
|
||||
).text
|
||||
# Validate that API key is not empty after template conversion
|
||||
if not node_data.authorization.config.api_key or not node_data.authorization.config.api_key.strip():
|
||||
raise AuthorizationConfigError(
|
||||
"API key is required for authorization but was empty. Please provide a valid API key."
|
||||
)
|
||||
|
||||
self.url = node_data.url
|
||||
self.method = node_data.method
|
||||
self.auth = node_data.authorization
|
||||
self.timeout = timeout
|
||||
self.ssl_verify = ssl_verify if ssl_verify is not None else node_data.ssl_verify
|
||||
if self.ssl_verify is None:
|
||||
self.ssl_verify = self._http_request_config.ssl_verify
|
||||
if not isinstance(self.ssl_verify, bool):
|
||||
raise ValueError("ssl_verify must be a boolean")
|
||||
self.params = None
|
||||
self.headers = {}
|
||||
self.content = None
|
||||
self.files = None
|
||||
self.data = None
|
||||
self.json = None
|
||||
self.max_retries = (
|
||||
max_retries if max_retries is not None else self._http_request_config.ssrf_default_max_retries
|
||||
)
|
||||
self._http_client = http_client
|
||||
self._file_manager = file_manager
|
||||
|
||||
# init template
|
||||
self.variable_pool = variable_pool
|
||||
self.node_data = node_data
|
||||
self._initialize()
|
||||
|
||||
def _initialize(self):
|
||||
self._init_url()
|
||||
self._init_params()
|
||||
self._init_headers()
|
||||
self._init_body()
|
||||
|
||||
def _init_url(self):
|
||||
self.url = self.variable_pool.convert_template(self.node_data.url).text
|
||||
|
||||
# check if url is a valid URL
|
||||
if not self.url:
|
||||
raise InvalidURLError("url is required")
|
||||
if not self.url.startswith(("http://", "https://")):
|
||||
raise InvalidURLError("url should start with http:// or https://")
|
||||
|
||||
def _init_params(self):
|
||||
"""
|
||||
Almost same as _init_headers(), difference:
|
||||
1. response a list tuple to support same key, like 'aa=1&aa=2'
|
||||
2. param value may have '\n', we need to splitlines then extract the variable value.
|
||||
"""
|
||||
result = []
|
||||
for line in self.node_data.params.splitlines():
|
||||
if not (line := line.strip()):
|
||||
continue
|
||||
|
||||
key, *value = line.split(":", 1)
|
||||
if not (key := key.strip()):
|
||||
continue
|
||||
|
||||
value_str = value[0].strip() if value else ""
|
||||
result.append(
|
||||
(self.variable_pool.convert_template(key).text, self.variable_pool.convert_template(value_str).text)
|
||||
)
|
||||
|
||||
if result:
|
||||
self.params = result
|
||||
|
||||
def _init_headers(self):
|
||||
"""
|
||||
Convert the header string of frontend to a dictionary.
|
||||
|
||||
Each line in the header string represents a key-value pair.
|
||||
Keys and values are separated by ':'.
|
||||
Empty values are allowed.
|
||||
|
||||
Examples:
|
||||
'aa:bb\n cc:dd' -> {'aa': 'bb', 'cc': 'dd'}
|
||||
'aa:\n cc:dd\n' -> {'aa': '', 'cc': 'dd'}
|
||||
'aa\n cc : dd' -> {'aa': '', 'cc': 'dd'}
|
||||
|
||||
"""
|
||||
headers = self.variable_pool.convert_template(self.node_data.headers).text
|
||||
self.headers = {
|
||||
key.strip(): (value[0].strip() if value else "")
|
||||
for line in headers.splitlines()
|
||||
if line.strip()
|
||||
for key, *value in [line.split(":", 1)]
|
||||
}
|
||||
|
||||
def _init_body(self):
|
||||
body = self.node_data.body
|
||||
if body is not None:
|
||||
data = body.data
|
||||
match body.type:
|
||||
case "none":
|
||||
self.content = ""
|
||||
case "raw-text":
|
||||
if len(data) != 1:
|
||||
raise RequestBodyError("raw-text body type should have exactly one item")
|
||||
self.content = self.variable_pool.convert_template(data[0].value).text
|
||||
case "json":
|
||||
if len(data) != 1:
|
||||
raise RequestBodyError("json body type should have exactly one item")
|
||||
json_string = self.variable_pool.convert_template(data[0].value).text
|
||||
try:
|
||||
repaired = repair_json(json_string)
|
||||
json_object = json.loads(repaired, strict=False)
|
||||
except json.JSONDecodeError as e:
|
||||
raise RequestBodyError(f"Failed to parse JSON: {json_string}") from e
|
||||
self.json = json_object
|
||||
# self.json = self._parse_object_contains_variables(json_object)
|
||||
case "binary":
|
||||
if len(data) != 1:
|
||||
raise RequestBodyError("binary body type should have exactly one item")
|
||||
file_selector = data[0].file
|
||||
file_variable = self.variable_pool.get_file(file_selector)
|
||||
if file_variable is None:
|
||||
raise FileFetchError(f"cannot fetch file with selector {file_selector}")
|
||||
file = file_variable.value
|
||||
self.content = self._file_manager.download(file)
|
||||
case "x-www-form-urlencoded":
|
||||
form_data = {
|
||||
self.variable_pool.convert_template(item.key).text: self.variable_pool.convert_template(
|
||||
item.value
|
||||
).text
|
||||
for item in data
|
||||
}
|
||||
self.data = form_data
|
||||
case "form-data":
|
||||
form_data = {
|
||||
self.variable_pool.convert_template(item.key).text: self.variable_pool.convert_template(
|
||||
item.value
|
||||
).text
|
||||
for item in filter(lambda item: item.type == "text", data)
|
||||
}
|
||||
file_selectors = {
|
||||
self.variable_pool.convert_template(item.key).text: item.file
|
||||
for item in filter(lambda item: item.type == "file", data)
|
||||
}
|
||||
|
||||
# get files from file_selectors, add support for array file variables
|
||||
files_list = []
|
||||
for key, selector in file_selectors.items():
|
||||
segment = self.variable_pool.get(selector)
|
||||
if isinstance(segment, FileSegment):
|
||||
files_list.append((key, [segment.value]))
|
||||
elif isinstance(segment, ArrayFileSegment):
|
||||
files_list.append((key, list(segment.value)))
|
||||
|
||||
# get files from file_manager
|
||||
files: dict[str, list[tuple[str | None, bytes, str]]] = {}
|
||||
for key, files_in_segment in files_list:
|
||||
for file in files_in_segment:
|
||||
if file.related_id is not None or (
|
||||
file.transfer_method == FileTransferMethod.REMOTE_URL and file.remote_url is not None
|
||||
):
|
||||
file_tuple = (
|
||||
file.filename,
|
||||
self._file_manager.download(file),
|
||||
file.mime_type or "application/octet-stream",
|
||||
)
|
||||
if key not in files:
|
||||
files[key] = []
|
||||
files[key].append(file_tuple)
|
||||
|
||||
# convert files to list for httpx request
|
||||
# If there are no actual files, we still need to force httpx to use `multipart/form-data`.
|
||||
# This is achieved by inserting a harmless placeholder file that will be ignored by the server.
|
||||
if not files:
|
||||
self.files = [("__multipart_placeholder__", ("", b"", "application/octet-stream"))]
|
||||
if files:
|
||||
self.files = []
|
||||
for key, file_tuples in files.items():
|
||||
for file_tuple in file_tuples:
|
||||
self.files.append((key, file_tuple))
|
||||
|
||||
self.data = form_data
|
||||
|
||||
def _assembling_headers(self) -> dict[str, Any]:
|
||||
authorization = deepcopy(self.auth)
|
||||
headers = deepcopy(self.headers) or {}
|
||||
if self.auth.type == "api-key":
|
||||
if self.auth.config is None:
|
||||
raise AuthorizationConfigError("self.authorization config is required")
|
||||
if authorization.config is None:
|
||||
raise AuthorizationConfigError("authorization config is required")
|
||||
|
||||
if not authorization.config.header:
|
||||
authorization.config.header = "Authorization"
|
||||
|
||||
if self.auth.config.type == "bearer" and authorization.config.api_key:
|
||||
headers[authorization.config.header] = f"Bearer {authorization.config.api_key}"
|
||||
elif self.auth.config.type == "basic" and authorization.config.api_key:
|
||||
credentials = authorization.config.api_key
|
||||
if ":" in credentials:
|
||||
encoded_credentials = base64.b64encode(credentials.encode("utf-8")).decode("utf-8")
|
||||
else:
|
||||
encoded_credentials = credentials
|
||||
headers[authorization.config.header] = f"Basic {encoded_credentials}"
|
||||
elif self.auth.config.type == "custom":
|
||||
if authorization.config.header and authorization.config.api_key:
|
||||
headers[authorization.config.header] = authorization.config.api_key
|
||||
|
||||
# Handle Content-Type for multipart/form-data requests
|
||||
# Fix for issue #23829: Missing boundary when using multipart/form-data
|
||||
body = self.node_data.body
|
||||
if body and body.type == "form-data":
|
||||
# For multipart/form-data with files (including placeholder files),
|
||||
# remove any manually set Content-Type header to let httpx handle
|
||||
# For multipart/form-data, if any files are present (including placeholder files),
|
||||
# we must remove any manually set Content-Type header. This is because httpx needs to
|
||||
# automatically set the Content-Type and boundary for multipart encoding whenever files
|
||||
# are included, even if they are placeholders, to avoid boundary issues and ensure correct
|
||||
# file upload behaviour. Manually setting Content-Type can cause httpx to fail to set the
|
||||
# boundary, resulting in invalid requests.
|
||||
if self.files:
|
||||
# Remove Content-Type if it was manually set to avoid boundary issues
|
||||
headers = {k: v for k, v in headers.items() if k.lower() != "content-type"}
|
||||
else:
|
||||
# No files at all, set Content-Type manually
|
||||
if "content-type" not in (k.lower() for k in headers):
|
||||
headers["Content-Type"] = "multipart/form-data"
|
||||
elif body and body.type in BODY_TYPE_TO_CONTENT_TYPE:
|
||||
# Set Content-Type for other body types
|
||||
if "content-type" not in (k.lower() for k in headers):
|
||||
headers["Content-Type"] = BODY_TYPE_TO_CONTENT_TYPE[body.type]
|
||||
|
||||
return headers
|
||||
|
||||
def _validate_and_parse_response(self, response: httpx.Response) -> Response:
|
||||
executor_response = Response(response)
|
||||
|
||||
threshold_size = (
|
||||
self._http_request_config.max_binary_size
|
||||
if executor_response.is_file
|
||||
else self._http_request_config.max_text_size
|
||||
)
|
||||
if executor_response.size > threshold_size:
|
||||
raise ResponseSizeError(
|
||||
f"{'File' if executor_response.is_file else 'Text'} size is too large,"
|
||||
f" max size is {threshold_size / 1024 / 1024:.2f} MB,"
|
||||
f" but current size is {executor_response.readable_size}."
|
||||
)
|
||||
|
||||
return executor_response
|
||||
|
||||
def _do_http_request(self, headers: dict[str, Any]) -> httpx.Response:
|
||||
"""
|
||||
do http request depending on api bundle
|
||||
"""
|
||||
_METHOD_MAP: dict[str, Callable[..., httpx.Response]] = {
|
||||
"get": self._http_client.get,
|
||||
"head": self._http_client.head,
|
||||
"post": self._http_client.post,
|
||||
"put": self._http_client.put,
|
||||
"delete": self._http_client.delete,
|
||||
"patch": self._http_client.patch,
|
||||
}
|
||||
method_lc = self.method.lower()
|
||||
if method_lc not in _METHOD_MAP:
|
||||
raise InvalidHttpMethodError(f"Invalid http method {self.method}")
|
||||
|
||||
request_args: dict[str, Any] = {
|
||||
"data": self.data,
|
||||
"files": self.files,
|
||||
"json": self.json,
|
||||
"content": self.content,
|
||||
"headers": headers,
|
||||
"params": self.params,
|
||||
"timeout": (self.timeout.connect, self.timeout.read, self.timeout.write),
|
||||
"ssl_verify": self.ssl_verify,
|
||||
"follow_redirects": True,
|
||||
}
|
||||
# request_args = {k: v for k, v in request_args.items() if v is not None}
|
||||
try:
|
||||
response = _METHOD_MAP[method_lc](
|
||||
url=self.url,
|
||||
**request_args,
|
||||
max_retries=self.max_retries,
|
||||
)
|
||||
except self._http_client.max_retries_exceeded_error as e:
|
||||
raise HttpRequestNodeError(f"Reached maximum retries for URL {self.url}") from e
|
||||
except self._http_client.request_error as e:
|
||||
raise HttpRequestNodeError(str(e)) from e
|
||||
return response
|
||||
|
||||
def invoke(self) -> Response:
|
||||
# assemble headers
|
||||
headers = self._assembling_headers()
|
||||
# do http request
|
||||
response = self._do_http_request(headers)
|
||||
# validate response
|
||||
return self._validate_and_parse_response(response)
|
||||
|
||||
def to_log(self):
|
||||
url_parts = urlparse(self.url)
|
||||
path = url_parts.path or "/"
|
||||
|
||||
# Add query parameters
|
||||
if self.params:
|
||||
query_string = urlencode(self.params)
|
||||
path += f"?{query_string}"
|
||||
elif url_parts.query:
|
||||
path += f"?{url_parts.query}"
|
||||
|
||||
raw = f"{self.method.upper()} {path} HTTP/1.1\r\n"
|
||||
raw += f"Host: {url_parts.netloc}\r\n"
|
||||
|
||||
headers = self._assembling_headers()
|
||||
body = self.node_data.body
|
||||
boundary = f"----WebKitFormBoundary{_generate_random_string(16)}"
|
||||
if body:
|
||||
if "content-type" not in (k.lower() for k in self.headers) and body.type in BODY_TYPE_TO_CONTENT_TYPE:
|
||||
headers["Content-Type"] = BODY_TYPE_TO_CONTENT_TYPE[body.type]
|
||||
if body.type == "form-data":
|
||||
headers["Content-Type"] = f"multipart/form-data; boundary={boundary}"
|
||||
for k, v in headers.items():
|
||||
if self.auth.type == "api-key":
|
||||
authorization_header = "Authorization"
|
||||
if self.auth.config and self.auth.config.header:
|
||||
authorization_header = self.auth.config.header
|
||||
if k.lower() == authorization_header.lower():
|
||||
raw += f"{k}: {'*' * len(v)}\r\n"
|
||||
continue
|
||||
raw += f"{k}: {v}\r\n"
|
||||
|
||||
body_string = ""
|
||||
# Only log actual files if present.
|
||||
# '__multipart_placeholder__' is inserted to force multipart encoding but is not a real file.
|
||||
# This prevents logging meaningless placeholder entries.
|
||||
if self.files and not all(f[0] == "__multipart_placeholder__" for f in self.files):
|
||||
for file_entry in self.files:
|
||||
# file_entry should be (key, (filename, content, mime_type)), but handle edge cases
|
||||
if len(file_entry) != 2 or len(file_entry[1]) < 2:
|
||||
continue # skip malformed entries
|
||||
key = file_entry[0]
|
||||
content = file_entry[1][1]
|
||||
body_string += f"--{boundary}\r\n"
|
||||
body_string += f'Content-Disposition: form-data; name="{key}"\r\n\r\n'
|
||||
# decode content safely
|
||||
# Do not decode binary content; use a placeholder with file metadata instead.
|
||||
# Includes filename, size, and MIME type for better logging context.
|
||||
body_string += (
|
||||
f"<file_content_binary: '{file_entry[1][0] or 'unknown'}', "
|
||||
f"type='{file_entry[1][2] if len(file_entry[1]) > 2 else 'unknown'}', "
|
||||
f"size={len(content)} bytes>\r\n"
|
||||
)
|
||||
body_string += f"--{boundary}--\r\n"
|
||||
elif self.node_data.body:
|
||||
if self.content:
|
||||
# If content is bytes, do not decode it; show a placeholder with size.
|
||||
# Provides content size information for binary data without exposing the raw bytes.
|
||||
if isinstance(self.content, bytes):
|
||||
body_string = f"<binary_content: size={len(self.content)} bytes>"
|
||||
else:
|
||||
body_string = self.content
|
||||
elif self.data and self.node_data.body.type == "x-www-form-urlencoded":
|
||||
body_string = urlencode(self.data)
|
||||
elif self.data and self.node_data.body.type == "form-data":
|
||||
for key, value in self.data.items():
|
||||
body_string += f"--{boundary}\r\n"
|
||||
body_string += f'Content-Disposition: form-data; name="{key}"\r\n\r\n'
|
||||
body_string += f"{value}\r\n"
|
||||
body_string += f"--{boundary}--\r\n"
|
||||
elif self.json:
|
||||
body_string = json.dumps(self.json)
|
||||
elif self.node_data.body.type == "raw-text":
|
||||
if len(self.node_data.body.data) != 1:
|
||||
raise RequestBodyError("raw-text body type should have exactly one item")
|
||||
body_string = self.node_data.body.data[0].value
|
||||
if body_string:
|
||||
raw += f"Content-Length: {len(body_string)}\r\n"
|
||||
raw += "\r\n" # Empty line between headers and body
|
||||
raw += body_string
|
||||
|
||||
return raw
|
||||
|
||||
|
||||
def _generate_random_string(n: int) -> str:
|
||||
"""
|
||||
Generate a random string of lowercase ASCII letters.
|
||||
|
||||
Args:
|
||||
n (int): The length of the random string to generate.
|
||||
|
||||
Returns:
|
||||
str: A random string of lowercase ASCII letters with length n.
|
||||
|
||||
Example:
|
||||
>>> _generate_random_string(5)
|
||||
'abcde'
|
||||
"""
|
||||
return "".join(secrets.choice(string.ascii_lowercase) for _ in range(n))
|
||||
260
api/dify_graph/nodes/http_request/node.py
Normal file
260
api/dify_graph/nodes/http_request/node.py
Normal file
@ -0,0 +1,260 @@
|
||||
import logging
|
||||
import mimetypes
|
||||
from collections.abc import Callable, Mapping, Sequence
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus
|
||||
from dify_graph.file import File, FileTransferMethod
|
||||
from dify_graph.node_events import NodeRunResult
|
||||
from dify_graph.nodes.base import variable_template_parser
|
||||
from dify_graph.nodes.base.entities import VariableSelector
|
||||
from dify_graph.nodes.base.node import Node
|
||||
from dify_graph.nodes.http_request.executor import Executor
|
||||
from dify_graph.nodes.protocols import FileManagerProtocol, HttpClientProtocol, ToolFileManagerProtocol
|
||||
from dify_graph.variables.segments import ArrayFileSegment
|
||||
from factories import file_factory
|
||||
|
||||
from .config import build_http_request_config, resolve_http_request_config
|
||||
from .entities import (
|
||||
HTTP_REQUEST_CONFIG_FILTER_KEY,
|
||||
HttpRequestNodeConfig,
|
||||
HttpRequestNodeData,
|
||||
HttpRequestNodeTimeout,
|
||||
Response,
|
||||
)
|
||||
from .exc import HttpRequestNodeError, RequestBodyError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from dify_graph.entities import GraphInitParams
|
||||
from dify_graph.runtime import GraphRuntimeState
|
||||
|
||||
|
||||
class HttpRequestNode(Node[HttpRequestNodeData]):
|
||||
node_type = NodeType.HTTP_REQUEST
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
id: str,
|
||||
config: Mapping[str, Any],
|
||||
graph_init_params: "GraphInitParams",
|
||||
graph_runtime_state: "GraphRuntimeState",
|
||||
*,
|
||||
http_request_config: HttpRequestNodeConfig,
|
||||
http_client: HttpClientProtocol,
|
||||
tool_file_manager_factory: Callable[[], ToolFileManagerProtocol],
|
||||
file_manager: FileManagerProtocol,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
id=id,
|
||||
config=config,
|
||||
graph_init_params=graph_init_params,
|
||||
graph_runtime_state=graph_runtime_state,
|
||||
)
|
||||
|
||||
self._http_request_config = http_request_config
|
||||
self._http_client = http_client
|
||||
self._tool_file_manager_factory = tool_file_manager_factory
|
||||
self._file_manager = file_manager
|
||||
|
||||
@classmethod
|
||||
def get_default_config(cls, filters: Mapping[str, object] | None = None) -> Mapping[str, object]:
|
||||
if not filters or HTTP_REQUEST_CONFIG_FILTER_KEY not in filters:
|
||||
http_request_config = build_http_request_config()
|
||||
else:
|
||||
http_request_config = resolve_http_request_config(filters)
|
||||
default_timeout = http_request_config.default_timeout()
|
||||
return {
|
||||
"type": "http-request",
|
||||
"config": {
|
||||
"method": "get",
|
||||
"authorization": {
|
||||
"type": "no-auth",
|
||||
},
|
||||
"body": {"type": "none"},
|
||||
"timeout": {
|
||||
**default_timeout.model_dump(),
|
||||
"max_connect_timeout": http_request_config.max_connect_timeout,
|
||||
"max_read_timeout": http_request_config.max_read_timeout,
|
||||
"max_write_timeout": http_request_config.max_write_timeout,
|
||||
},
|
||||
"ssl_verify": http_request_config.ssl_verify,
|
||||
},
|
||||
"retry_config": {
|
||||
"max_retries": http_request_config.ssrf_default_max_retries,
|
||||
"retry_interval": 0.5 * (2**2),
|
||||
"retry_enabled": True,
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def version(cls) -> str:
|
||||
return "1"
|
||||
|
||||
def _run(self) -> NodeRunResult:
|
||||
process_data = {}
|
||||
try:
|
||||
http_executor = Executor(
|
||||
node_data=self.node_data,
|
||||
timeout=self._get_request_timeout(self.node_data),
|
||||
variable_pool=self.graph_runtime_state.variable_pool,
|
||||
http_request_config=self._http_request_config,
|
||||
max_retries=0,
|
||||
ssl_verify=self.node_data.ssl_verify,
|
||||
http_client=self._http_client,
|
||||
file_manager=self._file_manager,
|
||||
)
|
||||
process_data["request"] = http_executor.to_log()
|
||||
|
||||
response = http_executor.invoke()
|
||||
files = self.extract_files(url=http_executor.url, response=response)
|
||||
if not response.response.is_success and (self.error_strategy or self.retry):
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.FAILED,
|
||||
outputs={
|
||||
"status_code": response.status_code,
|
||||
"body": response.text if not files.value else "",
|
||||
"headers": response.headers,
|
||||
"files": files,
|
||||
},
|
||||
process_data={
|
||||
"request": http_executor.to_log(),
|
||||
},
|
||||
error=f"Request failed with status code {response.status_code}",
|
||||
error_type="HTTPResponseCodeError",
|
||||
)
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
outputs={
|
||||
"status_code": response.status_code,
|
||||
"body": response.text if not files.value else "",
|
||||
"headers": response.headers,
|
||||
"files": files,
|
||||
},
|
||||
process_data={
|
||||
"request": http_executor.to_log(),
|
||||
},
|
||||
)
|
||||
except HttpRequestNodeError as e:
|
||||
logger.warning("http request node %s failed to run: %s", self._node_id, e)
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.FAILED,
|
||||
error=str(e),
|
||||
process_data=process_data,
|
||||
error_type=type(e).__name__,
|
||||
)
|
||||
|
||||
def _get_request_timeout(self, node_data: HttpRequestNodeData) -> HttpRequestNodeTimeout:
|
||||
default_timeout = self._http_request_config.default_timeout()
|
||||
timeout = node_data.timeout
|
||||
if timeout is None:
|
||||
return default_timeout
|
||||
|
||||
return HttpRequestNodeTimeout(
|
||||
connect=timeout.connect or default_timeout.connect,
|
||||
read=timeout.read or default_timeout.read,
|
||||
write=timeout.write or default_timeout.write,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _extract_variable_selector_to_variable_mapping(
|
||||
cls,
|
||||
*,
|
||||
graph_config: Mapping[str, Any],
|
||||
node_id: str,
|
||||
node_data: Mapping[str, Any],
|
||||
) -> Mapping[str, Sequence[str]]:
|
||||
# Create typed NodeData from dict
|
||||
typed_node_data = HttpRequestNodeData.model_validate(node_data)
|
||||
|
||||
selectors: list[VariableSelector] = []
|
||||
selectors += variable_template_parser.extract_selectors_from_template(typed_node_data.url)
|
||||
selectors += variable_template_parser.extract_selectors_from_template(typed_node_data.headers)
|
||||
selectors += variable_template_parser.extract_selectors_from_template(typed_node_data.params)
|
||||
if typed_node_data.body:
|
||||
body_type = typed_node_data.body.type
|
||||
data = typed_node_data.body.data
|
||||
match body_type:
|
||||
case "none":
|
||||
pass
|
||||
case "binary":
|
||||
if len(data) != 1:
|
||||
raise RequestBodyError("invalid body data, should have only one item")
|
||||
selector = data[0].file
|
||||
selectors.append(VariableSelector(variable="#" + ".".join(selector) + "#", value_selector=selector))
|
||||
case "json" | "raw-text":
|
||||
if len(data) != 1:
|
||||
raise RequestBodyError("invalid body data, should have only one item")
|
||||
selectors += variable_template_parser.extract_selectors_from_template(data[0].key)
|
||||
selectors += variable_template_parser.extract_selectors_from_template(data[0].value)
|
||||
case "x-www-form-urlencoded":
|
||||
for item in data:
|
||||
selectors += variable_template_parser.extract_selectors_from_template(item.key)
|
||||
selectors += variable_template_parser.extract_selectors_from_template(item.value)
|
||||
case "form-data":
|
||||
for item in data:
|
||||
selectors += variable_template_parser.extract_selectors_from_template(item.key)
|
||||
if item.type == "text":
|
||||
selectors += variable_template_parser.extract_selectors_from_template(item.value)
|
||||
elif item.type == "file":
|
||||
selectors.append(
|
||||
VariableSelector(variable="#" + ".".join(item.file) + "#", value_selector=item.file)
|
||||
)
|
||||
|
||||
mapping = {}
|
||||
for selector_iter in selectors:
|
||||
mapping[node_id + "." + selector_iter.variable] = selector_iter.value_selector
|
||||
|
||||
return mapping
|
||||
|
||||
def extract_files(self, url: str, response: Response) -> ArrayFileSegment:
|
||||
"""
|
||||
Extract files from response by checking both Content-Type header and URL
|
||||
"""
|
||||
files: list[File] = []
|
||||
is_file = response.is_file
|
||||
content_type = response.content_type
|
||||
content = response.content
|
||||
parsed_content_disposition = response.parsed_content_disposition
|
||||
content_disposition_type = None
|
||||
|
||||
if not is_file:
|
||||
return ArrayFileSegment(value=[])
|
||||
|
||||
if parsed_content_disposition:
|
||||
content_disposition_filename = parsed_content_disposition.get_filename()
|
||||
if content_disposition_filename:
|
||||
# If filename is available from content-disposition, use it to guess the content type
|
||||
content_disposition_type = mimetypes.guess_type(content_disposition_filename)[0]
|
||||
|
||||
# Guess file extension from URL or Content-Type header
|
||||
filename = url.split("?")[0].split("/")[-1] or ""
|
||||
mime_type = (
|
||||
content_disposition_type or content_type or mimetypes.guess_type(filename)[0] or "application/octet-stream"
|
||||
)
|
||||
tool_file_manager = self._tool_file_manager_factory()
|
||||
|
||||
tool_file = tool_file_manager.create_file_by_raw(
|
||||
user_id=self.user_id,
|
||||
tenant_id=self.tenant_id,
|
||||
conversation_id=None,
|
||||
file_binary=content,
|
||||
mimetype=mime_type,
|
||||
)
|
||||
|
||||
mapping = {
|
||||
"tool_file_id": tool_file.id,
|
||||
"transfer_method": FileTransferMethod.TOOL_FILE,
|
||||
}
|
||||
file = file_factory.build_from_mapping(
|
||||
mapping=mapping,
|
||||
tenant_id=self.tenant_id,
|
||||
)
|
||||
files.append(file)
|
||||
|
||||
return ArrayFileSegment(value=files)
|
||||
|
||||
@property
|
||||
def retry(self) -> bool:
|
||||
return self.node_data.retry_config.retry_enabled
|
||||
Reference in New Issue
Block a user