mirror of
https://github.com/langgenius/dify.git
synced 2026-03-05 07:37:07 +08:00
240 lines
7.6 KiB
Python
240 lines
7.6 KiB
Python
import mimetypes
|
|
from collections.abc import Sequence
|
|
from dataclasses import dataclass
|
|
from email.message import Message
|
|
from typing import Any, Literal
|
|
|
|
import charset_normalizer
|
|
import httpx
|
|
from pydantic import BaseModel, Field, ValidationInfo, field_validator
|
|
|
|
from dify_graph.nodes.base import BaseNodeData
|
|
|
|
HTTP_REQUEST_CONFIG_FILTER_KEY = "http_request_config"
|
|
|
|
|
|
class HttpRequestNodeAuthorizationConfig(BaseModel):
|
|
type: Literal["basic", "bearer", "custom"]
|
|
api_key: str
|
|
header: str = ""
|
|
|
|
|
|
class HttpRequestNodeAuthorization(BaseModel):
|
|
type: Literal["no-auth", "api-key"]
|
|
config: HttpRequestNodeAuthorizationConfig | None = None
|
|
|
|
@field_validator("config", mode="before")
|
|
@classmethod
|
|
def check_config(cls, v: HttpRequestNodeAuthorizationConfig, values: ValidationInfo):
|
|
"""
|
|
Check config, if type is no-auth, config should be None, otherwise it should be a dict.
|
|
"""
|
|
if values.data["type"] == "no-auth":
|
|
return None
|
|
else:
|
|
if not v or not isinstance(v, dict):
|
|
raise ValueError("config should be a dict")
|
|
|
|
return v
|
|
|
|
|
|
class BodyData(BaseModel):
|
|
key: str = ""
|
|
type: Literal["file", "text"]
|
|
value: str = ""
|
|
file: Sequence[str] = Field(default_factory=list)
|
|
|
|
|
|
class HttpRequestNodeBody(BaseModel):
|
|
type: Literal["none", "form-data", "x-www-form-urlencoded", "raw-text", "json", "binary"]
|
|
data: Sequence[BodyData] = Field(default_factory=list)
|
|
|
|
@field_validator("data", mode="before")
|
|
@classmethod
|
|
def check_data(cls, v: Any):
|
|
"""For compatibility, if body is not set, return empty list."""
|
|
if not v:
|
|
return []
|
|
if isinstance(v, str):
|
|
return [BodyData(key="", type="text", value=v)]
|
|
return v
|
|
|
|
|
|
class HttpRequestNodeTimeout(BaseModel):
|
|
connect: int | None = None
|
|
read: int | None = None
|
|
write: int | None = None
|
|
|
|
|
|
@dataclass(frozen=True, slots=True)
|
|
class HttpRequestNodeConfig:
|
|
max_connect_timeout: int
|
|
max_read_timeout: int
|
|
max_write_timeout: int
|
|
max_binary_size: int
|
|
max_text_size: int
|
|
ssl_verify: bool
|
|
ssrf_default_max_retries: int
|
|
|
|
def default_timeout(self) -> "HttpRequestNodeTimeout":
|
|
return HttpRequestNodeTimeout(
|
|
connect=self.max_connect_timeout,
|
|
read=self.max_read_timeout,
|
|
write=self.max_write_timeout,
|
|
)
|
|
|
|
|
|
class HttpRequestNodeData(BaseNodeData):
|
|
"""
|
|
Code Node Data.
|
|
"""
|
|
|
|
method: Literal[
|
|
"get",
|
|
"post",
|
|
"put",
|
|
"patch",
|
|
"delete",
|
|
"head",
|
|
"options",
|
|
"GET",
|
|
"POST",
|
|
"PUT",
|
|
"PATCH",
|
|
"DELETE",
|
|
"HEAD",
|
|
"OPTIONS",
|
|
]
|
|
url: str
|
|
authorization: HttpRequestNodeAuthorization
|
|
headers: str
|
|
params: str
|
|
body: HttpRequestNodeBody | None = None
|
|
timeout: HttpRequestNodeTimeout | None = None
|
|
ssl_verify: bool | None = None
|
|
|
|
|
|
class Response:
|
|
headers: dict[str, str]
|
|
response: httpx.Response
|
|
_cached_text: str | None
|
|
|
|
def __init__(self, response: httpx.Response):
|
|
self.response = response
|
|
self.headers = dict(response.headers)
|
|
self._cached_text = None
|
|
|
|
@property
|
|
def is_file(self):
|
|
"""
|
|
Determine if the response contains a file by checking:
|
|
1. Content-Disposition header (RFC 6266)
|
|
2. Content characteristics
|
|
3. MIME type analysis
|
|
"""
|
|
content_type = self.content_type.split(";")[0].strip().lower()
|
|
parsed_content_disposition = self.parsed_content_disposition
|
|
|
|
# Check if it's explicitly marked as an attachment
|
|
if parsed_content_disposition:
|
|
disp_type = parsed_content_disposition.get_content_disposition() # Returns 'attachment', 'inline', or None
|
|
filename = parsed_content_disposition.get_filename() # Returns filename if present, None otherwise
|
|
if disp_type == "attachment" or filename is not None:
|
|
return True
|
|
|
|
# For 'text/' types, only 'csv' should be downloaded as file
|
|
if content_type.startswith("text/") and "csv" not in content_type:
|
|
return False
|
|
|
|
# For application types, try to detect if it's a text-based format
|
|
if content_type.startswith("application/"):
|
|
# Common text-based application types
|
|
if any(
|
|
text_type in content_type
|
|
for text_type in ("json", "xml", "javascript", "x-www-form-urlencoded", "yaml", "graphql")
|
|
):
|
|
return False
|
|
|
|
# Try to detect if content is text-based by sampling first few bytes
|
|
try:
|
|
# Sample first 1024 bytes for text detection
|
|
content_sample = self.response.content[:1024]
|
|
content_sample.decode("utf-8")
|
|
# If we can decode as UTF-8 and find common text patterns, likely not a file
|
|
text_markers = (b"{", b"[", b"<", b"function", b"var ", b"const ", b"let ")
|
|
if any(marker in content_sample for marker in text_markers):
|
|
return False
|
|
except UnicodeDecodeError:
|
|
# If we can't decode as UTF-8, likely a binary file
|
|
return True
|
|
|
|
# For other types, use MIME type analysis
|
|
main_type, _ = mimetypes.guess_type("dummy" + (mimetypes.guess_extension(content_type) or ""))
|
|
if main_type:
|
|
return main_type.split("/")[0] in ("application", "image", "audio", "video")
|
|
|
|
# For unknown types, check if it's a media type
|
|
return any(media_type in content_type for media_type in ("image/", "audio/", "video/"))
|
|
|
|
@property
|
|
def content_type(self) -> str:
|
|
return self.headers.get("content-type", "")
|
|
|
|
@property
|
|
def text(self) -> str:
|
|
"""
|
|
Get response text with robust encoding detection.
|
|
|
|
Uses charset_normalizer for better encoding detection than httpx's default,
|
|
which helps handle Chinese and other non-ASCII characters properly.
|
|
"""
|
|
# Check cache first
|
|
if hasattr(self, "_cached_text") and self._cached_text is not None:
|
|
return self._cached_text
|
|
|
|
# Try charset_normalizer for robust encoding detection first
|
|
detected_encoding = charset_normalizer.from_bytes(self.response.content).best()
|
|
if detected_encoding and detected_encoding.encoding:
|
|
try:
|
|
text = self.response.content.decode(detected_encoding.encoding)
|
|
self._cached_text = text
|
|
return text
|
|
except (UnicodeDecodeError, TypeError, LookupError):
|
|
# Fallback to httpx's encoding detection if charset_normalizer fails
|
|
pass
|
|
|
|
# Fallback to httpx's built-in encoding detection
|
|
text = self.response.text
|
|
self._cached_text = text
|
|
return text
|
|
|
|
@property
|
|
def content(self) -> bytes:
|
|
return self.response.content
|
|
|
|
@property
|
|
def status_code(self) -> int:
|
|
return self.response.status_code
|
|
|
|
@property
|
|
def size(self) -> int:
|
|
return len(self.content)
|
|
|
|
@property
|
|
def readable_size(self) -> str:
|
|
if self.size < 1024:
|
|
return f"{self.size} bytes"
|
|
elif self.size < 1024 * 1024:
|
|
return f"{(self.size / 1024):.2f} KB"
|
|
else:
|
|
return f"{(self.size / 1024 / 1024):.2f} MB"
|
|
|
|
@property
|
|
def parsed_content_disposition(self) -> Message | None:
|
|
content_disposition = self.headers.get("content-disposition", "")
|
|
if content_disposition:
|
|
msg = Message()
|
|
msg["content-disposition"] = content_disposition
|
|
return msg
|
|
return None
|