import mimetypes from collections.abc import Sequence from dataclasses import dataclass from email.message import Message from typing import Any, Literal import charset_normalizer import httpx from pydantic import BaseModel, Field, ValidationInfo, field_validator from dify_graph.nodes.base import BaseNodeData HTTP_REQUEST_CONFIG_FILTER_KEY = "http_request_config" class HttpRequestNodeAuthorizationConfig(BaseModel): type: Literal["basic", "bearer", "custom"] api_key: str header: str = "" class HttpRequestNodeAuthorization(BaseModel): type: Literal["no-auth", "api-key"] config: HttpRequestNodeAuthorizationConfig | None = None @field_validator("config", mode="before") @classmethod def check_config(cls, v: HttpRequestNodeAuthorizationConfig, values: ValidationInfo): """ Check config, if type is no-auth, config should be None, otherwise it should be a dict. """ if values.data["type"] == "no-auth": return None else: if not v or not isinstance(v, dict): raise ValueError("config should be a dict") return v class BodyData(BaseModel): key: str = "" type: Literal["file", "text"] value: str = "" file: Sequence[str] = Field(default_factory=list) class HttpRequestNodeBody(BaseModel): type: Literal["none", "form-data", "x-www-form-urlencoded", "raw-text", "json", "binary"] data: Sequence[BodyData] = Field(default_factory=list) @field_validator("data", mode="before") @classmethod def check_data(cls, v: Any): """For compatibility, if body is not set, return empty list.""" if not v: return [] if isinstance(v, str): return [BodyData(key="", type="text", value=v)] return v class HttpRequestNodeTimeout(BaseModel): connect: int | None = None read: int | None = None write: int | None = None @dataclass(frozen=True, slots=True) class HttpRequestNodeConfig: max_connect_timeout: int max_read_timeout: int max_write_timeout: int max_binary_size: int max_text_size: int ssl_verify: bool ssrf_default_max_retries: int def default_timeout(self) -> "HttpRequestNodeTimeout": return HttpRequestNodeTimeout( connect=self.max_connect_timeout, read=self.max_read_timeout, write=self.max_write_timeout, ) class HttpRequestNodeData(BaseNodeData): """ Code Node Data. """ method: Literal[ "get", "post", "put", "patch", "delete", "head", "options", "GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS", ] url: str authorization: HttpRequestNodeAuthorization headers: str params: str body: HttpRequestNodeBody | None = None timeout: HttpRequestNodeTimeout | None = None ssl_verify: bool | None = None class Response: headers: dict[str, str] response: httpx.Response _cached_text: str | None def __init__(self, response: httpx.Response): self.response = response self.headers = dict(response.headers) self._cached_text = None @property def is_file(self): """ Determine if the response contains a file by checking: 1. Content-Disposition header (RFC 6266) 2. Content characteristics 3. MIME type analysis """ content_type = self.content_type.split(";")[0].strip().lower() parsed_content_disposition = self.parsed_content_disposition # Check if it's explicitly marked as an attachment if parsed_content_disposition: disp_type = parsed_content_disposition.get_content_disposition() # Returns 'attachment', 'inline', or None filename = parsed_content_disposition.get_filename() # Returns filename if present, None otherwise if disp_type == "attachment" or filename is not None: return True # For 'text/' types, only 'csv' should be downloaded as file if content_type.startswith("text/") and "csv" not in content_type: return False # For application types, try to detect if it's a text-based format if content_type.startswith("application/"): # Common text-based application types if any( text_type in content_type for text_type in ("json", "xml", "javascript", "x-www-form-urlencoded", "yaml", "graphql") ): return False # Try to detect if content is text-based by sampling first few bytes try: # Sample first 1024 bytes for text detection content_sample = self.response.content[:1024] content_sample.decode("utf-8") # If we can decode as UTF-8 and find common text patterns, likely not a file text_markers = (b"{", b"[", b"<", b"function", b"var ", b"const ", b"let ") if any(marker in content_sample for marker in text_markers): return False except UnicodeDecodeError: # If we can't decode as UTF-8, likely a binary file return True # For other types, use MIME type analysis main_type, _ = mimetypes.guess_type("dummy" + (mimetypes.guess_extension(content_type) or "")) if main_type: return main_type.split("/")[0] in ("application", "image", "audio", "video") # For unknown types, check if it's a media type return any(media_type in content_type for media_type in ("image/", "audio/", "video/")) @property def content_type(self) -> str: return self.headers.get("content-type", "") @property def text(self) -> str: """ Get response text with robust encoding detection. Uses charset_normalizer for better encoding detection than httpx's default, which helps handle Chinese and other non-ASCII characters properly. """ # Check cache first if hasattr(self, "_cached_text") and self._cached_text is not None: return self._cached_text # Try charset_normalizer for robust encoding detection first detected_encoding = charset_normalizer.from_bytes(self.response.content).best() if detected_encoding and detected_encoding.encoding: try: text = self.response.content.decode(detected_encoding.encoding) self._cached_text = text return text except (UnicodeDecodeError, TypeError, LookupError): # Fallback to httpx's encoding detection if charset_normalizer fails pass # Fallback to httpx's built-in encoding detection text = self.response.text self._cached_text = text return text @property def content(self) -> bytes: return self.response.content @property def status_code(self) -> int: return self.response.status_code @property def size(self) -> int: return len(self.content) @property def readable_size(self) -> str: if self.size < 1024: return f"{self.size} bytes" elif self.size < 1024 * 1024: return f"{(self.size / 1024):.2f} KB" else: return f"{(self.size / 1024 / 1024):.2f} MB" @property def parsed_content_disposition(self) -> Message | None: content_disposition = self.headers.get("content-disposition", "") if content_disposition: msg = Message() msg["content-disposition"] = content_disposition return msg return None