change all to httpx (#26119)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
This commit is contained in:
Asuka Minato
2025-10-11 00:41:16 +09:00
committed by GitHub
parent 3922ad876f
commit bb6a331490
23 changed files with 232 additions and 173 deletions

View File

@ -4,7 +4,7 @@ import math
from typing import Any, cast
from urllib.parse import urlparse
import requests
from elasticsearch import ConnectionError as ElasticsearchConnectionError
from elasticsearch import Elasticsearch
from flask import current_app
from packaging.version import parse as parse_version
@ -138,7 +138,7 @@ class ElasticSearchVector(BaseVector):
if not client.ping():
raise ConnectionError("Failed to connect to Elasticsearch")
except requests.ConnectionError as e:
except ElasticsearchConnectionError as e:
raise ConnectionError(f"Vector database connection error: {str(e)}")
except Exception as e:
raise ConnectionError(f"Elasticsearch client initialization failed: {str(e)}")

View File

@ -5,9 +5,10 @@ from collections.abc import Generator, Iterable, Sequence
from itertools import islice
from typing import TYPE_CHECKING, Any, Union
import httpx
import qdrant_client
import requests
from flask import current_app
from httpx import DigestAuth
from pydantic import BaseModel
from qdrant_client.http import models as rest
from qdrant_client.http.models import (
@ -19,7 +20,6 @@ from qdrant_client.http.models import (
TokenizerType,
)
from qdrant_client.local.qdrant_local import QdrantLocal
from requests.auth import HTTPDigestAuth
from sqlalchemy import select
from configs import dify_config
@ -504,10 +504,10 @@ class TidbOnQdrantVectorFactory(AbstractVectorFactory):
}
cluster_data = {"displayName": display_name, "region": region_object, "labels": labels}
response = requests.post(
response = httpx.post(
f"{tidb_config.api_url}/clusters",
json=cluster_data,
auth=HTTPDigestAuth(tidb_config.public_key, tidb_config.private_key),
auth=DigestAuth(tidb_config.public_key, tidb_config.private_key),
)
if response.status_code == 200:
@ -527,10 +527,10 @@ class TidbOnQdrantVectorFactory(AbstractVectorFactory):
body = {"password": new_password}
response = requests.put(
response = httpx.put(
f"{tidb_config.api_url}/clusters/{cluster_id}/password",
json=body,
auth=HTTPDigestAuth(tidb_config.public_key, tidb_config.private_key),
auth=DigestAuth(tidb_config.public_key, tidb_config.private_key),
)
if response.status_code == 200:

View File

@ -2,8 +2,8 @@ import time
import uuid
from collections.abc import Sequence
import requests
from requests.auth import HTTPDigestAuth
import httpx
from httpx import DigestAuth
from configs import dify_config
from extensions.ext_database import db
@ -49,7 +49,7 @@ class TidbService:
"rootPassword": password,
}
response = requests.post(f"{api_url}/clusters", json=cluster_data, auth=HTTPDigestAuth(public_key, private_key))
response = httpx.post(f"{api_url}/clusters", json=cluster_data, auth=DigestAuth(public_key, private_key))
if response.status_code == 200:
response_data = response.json()
@ -83,7 +83,7 @@ class TidbService:
:return: The response from the API.
"""
response = requests.delete(f"{api_url}/clusters/{cluster_id}", auth=HTTPDigestAuth(public_key, private_key))
response = httpx.delete(f"{api_url}/clusters/{cluster_id}", auth=DigestAuth(public_key, private_key))
if response.status_code == 200:
return response.json()
@ -102,7 +102,7 @@ class TidbService:
:return: The response from the API.
"""
response = requests.get(f"{api_url}/clusters/{cluster_id}", auth=HTTPDigestAuth(public_key, private_key))
response = httpx.get(f"{api_url}/clusters/{cluster_id}", auth=DigestAuth(public_key, private_key))
if response.status_code == 200:
return response.json()
@ -127,10 +127,10 @@ class TidbService:
body = {"password": new_password, "builtinRole": "role_admin", "customRoles": []}
response = requests.patch(
response = httpx.patch(
f"{api_url}/clusters/{cluster_id}/sqlUsers/{account}",
json=body,
auth=HTTPDigestAuth(public_key, private_key),
auth=DigestAuth(public_key, private_key),
)
if response.status_code == 200:
@ -161,9 +161,7 @@ class TidbService:
tidb_serverless_list_map = {item.cluster_id: item for item in tidb_serverless_list}
cluster_ids = [item.cluster_id for item in tidb_serverless_list]
params = {"clusterIds": cluster_ids, "view": "BASIC"}
response = requests.get(
f"{api_url}/clusters:batchGet", params=params, auth=HTTPDigestAuth(public_key, private_key)
)
response = httpx.get(f"{api_url}/clusters:batchGet", params=params, auth=DigestAuth(public_key, private_key))
if response.status_code == 200:
response_data = response.json()
@ -224,8 +222,8 @@ class TidbService:
clusters.append(cluster_data)
request_body = {"requests": clusters}
response = requests.post(
f"{api_url}/clusters:batchCreate", json=request_body, auth=HTTPDigestAuth(public_key, private_key)
response = httpx.post(
f"{api_url}/clusters:batchCreate", json=request_body, auth=DigestAuth(public_key, private_key)
)
if response.status_code == 200:

View File

@ -2,7 +2,6 @@ import datetime
import json
from typing import Any
import requests
import weaviate # type: ignore
from pydantic import BaseModel, model_validator
@ -45,8 +44,8 @@ class WeaviateVector(BaseVector):
client = weaviate.Client(
url=config.endpoint, auth_client_secret=auth_config, timeout_config=(5, 60), startup_period=None
)
except requests.ConnectionError:
raise ConnectionError("Vector database connection error")
except Exception as exc:
raise ConnectionError("Vector database connection error") from exc
client.batch.configure(
# `batch_size` takes an `int` value to enable auto-batching

View File

@ -2,7 +2,7 @@ import json
import time
from typing import Any, cast
import requests
import httpx
from extensions.ext_storage import storage
@ -104,18 +104,18 @@ class FirecrawlApp:
def _prepare_headers(self) -> dict[str, Any]:
return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5) -> requests.Response:
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5) -> httpx.Response:
for attempt in range(retries):
response = requests.post(url, headers=headers, json=data)
response = httpx.post(url, headers=headers, json=data)
if response.status_code == 502:
time.sleep(backoff_factor * (2**attempt))
else:
return response
return response
def _get_request(self, url, headers, retries=3, backoff_factor=0.5) -> requests.Response:
def _get_request(self, url, headers, retries=3, backoff_factor=0.5) -> httpx.Response:
for attempt in range(retries):
response = requests.get(url, headers=headers)
response = httpx.get(url, headers=headers)
if response.status_code == 502:
time.sleep(backoff_factor * (2**attempt))
else:

View File

@ -3,7 +3,7 @@ import logging
import operator
from typing import Any, cast
import requests
import httpx
from configs import dify_config
from core.rag.extractor.extractor_base import BaseExtractor
@ -92,7 +92,7 @@ class NotionExtractor(BaseExtractor):
if next_cursor:
current_query["start_cursor"] = next_cursor
res = requests.post(
res = httpx.post(
DATABASE_URL_TMPL.format(database_id=database_id),
headers={
"Authorization": "Bearer " + self._notion_access_token,
@ -160,7 +160,7 @@ class NotionExtractor(BaseExtractor):
while True:
query_dict: dict[str, Any] = {} if not start_cursor else {"start_cursor": start_cursor}
try:
res = requests.request(
res = httpx.request(
"GET",
block_url,
headers={
@ -173,7 +173,7 @@ class NotionExtractor(BaseExtractor):
if res.status_code != 200:
raise ValueError(f"Error fetching Notion block data: {res.text}")
data = res.json()
except requests.RequestException as e:
except httpx.HTTPError as e:
raise ValueError("Error fetching Notion block data") from e
if "results" not in data or not isinstance(data["results"], list):
raise ValueError("Error fetching Notion block data")
@ -222,7 +222,7 @@ class NotionExtractor(BaseExtractor):
while True:
query_dict: dict[str, Any] = {} if not start_cursor else {"start_cursor": start_cursor}
res = requests.request(
res = httpx.request(
"GET",
block_url,
headers={
@ -282,7 +282,7 @@ class NotionExtractor(BaseExtractor):
while not done:
query_dict: dict[str, Any] = {} if not start_cursor else {"start_cursor": start_cursor}
res = requests.request(
res = httpx.request(
"GET",
block_url,
headers={
@ -354,7 +354,7 @@ class NotionExtractor(BaseExtractor):
query_dict: dict[str, Any] = {}
res = requests.request(
res = httpx.request(
"GET",
retrieve_page_url,
headers={

View File

@ -3,8 +3,8 @@ from collections.abc import Generator
from typing import Union
from urllib.parse import urljoin
import requests
from requests import Response
import httpx
from httpx import Response
from core.rag.extractor.watercrawl.exceptions import (
WaterCrawlAuthenticationError,
@ -20,28 +20,45 @@ class BaseAPIClient:
self.session = self.init_session()
def init_session(self):
session = requests.Session()
session.headers.update({"X-API-Key": self.api_key})
session.headers.update({"Content-Type": "application/json"})
session.headers.update({"Accept": "application/json"})
session.headers.update({"User-Agent": "WaterCrawl-Plugin"})
session.headers.update({"Accept-Language": "en-US"})
return session
headers = {
"X-API-Key": self.api_key,
"Content-Type": "application/json",
"Accept": "application/json",
"User-Agent": "WaterCrawl-Plugin",
"Accept-Language": "en-US",
}
return httpx.Client(headers=headers, timeout=None)
def _request(
self,
method: str,
endpoint: str,
query_params: dict | None = None,
data: dict | None = None,
**kwargs,
) -> Response:
stream = kwargs.pop("stream", False)
url = urljoin(self.base_url, endpoint)
if stream:
request = self.session.build_request(method, url, params=query_params, json=data)
return self.session.send(request, stream=True, **kwargs)
return self.session.request(method, url, params=query_params, json=data, **kwargs)
def _get(self, endpoint: str, query_params: dict | None = None, **kwargs):
return self.session.get(urljoin(self.base_url, endpoint), params=query_params, **kwargs)
return self._request("GET", endpoint, query_params=query_params, **kwargs)
def _post(self, endpoint: str, query_params: dict | None = None, data: dict | None = None, **kwargs):
return self.session.post(urljoin(self.base_url, endpoint), params=query_params, json=data, **kwargs)
return self._request("POST", endpoint, query_params=query_params, data=data, **kwargs)
def _put(self, endpoint: str, query_params: dict | None = None, data: dict | None = None, **kwargs):
return self.session.put(urljoin(self.base_url, endpoint), params=query_params, json=data, **kwargs)
return self._request("PUT", endpoint, query_params=query_params, data=data, **kwargs)
def _delete(self, endpoint: str, query_params: dict | None = None, **kwargs):
return self.session.delete(urljoin(self.base_url, endpoint), params=query_params, **kwargs)
return self._request("DELETE", endpoint, query_params=query_params, **kwargs)
def _patch(self, endpoint: str, query_params: dict | None = None, data: dict | None = None, **kwargs):
return self.session.patch(urljoin(self.base_url, endpoint), params=query_params, json=data, **kwargs)
return self._request("PATCH", endpoint, query_params=query_params, data=data, **kwargs)
class WaterCrawlAPIClient(BaseAPIClient):
@ -49,14 +66,17 @@ class WaterCrawlAPIClient(BaseAPIClient):
super().__init__(api_key, base_url)
def process_eventstream(self, response: Response, download: bool = False) -> Generator:
for line in response.iter_lines():
line = line.decode("utf-8")
if line.startswith("data:"):
line = line[5:].strip()
data = json.loads(line)
if data["type"] == "result" and download:
data["data"] = self.download_result(data["data"])
yield data
try:
for raw_line in response.iter_lines():
line = raw_line.decode("utf-8") if isinstance(raw_line, bytes) else raw_line
if line.startswith("data:"):
line = line[5:].strip()
data = json.loads(line)
if data["type"] == "result" and download:
data["data"] = self.download_result(data["data"])
yield data
finally:
response.close()
def process_response(self, response: Response) -> dict | bytes | list | None | Generator:
if response.status_code == 401:
@ -170,7 +190,10 @@ class WaterCrawlAPIClient(BaseAPIClient):
return event_data["data"]
def download_result(self, result_object: dict):
response = requests.get(result_object["result"])
response.raise_for_status()
result_object["result"] = response.json()
response = httpx.get(result_object["result"], timeout=None)
try:
response.raise_for_status()
result_object["result"] = response.json()
finally:
response.close()
return result_object

View File

@ -9,7 +9,7 @@ import uuid
from urllib.parse import urlparse
from xml.etree import ElementTree
import requests
import httpx
from docx import Document as DocxDocument
from configs import dify_config
@ -43,15 +43,19 @@ class WordExtractor(BaseExtractor):
# If the file is a web path, download it to a temporary file, and use that
if not os.path.isfile(self.file_path) and self._is_valid_url(self.file_path):
r = requests.get(self.file_path)
response = httpx.get(self.file_path, timeout=None)
if r.status_code != 200:
raise ValueError(f"Check the url of your file; returned status code {r.status_code}")
if response.status_code != 200:
response.close()
raise ValueError(f"Check the url of your file; returned status code {response.status_code}")
self.web_path = self.file_path
# TODO: use a better way to handle the file
self.temp_file = tempfile.NamedTemporaryFile() # noqa SIM115
self.temp_file.write(r.content)
try:
self.temp_file.write(response.content)
finally:
response.close()
self.file_path = self.temp_file.name
elif not os.path.isfile(self.file_path):
raise ValueError(f"File path {self.file_path} is not a valid file or url")