mirror of
https://github.com/langgenius/dify.git
synced 2026-05-03 17:08:03 +08:00
change all to httpx (#26119)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
This commit is contained in:
@ -4,7 +4,7 @@ import math
|
||||
from typing import Any, cast
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from elasticsearch import ConnectionError as ElasticsearchConnectionError
|
||||
from elasticsearch import Elasticsearch
|
||||
from flask import current_app
|
||||
from packaging.version import parse as parse_version
|
||||
@ -138,7 +138,7 @@ class ElasticSearchVector(BaseVector):
|
||||
if not client.ping():
|
||||
raise ConnectionError("Failed to connect to Elasticsearch")
|
||||
|
||||
except requests.ConnectionError as e:
|
||||
except ElasticsearchConnectionError as e:
|
||||
raise ConnectionError(f"Vector database connection error: {str(e)}")
|
||||
except Exception as e:
|
||||
raise ConnectionError(f"Elasticsearch client initialization failed: {str(e)}")
|
||||
|
||||
@ -5,9 +5,10 @@ from collections.abc import Generator, Iterable, Sequence
|
||||
from itertools import islice
|
||||
from typing import TYPE_CHECKING, Any, Union
|
||||
|
||||
import httpx
|
||||
import qdrant_client
|
||||
import requests
|
||||
from flask import current_app
|
||||
from httpx import DigestAuth
|
||||
from pydantic import BaseModel
|
||||
from qdrant_client.http import models as rest
|
||||
from qdrant_client.http.models import (
|
||||
@ -19,7 +20,6 @@ from qdrant_client.http.models import (
|
||||
TokenizerType,
|
||||
)
|
||||
from qdrant_client.local.qdrant_local import QdrantLocal
|
||||
from requests.auth import HTTPDigestAuth
|
||||
from sqlalchemy import select
|
||||
|
||||
from configs import dify_config
|
||||
@ -504,10 +504,10 @@ class TidbOnQdrantVectorFactory(AbstractVectorFactory):
|
||||
}
|
||||
cluster_data = {"displayName": display_name, "region": region_object, "labels": labels}
|
||||
|
||||
response = requests.post(
|
||||
response = httpx.post(
|
||||
f"{tidb_config.api_url}/clusters",
|
||||
json=cluster_data,
|
||||
auth=HTTPDigestAuth(tidb_config.public_key, tidb_config.private_key),
|
||||
auth=DigestAuth(tidb_config.public_key, tidb_config.private_key),
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
@ -527,10 +527,10 @@ class TidbOnQdrantVectorFactory(AbstractVectorFactory):
|
||||
|
||||
body = {"password": new_password}
|
||||
|
||||
response = requests.put(
|
||||
response = httpx.put(
|
||||
f"{tidb_config.api_url}/clusters/{cluster_id}/password",
|
||||
json=body,
|
||||
auth=HTTPDigestAuth(tidb_config.public_key, tidb_config.private_key),
|
||||
auth=DigestAuth(tidb_config.public_key, tidb_config.private_key),
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
|
||||
@ -2,8 +2,8 @@ import time
|
||||
import uuid
|
||||
from collections.abc import Sequence
|
||||
|
||||
import requests
|
||||
from requests.auth import HTTPDigestAuth
|
||||
import httpx
|
||||
from httpx import DigestAuth
|
||||
|
||||
from configs import dify_config
|
||||
from extensions.ext_database import db
|
||||
@ -49,7 +49,7 @@ class TidbService:
|
||||
"rootPassword": password,
|
||||
}
|
||||
|
||||
response = requests.post(f"{api_url}/clusters", json=cluster_data, auth=HTTPDigestAuth(public_key, private_key))
|
||||
response = httpx.post(f"{api_url}/clusters", json=cluster_data, auth=DigestAuth(public_key, private_key))
|
||||
|
||||
if response.status_code == 200:
|
||||
response_data = response.json()
|
||||
@ -83,7 +83,7 @@ class TidbService:
|
||||
:return: The response from the API.
|
||||
"""
|
||||
|
||||
response = requests.delete(f"{api_url}/clusters/{cluster_id}", auth=HTTPDigestAuth(public_key, private_key))
|
||||
response = httpx.delete(f"{api_url}/clusters/{cluster_id}", auth=DigestAuth(public_key, private_key))
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
@ -102,7 +102,7 @@ class TidbService:
|
||||
:return: The response from the API.
|
||||
"""
|
||||
|
||||
response = requests.get(f"{api_url}/clusters/{cluster_id}", auth=HTTPDigestAuth(public_key, private_key))
|
||||
response = httpx.get(f"{api_url}/clusters/{cluster_id}", auth=DigestAuth(public_key, private_key))
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
@ -127,10 +127,10 @@ class TidbService:
|
||||
|
||||
body = {"password": new_password, "builtinRole": "role_admin", "customRoles": []}
|
||||
|
||||
response = requests.patch(
|
||||
response = httpx.patch(
|
||||
f"{api_url}/clusters/{cluster_id}/sqlUsers/{account}",
|
||||
json=body,
|
||||
auth=HTTPDigestAuth(public_key, private_key),
|
||||
auth=DigestAuth(public_key, private_key),
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
@ -161,9 +161,7 @@ class TidbService:
|
||||
tidb_serverless_list_map = {item.cluster_id: item for item in tidb_serverless_list}
|
||||
cluster_ids = [item.cluster_id for item in tidb_serverless_list]
|
||||
params = {"clusterIds": cluster_ids, "view": "BASIC"}
|
||||
response = requests.get(
|
||||
f"{api_url}/clusters:batchGet", params=params, auth=HTTPDigestAuth(public_key, private_key)
|
||||
)
|
||||
response = httpx.get(f"{api_url}/clusters:batchGet", params=params, auth=DigestAuth(public_key, private_key))
|
||||
|
||||
if response.status_code == 200:
|
||||
response_data = response.json()
|
||||
@ -224,8 +222,8 @@ class TidbService:
|
||||
clusters.append(cluster_data)
|
||||
|
||||
request_body = {"requests": clusters}
|
||||
response = requests.post(
|
||||
f"{api_url}/clusters:batchCreate", json=request_body, auth=HTTPDigestAuth(public_key, private_key)
|
||||
response = httpx.post(
|
||||
f"{api_url}/clusters:batchCreate", json=request_body, auth=DigestAuth(public_key, private_key)
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
|
||||
@ -2,7 +2,6 @@ import datetime
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
import weaviate # type: ignore
|
||||
from pydantic import BaseModel, model_validator
|
||||
|
||||
@ -45,8 +44,8 @@ class WeaviateVector(BaseVector):
|
||||
client = weaviate.Client(
|
||||
url=config.endpoint, auth_client_secret=auth_config, timeout_config=(5, 60), startup_period=None
|
||||
)
|
||||
except requests.ConnectionError:
|
||||
raise ConnectionError("Vector database connection error")
|
||||
except Exception as exc:
|
||||
raise ConnectionError("Vector database connection error") from exc
|
||||
|
||||
client.batch.configure(
|
||||
# `batch_size` takes an `int` value to enable auto-batching
|
||||
|
||||
@ -2,7 +2,7 @@ import json
|
||||
import time
|
||||
from typing import Any, cast
|
||||
|
||||
import requests
|
||||
import httpx
|
||||
|
||||
from extensions.ext_storage import storage
|
||||
|
||||
@ -104,18 +104,18 @@ class FirecrawlApp:
|
||||
def _prepare_headers(self) -> dict[str, Any]:
|
||||
return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
|
||||
|
||||
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5) -> requests.Response:
|
||||
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5) -> httpx.Response:
|
||||
for attempt in range(retries):
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
response = httpx.post(url, headers=headers, json=data)
|
||||
if response.status_code == 502:
|
||||
time.sleep(backoff_factor * (2**attempt))
|
||||
else:
|
||||
return response
|
||||
return response
|
||||
|
||||
def _get_request(self, url, headers, retries=3, backoff_factor=0.5) -> requests.Response:
|
||||
def _get_request(self, url, headers, retries=3, backoff_factor=0.5) -> httpx.Response:
|
||||
for attempt in range(retries):
|
||||
response = requests.get(url, headers=headers)
|
||||
response = httpx.get(url, headers=headers)
|
||||
if response.status_code == 502:
|
||||
time.sleep(backoff_factor * (2**attempt))
|
||||
else:
|
||||
|
||||
@ -3,7 +3,7 @@ import logging
|
||||
import operator
|
||||
from typing import Any, cast
|
||||
|
||||
import requests
|
||||
import httpx
|
||||
|
||||
from configs import dify_config
|
||||
from core.rag.extractor.extractor_base import BaseExtractor
|
||||
@ -92,7 +92,7 @@ class NotionExtractor(BaseExtractor):
|
||||
if next_cursor:
|
||||
current_query["start_cursor"] = next_cursor
|
||||
|
||||
res = requests.post(
|
||||
res = httpx.post(
|
||||
DATABASE_URL_TMPL.format(database_id=database_id),
|
||||
headers={
|
||||
"Authorization": "Bearer " + self._notion_access_token,
|
||||
@ -160,7 +160,7 @@ class NotionExtractor(BaseExtractor):
|
||||
while True:
|
||||
query_dict: dict[str, Any] = {} if not start_cursor else {"start_cursor": start_cursor}
|
||||
try:
|
||||
res = requests.request(
|
||||
res = httpx.request(
|
||||
"GET",
|
||||
block_url,
|
||||
headers={
|
||||
@ -173,7 +173,7 @@ class NotionExtractor(BaseExtractor):
|
||||
if res.status_code != 200:
|
||||
raise ValueError(f"Error fetching Notion block data: {res.text}")
|
||||
data = res.json()
|
||||
except requests.RequestException as e:
|
||||
except httpx.HTTPError as e:
|
||||
raise ValueError("Error fetching Notion block data") from e
|
||||
if "results" not in data or not isinstance(data["results"], list):
|
||||
raise ValueError("Error fetching Notion block data")
|
||||
@ -222,7 +222,7 @@ class NotionExtractor(BaseExtractor):
|
||||
while True:
|
||||
query_dict: dict[str, Any] = {} if not start_cursor else {"start_cursor": start_cursor}
|
||||
|
||||
res = requests.request(
|
||||
res = httpx.request(
|
||||
"GET",
|
||||
block_url,
|
||||
headers={
|
||||
@ -282,7 +282,7 @@ class NotionExtractor(BaseExtractor):
|
||||
while not done:
|
||||
query_dict: dict[str, Any] = {} if not start_cursor else {"start_cursor": start_cursor}
|
||||
|
||||
res = requests.request(
|
||||
res = httpx.request(
|
||||
"GET",
|
||||
block_url,
|
||||
headers={
|
||||
@ -354,7 +354,7 @@ class NotionExtractor(BaseExtractor):
|
||||
|
||||
query_dict: dict[str, Any] = {}
|
||||
|
||||
res = requests.request(
|
||||
res = httpx.request(
|
||||
"GET",
|
||||
retrieve_page_url,
|
||||
headers={
|
||||
|
||||
@ -3,8 +3,8 @@ from collections.abc import Generator
|
||||
from typing import Union
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
from requests import Response
|
||||
import httpx
|
||||
from httpx import Response
|
||||
|
||||
from core.rag.extractor.watercrawl.exceptions import (
|
||||
WaterCrawlAuthenticationError,
|
||||
@ -20,28 +20,45 @@ class BaseAPIClient:
|
||||
self.session = self.init_session()
|
||||
|
||||
def init_session(self):
|
||||
session = requests.Session()
|
||||
session.headers.update({"X-API-Key": self.api_key})
|
||||
session.headers.update({"Content-Type": "application/json"})
|
||||
session.headers.update({"Accept": "application/json"})
|
||||
session.headers.update({"User-Agent": "WaterCrawl-Plugin"})
|
||||
session.headers.update({"Accept-Language": "en-US"})
|
||||
return session
|
||||
headers = {
|
||||
"X-API-Key": self.api_key,
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"User-Agent": "WaterCrawl-Plugin",
|
||||
"Accept-Language": "en-US",
|
||||
}
|
||||
return httpx.Client(headers=headers, timeout=None)
|
||||
|
||||
def _request(
|
||||
self,
|
||||
method: str,
|
||||
endpoint: str,
|
||||
query_params: dict | None = None,
|
||||
data: dict | None = None,
|
||||
**kwargs,
|
||||
) -> Response:
|
||||
stream = kwargs.pop("stream", False)
|
||||
url = urljoin(self.base_url, endpoint)
|
||||
if stream:
|
||||
request = self.session.build_request(method, url, params=query_params, json=data)
|
||||
return self.session.send(request, stream=True, **kwargs)
|
||||
|
||||
return self.session.request(method, url, params=query_params, json=data, **kwargs)
|
||||
|
||||
def _get(self, endpoint: str, query_params: dict | None = None, **kwargs):
|
||||
return self.session.get(urljoin(self.base_url, endpoint), params=query_params, **kwargs)
|
||||
return self._request("GET", endpoint, query_params=query_params, **kwargs)
|
||||
|
||||
def _post(self, endpoint: str, query_params: dict | None = None, data: dict | None = None, **kwargs):
|
||||
return self.session.post(urljoin(self.base_url, endpoint), params=query_params, json=data, **kwargs)
|
||||
return self._request("POST", endpoint, query_params=query_params, data=data, **kwargs)
|
||||
|
||||
def _put(self, endpoint: str, query_params: dict | None = None, data: dict | None = None, **kwargs):
|
||||
return self.session.put(urljoin(self.base_url, endpoint), params=query_params, json=data, **kwargs)
|
||||
return self._request("PUT", endpoint, query_params=query_params, data=data, **kwargs)
|
||||
|
||||
def _delete(self, endpoint: str, query_params: dict | None = None, **kwargs):
|
||||
return self.session.delete(urljoin(self.base_url, endpoint), params=query_params, **kwargs)
|
||||
return self._request("DELETE", endpoint, query_params=query_params, **kwargs)
|
||||
|
||||
def _patch(self, endpoint: str, query_params: dict | None = None, data: dict | None = None, **kwargs):
|
||||
return self.session.patch(urljoin(self.base_url, endpoint), params=query_params, json=data, **kwargs)
|
||||
return self._request("PATCH", endpoint, query_params=query_params, data=data, **kwargs)
|
||||
|
||||
|
||||
class WaterCrawlAPIClient(BaseAPIClient):
|
||||
@ -49,14 +66,17 @@ class WaterCrawlAPIClient(BaseAPIClient):
|
||||
super().__init__(api_key, base_url)
|
||||
|
||||
def process_eventstream(self, response: Response, download: bool = False) -> Generator:
|
||||
for line in response.iter_lines():
|
||||
line = line.decode("utf-8")
|
||||
if line.startswith("data:"):
|
||||
line = line[5:].strip()
|
||||
data = json.loads(line)
|
||||
if data["type"] == "result" and download:
|
||||
data["data"] = self.download_result(data["data"])
|
||||
yield data
|
||||
try:
|
||||
for raw_line in response.iter_lines():
|
||||
line = raw_line.decode("utf-8") if isinstance(raw_line, bytes) else raw_line
|
||||
if line.startswith("data:"):
|
||||
line = line[5:].strip()
|
||||
data = json.loads(line)
|
||||
if data["type"] == "result" and download:
|
||||
data["data"] = self.download_result(data["data"])
|
||||
yield data
|
||||
finally:
|
||||
response.close()
|
||||
|
||||
def process_response(self, response: Response) -> dict | bytes | list | None | Generator:
|
||||
if response.status_code == 401:
|
||||
@ -170,7 +190,10 @@ class WaterCrawlAPIClient(BaseAPIClient):
|
||||
return event_data["data"]
|
||||
|
||||
def download_result(self, result_object: dict):
|
||||
response = requests.get(result_object["result"])
|
||||
response.raise_for_status()
|
||||
result_object["result"] = response.json()
|
||||
response = httpx.get(result_object["result"], timeout=None)
|
||||
try:
|
||||
response.raise_for_status()
|
||||
result_object["result"] = response.json()
|
||||
finally:
|
||||
response.close()
|
||||
return result_object
|
||||
|
||||
@ -9,7 +9,7 @@ import uuid
|
||||
from urllib.parse import urlparse
|
||||
from xml.etree import ElementTree
|
||||
|
||||
import requests
|
||||
import httpx
|
||||
from docx import Document as DocxDocument
|
||||
|
||||
from configs import dify_config
|
||||
@ -43,15 +43,19 @@ class WordExtractor(BaseExtractor):
|
||||
|
||||
# If the file is a web path, download it to a temporary file, and use that
|
||||
if not os.path.isfile(self.file_path) and self._is_valid_url(self.file_path):
|
||||
r = requests.get(self.file_path)
|
||||
response = httpx.get(self.file_path, timeout=None)
|
||||
|
||||
if r.status_code != 200:
|
||||
raise ValueError(f"Check the url of your file; returned status code {r.status_code}")
|
||||
if response.status_code != 200:
|
||||
response.close()
|
||||
raise ValueError(f"Check the url of your file; returned status code {response.status_code}")
|
||||
|
||||
self.web_path = self.file_path
|
||||
# TODO: use a better way to handle the file
|
||||
self.temp_file = tempfile.NamedTemporaryFile() # noqa SIM115
|
||||
self.temp_file.write(r.content)
|
||||
try:
|
||||
self.temp_file.write(response.content)
|
||||
finally:
|
||||
response.close()
|
||||
self.file_path = self.temp_file.name
|
||||
elif not os.path.isfile(self.file_path):
|
||||
raise ValueError(f"File path {self.file_path} is not a valid file or url")
|
||||
|
||||
Reference in New Issue
Block a user