Merge remote-tracking branch 'origin/main' into feat/queue-based-graph-engine

This commit is contained in:
-LAN-
2025-08-29 13:22:13 +08:00
108 changed files with 5144 additions and 2427 deletions

View File

@ -3,8 +3,8 @@ import uuid
from contextlib import contextmanager
from typing import Any
import psycopg2.extras # type: ignore
import psycopg2.pool # type: ignore
import psycopg2.extras
import psycopg2.pool
from pydantic import BaseModel, model_validator
from core.rag.models.document import Document

View File

@ -3,8 +3,8 @@ import uuid
from contextlib import contextmanager
from typing import Any
import psycopg2.extras # type: ignore
import psycopg2.pool # type: ignore
import psycopg2.extras
import psycopg2.pool
from pydantic import BaseModel, model_validator
from configs import dify_config

View File

@ -48,7 +48,7 @@ class OpenSearchConfig(BaseModel):
return values
def create_aws_managed_iam_auth(self) -> Urllib3AWSV4SignerAuth:
import boto3 # type: ignore
import boto3
return Urllib3AWSV4SignerAuth(
credentials=boto3.Session().get_credentials(),

View File

@ -6,8 +6,8 @@ from contextlib import contextmanager
from typing import Any
import psycopg2.errors
import psycopg2.extras # type: ignore
import psycopg2.pool # type: ignore
import psycopg2.extras
import psycopg2.pool
from pydantic import BaseModel, model_validator
from configs import dify_config

View File

@ -3,8 +3,8 @@ import uuid
from contextlib import contextmanager
from typing import Any
import psycopg2.extras # type: ignore
import psycopg2.pool # type: ignore
import psycopg2.extras
import psycopg2.pool
from pydantic import BaseModel, model_validator
from configs import dify_config

View File

@ -3,7 +3,7 @@ import os
import uuid
from collections.abc import Generator, Iterable, Sequence
from itertools import islice
from typing import TYPE_CHECKING, Any, Optional, Union, cast
from typing import TYPE_CHECKING, Any, Optional, Union
import qdrant_client
import requests
@ -398,7 +398,6 @@ class TidbOnQdrantVector(BaseVector):
def _reload_if_needed(self):
if isinstance(self._client, QdrantLocal):
self._client = cast(QdrantLocal, self._client)
self._client._load()
@classmethod

View File

@ -4,7 +4,7 @@ import os
from typing import Optional, cast
import pandas as pd
from openpyxl import load_workbook # type: ignore
from openpyxl import load_workbook
from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document

View File

@ -73,8 +73,8 @@ class ExtractProcessor:
suffix = "." + match.group(1)
else:
suffix = ""
# FIXME mypy: Cannot determine type of 'tempfile._get_candidate_names' better not use it here
file_path = f"{temp_dir}/{next(tempfile._get_candidate_names())}{suffix}" # type: ignore
# https://stackoverflow.com/questions/26541416/generate-temporary-file-names-without-creating-actual-file-in-python#comment90414256_26541521
file_path = f"{temp_dir}/{tempfile.gettempdir()}{suffix}"
Path(file_path).write_bytes(response.content)
extract_setting = ExtractSetting(datasource_type="upload_file", document_model="text_model")
if return_text:

View File

@ -1,6 +1,6 @@
"""Abstract interface for document loader implementations."""
from bs4 import BeautifulSoup # type: ignore
from bs4 import BeautifulSoup
from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document

View File

@ -3,7 +3,7 @@ import contextlib
import logging
from typing import Optional
from bs4 import BeautifulSoup # type: ignore
from bs4 import BeautifulSoup
from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document

View File

@ -144,7 +144,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
def from_huggingface_tokenizer(cls, tokenizer: Any, **kwargs: Any) -> TextSplitter:
"""Text splitter that uses HuggingFace tokenizer to count length."""
try:
from transformers import PreTrainedTokenizerBase # type: ignore
from transformers import PreTrainedTokenizerBase
if not isinstance(tokenizer, PreTrainedTokenizerBase):
raise ValueError("Tokenizer received was not an instance of PreTrainedTokenizerBase")