Merge branch 'feat/queue-based-graph-engine' into feat/rag-2

# Conflicts: # api/core/memory/token_buffer_memory.py # api/core/rag/extractor/notion_extractor.py # api/core/repositories/sqlalchemy_workflow_node_execution_repository.py # api/core/variables/variables.py # api/core/workflow/graph/graph.py # api/core/workflow/graph_engine/entities/event.py # api/services/dataset_service.py # web/app/components/app-sidebar/index.tsx # web/app/components/base/tag-management/selector.tsx # web/app/components/base/toast/index.tsx # web/app/components/datasets/create/website/index.tsx # web/app/components/datasets/create/website/jina-reader/base/options-wrap.tsx # web/app/components/workflow/header/version-history-button.tsx # web/app/components/workflow/hooks/use-inspect-vars-crud-common.ts # web/app/components/workflow/hooks/use-workflow-interactions.ts # web/app/components/workflow/panel/version-history-panel/index.tsx # web/service/base.ts
2026-04-24 04:45:51 +08:00 · 2025-09-03 15:01:06 +08:00
parent c422d732d2 8c97937cae
commit d4aed3df5c
572 changed files with 16030 additions and 7973 deletions
--- a/api/core/rag/extractor/blob/blob.py
+++ b/api/core/rag/extractor/blob/blob.py
@ -107,7 +107,7 @@ class Blob(BaseModel):
            Blob instance
        """
        if mime_type is None and guess_type:
-            _mimetype = mimetypes.guess_type(path)[0] if guess_type else None
+            _mimetype = mimetypes.guess_type(path)[0]
        else:
            _mimetype = mime_type
        # We do not load the data immediately, instead we treat the blob as a
--- a/api/core/rag/extractor/extract_processor.py
+++ b/api/core/rag/extractor/extract_processor.py
@ -45,7 +45,7 @@ class ExtractProcessor:
        cls, upload_file: UploadFile, return_text: bool = False, is_automatic: bool = False
    ) -> Union[list[Document], str]:
        extract_setting = ExtractSetting(
-            datasource_type="upload_file", upload_file=upload_file, document_model="text_model"
+            datasource_type=DatasourceType.FILE.value, upload_file=upload_file, document_model="text_model"
        )
        if return_text:
            delimiter = "\n"
@ -76,7 +76,7 @@ class ExtractProcessor:
            # https://stackoverflow.com/questions/26541416/generate-temporary-file-names-without-creating-actual-file-in-python#comment90414256_26541521
            file_path = f"{temp_dir}/{tempfile.gettempdir()}{suffix}"
            Path(file_path).write_bytes(response.content)
-            extract_setting = ExtractSetting(datasource_type="upload_file", document_model="text_model")
+            extract_setting = ExtractSetting(datasource_type=DatasourceType.FILE.value, document_model="text_model")
            if return_text:
                delimiter = "\n"
                return delimiter.join(
--- a/api/core/rag/extractor/markdown_extractor.py
+++ b/api/core/rag/extractor/markdown_extractor.py
@ -2,7 +2,7 @@

 import re
 from pathlib import Path
-from typing import Optional, cast
+from typing import Optional

 from core.rag.extractor.extractor_base import BaseExtractor
 from core.rag.extractor.helpers import detect_file_encodings
@ -76,7 +76,7 @@ class MarkdownExtractor(BaseExtractor):
        markdown_tups.append((current_header, current_text))

        markdown_tups = [
-            (re.sub(r"#", "", cast(str, key)).strip() if key else None, re.sub(r"<.*?>", "", value))
+            (re.sub(r"#", "", key).strip() if key else None, re.sub(r"<.*?>", "", value))
            for key, value in markdown_tups
        ]

--- a/api/core/rag/extractor/pdf_extractor.py
+++ b/api/core/rag/extractor/pdf_extractor.py
@ -2,7 +2,7 @@

 import contextlib
 from collections.abc import Iterator
-from typing import Optional, cast
+from typing import Optional

 from core.rag.extractor.blob.blob import Blob
 from core.rag.extractor.extractor_base import BaseExtractor
@ -27,7 +27,7 @@ class PdfExtractor(BaseExtractor):
        plaintext_file_exists = False
        if self._file_cache_key:
            with contextlib.suppress(FileNotFoundError):
-                text = cast(bytes, storage.load(self._file_cache_key)).decode("utf-8")
+                text = storage.load(self._file_cache_key).decode("utf-8")
                plaintext_file_exists = True
                return [Document(page_content=text)]
        documents = list(self.load())
--- a/api/core/rag/extractor/unstructured/unstructured_doc_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_doc_extractor.py
@ -23,7 +23,7 @@ class UnstructuredWordExtractor(BaseExtractor):
        unstructured_version = tuple(int(x) for x in __unstructured_version__.split("."))
        # check the file extension
        try:
-            import magic  # noqa: F401
+            import magic  # noqa: F401  # pyright: ignore[reportUnusedImport]

            is_doc = detect_filetype(self._file_path) == FileType.DOC
        except ImportError: