Merge branch 'feat/queue-based-graph-engine' into feat/rag-2

# Conflicts:
#	api/core/memory/token_buffer_memory.py
#	api/core/rag/extractor/notion_extractor.py
#	api/core/repositories/sqlalchemy_workflow_node_execution_repository.py
#	api/core/variables/variables.py
#	api/core/workflow/graph/graph.py
#	api/core/workflow/graph_engine/entities/event.py
#	api/services/dataset_service.py
#	web/app/components/app-sidebar/index.tsx
#	web/app/components/base/tag-management/selector.tsx
#	web/app/components/base/toast/index.tsx
#	web/app/components/datasets/create/website/index.tsx
#	web/app/components/datasets/create/website/jina-reader/base/options-wrap.tsx
#	web/app/components/workflow/header/version-history-button.tsx
#	web/app/components/workflow/hooks/use-inspect-vars-crud-common.ts
#	web/app/components/workflow/hooks/use-workflow-interactions.ts
#	web/app/components/workflow/panel/version-history-panel/index.tsx
#	web/service/base.ts
This commit is contained in:
jyong
2025-09-03 15:01:06 +08:00
572 changed files with 16030 additions and 7973 deletions

View File

@ -107,7 +107,7 @@ class Blob(BaseModel):
Blob instance
"""
if mime_type is None and guess_type:
_mimetype = mimetypes.guess_type(path)[0] if guess_type else None
_mimetype = mimetypes.guess_type(path)[0]
else:
_mimetype = mime_type
# We do not load the data immediately, instead we treat the blob as a

View File

@ -45,7 +45,7 @@ class ExtractProcessor:
cls, upload_file: UploadFile, return_text: bool = False, is_automatic: bool = False
) -> Union[list[Document], str]:
extract_setting = ExtractSetting(
datasource_type="upload_file", upload_file=upload_file, document_model="text_model"
datasource_type=DatasourceType.FILE.value, upload_file=upload_file, document_model="text_model"
)
if return_text:
delimiter = "\n"
@ -76,7 +76,7 @@ class ExtractProcessor:
# https://stackoverflow.com/questions/26541416/generate-temporary-file-names-without-creating-actual-file-in-python#comment90414256_26541521
file_path = f"{temp_dir}/{tempfile.gettempdir()}{suffix}"
Path(file_path).write_bytes(response.content)
extract_setting = ExtractSetting(datasource_type="upload_file", document_model="text_model")
extract_setting = ExtractSetting(datasource_type=DatasourceType.FILE.value, document_model="text_model")
if return_text:
delimiter = "\n"
return delimiter.join(

View File

@ -2,7 +2,7 @@
import re
from pathlib import Path
from typing import Optional, cast
from typing import Optional
from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.extractor.helpers import detect_file_encodings
@ -76,7 +76,7 @@ class MarkdownExtractor(BaseExtractor):
markdown_tups.append((current_header, current_text))
markdown_tups = [
(re.sub(r"#", "", cast(str, key)).strip() if key else None, re.sub(r"<.*?>", "", value))
(re.sub(r"#", "", key).strip() if key else None, re.sub(r"<.*?>", "", value))
for key, value in markdown_tups
]

View File

@ -2,7 +2,7 @@
import contextlib
from collections.abc import Iterator
from typing import Optional, cast
from typing import Optional
from core.rag.extractor.blob.blob import Blob
from core.rag.extractor.extractor_base import BaseExtractor
@ -27,7 +27,7 @@ class PdfExtractor(BaseExtractor):
plaintext_file_exists = False
if self._file_cache_key:
with contextlib.suppress(FileNotFoundError):
text = cast(bytes, storage.load(self._file_cache_key)).decode("utf-8")
text = storage.load(self._file_cache_key).decode("utf-8")
plaintext_file_exists = True
return [Document(page_content=text)]
documents = list(self.load())

View File

@ -23,7 +23,7 @@ class UnstructuredWordExtractor(BaseExtractor):
unstructured_version = tuple(int(x) for x in __unstructured_version__.split("."))
# check the file extension
try:
import magic # noqa: F401
import magic # noqa: F401 # pyright: ignore[reportUnusedImport]
is_doc = detect_filetype(self._file_path) == FileType.DOC
except ImportError: