mirror of
https://github.com/langgenius/dify.git
synced 2026-04-24 04:45:51 +08:00
Merge branch 'feat/queue-based-graph-engine' into feat/rag-2
# Conflicts: # api/core/memory/token_buffer_memory.py # api/core/rag/extractor/notion_extractor.py # api/core/repositories/sqlalchemy_workflow_node_execution_repository.py # api/core/variables/variables.py # api/core/workflow/graph/graph.py # api/core/workflow/graph_engine/entities/event.py # api/services/dataset_service.py # web/app/components/app-sidebar/index.tsx # web/app/components/base/tag-management/selector.tsx # web/app/components/base/toast/index.tsx # web/app/components/datasets/create/website/index.tsx # web/app/components/datasets/create/website/jina-reader/base/options-wrap.tsx # web/app/components/workflow/header/version-history-button.tsx # web/app/components/workflow/hooks/use-inspect-vars-crud-common.ts # web/app/components/workflow/hooks/use-workflow-interactions.ts # web/app/components/workflow/panel/version-history-panel/index.tsx # web/service/base.ts
This commit is contained in:
@ -107,7 +107,7 @@ class Blob(BaseModel):
|
||||
Blob instance
|
||||
"""
|
||||
if mime_type is None and guess_type:
|
||||
_mimetype = mimetypes.guess_type(path)[0] if guess_type else None
|
||||
_mimetype = mimetypes.guess_type(path)[0]
|
||||
else:
|
||||
_mimetype = mime_type
|
||||
# We do not load the data immediately, instead we treat the blob as a
|
||||
|
||||
@ -45,7 +45,7 @@ class ExtractProcessor:
|
||||
cls, upload_file: UploadFile, return_text: bool = False, is_automatic: bool = False
|
||||
) -> Union[list[Document], str]:
|
||||
extract_setting = ExtractSetting(
|
||||
datasource_type="upload_file", upload_file=upload_file, document_model="text_model"
|
||||
datasource_type=DatasourceType.FILE.value, upload_file=upload_file, document_model="text_model"
|
||||
)
|
||||
if return_text:
|
||||
delimiter = "\n"
|
||||
@ -76,7 +76,7 @@ class ExtractProcessor:
|
||||
# https://stackoverflow.com/questions/26541416/generate-temporary-file-names-without-creating-actual-file-in-python#comment90414256_26541521
|
||||
file_path = f"{temp_dir}/{tempfile.gettempdir()}{suffix}"
|
||||
Path(file_path).write_bytes(response.content)
|
||||
extract_setting = ExtractSetting(datasource_type="upload_file", document_model="text_model")
|
||||
extract_setting = ExtractSetting(datasource_type=DatasourceType.FILE.value, document_model="text_model")
|
||||
if return_text:
|
||||
delimiter = "\n"
|
||||
return delimiter.join(
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional, cast
|
||||
from typing import Optional
|
||||
|
||||
from core.rag.extractor.extractor_base import BaseExtractor
|
||||
from core.rag.extractor.helpers import detect_file_encodings
|
||||
@ -76,7 +76,7 @@ class MarkdownExtractor(BaseExtractor):
|
||||
markdown_tups.append((current_header, current_text))
|
||||
|
||||
markdown_tups = [
|
||||
(re.sub(r"#", "", cast(str, key)).strip() if key else None, re.sub(r"<.*?>", "", value))
|
||||
(re.sub(r"#", "", key).strip() if key else None, re.sub(r"<.*?>", "", value))
|
||||
for key, value in markdown_tups
|
||||
]
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
import contextlib
|
||||
from collections.abc import Iterator
|
||||
from typing import Optional, cast
|
||||
from typing import Optional
|
||||
|
||||
from core.rag.extractor.blob.blob import Blob
|
||||
from core.rag.extractor.extractor_base import BaseExtractor
|
||||
@ -27,7 +27,7 @@ class PdfExtractor(BaseExtractor):
|
||||
plaintext_file_exists = False
|
||||
if self._file_cache_key:
|
||||
with contextlib.suppress(FileNotFoundError):
|
||||
text = cast(bytes, storage.load(self._file_cache_key)).decode("utf-8")
|
||||
text = storage.load(self._file_cache_key).decode("utf-8")
|
||||
plaintext_file_exists = True
|
||||
return [Document(page_content=text)]
|
||||
documents = list(self.load())
|
||||
|
||||
@ -23,7 +23,7 @@ class UnstructuredWordExtractor(BaseExtractor):
|
||||
unstructured_version = tuple(int(x) for x in __unstructured_version__.split("."))
|
||||
# check the file extension
|
||||
try:
|
||||
import magic # noqa: F401
|
||||
import magic # noqa: F401 # pyright: ignore[reportUnusedImport]
|
||||
|
||||
is_doc = detect_filetype(self._file_path) == FileType.DOC
|
||||
except ImportError:
|
||||
|
||||
Reference in New Issue
Block a user