mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-05-06 10:17:49 +08:00
refactor: let excel use lazy image loader (#13558)
### What problem does this PR solve? let excel use lazy image loader ### Type of change - [x] Refactoring --------- Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
This commit is contained in:
@ -27,7 +27,7 @@ from rag.nlp import rag_tokenizer
|
||||
from deepdoc.parser import PdfParser, HtmlParser
|
||||
from deepdoc.parser.figure_parser import vision_figure_parser_docx_wrapper
|
||||
from PIL import Image
|
||||
from rag.utils.lazy_image import LazyDocxImage
|
||||
from rag.utils.lazy_image import LazyImage
|
||||
|
||||
|
||||
class Pdf(PdfParser):
|
||||
@ -89,7 +89,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca
|
||||
sections = [
|
||||
(item[0], item[1] if item[1] is not None else "")
|
||||
for item in sections
|
||||
if not isinstance(item[1], (Image.Image, LazyDocxImage))
|
||||
if not isinstance(item[1], (Image.Image, LazyImage))
|
||||
]
|
||||
callback(0.8, "Finish parsing.")
|
||||
|
||||
|
||||
@ -115,7 +115,7 @@ class Excel(ExcelParser):
|
||||
tables.append(
|
||||
(
|
||||
(
|
||||
img["image"], # Image.Image
|
||||
img["image"], # Image.Image or LazyImage
|
||||
[img["image_description"]] # description list (must be list)
|
||||
),
|
||||
[
|
||||
|
||||
Reference in New Issue
Block a user