mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-04-27 05:47:57 +08:00
refactor: let excel use lazy image loader (#13558)
### What problem does this PR solve? let excel use lazy image loader ### Type of change - [x] Refactoring --------- Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
This commit is contained in:
@ -18,9 +18,9 @@ from io import BytesIO
|
||||
|
||||
import pandas as pd
|
||||
from openpyxl import Workbook, load_workbook
|
||||
from PIL import Image
|
||||
|
||||
from rag.nlp import find_codec
|
||||
from rag.utils.lazy_image import LazyImage
|
||||
|
||||
# copied from `/openpyxl/cell/cell.py`
|
||||
ILLEGAL_CHARACTERS_RE = re.compile(r"[\000-\010]|[\013-\014]|[\016-\037]")
|
||||
@ -122,7 +122,7 @@ class RAGFlowExcelParser:
|
||||
for img in images:
|
||||
try:
|
||||
img_bytes = img._data()
|
||||
pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
|
||||
lazy_img = LazyImage([img_bytes])
|
||||
|
||||
anchor = img.anchor
|
||||
if hasattr(anchor, "_from") and hasattr(anchor, "_to"):
|
||||
@ -139,7 +139,7 @@ class RAGFlowExcelParser:
|
||||
|
||||
item = {
|
||||
"sheet": sheetname or ws.title,
|
||||
"image": pil_img,
|
||||
"image": lazy_img,
|
||||
"image_description": "",
|
||||
"row_from": r1,
|
||||
"col_from": c1,
|
||||
|
||||
Reference in New Issue
Block a user