Fix: paddle ocr coordinate lower > upper (#13630)

### What problem does this PR solve? Fix: paddle ocr coordinate lower > upper #13618 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
2026-04-23 20:26:11 +08:00 · 2026-03-16 20:15:26 +08:00
parent 0545801251
commit 09ff1bc2b0
1 changed files with 35 additions and 1 deletions
--- a/deepdoc/parser/paddleocr_parser.py
+++ b/deepdoc/parser/paddleocr_parser.py
@ -59,6 +59,18 @@ def _remove_images_from_markdown(markdown: str) -> str:
    return _MARKDOWN_IMAGE_PATTERN.sub("", markdown)


+def _normalize_bbox(bbox: list[Any] | tuple[Any, ...]) -> tuple[float, float, float, float]:
+    if len(bbox) < 4:
+        return 0.0, 0.0, 0.0, 0.0
+
+    left, top, right, bottom = (float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3]))
+    if left > right:
+        left, right = right, left
+    if top > bottom:
+        top, bottom = bottom, top
+    return left, top, right, bottom
+
+
@dataclass
 class PaddleOCRVLConfig:
    """Configuration for PaddleOCR-VL algorithm."""
@ -393,8 +405,9 @@ class PaddleOCRParser(RAGFlowPdfParser):

                    label = block.get("block_label", "")
                    block_bbox = block.get("block_bbox", [0, 0, 0, 0])
+                    left, top, right, bottom = _normalize_bbox(block_bbox)

-                    tag = f"@@{page_idx + 1}\t{block_bbox[0] // self._ZOOMIN}\t{block_bbox[2] // self._ZOOMIN}\t{block_bbox[1] // self._ZOOMIN}\t{block_bbox[3] // self._ZOOMIN}##"
+                    tag = f"@@{page_idx + 1}\t{left // self._ZOOMIN}\t{right // self._ZOOMIN}\t{top // self._ZOOMIN}\t{bottom // self._ZOOMIN}##"

                    if parse_method == "manual":
                        sections.append((block_content, label, tag))
@ -509,6 +522,16 @@ class PaddleOCRParser(RAGFlowPdfParser):

            img0 = self.page_images[pns[0]]
            x0, y0, x1, y1 = int(left), int(top), int(right), int(min(bottom, img0.size[1]))
+            if x0 > x1:
+                x0, x1 = x1, x0
+            if y0 > y1:
+                y0, y1 = y1, y0
+            x0 = max(0, min(x0, img0.size[0]))
+            x1 = max(0, min(x1, img0.size[0]))
+            y0 = max(0, min(y0, img0.size[1]))
+            y1 = max(0, min(y1, img0.size[1]))
+            if x1 <= x0 or y1 <= y0:
+                continue
            crop0 = img0.crop((x0, y0, x1, y1))
            imgs.append(crop0)
            if 0 < ii < len(poss) - 1:
@ -521,6 +544,17 @@ class PaddleOCRParser(RAGFlowPdfParser):
                    continue
                page = self.page_images[pn]
                x0, y0, x1, y1 = int(left), 0, int(right), int(min(bottom, page.size[1]))
+                if x0 > x1:
+                    x0, x1 = x1, x0
+                if y0 > y1:
+                    y0, y1 = y1, y0
+                x0 = max(0, min(x0, page.size[0]))
+                x1 = max(0, min(x1, page.size[0]))
+                y0 = max(0, min(y0, page.size[1]))
+                y1 = max(0, min(y1, page.size[1]))
+                if x1 <= x0 or y1 <= y0:
+                    bottom -= page.size[1]
+                    continue
                cimgp = page.crop((x0, y0, x1, y1))
                imgs.append(cimgp)
                if 0 < ii < len(poss) - 1: