diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index ce6b9298b..613787b48 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -476,7 +476,7 @@ class RAGFlowPdfParser: self.boxes = bxs def _naive_vertical_merge(self, zoomin=3): - #bxs = self._assign_column(self.boxes, zoomin) + # bxs = self._assign_column(self.boxes, zoomin) bxs = self.boxes grouped = defaultdict(list) @@ -553,7 +553,8 @@ class RAGFlowPdfParser: merged_boxes.extend(bxs) - #self.boxes = sorted(merged_boxes, key=lambda x: (x["page_number"], x.get("col_id", 0), x["top"])) + # self.boxes = sorted(merged_boxes, key=lambda x: (x["page_number"], x.get("col_id", 0), x["top"])) + self.boxes = merged_boxes def _final_reading_order_merge(self, zoomin=3): if not self.boxes: