mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-04-27 05:47:57 +08:00
Fix: upgrade pypdf to 6.7.5 and migrate from deprecated pypdf2 to fix CVE-2026-28804 and CVE-2023-36464 (#13454)
### What problem does this PR solve? This PR addresses security vulnerabilities in PDF processing dependencies identified by Trivy security scan: 1. CVE-2026-28804 (MEDIUM): pypdf 6.7.4 vulnerable to inefficient decoding of ASCIIHexDecode streams 2. CVE-2023-36464 (MEDIUM): pypdf2 3.0.1 susceptible to infinite loop when parsing malformed comments Since pypdf2 is deprecated with no available fixes, this PR migrates all pypdf2 usage to the actively maintained pypdf library (version 6.7.5), which resolves both vulnerabilities. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -81,8 +81,7 @@ dependencies = [
|
||||
"pyobvector==0.2.22",
|
||||
"pyodbc>=5.2.0,<6.0.0",
|
||||
"pypandoc>=1.16",
|
||||
"pypdf>=6.6.2",
|
||||
"pypdf2>=3.0.1,<4.0.0",
|
||||
"pypdf>=6.7.5",
|
||||
"python-calamine>=0.4.0",
|
||||
"python-docx>=1.1.2,<2.0.0",
|
||||
"python-pptx>=1.0.2,<2.0.0",
|
||||
|
||||
@ -20,7 +20,7 @@ import re
|
||||
from collections import defaultdict
|
||||
from io import BytesIO
|
||||
|
||||
from PyPDF2 import PdfReader as pdf2_read
|
||||
from pypdf import PdfReader as pdf2_read
|
||||
|
||||
from deepdoc.parser import PdfParser, PlainParser
|
||||
from deepdoc.parser.ppt_parser import RAGFlowPptParser
|
||||
|
||||
@ -21,7 +21,7 @@ import requests
|
||||
from requests.exceptions import Timeout, RequestException
|
||||
from io import BytesIO
|
||||
from typing import List, Union, Tuple, Optional, Dict
|
||||
import PyPDF2
|
||||
import pypdf as PyPDF2
|
||||
from docx import Document
|
||||
import olefile
|
||||
|
||||
|
||||
19
uv.lock
generated
19
uv.lock
generated
@ -5760,20 +5760,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "pypdf"
|
||||
version = "6.7.4"
|
||||
version = "6.7.5"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/dc/f52deef12797ad58b88e4663f097a343f53b9361338aef6573f135ac302f/pypdf-6.7.4.tar.gz", hash = "sha256:9edd1cd47938bb35ec87795f61225fd58a07cfaf0c5699018ae1a47d6f8ab0e3", size = 5304821, upload-time = "2026-02-27T10:44:39.395Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f6/52/37cc0aa9e9d1bf7729a737a0d83f8b3f851c8eb137373d9f71eafb0a3405/pypdf-6.7.5.tar.gz", hash = "sha256:40bb2e2e872078655f12b9b89e2f900888bb505e88a82150b64f9f34fa25651d", size = 5304278, upload-time = "2026-03-02T09:05:21.464Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/be/cded021305f5c81b47265b8c5292b99388615a4391c21ff00fd538d34a56/pypdf-6.7.4-py3-none-any.whl", hash = "sha256:527d6da23274a6c70a9cb59d1986d93946ba8e36a6bc17f3f7cce86331492dda", size = 331496, upload-time = "2026-02-27T10:44:37.527Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pypdf2"
|
||||
version = "3.0.1"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9f/bb/18dc3062d37db6c491392007dfd1a7f524bb95886eb956569ac38a23a784/PyPDF2-3.0.1.tar.gz", hash = "sha256:a74408f69ba6271f71b9352ef4ed03dc53a31aa404d29b5d31f53bfecfee1440", size = 227419, upload-time = "2022-12-31T10:36:13.13Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8e/5e/c86a5643653825d3c913719e788e41386bee415c2b87b4f955432f2de6b2/pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928", size = 232572, upload-time = "2022-12-31T10:36:10.327Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/89/336673efd0a88956562658aba4f0bbef7cb92a6fbcbcaf94926dbc82b408/pypdf-6.7.5-py3-none-any.whl", hash = "sha256:07ba7f1d6e6d9aa2a17f5452e320a84718d4ce863367f7ede2fd72280349ab13", size = 331421, upload-time = "2026-03-02T09:05:19.722Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -6323,7 +6314,6 @@ dependencies = [
|
||||
{ name = "pyodbc" },
|
||||
{ name = "pypandoc" },
|
||||
{ name = "pypdf" },
|
||||
{ name = "pypdf2" },
|
||||
{ name = "python-calamine" },
|
||||
{ name = "python-docx" },
|
||||
{ name = "python-gitlab" },
|
||||
@ -6462,8 +6452,7 @@ requires-dist = [
|
||||
{ name = "pyobvector", specifier = "==0.2.22" },
|
||||
{ name = "pyodbc", specifier = ">=5.2.0,<6.0.0" },
|
||||
{ name = "pypandoc", specifier = ">=1.16" },
|
||||
{ name = "pypdf", specifier = ">=6.6.2" },
|
||||
{ name = "pypdf2", specifier = ">=3.0.1,<4.0.0" },
|
||||
{ name = "pypdf", specifier = ">=6.7.5" },
|
||||
{ name = "python-calamine", specifier = ">=0.4.0" },
|
||||
{ name = "python-docx", specifier = ">=1.1.2,<2.0.0" },
|
||||
{ name = "python-gitlab", specifier = ">=7.0.0" },
|
||||
|
||||
Reference in New Issue
Block a user