mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-03-11 10:17:56 +08:00
### What problem does this PR solve? This PR addresses security vulnerabilities in PDF processing dependencies identified by Trivy security scan: 1. CVE-2026-28804 (MEDIUM): pypdf 6.7.4 vulnerable to inefficient decoding of ASCIIHexDecode streams 2. CVE-2023-36464 (MEDIUM): pypdf2 3.0.1 susceptible to infinite loop when parsing malformed comments Since pypdf2 is deprecated with no available fixes, this PR migrates all pypdf2 usage to the actively maintained pypdf library (version 6.7.5), which resolves both vulnerabilities. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
288 lines
7.8 KiB
TOML
288 lines
7.8 KiB
TOML
[project]
|
|
name = "ragflow"
|
|
version = "0.24.0"
|
|
description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data."
|
|
authors = [{ name = "Zhichang Yu", email = "yuzhichang@gmail.com" }]
|
|
license-files = ["LICENSE"]
|
|
readme = "README.md"
|
|
requires-python = ">=3.12,<3.15"
|
|
dependencies = [
|
|
"aiosmtplib>=5.0.0",
|
|
"akshare>=1.15.78,<2.0.0",
|
|
"anthropic==0.34.1",
|
|
"arxiv==2.1.3",
|
|
"atlassian-python-api==4.0.7",
|
|
"azure-identity==1.17.1",
|
|
"azure-storage-file-datalake==12.16.0",
|
|
"beartype>=0.20.0,<1.0.0",
|
|
"bio==1.7.1",
|
|
"boxsdk>=10.1.0",
|
|
"captcha>=0.7.1",
|
|
"chardet>=5.2.0,<6.0.0",
|
|
"cn2an==0.5.22",
|
|
"cohere==5.6.2",
|
|
"Crawl4AI>=0.4.0,<1.0.0",
|
|
"dashscope==1.25.11",
|
|
"deepl==1.18.0",
|
|
"demjson3==3.0.6",
|
|
"discord-py==2.3.2",
|
|
"dropbox==12.0.2",
|
|
"duckduckgo-search>=7.2.0,<8.0.0",
|
|
"editdistance==0.8.1",
|
|
"elasticsearch-dsl==8.12.0",
|
|
"exceptiongroup>=1.3.0,<2.0.0",
|
|
"extract-msg>=0.39.0",
|
|
"ffmpeg-python>=0.2.0",
|
|
"flasgger>=0.9.7.1,<0.10.0",
|
|
"flask-cors==6.0.2",
|
|
"flask-login==0.6.3",
|
|
"flask-mail>=0.10.0",
|
|
"flask-session==0.8.0",
|
|
"google-api-python-client>=2.190.0,<3.0.0",
|
|
"google-auth-oauthlib>=1.2.0,<2.0.0",
|
|
"google-cloud-storage>=2.19.0,<3.0.0",
|
|
"google-genai>=1.41.0,<2.0.0",
|
|
"google-search-results==2.4.2",
|
|
"graspologic @ git+https://gitee.com/infiniflow/graspologic.git@38e680cab72bc9fb68a7992c3bcc2d53b24e42fd",
|
|
"groq==0.9.0",
|
|
"grpcio-status==1.67.1",
|
|
"html-text==0.6.2",
|
|
"infinity-sdk==0.7.0-dev2",
|
|
"infinity-emb>=0.0.66,<0.0.67",
|
|
"jira==3.10.5",
|
|
"json-repair==0.35.0",
|
|
"langfuse>=2.60.0",
|
|
"mammoth>=1.11.0",
|
|
"markdown==3.6",
|
|
"markdown-to-json==2.1.1",
|
|
"markdownify>=1.2.0",
|
|
"mcp>=1.19.0",
|
|
"mini-racer>=0.12.4,<0.13.0",
|
|
"minio==7.2.4",
|
|
"mistralai==0.4.2",
|
|
"mysql-connector-python>=9.0.0,<10.0.0",
|
|
"moodlepy>=0.23.0",
|
|
"mypy-boto3-s3==1.40.26",
|
|
"Office365-REST-Python-Client==2.6.2",
|
|
"ollama>=0.5.0",
|
|
"onnxruntime==1.23.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
|
|
"onnxruntime-gpu==1.23.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
|
|
"opencv-python==4.10.0.84",
|
|
"opencv-python-headless==4.10.0.84",
|
|
"opendal>=0.45.0,<0.46.0",
|
|
"opensearch-py==2.7.1",
|
|
"ormsgpack==1.5.0",
|
|
"pdfplumber==0.10.4",
|
|
"pluginlib==0.9.4",
|
|
"psycopg2-binary>=2.9.11,<3.0.0",
|
|
"pyclipper>=1.4.0,<2.0.0",
|
|
# "pywencai>=0.13.1,<1.0.0", # Temporarily disabled: conflicts with agentrun-sdk (pydash>=8), needed for agent/tools/wencai.py
|
|
"pycryptodomex==3.20.0",
|
|
"pyobvector==0.2.22",
|
|
"pyodbc>=5.2.0,<6.0.0",
|
|
"pypandoc>=1.16",
|
|
"pypdf>=6.7.5",
|
|
"python-calamine>=0.4.0",
|
|
"python-docx>=1.1.2,<2.0.0",
|
|
"python-pptx>=1.0.2,<2.0.0",
|
|
# "pywencai>=0.13.1,<1.0.0", # Temporarily disabled: conflicts with agentrun-sdk (pydash>=8), needed for agent/tools/wencai.py
|
|
"qianfan==0.4.6",
|
|
"quart-auth==0.11.0",
|
|
"quart-cors==0.8.0",
|
|
"ranx==0.3.20",
|
|
"readability-lxml>=0.8.4,<1.0.0",
|
|
"replicate==0.31.0",
|
|
"reportlab>=4.4.1",
|
|
"roman-numbers==1.0.2",
|
|
"ruamel-base==1.0.0",
|
|
"ruamel-yaml>=0.18.6,<0.19.0",
|
|
"scholarly==1.7.11",
|
|
"selenium-wire==5.1.0",
|
|
"slack-sdk==3.37.0",
|
|
"socksio==1.0.0",
|
|
"agentrun-sdk>=0.0.16,<1.0.0",
|
|
"nest-asyncio>=1.6.0,<2.0.0", # Needed for agent/component/message.py
|
|
"sqlglotrs==0.9.0",
|
|
"strenum==0.4.15",
|
|
"tavily-python==0.5.1",
|
|
"tencentcloud-sdk-python==3.0.1478",
|
|
"tika==2.6.0",
|
|
"valkey==6.0.2",
|
|
"volcengine==1.0.194",
|
|
"voyageai==0.2.3",
|
|
"webdav4>=0.10.0,<0.11.0",
|
|
"webdriver-manager==4.0.1",
|
|
"wikipedia==1.4.0",
|
|
"word2number==1.1",
|
|
"xgboost==1.6.0",
|
|
"xpinyin==0.7.6",
|
|
"yfinance==0.2.65",
|
|
"zhipuai==2.0.1",
|
|
"peewee>=3.17.1,<4.0.0",
|
|
# following modules aren't necessary
|
|
# "nltk==3.9.1",
|
|
# "numpy>=1.26.0,<2.0.0",
|
|
# "openai>=1.45.0",
|
|
# "openpyxl>=3.1.0,<4.0.0",
|
|
# "pandas>=2.2.0,<3.0.0",
|
|
# "pillow>=10.4.0,<13.0.0",
|
|
# "protobuf==5.27.2",
|
|
# "pymysql>=1.1.1,<2.0.0",
|
|
# "python-dotenv==1.0.1",
|
|
# "python-dateutil==2.8.2",
|
|
# "Quart==0.20.0",
|
|
# "requests>=2.32.3,<3.0.0",
|
|
# "scikit-learn==1.5.0",
|
|
# "selenium==4.22.0",
|
|
# "setuptools>=78.1.1,<81.0.0",
|
|
# "shapely==2.0.5",
|
|
# "six==1.16.0",
|
|
# "tabulate==0.9.0",
|
|
# "tiktoken==0.7.0",
|
|
# "umap_learn==0.5.6",
|
|
# "werkzeug==3.0.6",
|
|
# "xxhash>=3.5.0,<4.0.0",
|
|
# "trio>=0.17.0,<0.29.0",
|
|
# "debugpy>=1.8.13",
|
|
# "click>=8.1.8",
|
|
# "litellm>=1.74.15.post1",
|
|
# "lark>=1.2.2",
|
|
# "pip>=25.2",
|
|
# "imageio-ffmpeg>=0.6.0",
|
|
# "cryptography==46.0.3",
|
|
# "jinja2>=3.1.0",
|
|
"pyairtable>=3.3.0",
|
|
"pygithub>=2.8.1",
|
|
"asana>=5.2.2",
|
|
"python-gitlab>=7.0.0",
|
|
"alibabacloud-dingtalk>=2.0.0",
|
|
"quart-schema==0.23.0",
|
|
]
|
|
|
|
[dependency-groups]
|
|
test = [
|
|
"hypothesis>=6.132.0",
|
|
"openpyxl>=3.1.5",
|
|
"pillow>=10.4.0,<13.0.0",
|
|
"pytest>=8.3.5",
|
|
"pytest-asyncio>=1.3.0",
|
|
"pytest-xdist>=3.8.0",
|
|
"pytest-cov>=7.0.0",
|
|
"python-docx>=1.1.2",
|
|
"python-pptx>=1.0.2",
|
|
"reportlab>=4.4.1",
|
|
"requests>=2.32.2",
|
|
"requests-toolbelt>=1.0.0",
|
|
"pycryptodomex==3.20.0",
|
|
"pytest-playwright>=0.7.2",
|
|
"codecov>=2.1.13",
|
|
]
|
|
|
|
[[tool.uv.index]]
|
|
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
|
|
|
|
[tool.setuptools]
|
|
packages = [
|
|
'agent',
|
|
'api',
|
|
'deepdoc',
|
|
'graphrag',
|
|
'intergrations.chatgpt-on-wechat.plugins',
|
|
'mcp.server',
|
|
'rag',
|
|
'sdk.python.ragflow_sdk',
|
|
]
|
|
|
|
[tool.ruff]
|
|
line-length = 200
|
|
exclude = [".venv", "rag/svr/discord_svr.py"]
|
|
|
|
[tool.ruff.lint]
|
|
extend-select = ["ASYNC", "ASYNC1"]
|
|
ignore = ["E402"]
|
|
|
|
[tool.pytest.ini_options]
|
|
pythonpath = [
|
|
"."
|
|
]
|
|
|
|
testpaths = ["test"]
|
|
python_files = ["test_*.py"]
|
|
python_classes = ["Test*"]
|
|
python_functions = ["test_*"]
|
|
|
|
markers = [
|
|
"p0: critical priority test cases",
|
|
"p1: high priority test cases",
|
|
"p2: medium priority test cases",
|
|
"p3: low priority test cases",
|
|
"smoke: smoke test cases",
|
|
"auth: authentication UI tests",
|
|
]
|
|
|
|
# Test collection and runtime configuration
|
|
filterwarnings = [
|
|
"error", # Treat warnings as errors
|
|
"ignore::DeprecationWarning", # Ignore specific warnings
|
|
]
|
|
|
|
# Command line options
|
|
addopts = [
|
|
"-v", # Verbose output
|
|
"--strict-markers", # Enforce marker definitions
|
|
"--tb=short", # Simplified traceback
|
|
"--disable-warnings", # Disable warnings
|
|
"--color=yes" # Colored output
|
|
]
|
|
|
|
|
|
# Coverage configuration
|
|
[tool.coverage.run]
|
|
# Source paths - adjust according to your project structure
|
|
source = [
|
|
# "../../api/db/services",
|
|
# Add more directories if needed:
|
|
"../../common",
|
|
# "../../utils",
|
|
]
|
|
|
|
# Files/directories to exclude
|
|
omit = [
|
|
"*/tests/*",
|
|
"*/test_*",
|
|
"*/__pycache__/*",
|
|
"*/.pytest_cache/*",
|
|
"*/venv/*",
|
|
"*/.venv/*",
|
|
"*/env/*",
|
|
"*/site-packages/*",
|
|
"*/dist/*",
|
|
"*/build/*",
|
|
"*/migrations/*",
|
|
"setup.py"
|
|
]
|
|
|
|
[tool.coverage.report]
|
|
# Report configuration
|
|
precision = 2
|
|
show_missing = true
|
|
skip_covered = false
|
|
fail_under = 0 # Minimum coverage requirement (0-100)
|
|
|
|
# Lines to exclude (optional)
|
|
exclude_lines = [
|
|
# "pragma: no cover",
|
|
# "def __repr__",
|
|
# "raise AssertionError",
|
|
# "raise NotImplementedError",
|
|
# "if __name__ == .__main__.:",
|
|
# "if TYPE_CHECKING:",
|
|
"pass"
|
|
]
|
|
|
|
[tool.coverage.html]
|
|
# HTML report configuration
|
|
directory = "htmlcov"
|
|
title = "Test Coverage Report"
|
|
# extra_css = "custom.css" # Optional custom CSS
|