mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-04-23 20:26:11 +08:00
Fix: LFI vulnerability in document parsing API (#13196)
### What problem does this PR solve? Fix LFI vulnerability in document parsing API. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -17,7 +17,7 @@ import json
|
||||
import os.path
|
||||
import pathlib
|
||||
import re
|
||||
from pathlib import Path
|
||||
from pathlib import Path, PurePosixPath, PureWindowsPath
|
||||
from quart import request, make_response
|
||||
from api.apps import current_user, login_required
|
||||
from api.common.check_team_permission import check_kb_team_permission
|
||||
@ -50,6 +50,18 @@ from rag.nlp import search, rag_tokenizer
|
||||
from common import settings
|
||||
|
||||
|
||||
def _is_safe_download_filename(name: str) -> bool:
|
||||
if not name or name in {".", ".."}:
|
||||
return False
|
||||
if "\x00" in name or len(name) > 255:
|
||||
return False
|
||||
if name != PurePosixPath(name).name:
|
||||
return False
|
||||
if name != PureWindowsPath(name).name:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@manager.route("/upload", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("kb_id")
|
||||
@ -874,7 +886,11 @@ async def parse():
|
||||
r = re.search(r"filename=\"([^\"]+)\"", str(res_headers))
|
||||
if not r or not r.group(1):
|
||||
return get_json_result(data=False, message="Can't not identify downloaded file", code=RetCode.ARGUMENT_ERROR)
|
||||
f = File(r.group(1), os.path.join(download_path, r.group(1)))
|
||||
filename = r.group(1).strip()
|
||||
if not _is_safe_download_filename(filename):
|
||||
return get_json_result(data=False, message="Invalid downloaded filename", code=RetCode.ARGUMENT_ERROR)
|
||||
filepath = os.path.join(download_path, filename)
|
||||
f = File(filename, filepath)
|
||||
txt = FileService.parse_docs([f], current_user.id)
|
||||
return get_json_result(data=txt)
|
||||
|
||||
|
||||
@ -19,7 +19,7 @@ from rag.nlp import find_codec
|
||||
|
||||
def get_text(fnm: str, binary=None) -> str:
|
||||
txt = ""
|
||||
if binary:
|
||||
if binary is not None:
|
||||
encoding = find_codec(binary)
|
||||
txt = binary.decode(encoding, errors="ignore")
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user