fix: csv injection in annotations export (#29462)

Co-authored-by: hj24 <huangjian@dify.ai>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
zyssyz123
2025-12-15 17:14:05 +08:00
committed by GitHub
parent 2bf44057e9
commit bd7b1fc6fb
5 changed files with 271 additions and 7 deletions

View File

@ -0,0 +1,89 @@
"""CSV sanitization utilities to prevent formula injection attacks."""
from typing import Any
class CSVSanitizer:
"""
Sanitizer for CSV export to prevent formula injection attacks.
This class provides methods to sanitize data before CSV export by escaping
characters that could be interpreted as formulas by spreadsheet applications
(Excel, LibreOffice, Google Sheets).
Formula injection occurs when user-controlled data starting with special
characters (=, +, -, @, tab, carriage return) is exported to CSV and opened
in a spreadsheet application, potentially executing malicious commands.
"""
# Characters that can start a formula in Excel/LibreOffice/Google Sheets
FORMULA_CHARS = frozenset({"=", "+", "-", "@", "\t", "\r"})
@classmethod
def sanitize_value(cls, value: Any) -> str:
"""
Sanitize a value for safe CSV export.
Prefixes formula-initiating characters with a single quote to prevent
Excel/LibreOffice/Google Sheets from treating them as formulas.
Args:
value: The value to sanitize (will be converted to string)
Returns:
Sanitized string safe for CSV export
Examples:
>>> CSVSanitizer.sanitize_value("=1+1")
"'=1+1"
>>> CSVSanitizer.sanitize_value("Hello World")
"Hello World"
>>> CSVSanitizer.sanitize_value(None)
""
"""
if value is None:
return ""
# Convert to string
str_value = str(value)
# If empty, return as is
if not str_value:
return ""
# Check if first character is a formula initiator
if str_value[0] in cls.FORMULA_CHARS:
# Prefix with single quote to escape
return f"'{str_value}"
return str_value
@classmethod
def sanitize_dict(cls, data: dict[str, Any], fields_to_sanitize: list[str] | None = None) -> dict[str, Any]:
"""
Sanitize specified fields in a dictionary.
Args:
data: Dictionary containing data to sanitize
fields_to_sanitize: List of field names to sanitize.
If None, sanitizes all string fields.
Returns:
Dictionary with sanitized values (creates a shallow copy)
Examples:
>>> data = {"question": "=1+1", "answer": "+calc", "id": "123"}
>>> CSVSanitizer.sanitize_dict(data, ["question", "answer"])
{"question": "'=1+1", "answer": "'+calc", "id": "123"}
"""
sanitized = data.copy()
if fields_to_sanitize is None:
# Sanitize all string fields
fields_to_sanitize = [k for k, v in data.items() if isinstance(v, str)]
for field in fields_to_sanitize:
if field in sanitized:
sanitized[field] = cls.sanitize_value(sanitized[field])
return sanitized