mirror of
https://github.com/langgenius/dify.git
synced 2026-03-16 20:37:42 +08:00
feat: apply markdown rendering to HITL email, sanitize email subject and body (#32305)
This PR: 1. Fixes the bug that email body of `HumanInput` node are sent as-is, without markdown rendering or sanitization 2. Applies HTML sanitization to email subject and body 3. Removes `\r` and `\n` from email subject to prevent SMTP header injection Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
@ -8,6 +8,8 @@ from collections.abc import Mapping, Sequence
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Annotated, Any, ClassVar, Literal, Self
|
||||
|
||||
import bleach
|
||||
import markdown
|
||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||
|
||||
from dify_graph.entities.base_node_data import BaseNodeData
|
||||
@ -58,6 +60,39 @@ class EmailDeliveryConfig(BaseModel):
|
||||
"""Configuration for email delivery method."""
|
||||
|
||||
URL_PLACEHOLDER: ClassVar[str] = "{{#url#}}"
|
||||
_SUBJECT_NEWLINE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(r"[\r\n]+")
|
||||
_ALLOWED_HTML_TAGS: ClassVar[list[str]] = [
|
||||
"a",
|
||||
"blockquote",
|
||||
"br",
|
||||
"code",
|
||||
"em",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"h6",
|
||||
"hr",
|
||||
"li",
|
||||
"ol",
|
||||
"p",
|
||||
"pre",
|
||||
"strong",
|
||||
"table",
|
||||
"tbody",
|
||||
"td",
|
||||
"th",
|
||||
"thead",
|
||||
"tr",
|
||||
"ul",
|
||||
]
|
||||
_ALLOWED_HTML_ATTRIBUTES: ClassVar[dict[str, list[str]]] = {
|
||||
"a": ["href", "title"],
|
||||
"td": ["align"],
|
||||
"th": ["align"],
|
||||
}
|
||||
_ALLOWED_PROTOCOLS: ClassVar[list[str]] = ["http", "https", "mailto"]
|
||||
|
||||
recipients: EmailRecipients
|
||||
|
||||
@ -98,6 +133,43 @@ class EmailDeliveryConfig(BaseModel):
|
||||
return templated_body
|
||||
return variable_pool.convert_template(templated_body).text
|
||||
|
||||
@classmethod
|
||||
def render_markdown_body(cls, body: str) -> str:
|
||||
"""Render markdown to safe HTML for email delivery."""
|
||||
sanitized_markdown = bleach.clean(
|
||||
body,
|
||||
tags=[],
|
||||
attributes={},
|
||||
strip=True,
|
||||
strip_comments=True,
|
||||
)
|
||||
rendered_html = markdown.markdown(
|
||||
sanitized_markdown,
|
||||
extensions=["nl2br", "tables"],
|
||||
extension_configs={"tables": {"use_align_attribute": True}},
|
||||
)
|
||||
return bleach.clean(
|
||||
rendered_html,
|
||||
tags=cls._ALLOWED_HTML_TAGS,
|
||||
attributes=cls._ALLOWED_HTML_ATTRIBUTES,
|
||||
protocols=cls._ALLOWED_PROTOCOLS,
|
||||
strip=True,
|
||||
strip_comments=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sanitize_subject(cls, subject: str) -> str:
|
||||
"""Sanitize email subject to plain text and prevent CRLF injection."""
|
||||
sanitized_subject = bleach.clean(
|
||||
subject,
|
||||
tags=[],
|
||||
attributes={},
|
||||
strip=True,
|
||||
strip_comments=True,
|
||||
)
|
||||
sanitized_subject = cls._SUBJECT_NEWLINE_PATTERN.sub(" ", sanitized_subject)
|
||||
return " ".join(sanitized_subject.split())
|
||||
|
||||
|
||||
class _DeliveryMethodBase(BaseModel):
|
||||
"""Base delivery method configuration."""
|
||||
|
||||
@ -40,7 +40,7 @@ dependencies = [
|
||||
"numpy~=1.26.4",
|
||||
"openpyxl~=3.1.5",
|
||||
"opik~=1.10.37",
|
||||
"litellm==1.82.2", # Pinned to avoid madoka dependency issue
|
||||
"litellm==1.82.2", # Pinned to avoid madoka dependency issue
|
||||
"opentelemetry-api==1.28.0",
|
||||
"opentelemetry-distro==0.49b0",
|
||||
"opentelemetry-exporter-otlp==1.28.0",
|
||||
@ -91,6 +91,7 @@ dependencies = [
|
||||
"apscheduler>=3.11.0",
|
||||
"weave>=0.52.16",
|
||||
"fastopenapi[flask]>=0.7.0",
|
||||
"bleach~=6.2.0",
|
||||
]
|
||||
# Before adding new dependency, consider place it in
|
||||
# alphabet order (a-z) and suitable group.
|
||||
@ -251,10 +252,7 @@ ignore_errors = true
|
||||
|
||||
[tool.pyrefly]
|
||||
project-includes = ["."]
|
||||
project-excludes = [
|
||||
".venv",
|
||||
"migrations/",
|
||||
]
|
||||
project-excludes = [".venv", "migrations/"]
|
||||
python-platform = "linux"
|
||||
python-version = "3.11.0"
|
||||
infer-with-first-use = false
|
||||
|
||||
@ -155,13 +155,15 @@ class EmailDeliveryTestHandler:
|
||||
context=context,
|
||||
recipient_email=recipient_email,
|
||||
)
|
||||
subject = render_email_template(method.config.subject, substitutions)
|
||||
subject_template = render_email_template(method.config.subject, substitutions)
|
||||
subject = EmailDeliveryConfig.sanitize_subject(subject_template)
|
||||
templated_body = EmailDeliveryConfig.render_body_template(
|
||||
body=method.config.body,
|
||||
url=substitutions.get("form_link"),
|
||||
variable_pool=context.variable_pool,
|
||||
)
|
||||
body = render_email_template(templated_body, substitutions)
|
||||
body = EmailDeliveryConfig.render_markdown_body(body)
|
||||
|
||||
mail.send(
|
||||
to=recipient_email,
|
||||
|
||||
@ -111,7 +111,7 @@ def _render_body(
|
||||
url=form_link,
|
||||
variable_pool=variable_pool,
|
||||
)
|
||||
return body
|
||||
return EmailDeliveryConfig.render_markdown_body(body)
|
||||
|
||||
|
||||
def _load_variable_pool(workflow_run_id: str | None) -> VariablePool | None:
|
||||
@ -173,10 +173,11 @@ def dispatch_human_input_email_task(form_id: str, node_title: str | None = None,
|
||||
for recipient in job.recipients:
|
||||
form_link = _build_form_link(recipient.token)
|
||||
body = _render_body(job.body, form_link, variable_pool=variable_pool)
|
||||
subject = EmailDeliveryConfig.sanitize_subject(job.subject)
|
||||
|
||||
mail.send(
|
||||
to=recipient.email,
|
||||
subject=job.subject,
|
||||
subject=subject,
|
||||
html=body,
|
||||
)
|
||||
|
||||
|
||||
@ -14,3 +14,64 @@ def test_render_body_template_replaces_variable_values():
|
||||
result = config.render_body_template(body=config.body, url="https://example.com", variable_pool=variable_pool)
|
||||
|
||||
assert result == "Hello World https://example.com"
|
||||
|
||||
|
||||
def test_render_markdown_body_renders_markdown_to_html():
|
||||
rendered = EmailDeliveryConfig.render_markdown_body("**Bold** and [link](https://example.com)")
|
||||
|
||||
assert "<strong>Bold</strong>" in rendered
|
||||
assert '<a href="https://example.com">link</a>' in rendered
|
||||
|
||||
|
||||
def test_render_markdown_body_sanitizes_unsafe_html():
|
||||
rendered = EmailDeliveryConfig.render_markdown_body(
|
||||
'<script>alert("xss")</script><a href="javascript:alert(1)" onclick="alert(2)">Click</a>'
|
||||
)
|
||||
|
||||
assert "<script" not in rendered
|
||||
assert "<a" not in rendered
|
||||
assert "onclick" not in rendered
|
||||
assert "javascript:" not in rendered
|
||||
assert "Click" in rendered
|
||||
|
||||
|
||||
def test_render_markdown_body_sanitizes_markdown_link_with_javascript_href():
|
||||
rendered = EmailDeliveryConfig.render_markdown_body("[bad](javascript:alert(1)) and [ok](https://example.com)")
|
||||
|
||||
assert "javascript:" not in rendered
|
||||
assert "<a>bad</a>" in rendered
|
||||
assert '<a href="https://example.com">ok</a>' in rendered
|
||||
|
||||
|
||||
def test_render_markdown_body_does_not_allow_raw_html_tags():
|
||||
rendered = EmailDeliveryConfig.render_markdown_body("<b>raw html</b> and **markdown**")
|
||||
|
||||
assert "<b>" not in rendered
|
||||
assert "raw html" in rendered
|
||||
assert "<strong>markdown</strong>" in rendered
|
||||
|
||||
|
||||
def test_render_markdown_body_supports_table_syntax():
|
||||
rendered = EmailDeliveryConfig.render_markdown_body("| h1 | h2 |\n| --- | ---: |\n| v1 | v2 |")
|
||||
|
||||
assert "<table>" in rendered
|
||||
assert "<thead>" in rendered
|
||||
assert "<tbody>" in rendered
|
||||
assert 'align="right"' in rendered
|
||||
assert "style=" not in rendered
|
||||
|
||||
|
||||
def test_sanitize_subject_removes_crlf():
|
||||
sanitized = EmailDeliveryConfig.sanitize_subject("Notice\r\nBCC:attacker@example.com")
|
||||
|
||||
assert "\r" not in sanitized
|
||||
assert "\n" not in sanitized
|
||||
assert sanitized == "Notice BCC:attacker@example.com"
|
||||
|
||||
|
||||
def test_sanitize_subject_removes_html_tags():
|
||||
sanitized = EmailDeliveryConfig.sanitize_subject("<b>Alert</b><img src=x onerror=1>")
|
||||
|
||||
assert "<" not in sanitized
|
||||
assert ">" not in sanitized
|
||||
assert sanitized == "Alert"
|
||||
|
||||
@ -207,6 +207,45 @@ class TestEmailDeliveryTestHandler:
|
||||
assert kwargs["to"] == "test@example.com"
|
||||
assert "RENDERED_Subj" in kwargs["subject"]
|
||||
|
||||
def test_send_test_sanitizes_subject(self, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
service_module.FeatureService,
|
||||
"get_features",
|
||||
lambda _id: SimpleNamespace(human_input_email_delivery_enabled=True),
|
||||
)
|
||||
monkeypatch.setattr(service_module.mail, "is_inited", lambda: True)
|
||||
mock_mail_send = MagicMock()
|
||||
monkeypatch.setattr(service_module.mail, "send", mock_mail_send)
|
||||
monkeypatch.setattr(
|
||||
service_module,
|
||||
"render_email_template",
|
||||
lambda template, substitutions: template.replace("{{ recipient_email }}", substitutions["recipient_email"]),
|
||||
)
|
||||
|
||||
handler = EmailDeliveryTestHandler(session_factory=MagicMock())
|
||||
handler._resolve_recipients = MagicMock(return_value=["test@example.com"])
|
||||
|
||||
context = DeliveryTestContext(
|
||||
tenant_id="t1",
|
||||
app_id="a1",
|
||||
node_id="n1",
|
||||
node_title="title",
|
||||
rendered_content="content",
|
||||
recipients=[DeliveryTestEmailRecipient(email="test@example.com", form_token="token123")],
|
||||
)
|
||||
method = EmailDeliveryMethod(
|
||||
config=EmailDeliveryConfig(
|
||||
recipients=EmailRecipients(whole_workspace=False, items=[]),
|
||||
subject="<b>Notice</b>\r\nBCC:{{ recipient_email }}",
|
||||
body="Body",
|
||||
)
|
||||
)
|
||||
|
||||
handler.send_test(context=context, method=method)
|
||||
|
||||
_, kwargs = mock_mail_send.call_args
|
||||
assert kwargs["subject"] == "Notice BCC:test@example.com"
|
||||
|
||||
def test_resolve_recipients(self):
|
||||
handler = EmailDeliveryTestHandler(session_factory=MagicMock())
|
||||
|
||||
|
||||
@ -120,4 +120,37 @@ def test_dispatch_human_input_email_task_replaces_body_variables(monkeypatch: py
|
||||
session_factory=lambda: _DummySession(form),
|
||||
)
|
||||
|
||||
assert mail.sent[0]["html"] == "Body OK"
|
||||
assert mail.sent[0]["html"] == "<p>Body OK</p>"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("line_break", ["\r\n", "\r", "\n"])
|
||||
def test_dispatch_human_input_email_task_sanitizes_subject(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
line_break: str,
|
||||
):
|
||||
mail = _DummyMail()
|
||||
form = SimpleNamespace(id="form-1", tenant_id="tenant-1", workflow_run_id=None)
|
||||
job = task_module._EmailDeliveryJob(
|
||||
form_id="form-1",
|
||||
subject=f"Notice{line_break}BCC:attacker@example.com <b>Alert</b>",
|
||||
body="Body",
|
||||
form_content="content",
|
||||
recipients=[task_module._EmailRecipient(email="user@example.com", token="token-1")],
|
||||
)
|
||||
|
||||
monkeypatch.setattr(task_module, "mail", mail)
|
||||
monkeypatch.setattr(
|
||||
task_module.FeatureService,
|
||||
"get_features",
|
||||
lambda _tenant_id: SimpleNamespace(human_input_email_delivery_enabled=True),
|
||||
)
|
||||
monkeypatch.setattr(task_module, "_load_email_jobs", lambda _session, _form: [job])
|
||||
monkeypatch.setattr(task_module, "_load_variable_pool", lambda _workflow_run_id: None)
|
||||
|
||||
task_module.dispatch_human_input_email_task(
|
||||
form_id="form-1",
|
||||
node_title="Approve",
|
||||
session_factory=lambda: _DummySession(form),
|
||||
)
|
||||
|
||||
assert mail.sent[0]["subject"] == "Notice BCC:attacker@example.com Alert"
|
||||
|
||||
14
api/uv.lock
generated
14
api/uv.lock
generated
@ -658,6 +658,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b3/cc/38b6f87170908bd8aaf9e412b021d17e85f690abe00edf50192f1a4566b9/billiard-4.2.3-py3-none-any.whl", hash = "sha256:989e9b688e3abf153f307b68a1328dfacfb954e30a4f920005654e276c69236b", size = 87042, upload-time = "2025-11-16T17:47:29.005Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bleach"
|
||||
version = "6.2.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "webencodings" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/76/9a/0e33f5054c54d349ea62c277191c020c2d6ef1d65ab2cb1993f91ec846d1/bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f", size = 203083, upload-time = "2024-10-29T18:30:40.477Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/fc/55/96142937f66150805c25c4d0f31ee4132fd33497753400734f9dfdcbdc66/bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e", size = 163406, upload-time = "2024-10-29T18:30:38.186Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "blinker"
|
||||
version = "1.9.0"
|
||||
@ -1529,6 +1541,7 @@ dependencies = [
|
||||
{ name = "arize-phoenix-otel" },
|
||||
{ name = "azure-identity" },
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "bleach" },
|
||||
{ name = "boto3" },
|
||||
{ name = "bs4" },
|
||||
{ name = "cachetools" },
|
||||
@ -1730,6 +1743,7 @@ requires-dist = [
|
||||
{ name = "arize-phoenix-otel", specifier = "~=0.15.0" },
|
||||
{ name = "azure-identity", specifier = "==1.25.3" },
|
||||
{ name = "beautifulsoup4", specifier = "==4.14.3" },
|
||||
{ name = "bleach", specifier = "~=6.2.0" },
|
||||
{ name = "boto3", specifier = "==1.42.68" },
|
||||
{ name = "bs4", specifier = "~=0.0.1" },
|
||||
{ name = "cachetools", specifier = "~=5.3.0" },
|
||||
|
||||
Reference in New Issue
Block a user