feat: apply markdown rendering to HITL email, sanitize email subject and body (#32305)

This PR:

1. Fixes the bug that email body of `HumanInput` node are sent as-is, without markdown rendering or sanitization
2. Applies HTML sanitization to email subject and body
3. Removes `\r` and `\n` from email subject to prevent SMTP header injection

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
Blackoutta
2026-03-16 16:52:46 +08:00
committed by GitHub
parent 4822d550b6
commit 57d476d4e2
8 changed files with 229 additions and 9 deletions

View File

@ -8,6 +8,8 @@ from collections.abc import Mapping, Sequence
from datetime import datetime, timedelta
from typing import Annotated, Any, ClassVar, Literal, Self
import bleach
import markdown
from pydantic import BaseModel, Field, field_validator, model_validator
from dify_graph.entities.base_node_data import BaseNodeData
@ -58,6 +60,39 @@ class EmailDeliveryConfig(BaseModel):
"""Configuration for email delivery method."""
URL_PLACEHOLDER: ClassVar[str] = "{{#url#}}"
_SUBJECT_NEWLINE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(r"[\r\n]+")
_ALLOWED_HTML_TAGS: ClassVar[list[str]] = [
"a",
"blockquote",
"br",
"code",
"em",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"hr",
"li",
"ol",
"p",
"pre",
"strong",
"table",
"tbody",
"td",
"th",
"thead",
"tr",
"ul",
]
_ALLOWED_HTML_ATTRIBUTES: ClassVar[dict[str, list[str]]] = {
"a": ["href", "title"],
"td": ["align"],
"th": ["align"],
}
_ALLOWED_PROTOCOLS: ClassVar[list[str]] = ["http", "https", "mailto"]
recipients: EmailRecipients
@ -98,6 +133,43 @@ class EmailDeliveryConfig(BaseModel):
return templated_body
return variable_pool.convert_template(templated_body).text
@classmethod
def render_markdown_body(cls, body: str) -> str:
"""Render markdown to safe HTML for email delivery."""
sanitized_markdown = bleach.clean(
body,
tags=[],
attributes={},
strip=True,
strip_comments=True,
)
rendered_html = markdown.markdown(
sanitized_markdown,
extensions=["nl2br", "tables"],
extension_configs={"tables": {"use_align_attribute": True}},
)
return bleach.clean(
rendered_html,
tags=cls._ALLOWED_HTML_TAGS,
attributes=cls._ALLOWED_HTML_ATTRIBUTES,
protocols=cls._ALLOWED_PROTOCOLS,
strip=True,
strip_comments=True,
)
@classmethod
def sanitize_subject(cls, subject: str) -> str:
"""Sanitize email subject to plain text and prevent CRLF injection."""
sanitized_subject = bleach.clean(
subject,
tags=[],
attributes={},
strip=True,
strip_comments=True,
)
sanitized_subject = cls._SUBJECT_NEWLINE_PATTERN.sub(" ", sanitized_subject)
return " ".join(sanitized_subject.split())
class _DeliveryMethodBase(BaseModel):
"""Base delivery method configuration."""

View File

@ -40,7 +40,7 @@ dependencies = [
"numpy~=1.26.4",
"openpyxl~=3.1.5",
"opik~=1.10.37",
"litellm==1.82.2", # Pinned to avoid madoka dependency issue
"litellm==1.82.2", # Pinned to avoid madoka dependency issue
"opentelemetry-api==1.28.0",
"opentelemetry-distro==0.49b0",
"opentelemetry-exporter-otlp==1.28.0",
@ -91,6 +91,7 @@ dependencies = [
"apscheduler>=3.11.0",
"weave>=0.52.16",
"fastopenapi[flask]>=0.7.0",
"bleach~=6.2.0",
]
# Before adding new dependency, consider place it in
# alphabet order (a-z) and suitable group.
@ -251,10 +252,7 @@ ignore_errors = true
[tool.pyrefly]
project-includes = ["."]
project-excludes = [
".venv",
"migrations/",
]
project-excludes = [".venv", "migrations/"]
python-platform = "linux"
python-version = "3.11.0"
infer-with-first-use = false

View File

@ -155,13 +155,15 @@ class EmailDeliveryTestHandler:
context=context,
recipient_email=recipient_email,
)
subject = render_email_template(method.config.subject, substitutions)
subject_template = render_email_template(method.config.subject, substitutions)
subject = EmailDeliveryConfig.sanitize_subject(subject_template)
templated_body = EmailDeliveryConfig.render_body_template(
body=method.config.body,
url=substitutions.get("form_link"),
variable_pool=context.variable_pool,
)
body = render_email_template(templated_body, substitutions)
body = EmailDeliveryConfig.render_markdown_body(body)
mail.send(
to=recipient_email,

View File

@ -111,7 +111,7 @@ def _render_body(
url=form_link,
variable_pool=variable_pool,
)
return body
return EmailDeliveryConfig.render_markdown_body(body)
def _load_variable_pool(workflow_run_id: str | None) -> VariablePool | None:
@ -173,10 +173,11 @@ def dispatch_human_input_email_task(form_id: str, node_title: str | None = None,
for recipient in job.recipients:
form_link = _build_form_link(recipient.token)
body = _render_body(job.body, form_link, variable_pool=variable_pool)
subject = EmailDeliveryConfig.sanitize_subject(job.subject)
mail.send(
to=recipient.email,
subject=job.subject,
subject=subject,
html=body,
)

View File

@ -14,3 +14,64 @@ def test_render_body_template_replaces_variable_values():
result = config.render_body_template(body=config.body, url="https://example.com", variable_pool=variable_pool)
assert result == "Hello World https://example.com"
def test_render_markdown_body_renders_markdown_to_html():
rendered = EmailDeliveryConfig.render_markdown_body("**Bold** and [link](https://example.com)")
assert "<strong>Bold</strong>" in rendered
assert '<a href="https://example.com">link</a>' in rendered
def test_render_markdown_body_sanitizes_unsafe_html():
rendered = EmailDeliveryConfig.render_markdown_body(
'<script>alert("xss")</script><a href="javascript:alert(1)" onclick="alert(2)">Click</a>'
)
assert "<script" not in rendered
assert "<a" not in rendered
assert "onclick" not in rendered
assert "javascript:" not in rendered
assert "Click" in rendered
def test_render_markdown_body_sanitizes_markdown_link_with_javascript_href():
rendered = EmailDeliveryConfig.render_markdown_body("[bad](javascript:alert(1)) and [ok](https://example.com)")
assert "javascript:" not in rendered
assert "<a>bad</a>" in rendered
assert '<a href="https://example.com">ok</a>' in rendered
def test_render_markdown_body_does_not_allow_raw_html_tags():
rendered = EmailDeliveryConfig.render_markdown_body("<b>raw html</b> and **markdown**")
assert "<b>" not in rendered
assert "raw html" in rendered
assert "<strong>markdown</strong>" in rendered
def test_render_markdown_body_supports_table_syntax():
rendered = EmailDeliveryConfig.render_markdown_body("| h1 | h2 |\n| --- | ---: |\n| v1 | v2 |")
assert "<table>" in rendered
assert "<thead>" in rendered
assert "<tbody>" in rendered
assert 'align="right"' in rendered
assert "style=" not in rendered
def test_sanitize_subject_removes_crlf():
sanitized = EmailDeliveryConfig.sanitize_subject("Notice\r\nBCC:attacker@example.com")
assert "\r" not in sanitized
assert "\n" not in sanitized
assert sanitized == "Notice BCC:attacker@example.com"
def test_sanitize_subject_removes_html_tags():
sanitized = EmailDeliveryConfig.sanitize_subject("<b>Alert</b><img src=x onerror=1>")
assert "<" not in sanitized
assert ">" not in sanitized
assert sanitized == "Alert"

View File

@ -207,6 +207,45 @@ class TestEmailDeliveryTestHandler:
assert kwargs["to"] == "test@example.com"
assert "RENDERED_Subj" in kwargs["subject"]
def test_send_test_sanitizes_subject(self, monkeypatch):
monkeypatch.setattr(
service_module.FeatureService,
"get_features",
lambda _id: SimpleNamespace(human_input_email_delivery_enabled=True),
)
monkeypatch.setattr(service_module.mail, "is_inited", lambda: True)
mock_mail_send = MagicMock()
monkeypatch.setattr(service_module.mail, "send", mock_mail_send)
monkeypatch.setattr(
service_module,
"render_email_template",
lambda template, substitutions: template.replace("{{ recipient_email }}", substitutions["recipient_email"]),
)
handler = EmailDeliveryTestHandler(session_factory=MagicMock())
handler._resolve_recipients = MagicMock(return_value=["test@example.com"])
context = DeliveryTestContext(
tenant_id="t1",
app_id="a1",
node_id="n1",
node_title="title",
rendered_content="content",
recipients=[DeliveryTestEmailRecipient(email="test@example.com", form_token="token123")],
)
method = EmailDeliveryMethod(
config=EmailDeliveryConfig(
recipients=EmailRecipients(whole_workspace=False, items=[]),
subject="<b>Notice</b>\r\nBCC:{{ recipient_email }}",
body="Body",
)
)
handler.send_test(context=context, method=method)
_, kwargs = mock_mail_send.call_args
assert kwargs["subject"] == "Notice BCC:test@example.com"
def test_resolve_recipients(self):
handler = EmailDeliveryTestHandler(session_factory=MagicMock())

View File

@ -120,4 +120,37 @@ def test_dispatch_human_input_email_task_replaces_body_variables(monkeypatch: py
session_factory=lambda: _DummySession(form),
)
assert mail.sent[0]["html"] == "Body OK"
assert mail.sent[0]["html"] == "<p>Body OK</p>"
@pytest.mark.parametrize("line_break", ["\r\n", "\r", "\n"])
def test_dispatch_human_input_email_task_sanitizes_subject(
monkeypatch: pytest.MonkeyPatch,
line_break: str,
):
mail = _DummyMail()
form = SimpleNamespace(id="form-1", tenant_id="tenant-1", workflow_run_id=None)
job = task_module._EmailDeliveryJob(
form_id="form-1",
subject=f"Notice{line_break}BCC:attacker@example.com <b>Alert</b>",
body="Body",
form_content="content",
recipients=[task_module._EmailRecipient(email="user@example.com", token="token-1")],
)
monkeypatch.setattr(task_module, "mail", mail)
monkeypatch.setattr(
task_module.FeatureService,
"get_features",
lambda _tenant_id: SimpleNamespace(human_input_email_delivery_enabled=True),
)
monkeypatch.setattr(task_module, "_load_email_jobs", lambda _session, _form: [job])
monkeypatch.setattr(task_module, "_load_variable_pool", lambda _workflow_run_id: None)
task_module.dispatch_human_input_email_task(
form_id="form-1",
node_title="Approve",
session_factory=lambda: _DummySession(form),
)
assert mail.sent[0]["subject"] == "Notice BCC:attacker@example.com Alert"

14
api/uv.lock generated
View File

@ -658,6 +658,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/b3/cc/38b6f87170908bd8aaf9e412b021d17e85f690abe00edf50192f1a4566b9/billiard-4.2.3-py3-none-any.whl", hash = "sha256:989e9b688e3abf153f307b68a1328dfacfb954e30a4f920005654e276c69236b", size = 87042, upload-time = "2025-11-16T17:47:29.005Z" },
]
[[package]]
name = "bleach"
version = "6.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "webencodings" },
]
sdist = { url = "https://files.pythonhosted.org/packages/76/9a/0e33f5054c54d349ea62c277191c020c2d6ef1d65ab2cb1993f91ec846d1/bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f", size = 203083, upload-time = "2024-10-29T18:30:40.477Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/fc/55/96142937f66150805c25c4d0f31ee4132fd33497753400734f9dfdcbdc66/bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e", size = 163406, upload-time = "2024-10-29T18:30:38.186Z" },
]
[[package]]
name = "blinker"
version = "1.9.0"
@ -1529,6 +1541,7 @@ dependencies = [
{ name = "arize-phoenix-otel" },
{ name = "azure-identity" },
{ name = "beautifulsoup4" },
{ name = "bleach" },
{ name = "boto3" },
{ name = "bs4" },
{ name = "cachetools" },
@ -1730,6 +1743,7 @@ requires-dist = [
{ name = "arize-phoenix-otel", specifier = "~=0.15.0" },
{ name = "azure-identity", specifier = "==1.25.3" },
{ name = "beautifulsoup4", specifier = "==4.14.3" },
{ name = "bleach", specifier = "~=6.2.0" },
{ name = "boto3", specifier = "==1.42.68" },
{ name = "bs4", specifier = "~=0.0.1" },
{ name = "cachetools", specifier = "~=5.3.0" },