From 57d476d4e200ca37e3b9afdfd3885a076a2cdefe Mon Sep 17 00:00:00 2001 From: Blackoutta <37723456+Blackoutta@users.noreply.github.com> Date: Mon, 16 Mar 2026 16:52:46 +0800 Subject: [PATCH] feat: apply markdown rendering to HITL email, sanitize email subject and body (#32305) This PR: 1. Fixes the bug that email body of `HumanInput` node are sent as-is, without markdown rendering or sanitization 2. Applies HTML sanitization to email subject and body 3. Removes `\r` and `\n` from email subject to prevent SMTP header injection Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: QuantumGhost Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- api/dify_graph/nodes/human_input/entities.py | 72 +++++++++++++++++++ api/pyproject.toml | 8 +-- .../human_input_delivery_test_service.py | 4 +- api/tasks/mail_human_input_delivery_task.py | 5 +- .../human_input/test_email_delivery_config.py | 61 ++++++++++++++++ .../test_human_input_delivery_test_service.py | 39 ++++++++++ .../test_mail_human_input_delivery_task.py | 35 ++++++++- api/uv.lock | 14 ++++ 8 files changed, 229 insertions(+), 9 deletions(-) diff --git a/api/dify_graph/nodes/human_input/entities.py b/api/dify_graph/nodes/human_input/entities.py index 7936e47213..2a33b4a0a8 100644 --- a/api/dify_graph/nodes/human_input/entities.py +++ b/api/dify_graph/nodes/human_input/entities.py @@ -8,6 +8,8 @@ from collections.abc import Mapping, Sequence from datetime import datetime, timedelta from typing import Annotated, Any, ClassVar, Literal, Self +import bleach +import markdown from pydantic import BaseModel, Field, field_validator, model_validator from dify_graph.entities.base_node_data import BaseNodeData @@ -58,6 +60,39 @@ class EmailDeliveryConfig(BaseModel): """Configuration for email delivery method.""" URL_PLACEHOLDER: ClassVar[str] = "{{#url#}}" + _SUBJECT_NEWLINE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(r"[\r\n]+") + _ALLOWED_HTML_TAGS: ClassVar[list[str]] = [ + "a", + "blockquote", + "br", + "code", + "em", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "hr", + "li", + "ol", + "p", + "pre", + "strong", + "table", + "tbody", + "td", + "th", + "thead", + "tr", + "ul", + ] + _ALLOWED_HTML_ATTRIBUTES: ClassVar[dict[str, list[str]]] = { + "a": ["href", "title"], + "td": ["align"], + "th": ["align"], + } + _ALLOWED_PROTOCOLS: ClassVar[list[str]] = ["http", "https", "mailto"] recipients: EmailRecipients @@ -98,6 +133,43 @@ class EmailDeliveryConfig(BaseModel): return templated_body return variable_pool.convert_template(templated_body).text + @classmethod + def render_markdown_body(cls, body: str) -> str: + """Render markdown to safe HTML for email delivery.""" + sanitized_markdown = bleach.clean( + body, + tags=[], + attributes={}, + strip=True, + strip_comments=True, + ) + rendered_html = markdown.markdown( + sanitized_markdown, + extensions=["nl2br", "tables"], + extension_configs={"tables": {"use_align_attribute": True}}, + ) + return bleach.clean( + rendered_html, + tags=cls._ALLOWED_HTML_TAGS, + attributes=cls._ALLOWED_HTML_ATTRIBUTES, + protocols=cls._ALLOWED_PROTOCOLS, + strip=True, + strip_comments=True, + ) + + @classmethod + def sanitize_subject(cls, subject: str) -> str: + """Sanitize email subject to plain text and prevent CRLF injection.""" + sanitized_subject = bleach.clean( + subject, + tags=[], + attributes={}, + strip=True, + strip_comments=True, + ) + sanitized_subject = cls._SUBJECT_NEWLINE_PATTERN.sub(" ", sanitized_subject) + return " ".join(sanitized_subject.split()) + class _DeliveryMethodBase(BaseModel): """Base delivery method configuration.""" diff --git a/api/pyproject.toml b/api/pyproject.toml index 57d58ce5b8..841a877328 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "numpy~=1.26.4", "openpyxl~=3.1.5", "opik~=1.10.37", - "litellm==1.82.2", # Pinned to avoid madoka dependency issue + "litellm==1.82.2", # Pinned to avoid madoka dependency issue "opentelemetry-api==1.28.0", "opentelemetry-distro==0.49b0", "opentelemetry-exporter-otlp==1.28.0", @@ -91,6 +91,7 @@ dependencies = [ "apscheduler>=3.11.0", "weave>=0.52.16", "fastopenapi[flask]>=0.7.0", + "bleach~=6.2.0", ] # Before adding new dependency, consider place it in # alphabet order (a-z) and suitable group. @@ -251,10 +252,7 @@ ignore_errors = true [tool.pyrefly] project-includes = ["."] -project-excludes = [ - ".venv", - "migrations/", -] +project-excludes = [".venv", "migrations/"] python-platform = "linux" python-version = "3.11.0" infer-with-first-use = false diff --git a/api/services/human_input_delivery_test_service.py b/api/services/human_input_delivery_test_service.py index 80deb37a56..229e6608da 100644 --- a/api/services/human_input_delivery_test_service.py +++ b/api/services/human_input_delivery_test_service.py @@ -155,13 +155,15 @@ class EmailDeliveryTestHandler: context=context, recipient_email=recipient_email, ) - subject = render_email_template(method.config.subject, substitutions) + subject_template = render_email_template(method.config.subject, substitutions) + subject = EmailDeliveryConfig.sanitize_subject(subject_template) templated_body = EmailDeliveryConfig.render_body_template( body=method.config.body, url=substitutions.get("form_link"), variable_pool=context.variable_pool, ) body = render_email_template(templated_body, substitutions) + body = EmailDeliveryConfig.render_markdown_body(body) mail.send( to=recipient_email, diff --git a/api/tasks/mail_human_input_delivery_task.py b/api/tasks/mail_human_input_delivery_task.py index bded4cea2b..d241783359 100644 --- a/api/tasks/mail_human_input_delivery_task.py +++ b/api/tasks/mail_human_input_delivery_task.py @@ -111,7 +111,7 @@ def _render_body( url=form_link, variable_pool=variable_pool, ) - return body + return EmailDeliveryConfig.render_markdown_body(body) def _load_variable_pool(workflow_run_id: str | None) -> VariablePool | None: @@ -173,10 +173,11 @@ def dispatch_human_input_email_task(form_id: str, node_title: str | None = None, for recipient in job.recipients: form_link = _build_form_link(recipient.token) body = _render_body(job.body, form_link, variable_pool=variable_pool) + subject = EmailDeliveryConfig.sanitize_subject(job.subject) mail.send( to=recipient.email, - subject=job.subject, + subject=subject, html=body, ) diff --git a/api/tests/unit_tests/core/workflow/nodes/human_input/test_email_delivery_config.py b/api/tests/unit_tests/core/workflow/nodes/human_input/test_email_delivery_config.py index d4939b1071..d52dfa2a65 100644 --- a/api/tests/unit_tests/core/workflow/nodes/human_input/test_email_delivery_config.py +++ b/api/tests/unit_tests/core/workflow/nodes/human_input/test_email_delivery_config.py @@ -14,3 +14,64 @@ def test_render_body_template_replaces_variable_values(): result = config.render_body_template(body=config.body, url="https://example.com", variable_pool=variable_pool) assert result == "Hello World https://example.com" + + +def test_render_markdown_body_renders_markdown_to_html(): + rendered = EmailDeliveryConfig.render_markdown_body("**Bold** and [link](https://example.com)") + + assert "Bold" in rendered + assert 'link' in rendered + + +def test_render_markdown_body_sanitizes_unsafe_html(): + rendered = EmailDeliveryConfig.render_markdown_body( + 'Click' + ) + + assert "bad" in rendered + assert 'ok' in rendered + + +def test_render_markdown_body_does_not_allow_raw_html_tags(): + rendered = EmailDeliveryConfig.render_markdown_body("raw html and **markdown**") + + assert "" not in rendered + assert "raw html" in rendered + assert "markdown" in rendered + + +def test_render_markdown_body_supports_table_syntax(): + rendered = EmailDeliveryConfig.render_markdown_body("| h1 | h2 |\n| --- | ---: |\n| v1 | v2 |") + + assert "" in rendered + assert "" in rendered + assert "" in rendered + assert 'align="right"' in rendered + assert "style=" not in rendered + + +def test_sanitize_subject_removes_crlf(): + sanitized = EmailDeliveryConfig.sanitize_subject("Notice\r\nBCC:attacker@example.com") + + assert "\r" not in sanitized + assert "\n" not in sanitized + assert sanitized == "Notice BCC:attacker@example.com" + + +def test_sanitize_subject_removes_html_tags(): + sanitized = EmailDeliveryConfig.sanitize_subject("Alert") + + assert "<" not in sanitized + assert ">" not in sanitized + assert sanitized == "Alert" diff --git a/api/tests/unit_tests/services/test_human_input_delivery_test_service.py b/api/tests/unit_tests/services/test_human_input_delivery_test_service.py index 74139fd12d..a23c44b26e 100644 --- a/api/tests/unit_tests/services/test_human_input_delivery_test_service.py +++ b/api/tests/unit_tests/services/test_human_input_delivery_test_service.py @@ -207,6 +207,45 @@ class TestEmailDeliveryTestHandler: assert kwargs["to"] == "test@example.com" assert "RENDERED_Subj" in kwargs["subject"] + def test_send_test_sanitizes_subject(self, monkeypatch): + monkeypatch.setattr( + service_module.FeatureService, + "get_features", + lambda _id: SimpleNamespace(human_input_email_delivery_enabled=True), + ) + monkeypatch.setattr(service_module.mail, "is_inited", lambda: True) + mock_mail_send = MagicMock() + monkeypatch.setattr(service_module.mail, "send", mock_mail_send) + monkeypatch.setattr( + service_module, + "render_email_template", + lambda template, substitutions: template.replace("{{ recipient_email }}", substitutions["recipient_email"]), + ) + + handler = EmailDeliveryTestHandler(session_factory=MagicMock()) + handler._resolve_recipients = MagicMock(return_value=["test@example.com"]) + + context = DeliveryTestContext( + tenant_id="t1", + app_id="a1", + node_id="n1", + node_title="title", + rendered_content="content", + recipients=[DeliveryTestEmailRecipient(email="test@example.com", form_token="token123")], + ) + method = EmailDeliveryMethod( + config=EmailDeliveryConfig( + recipients=EmailRecipients(whole_workspace=False, items=[]), + subject="Notice\r\nBCC:{{ recipient_email }}", + body="Body", + ) + ) + + handler.send_test(context=context, method=method) + + _, kwargs = mock_mail_send.call_args + assert kwargs["subject"] == "Notice BCC:test@example.com" + def test_resolve_recipients(self): handler = EmailDeliveryTestHandler(session_factory=MagicMock()) diff --git a/api/tests/unit_tests/tasks/test_mail_human_input_delivery_task.py b/api/tests/unit_tests/tasks/test_mail_human_input_delivery_task.py index 20cb7a211e..37b7a85451 100644 --- a/api/tests/unit_tests/tasks/test_mail_human_input_delivery_task.py +++ b/api/tests/unit_tests/tasks/test_mail_human_input_delivery_task.py @@ -120,4 +120,37 @@ def test_dispatch_human_input_email_task_replaces_body_variables(monkeypatch: py session_factory=lambda: _DummySession(form), ) - assert mail.sent[0]["html"] == "Body OK" + assert mail.sent[0]["html"] == "

Body OK

" + + +@pytest.mark.parametrize("line_break", ["\r\n", "\r", "\n"]) +def test_dispatch_human_input_email_task_sanitizes_subject( + monkeypatch: pytest.MonkeyPatch, + line_break: str, +): + mail = _DummyMail() + form = SimpleNamespace(id="form-1", tenant_id="tenant-1", workflow_run_id=None) + job = task_module._EmailDeliveryJob( + form_id="form-1", + subject=f"Notice{line_break}BCC:attacker@example.com Alert", + body="Body", + form_content="content", + recipients=[task_module._EmailRecipient(email="user@example.com", token="token-1")], + ) + + monkeypatch.setattr(task_module, "mail", mail) + monkeypatch.setattr( + task_module.FeatureService, + "get_features", + lambda _tenant_id: SimpleNamespace(human_input_email_delivery_enabled=True), + ) + monkeypatch.setattr(task_module, "_load_email_jobs", lambda _session, _form: [job]) + monkeypatch.setattr(task_module, "_load_variable_pool", lambda _workflow_run_id: None) + + task_module.dispatch_human_input_email_task( + form_id="form-1", + node_title="Approve", + session_factory=lambda: _DummySession(form), + ) + + assert mail.sent[0]["subject"] == "Notice BCC:attacker@example.com Alert" diff --git a/api/uv.lock b/api/uv.lock index 547b2fabc7..c707c574e3 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -658,6 +658,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/cc/38b6f87170908bd8aaf9e412b021d17e85f690abe00edf50192f1a4566b9/billiard-4.2.3-py3-none-any.whl", hash = "sha256:989e9b688e3abf153f307b68a1328dfacfb954e30a4f920005654e276c69236b", size = 87042, upload-time = "2025-11-16T17:47:29.005Z" }, ] +[[package]] +name = "bleach" +version = "6.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "webencodings" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/76/9a/0e33f5054c54d349ea62c277191c020c2d6ef1d65ab2cb1993f91ec846d1/bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f", size = 203083, upload-time = "2024-10-29T18:30:40.477Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/55/96142937f66150805c25c4d0f31ee4132fd33497753400734f9dfdcbdc66/bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e", size = 163406, upload-time = "2024-10-29T18:30:38.186Z" }, +] + [[package]] name = "blinker" version = "1.9.0" @@ -1529,6 +1541,7 @@ dependencies = [ { name = "arize-phoenix-otel" }, { name = "azure-identity" }, { name = "beautifulsoup4" }, + { name = "bleach" }, { name = "boto3" }, { name = "bs4" }, { name = "cachetools" }, @@ -1730,6 +1743,7 @@ requires-dist = [ { name = "arize-phoenix-otel", specifier = "~=0.15.0" }, { name = "azure-identity", specifier = "==1.25.3" }, { name = "beautifulsoup4", specifier = "==4.14.3" }, + { name = "bleach", specifier = "~=6.2.0" }, { name = "boto3", specifier = "==1.42.68" }, { name = "bs4", specifier = "~=0.0.1" }, { name = "cachetools", specifier = "~=5.3.0" },