feat: apply markdown rendering to HITL email, sanitize email subject and body (#32305)

This PR: 1. Fixes the bug that email body of `HumanInput` node are sent as-is, without markdown rendering or sanitization 2. Applies HTML sanitization to email subject and body 3. Removes `\r` and `\n` from email subject to prevent SMTP header injection Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-05-30 13:47:52 +08:00 · 2026-03-16 16:52:46 +08:00
parent 4822d550b6
commit 57d476d4e2
8 changed files with 229 additions and 9 deletions
--- a/api/dify_graph/nodes/human_input/entities.py
+++ b/api/dify_graph/nodes/human_input/entities.py
@ -8,6 +8,8 @@ from collections.abc import Mapping, Sequence
 from datetime import datetime, timedelta
 from typing import Annotated, Any, ClassVar, Literal, Self

+import bleach
+import markdown
 from pydantic import BaseModel, Field, field_validator, model_validator

 from dify_graph.entities.base_node_data import BaseNodeData
@ -58,6 +60,39 @@ class EmailDeliveryConfig(BaseModel):
    """Configuration for email delivery method."""

    URL_PLACEHOLDER: ClassVar[str] = "{{#url#}}"
+    _SUBJECT_NEWLINE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(r"[\r\n]+")
+    _ALLOWED_HTML_TAGS: ClassVar[list[str]] = [
+        "a",
+        "blockquote",
+        "br",
+        "code",
+        "em",
+        "h1",
+        "h2",
+        "h3",
+        "h4",
+        "h5",
+        "h6",
+        "hr",
+        "li",
+        "ol",
+        "p",
+        "pre",
+        "strong",
+        "table",
+        "tbody",
+        "td",
+        "th",
+        "thead",
+        "tr",
+        "ul",
+    ]
+    _ALLOWED_HTML_ATTRIBUTES: ClassVar[dict[str, list[str]]] = {
+        "a": ["href", "title"],
+        "td": ["align"],
+        "th": ["align"],
+    }
+    _ALLOWED_PROTOCOLS: ClassVar[list[str]] = ["http", "https", "mailto"]

    recipients: EmailRecipients

@ -98,6 +133,43 @@ class EmailDeliveryConfig(BaseModel):
            return templated_body
        return variable_pool.convert_template(templated_body).text

+    @classmethod
+    def render_markdown_body(cls, body: str) -> str:
+        """Render markdown to safe HTML for email delivery."""
+        sanitized_markdown = bleach.clean(
+            body,
+            tags=[],
+            attributes={},
+            strip=True,
+            strip_comments=True,
+        )
+        rendered_html = markdown.markdown(
+            sanitized_markdown,
+            extensions=["nl2br", "tables"],
+            extension_configs={"tables": {"use_align_attribute": True}},
+        )
+        return bleach.clean(
+            rendered_html,
+            tags=cls._ALLOWED_HTML_TAGS,
+            attributes=cls._ALLOWED_HTML_ATTRIBUTES,
+            protocols=cls._ALLOWED_PROTOCOLS,
+            strip=True,
+            strip_comments=True,
+        )
+
+    @classmethod
+    def sanitize_subject(cls, subject: str) -> str:
+        """Sanitize email subject to plain text and prevent CRLF injection."""
+        sanitized_subject = bleach.clean(
+            subject,
+            tags=[],
+            attributes={},
+            strip=True,
+            strip_comments=True,
+        )
+        sanitized_subject = cls._SUBJECT_NEWLINE_PATTERN.sub(" ", sanitized_subject)
+        return " ".join(sanitized_subject.split())
+

 class _DeliveryMethodBase(BaseModel):
    """Base delivery method configuration."""