Save Image advanced node.

2026-05-13 05:17:18 +08:00 · 2026-05-12 01:33:04 -04:00
4 changed files with 418 additions and 38 deletions
--- a/.spectral.yaml
+++ b/.spectral.yaml
@ -89,12 +89,3 @@ rules:
    then:
      field: description
      function: truthy
-
-overrides:
-  # /ws uses HTTP 101 (Switching Protocols) — a legitimate response for a
-  # WebSocket upgrade, but not a 2xx, so operation-success-response fires
-  # as a false positive. OpenAPI 3.x has no native WebSocket support.
-  - files:
-      - "openapi.yaml#/paths/~1ws"
-    rules:
-      operation-success-response: off
--- a/comfy_api_nodes/nodes_quiver.py
+++ b/comfy_api_nodes/nodes_quiver.py
@ -143,7 +143,7 @@ class QuiverTextToSVGNode(IO.ComfyNode):
        if reference_images:
            references = []
            for key in reference_images:
-                url = await upload_image_to_comfyapi(cls, reference_images[key], mime_type="image/png")
+                url = await upload_image_to_comfyapi(cls, reference_images[key])
                references.append(QuiverImageObject(url=url))
            if len(references) > 4:
                raise ValueError("Maximum 4 reference images are allowed.")
@ -252,7 +252,7 @@ class QuiverImageToSVGNode(IO.ComfyNode):
        model: dict,
        seed: int,
    ) -> IO.NodeOutput:
-        image_url = await upload_image_to_comfyapi(cls, image, mime_type="image/png")
+        image_url = await upload_image_to_comfyapi(cls, image)

        response = await sync_op(
            cls,
--- a/comfy_extras/nodes_images.py
+++ b/comfy_extras/nodes_images.py
@ -3,15 +3,23 @@ from __future__ import annotations
 import nodes
 import folder_paths

+import av
 import json
+
 import os
 import re
 import math
+import numpy as np
+import struct
 import torch
+
+import zlib
 import comfy.utils
+from fractions import Fraction

 from server import PromptServer
 from comfy_api.latest import ComfyExtension, IO, UI
+from comfy.cli_args import args
 from typing_extensions import override

 SVG = IO.SVG.Type  # TODO: temporary solution for backward compatibility, will be removed later.
@ -830,6 +838,405 @@ class ImageMergeTileList(IO.ComfyNode):
        return IO.NodeOutput(merged_image)


+# ---------------------------------------------------------------------------
+# Format specifications
+# ---------------------------------------------------------------------------
+
+# Maps (file_format, bit_depth, has_alpha) -> (numpy dtype scale, av pixel format,
+# stream pix_fmt). Keeps the encode path declarative instead of branchy.
+_FORMAT_SPECS = {
+    ("png", "8-bit", False):  {"scale": 255.0,   "dtype": np.uint8,   "frame_fmt": "rgb24",     "stream_fmt": "rgb24"},
+    ("png", "8-bit", True):   {"scale": 255.0,   "dtype": np.uint8,   "frame_fmt": "rgba",      "stream_fmt": "rgba"},
+    ("png", "16-bit", False): {"scale": 65535.0, "dtype": np.uint16,  "frame_fmt": "rgb48le",   "stream_fmt": "rgb48be"},
+    ("png", "16-bit", True):  {"scale": 65535.0, "dtype": np.uint16,  "frame_fmt": "rgba64le",  "stream_fmt": "rgba64be"},
+    ("exr", "32-bit float", False): {"scale": 1.0, "dtype": np.float32, "frame_fmt": "gbrpf32le",  "stream_fmt": "gbrpf32le"},
+    ("exr", "32-bit float", True):  {"scale": 1.0, "dtype": np.float32, "frame_fmt": "gbrapf32le", "stream_fmt": "gbrapf32le"},
+}
+
+
+# ---------------------------------------------------------------------------
+# Color transforms
+# ---------------------------------------------------------------------------
+
+def srgb_to_linear(t: torch.Tensor) -> torch.Tensor:
+    """Inverse sRGB EOTF (IEC 61966-2-1). Operates on RGB channels only;
+    alpha (if present as the 4th channel) is passed through unchanged."""
+    if t.shape[-1] == 4:
+        rgb, alpha = t[..., :3], t[..., 3:]
+        return torch.cat([srgb_to_linear(rgb), alpha], dim=-1)
+
+    # Piecewise: linear toe below 0.04045, gamma curve above.
+    low = t / 12.92
+    high = ((t.clamp(min=0.0) + 0.055) / 1.055) ** 2.4
+    return torch.where(t <= 0.04045, low, high)
+
+
+# HLG OETF constants from BT.2100 Table 5.
+_HLG_A = 0.17883277
+_HLG_B = 0.28466892
+_HLG_C = 0.55991072928   # = 0.5 - a*ln(4*a)
+
+
+def hlg_to_linear(t: torch.Tensor) -> torch.Tensor:
+    """Inverse HLG OETF (BT.2100). Maps a non-linear HLG signal in [0, 1] to
+    *scene*-linear light in [0, 1]. Per BT.2100 Note 5a, this is the correct
+    transform when converting HLG to a linear scene-light representation
+    (rather than display-light, which would also involve the HLG OOTF).
+
+    Operates on RGB channels only; alpha is passed through unchanged."""
+    if t.shape[-1] == 4:
+        rgb, alpha = t[..., :3], t[..., 3:]
+        return torch.cat([hlg_to_linear(rgb), alpha], dim=-1)
+
+    # Piecewise: sqrt branch below 0.5, log branch above.
+    # Clamp inside the log branch so negative / out-of-range values don't blow up;
+    # values above 1.0 are allowed and extrapolate naturally.
+    low = (t ** 2) / 3.0
+    high = (torch.exp((t.clamp(min=_HLG_C) - _HLG_C) / _HLG_A) + _HLG_B) / 12.0
+    return torch.where(t <= 0.5, low, high)
+
+
+# ---------------------------------------------------------------------------
+# Metadata injection
+# ---------------------------------------------------------------------------
+
+_PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n"
+
+
+def _png_chunk(chunk_type: bytes, data: bytes) -> bytes:
+    """Build a single PNG chunk: length | type | data | CRC32(type+data)."""
+    crc = zlib.crc32(chunk_type + data) & 0xFFFFFFFF
+    return struct.pack(">I", len(data)) + chunk_type + data + struct.pack(">I", crc)
+
+
+def _png_text_chunk(keyword: str, text: str) -> bytes:
+    """tEXt chunk: latin-1 keyword + NUL + latin-1 text."""
+    payload = keyword.encode("latin-1") + b"\x00" + text.encode("latin-1", errors="replace")
+    return _png_chunk(b"tEXt", payload)
+
+
+def inject_png_metadata(png_bytes: bytes, prompt: dict | None, extra_pnginfo: dict | None) -> bytes:
+    """Insert ComfyUI prompt/workflow as tEXt chunks right after IHDR."""
+    if not png_bytes.startswith(_PNG_SIGNATURE):
+        return png_bytes
+
+    chunks: list[bytes] = []
+    if prompt is not None:
+        chunks.append(_png_text_chunk("prompt", json.dumps(prompt)))
+    if extra_pnginfo:
+        for key, value in extra_pnginfo.items():
+            chunks.append(_png_text_chunk(key, json.dumps(value)))
+    if not chunks:
+        return png_bytes
+
+    # IHDR is always the first chunk; insert ours immediately after it.
+    ihdr_length = struct.unpack(">I", png_bytes[8:12])[0]
+    ihdr_end = 8 + 8 + ihdr_length + 4  # signature + (len+type) + data + crc
+    return png_bytes[:ihdr_end] + b"".join(chunks) + png_bytes[ihdr_end:]
+
+
+# Standard chromaticities (CIE 1931 xy) for the colorspaces this node writes.
+# Each tuple is (Rx, Ry, Gx, Gy, Bx, By, Wx, Wy). All share D65 white point.
+_CHROMATICITIES = {
+    # ITU-R BT.709 / sRGB primaries
+    "Rec.709":  (0.6400, 0.3300, 0.3000, 0.6000, 0.1500, 0.0600, 0.3127, 0.3290),
+    # ITU-R BT.2020 (UHDTV / wide-gamut HDR) primaries
+    "Rec.2020": (0.7080, 0.2920, 0.1700, 0.7970, 0.1310, 0.0460, 0.3127, 0.3290),
+}
+
+
+def _pack_chromaticities(primaries: tuple) -> bytes:
+    """Serialize 8 chromaticity floats into the EXR `chromaticities` payload."""
+    return struct.pack("<8f", *primaries)
+
+
+def _exr_attribute(name: str, attr_type: str, value: bytes) -> bytes:
+    """Serialize one EXR header attribute: name\\0 type\\0 size:int32 value."""
+    return (
+        name.encode("utf-8") + b"\x00"
+        + attr_type.encode("utf-8") + b"\x00"
+        + struct.pack("<i", len(value))
+        + value
+    )
+
+
+def inject_exr_metadata(
+    exr_bytes: bytes,
+    prompt: dict | None,
+    extra_pnginfo: dict | None,
+    colorspace: str | None = None,
+) -> bytes:
+    """Insert ComfyUI metadata and color-space info into an EXR header.
+
+    Color: EXR pixels are linear by convention. The standard way to describe
+    their RGB→XYZ relationship is the `chromaticities` attribute. We pick the
+    primaries that match what the user told us their input was:
+
+      colorspace="sRGB" → Rec. 709 / sRGB primaries (D65)
+      colorspace="HDR"  → Rec. 2020 / BT.2100 primaries (D65)
+
+    Pixels are always converted to linear scene light upstream (sRGB EOTF
+    inverse for sRGB; HLG OETF inverse for HDR), so the file content is
+    scene-linear in the indicated gamut. OpenEXR has no standard transfer-
+    function attribute (the OpenEXR TSC has discussed adding one but it
+    doesn't exist), so we don't invent one — `chromaticities` plus the EXR
+    linear-by-convention rule fully specifies the color.
+
+    Prompt/workflow: written as plain `string` attributes using the same keys
+    (`prompt`, `workflow`, ...) that Comfy uses for PNG tEXt chunks, so the
+    same readers can pull them out symmetrically.
+
+    Implementation note: the chunk-offset table that follows the header stores
+    *absolute* byte offsets into the file. Inserting N bytes into the header
+    means every offset must be incremented by N or the file becomes unreadable.
+    """
+    if len(exr_bytes) < 8 or exr_bytes[:4] != b"\x76\x2f\x31\x01":
+        return exr_bytes
+
+    new_blob = b""
+    if prompt is not None:
+        new_blob += _exr_attribute("prompt", "string", json.dumps(prompt).encode("utf-8"))
+    if extra_pnginfo:
+        for key, value in extra_pnginfo.items():
+            new_blob += _exr_attribute(key, "string", json.dumps(value).encode("utf-8"))
+    if colorspace is not None:
+        # Map each colorspace option to the RGB primaries the linear pixels
+        # are now in. "sRGB" and "linear" both produce Rec. 709 linear; "HDR"
+        # (HLG-encoded Rec. 2020 input) produces Rec. 2020 linear.
+        primaries_name = {
+            "sRGB":   "Rec.709",
+            "linear": "Rec.709",
+            "HDR":    "Rec.2020",
+        }.get(colorspace, "Rec.709")
+        new_blob += _exr_attribute(
+            "chromaticities",
+            "chromaticities",
+            _pack_chromaticities(_CHROMATICITIES[primaries_name]),
+        )
+    if not new_blob:
+        return exr_bytes
+
+    # Walk header attributes to find the terminating null byte, and pick up
+    # dataWindow + compression so we know how many chunks the offset table has.
+    pos = 8  # past magic (4) + version (4)
+    data_window = None
+    compression = 0
+    while pos < len(exr_bytes) and exr_bytes[pos] != 0:
+        name_end = exr_bytes.index(b"\x00", pos)
+        attr_name = exr_bytes[pos:name_end].decode("latin-1", errors="replace")
+        type_end = exr_bytes.index(b"\x00", name_end + 1)
+        attr_type = exr_bytes[name_end + 1:type_end].decode("latin-1", errors="replace")
+        size = struct.unpack("<i", exr_bytes[type_end + 1:type_end + 5])[0]
+        value_start = type_end + 5
+        value = exr_bytes[value_start:value_start + size]
+
+        if attr_name == "dataWindow" and attr_type == "box2i":
+            data_window = struct.unpack("<iiii", value)  # xMin, yMin, xMax, yMax
+        elif attr_name == "compression" and attr_type == "compression":
+            compression = value[0]
+
+        pos = value_start + size
+
+    if data_window is None:
+        return exr_bytes  # required attribute missing — don't risk corrupting
+
+    # Scanlines per chunk by compression, from the OpenEXR spec.
+    scanlines_per_block = {
+        0: 1,   # NO_COMPRESSION
+        1: 1,   # RLE
+        2: 1,   # ZIPS
+        3: 16,  # ZIP
+        4: 32,  # PIZ
+        5: 16,  # PXR24
+        6: 32,  # B44
+        7: 32,  # B44A
+        8: 256, # DWAA
+        9: 256, # DWAB
+    }.get(compression, 1)
+
+    _, y_min, _, y_max = data_window
+    height = y_max - y_min + 1
+    num_chunks = (height + scanlines_per_block - 1) // scanlines_per_block
+
+    header_end = pos  # position of the terminating null byte
+    table_start = header_end + 1
+    pixel_start = table_start + num_chunks * 8
+    delta = len(new_blob)
+
+    old_offsets = struct.unpack(f"<{num_chunks}Q", exr_bytes[table_start:pixel_start])
+    new_table = struct.pack(f"<{num_chunks}Q", *(o + delta for o in old_offsets))
+
+    return (
+        exr_bytes[:header_end]                # header attributes
+        + new_blob                            # our new attributes
+        + exr_bytes[header_end:table_start]   # terminating null byte
+        + new_table                           # shifted offset table
+        + exr_bytes[pixel_start:]             # pixel data, untouched
+    )
+
+
+# ---------------------------------------------------------------------------
+# Encoding
+# ---------------------------------------------------------------------------
+
+def _encode_image(
+    img_tensor: torch.Tensor,
+    file_format: str,
+    bit_depth: str,
+    colorspace: str,
+) -> bytes:
+    """Encode a single HxWxC tensor to PNG or EXR bytes in memory.
+
+    For EXR the input is interpreted according to `colorspace` and converted
+    to scene-linear (EXR's convention) before writing:
+
+      "sRGB"   → input is sRGB-encoded Rec. 709; apply inverse sRGB EOTF.
+      "HDR"    → input is HLG-encoded Rec. 2020 (BT.2100); apply inverse HLG
+                 OETF to get scene-linear, per BT.2100 Note 5a.
+      "linear" → input is already scene-linear (Rec. 709 primaries); write
+                 through unchanged. Use this for renderer/compositor output.
+
+    For PNG, colorspace selection does not modify pixels — PNG is delivered
+    sRGB-encoded and there is no PNG path for wide-gamut HDR in this node.
+    """
+    height, width, num_channels = img_tensor.shape
+    has_alpha = num_channels == 4
+
+    spec = _FORMAT_SPECS[(file_format, bit_depth, has_alpha)]
+
+    if spec["dtype"] == np.float32:
+        # EXR path: preserve full range, no clamp.
+        if colorspace == "sRGB":
+            img_tensor = srgb_to_linear(img_tensor)
+        elif colorspace == "HDR":
+            img_tensor = hlg_to_linear(img_tensor)
+        img_np = img_tensor.cpu().numpy().astype(np.float32)
+    else:
+        # PNG path: quantize to integer range.
+        scaled = (img_tensor * spec["scale"]).clamp(0, spec["scale"])
+        img_np = scaled.to(torch.int32).cpu().numpy().astype(spec["dtype"])
+
+    # Encode directly via CodecContext. PyAV's `image2` muxer does NOT write to
+    # BytesIO (it expects a real file path), so we bypass the container entirely.
+    # For single-frame PNG/EXR the raw codec output IS the file.
+    codec = av.CodecContext.create(file_format, "w")
+    codec.width = width
+    codec.height = height
+    codec.pix_fmt = spec["stream_fmt"]
+    codec.time_base = Fraction(1, 1)
+
+    frame = av.VideoFrame.from_ndarray(img_np, format=spec["frame_fmt"])
+    if spec["frame_fmt"] != spec["stream_fmt"]:
+        frame = frame.reformat(format=spec["stream_fmt"])
+    frame.pts = 0
+    frame.time_base = codec.time_base
+
+    packets = list(codec.encode(frame)) + list(codec.encode(None))  # flush with None
+    return b"".join(bytes(p) for p in packets)
+
+
+# ---------------------------------------------------------------------------
+# Node
+# ---------------------------------------------------------------------------
+
+class SaveImageAdvanced(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="SaveImageAdvanced",
+            search_aliases=["save", "save image", "export image", "output image", "write image"],
+            display_name="Save Image (Advanced)",
+            description="Saves the input images to your ComfyUI output directory.",
+            category="image",
+            essentials_category="Basics",
+            inputs=[
+                IO.Image.Input("images", tooltip="The images to save."),
+                IO.String.Input(
+                    "filename_prefix",
+                    default="ComfyUI",
+                    tooltip=(
+                        "The prefix for the file to save. May include formatting tokens "
+                        "such as %date:yyyy-MM-dd% or %Empty Latent Image.width%."
+                    ),
+                ),
+                IO.DynamicCombo.Input(
+                    "image_format",
+                    options=[
+                        IO.DynamicCombo.Option("png", [
+                            IO.Combo.Input("bit_depth", options=["8-bit", "16-bit"],
+                                           default="8-bit", advanced=True),
+                            IO.Combo.Input("colorspace", options=["sRGB"],
+                                           default="sRGB", advanced=True),
+                        ]),
+                        IO.DynamicCombo.Option("exr", [
+                            IO.Combo.Input("bit_depth", options=["32-bit float"],
+                                           default="32-bit float", advanced=True),
+                            IO.Combo.Input(
+                                "colorspace",
+                                options=["sRGB", "HDR", "linear"],
+                                default="sRGB",
+                                advanced=True,
+                                tooltip=(
+                                    "Colorspace of the input tensor. The EXR is "
+                                    "always written as scene-linear in the matching "
+                                    "gamut.\n"
+                                    "  'sRGB'   — input is sRGB-encoded Rec.709; "
+                                    "the inverse sRGB EOTF is applied.\n"
+                                    "  'HDR'    — input is HLG-encoded Rec.2020 "
+                                    "(BT.2100); the inverse HLG OETF is applied "
+                                    "to get scene-linear light.\n"
+                                    "  'linear' — input is already scene-linear "
+                                    "(Rec.709 primaries); written through unchanged. "
+                                    "Use this for renderer/compositor output."
+                                ),
+                            ),
+                        ]),
+                    ],
+                    tooltip="The file format in which to save the image.",
+                ),
+            ],
+            hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo],
+            is_output_node=True,
+        )
+
+    @classmethod
+    def execute(cls, images, filename_prefix: str, image_format: dict) -> IO.NodeOutput:
+        file_format = image_format["image_format"]
+        bit_depth = image_format["bit_depth"]
+        colorspace = image_format.get("colorspace", "sRGB")
+
+        output_dir = folder_paths.get_output_directory()
+        full_output_folder, filename, counter, subfolder, filename_prefix = (
+            folder_paths.get_save_image_path(
+                filename_prefix, output_dir, images[0].shape[1], images[0].shape[0]
+            )
+        )
+
+        prompt = cls.hidden.prompt
+        extra_pnginfo = cls.hidden.extra_pnginfo
+        write_metadata = not args.disable_metadata
+
+        results = []
+        for batch_number, image in enumerate(images):
+            encoded = _encode_image(image, file_format, bit_depth, colorspace)
+
+            if write_metadata:
+                if file_format == "png":
+                    encoded = inject_png_metadata(encoded, prompt, extra_pnginfo)
+                elif file_format == "exr":
+                    encoded = inject_exr_metadata(encoded, prompt, extra_pnginfo, colorspace)
+
+            name = filename.replace("%batch_num%", str(batch_number))
+            file = f"{name}_{counter:05}.{file_format}"
+            with open(os.path.join(full_output_folder, file), "wb") as f:
+                f.write(encoded)
+
+            results.append({"filename": file, "subfolder": subfolder, "type": "output"})
+            counter += 1
+
+        return IO.NodeOutput(ui={"images": results})
+
+
 class ImagesExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -842,6 +1249,7 @@ class ImagesExtension(ComfyExtension):
            ImageAddNoise,
            SaveAnimatedWEBP,
            SaveAnimatedPNG,
+            SaveImageAdvanced,
            SaveSVGNode,
            ImageStitch,
            ResizeAndPadImage,
--- a/openapi.yaml
+++ b/openapi.yaml
@ -2071,6 +2071,7 @@ paths:
                    type: integer
                    description: Number of assets marked as missing

+
  # ===========================================================================
  # Cloud-runtime FE-facing operations
  #
@ -2121,11 +2122,7 @@ paths:
      operationId: getCloudJobStatus
      tags: [queue]
      summary: Get status of a cloud job
-      deprecated: true
-      description: |
-        **Deprecated.** This endpoint is superseded by `GET /api/jobs/{job_id}`.
-        Clients should migrate; the endpoint is retained for backward
-        compatibility but will be removed in a future release.
+      description: "[cloud-only] Returns the current execution status of a cloud job."
      x-runtime: [cloud]
      parameters:
        - name: job_id
@ -2195,11 +2192,7 @@ paths:
      operationId: getHistoryV2
      tags: [history]
      summary: Get paginated execution history (v2)
-      deprecated: true
-      description: |
-        **Deprecated.** This endpoint is superseded by `GET /api/jobs`.
-        Clients should migrate; the endpoint is retained for backward
-        compatibility but will be removed in a future release.
+      description: "[cloud-only] Returns a paginated list of execution history entries in the v2 format, with richer metadata than the legacy history endpoint."
      x-runtime: [cloud]
      parameters:
        - name: limit
@ -2238,11 +2231,7 @@ paths:
      operationId: getHistoryV2ByPromptId
      tags: [history]
      summary: Get v2 history for a specific prompt
-      deprecated: true
-      description: |
-        **Deprecated.** This endpoint is superseded by `GET /api/jobs/{prompt_id}`.
-        Clients should migrate; the endpoint is retained for backward
-        compatibility but will be removed in a future release.
+      description: "[cloud-only] Returns the v2 history entry for a specific prompt execution."
      x-runtime: [cloud]
      parameters:
        - name: prompt_id
@ -2277,12 +2266,7 @@ paths:
      operationId: getCloudLogs
      tags: [system]
      summary: Get cloud execution logs
-      deprecated: true
-      description: |
-        **Deprecated.** This endpoint returns a static placeholder response and
-        provides no real log data. It is retained only to avoid breaking clients
-        that still call it. Clients should remove their dependency; the endpoint
-        will be removed in a future release.
+      description: "[cloud-only] Returns execution logs for the authenticated user's cloud jobs."
      x-runtime: [cloud]
      parameters:
        - name: job_id
@ -5386,12 +5370,7 @@ paths:
      operationId: viewVideo
      tags: [view]
      summary: View or download a video file
-      deprecated: true
-      description: |
-        **Deprecated.** This endpoint is an alias of `GET /api/view` added for
-        legacy history-queue video playback. Callers should use `/api/view`
-        directly; the endpoint is retained for backward compatibility but will
-        be removed in a future release.
+      description: "[cloud-only] Serves a video file from the output directory. Used by the frontend video player."
      x-runtime: [cloud]
      parameters:
        - name: filename
@ -5544,6 +5523,7 @@ paths:
              schema:
                $ref: "#/components/schemas/CloudError"

+
 components:
  parameters:
    ComfyUserHeader:
@ -6895,6 +6875,7 @@ components:
        error:
          type: string

+
    # -------------------------------------------------------------------
    # Cloud-runtime schemas
    #