Compare commits

..

1 Commits

Author SHA1 Message Date
0ac6480c7e Save Image advanced node. 2026-05-12 01:33:04 -04:00
4 changed files with 418 additions and 38 deletions

View File

@ -89,12 +89,3 @@ rules:
then:
field: description
function: truthy
overrides:
# /ws uses HTTP 101 (Switching Protocols) — a legitimate response for a
# WebSocket upgrade, but not a 2xx, so operation-success-response fires
# as a false positive. OpenAPI 3.x has no native WebSocket support.
- files:
- "openapi.yaml#/paths/~1ws"
rules:
operation-success-response: off

View File

@ -143,7 +143,7 @@ class QuiverTextToSVGNode(IO.ComfyNode):
if reference_images:
references = []
for key in reference_images:
url = await upload_image_to_comfyapi(cls, reference_images[key], mime_type="image/png")
url = await upload_image_to_comfyapi(cls, reference_images[key])
references.append(QuiverImageObject(url=url))
if len(references) > 4:
raise ValueError("Maximum 4 reference images are allowed.")
@ -252,7 +252,7 @@ class QuiverImageToSVGNode(IO.ComfyNode):
model: dict,
seed: int,
) -> IO.NodeOutput:
image_url = await upload_image_to_comfyapi(cls, image, mime_type="image/png")
image_url = await upload_image_to_comfyapi(cls, image)
response = await sync_op(
cls,

View File

@ -3,15 +3,23 @@ from __future__ import annotations
import nodes
import folder_paths
import av
import json
import os
import re
import math
import numpy as np
import struct
import torch
import zlib
import comfy.utils
from fractions import Fraction
from server import PromptServer
from comfy_api.latest import ComfyExtension, IO, UI
from comfy.cli_args import args
from typing_extensions import override
SVG = IO.SVG.Type # TODO: temporary solution for backward compatibility, will be removed later.
@ -830,6 +838,405 @@ class ImageMergeTileList(IO.ComfyNode):
return IO.NodeOutput(merged_image)
# ---------------------------------------------------------------------------
# Format specifications
# ---------------------------------------------------------------------------
# Maps (file_format, bit_depth, has_alpha) -> (numpy dtype scale, av pixel format,
# stream pix_fmt). Keeps the encode path declarative instead of branchy.
_FORMAT_SPECS = {
("png", "8-bit", False): {"scale": 255.0, "dtype": np.uint8, "frame_fmt": "rgb24", "stream_fmt": "rgb24"},
("png", "8-bit", True): {"scale": 255.0, "dtype": np.uint8, "frame_fmt": "rgba", "stream_fmt": "rgba"},
("png", "16-bit", False): {"scale": 65535.0, "dtype": np.uint16, "frame_fmt": "rgb48le", "stream_fmt": "rgb48be"},
("png", "16-bit", True): {"scale": 65535.0, "dtype": np.uint16, "frame_fmt": "rgba64le", "stream_fmt": "rgba64be"},
("exr", "32-bit float", False): {"scale": 1.0, "dtype": np.float32, "frame_fmt": "gbrpf32le", "stream_fmt": "gbrpf32le"},
("exr", "32-bit float", True): {"scale": 1.0, "dtype": np.float32, "frame_fmt": "gbrapf32le", "stream_fmt": "gbrapf32le"},
}
# ---------------------------------------------------------------------------
# Color transforms
# ---------------------------------------------------------------------------
def srgb_to_linear(t: torch.Tensor) -> torch.Tensor:
"""Inverse sRGB EOTF (IEC 61966-2-1). Operates on RGB channels only;
alpha (if present as the 4th channel) is passed through unchanged."""
if t.shape[-1] == 4:
rgb, alpha = t[..., :3], t[..., 3:]
return torch.cat([srgb_to_linear(rgb), alpha], dim=-1)
# Piecewise: linear toe below 0.04045, gamma curve above.
low = t / 12.92
high = ((t.clamp(min=0.0) + 0.055) / 1.055) ** 2.4
return torch.where(t <= 0.04045, low, high)
# HLG OETF constants from BT.2100 Table 5.
_HLG_A = 0.17883277
_HLG_B = 0.28466892
_HLG_C = 0.55991072928 # = 0.5 - a*ln(4*a)
def hlg_to_linear(t: torch.Tensor) -> torch.Tensor:
"""Inverse HLG OETF (BT.2100). Maps a non-linear HLG signal in [0, 1] to
*scene*-linear light in [0, 1]. Per BT.2100 Note 5a, this is the correct
transform when converting HLG to a linear scene-light representation
(rather than display-light, which would also involve the HLG OOTF).
Operates on RGB channels only; alpha is passed through unchanged."""
if t.shape[-1] == 4:
rgb, alpha = t[..., :3], t[..., 3:]
return torch.cat([hlg_to_linear(rgb), alpha], dim=-1)
# Piecewise: sqrt branch below 0.5, log branch above.
# Clamp inside the log branch so negative / out-of-range values don't blow up;
# values above 1.0 are allowed and extrapolate naturally.
low = (t ** 2) / 3.0
high = (torch.exp((t.clamp(min=_HLG_C) - _HLG_C) / _HLG_A) + _HLG_B) / 12.0
return torch.where(t <= 0.5, low, high)
# ---------------------------------------------------------------------------
# Metadata injection
# ---------------------------------------------------------------------------
_PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n"
def _png_chunk(chunk_type: bytes, data: bytes) -> bytes:
"""Build a single PNG chunk: length | type | data | CRC32(type+data)."""
crc = zlib.crc32(chunk_type + data) & 0xFFFFFFFF
return struct.pack(">I", len(data)) + chunk_type + data + struct.pack(">I", crc)
def _png_text_chunk(keyword: str, text: str) -> bytes:
"""tEXt chunk: latin-1 keyword + NUL + latin-1 text."""
payload = keyword.encode("latin-1") + b"\x00" + text.encode("latin-1", errors="replace")
return _png_chunk(b"tEXt", payload)
def inject_png_metadata(png_bytes: bytes, prompt: dict | None, extra_pnginfo: dict | None) -> bytes:
"""Insert ComfyUI prompt/workflow as tEXt chunks right after IHDR."""
if not png_bytes.startswith(_PNG_SIGNATURE):
return png_bytes
chunks: list[bytes] = []
if prompt is not None:
chunks.append(_png_text_chunk("prompt", json.dumps(prompt)))
if extra_pnginfo:
for key, value in extra_pnginfo.items():
chunks.append(_png_text_chunk(key, json.dumps(value)))
if not chunks:
return png_bytes
# IHDR is always the first chunk; insert ours immediately after it.
ihdr_length = struct.unpack(">I", png_bytes[8:12])[0]
ihdr_end = 8 + 8 + ihdr_length + 4 # signature + (len+type) + data + crc
return png_bytes[:ihdr_end] + b"".join(chunks) + png_bytes[ihdr_end:]
# Standard chromaticities (CIE 1931 xy) for the colorspaces this node writes.
# Each tuple is (Rx, Ry, Gx, Gy, Bx, By, Wx, Wy). All share D65 white point.
_CHROMATICITIES = {
# ITU-R BT.709 / sRGB primaries
"Rec.709": (0.6400, 0.3300, 0.3000, 0.6000, 0.1500, 0.0600, 0.3127, 0.3290),
# ITU-R BT.2020 (UHDTV / wide-gamut HDR) primaries
"Rec.2020": (0.7080, 0.2920, 0.1700, 0.7970, 0.1310, 0.0460, 0.3127, 0.3290),
}
def _pack_chromaticities(primaries: tuple) -> bytes:
"""Serialize 8 chromaticity floats into the EXR `chromaticities` payload."""
return struct.pack("<8f", *primaries)
def _exr_attribute(name: str, attr_type: str, value: bytes) -> bytes:
"""Serialize one EXR header attribute: name\\0 type\\0 size:int32 value."""
return (
name.encode("utf-8") + b"\x00"
+ attr_type.encode("utf-8") + b"\x00"
+ struct.pack("<i", len(value))
+ value
)
def inject_exr_metadata(
exr_bytes: bytes,
prompt: dict | None,
extra_pnginfo: dict | None,
colorspace: str | None = None,
) -> bytes:
"""Insert ComfyUI metadata and color-space info into an EXR header.
Color: EXR pixels are linear by convention. The standard way to describe
their RGB→XYZ relationship is the `chromaticities` attribute. We pick the
primaries that match what the user told us their input was:
colorspace="sRGB" → Rec. 709 / sRGB primaries (D65)
colorspace="HDR" → Rec. 2020 / BT.2100 primaries (D65)
Pixels are always converted to linear scene light upstream (sRGB EOTF
inverse for sRGB; HLG OETF inverse for HDR), so the file content is
scene-linear in the indicated gamut. OpenEXR has no standard transfer-
function attribute (the OpenEXR TSC has discussed adding one but it
doesn't exist), so we don't invent one — `chromaticities` plus the EXR
linear-by-convention rule fully specifies the color.
Prompt/workflow: written as plain `string` attributes using the same keys
(`prompt`, `workflow`, ...) that Comfy uses for PNG tEXt chunks, so the
same readers can pull them out symmetrically.
Implementation note: the chunk-offset table that follows the header stores
*absolute* byte offsets into the file. Inserting N bytes into the header
means every offset must be incremented by N or the file becomes unreadable.
"""
if len(exr_bytes) < 8 or exr_bytes[:4] != b"\x76\x2f\x31\x01":
return exr_bytes
new_blob = b""
if prompt is not None:
new_blob += _exr_attribute("prompt", "string", json.dumps(prompt).encode("utf-8"))
if extra_pnginfo:
for key, value in extra_pnginfo.items():
new_blob += _exr_attribute(key, "string", json.dumps(value).encode("utf-8"))
if colorspace is not None:
# Map each colorspace option to the RGB primaries the linear pixels
# are now in. "sRGB" and "linear" both produce Rec. 709 linear; "HDR"
# (HLG-encoded Rec. 2020 input) produces Rec. 2020 linear.
primaries_name = {
"sRGB": "Rec.709",
"linear": "Rec.709",
"HDR": "Rec.2020",
}.get(colorspace, "Rec.709")
new_blob += _exr_attribute(
"chromaticities",
"chromaticities",
_pack_chromaticities(_CHROMATICITIES[primaries_name]),
)
if not new_blob:
return exr_bytes
# Walk header attributes to find the terminating null byte, and pick up
# dataWindow + compression so we know how many chunks the offset table has.
pos = 8 # past magic (4) + version (4)
data_window = None
compression = 0
while pos < len(exr_bytes) and exr_bytes[pos] != 0:
name_end = exr_bytes.index(b"\x00", pos)
attr_name = exr_bytes[pos:name_end].decode("latin-1", errors="replace")
type_end = exr_bytes.index(b"\x00", name_end + 1)
attr_type = exr_bytes[name_end + 1:type_end].decode("latin-1", errors="replace")
size = struct.unpack("<i", exr_bytes[type_end + 1:type_end + 5])[0]
value_start = type_end + 5
value = exr_bytes[value_start:value_start + size]
if attr_name == "dataWindow" and attr_type == "box2i":
data_window = struct.unpack("<iiii", value) # xMin, yMin, xMax, yMax
elif attr_name == "compression" and attr_type == "compression":
compression = value[0]
pos = value_start + size
if data_window is None:
return exr_bytes # required attribute missing — don't risk corrupting
# Scanlines per chunk by compression, from the OpenEXR spec.
scanlines_per_block = {
0: 1, # NO_COMPRESSION
1: 1, # RLE
2: 1, # ZIPS
3: 16, # ZIP
4: 32, # PIZ
5: 16, # PXR24
6: 32, # B44
7: 32, # B44A
8: 256, # DWAA
9: 256, # DWAB
}.get(compression, 1)
_, y_min, _, y_max = data_window
height = y_max - y_min + 1
num_chunks = (height + scanlines_per_block - 1) // scanlines_per_block
header_end = pos # position of the terminating null byte
table_start = header_end + 1
pixel_start = table_start + num_chunks * 8
delta = len(new_blob)
old_offsets = struct.unpack(f"<{num_chunks}Q", exr_bytes[table_start:pixel_start])
new_table = struct.pack(f"<{num_chunks}Q", *(o + delta for o in old_offsets))
return (
exr_bytes[:header_end] # header attributes
+ new_blob # our new attributes
+ exr_bytes[header_end:table_start] # terminating null byte
+ new_table # shifted offset table
+ exr_bytes[pixel_start:] # pixel data, untouched
)
# ---------------------------------------------------------------------------
# Encoding
# ---------------------------------------------------------------------------
def _encode_image(
img_tensor: torch.Tensor,
file_format: str,
bit_depth: str,
colorspace: str,
) -> bytes:
"""Encode a single HxWxC tensor to PNG or EXR bytes in memory.
For EXR the input is interpreted according to `colorspace` and converted
to scene-linear (EXR's convention) before writing:
"sRGB" → input is sRGB-encoded Rec. 709; apply inverse sRGB EOTF.
"HDR" → input is HLG-encoded Rec. 2020 (BT.2100); apply inverse HLG
OETF to get scene-linear, per BT.2100 Note 5a.
"linear" → input is already scene-linear (Rec. 709 primaries); write
through unchanged. Use this for renderer/compositor output.
For PNG, colorspace selection does not modify pixels — PNG is delivered
sRGB-encoded and there is no PNG path for wide-gamut HDR in this node.
"""
height, width, num_channels = img_tensor.shape
has_alpha = num_channels == 4
spec = _FORMAT_SPECS[(file_format, bit_depth, has_alpha)]
if spec["dtype"] == np.float32:
# EXR path: preserve full range, no clamp.
if colorspace == "sRGB":
img_tensor = srgb_to_linear(img_tensor)
elif colorspace == "HDR":
img_tensor = hlg_to_linear(img_tensor)
img_np = img_tensor.cpu().numpy().astype(np.float32)
else:
# PNG path: quantize to integer range.
scaled = (img_tensor * spec["scale"]).clamp(0, spec["scale"])
img_np = scaled.to(torch.int32).cpu().numpy().astype(spec["dtype"])
# Encode directly via CodecContext. PyAV's `image2` muxer does NOT write to
# BytesIO (it expects a real file path), so we bypass the container entirely.
# For single-frame PNG/EXR the raw codec output IS the file.
codec = av.CodecContext.create(file_format, "w")
codec.width = width
codec.height = height
codec.pix_fmt = spec["stream_fmt"]
codec.time_base = Fraction(1, 1)
frame = av.VideoFrame.from_ndarray(img_np, format=spec["frame_fmt"])
if spec["frame_fmt"] != spec["stream_fmt"]:
frame = frame.reformat(format=spec["stream_fmt"])
frame.pts = 0
frame.time_base = codec.time_base
packets = list(codec.encode(frame)) + list(codec.encode(None)) # flush with None
return b"".join(bytes(p) for p in packets)
# ---------------------------------------------------------------------------
# Node
# ---------------------------------------------------------------------------
class SaveImageAdvanced(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="SaveImageAdvanced",
search_aliases=["save", "save image", "export image", "output image", "write image"],
display_name="Save Image (Advanced)",
description="Saves the input images to your ComfyUI output directory.",
category="image",
essentials_category="Basics",
inputs=[
IO.Image.Input("images", tooltip="The images to save."),
IO.String.Input(
"filename_prefix",
default="ComfyUI",
tooltip=(
"The prefix for the file to save. May include formatting tokens "
"such as %date:yyyy-MM-dd% or %Empty Latent Image.width%."
),
),
IO.DynamicCombo.Input(
"image_format",
options=[
IO.DynamicCombo.Option("png", [
IO.Combo.Input("bit_depth", options=["8-bit", "16-bit"],
default="8-bit", advanced=True),
IO.Combo.Input("colorspace", options=["sRGB"],
default="sRGB", advanced=True),
]),
IO.DynamicCombo.Option("exr", [
IO.Combo.Input("bit_depth", options=["32-bit float"],
default="32-bit float", advanced=True),
IO.Combo.Input(
"colorspace",
options=["sRGB", "HDR", "linear"],
default="sRGB",
advanced=True,
tooltip=(
"Colorspace of the input tensor. The EXR is "
"always written as scene-linear in the matching "
"gamut.\n"
" 'sRGB' — input is sRGB-encoded Rec.709; "
"the inverse sRGB EOTF is applied.\n"
" 'HDR' — input is HLG-encoded Rec.2020 "
"(BT.2100); the inverse HLG OETF is applied "
"to get scene-linear light.\n"
" 'linear' — input is already scene-linear "
"(Rec.709 primaries); written through unchanged. "
"Use this for renderer/compositor output."
),
),
]),
],
tooltip="The file format in which to save the image.",
),
],
hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo],
is_output_node=True,
)
@classmethod
def execute(cls, images, filename_prefix: str, image_format: dict) -> IO.NodeOutput:
file_format = image_format["image_format"]
bit_depth = image_format["bit_depth"]
colorspace = image_format.get("colorspace", "sRGB")
output_dir = folder_paths.get_output_directory()
full_output_folder, filename, counter, subfolder, filename_prefix = (
folder_paths.get_save_image_path(
filename_prefix, output_dir, images[0].shape[1], images[0].shape[0]
)
)
prompt = cls.hidden.prompt
extra_pnginfo = cls.hidden.extra_pnginfo
write_metadata = not args.disable_metadata
results = []
for batch_number, image in enumerate(images):
encoded = _encode_image(image, file_format, bit_depth, colorspace)
if write_metadata:
if file_format == "png":
encoded = inject_png_metadata(encoded, prompt, extra_pnginfo)
elif file_format == "exr":
encoded = inject_exr_metadata(encoded, prompt, extra_pnginfo, colorspace)
name = filename.replace("%batch_num%", str(batch_number))
file = f"{name}_{counter:05}.{file_format}"
with open(os.path.join(full_output_folder, file), "wb") as f:
f.write(encoded)
results.append({"filename": file, "subfolder": subfolder, "type": "output"})
counter += 1
return IO.NodeOutput(ui={"images": results})
class ImagesExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -842,6 +1249,7 @@ class ImagesExtension(ComfyExtension):
ImageAddNoise,
SaveAnimatedWEBP,
SaveAnimatedPNG,
SaveImageAdvanced,
SaveSVGNode,
ImageStitch,
ResizeAndPadImage,

View File

@ -2071,6 +2071,7 @@ paths:
type: integer
description: Number of assets marked as missing
# ===========================================================================
# Cloud-runtime FE-facing operations
#
@ -2121,11 +2122,7 @@ paths:
operationId: getCloudJobStatus
tags: [queue]
summary: Get status of a cloud job
deprecated: true
description: |
**Deprecated.** This endpoint is superseded by `GET /api/jobs/{job_id}`.
Clients should migrate; the endpoint is retained for backward
compatibility but will be removed in a future release.
description: "[cloud-only] Returns the current execution status of a cloud job."
x-runtime: [cloud]
parameters:
- name: job_id
@ -2195,11 +2192,7 @@ paths:
operationId: getHistoryV2
tags: [history]
summary: Get paginated execution history (v2)
deprecated: true
description: |
**Deprecated.** This endpoint is superseded by `GET /api/jobs`.
Clients should migrate; the endpoint is retained for backward
compatibility but will be removed in a future release.
description: "[cloud-only] Returns a paginated list of execution history entries in the v2 format, with richer metadata than the legacy history endpoint."
x-runtime: [cloud]
parameters:
- name: limit
@ -2238,11 +2231,7 @@ paths:
operationId: getHistoryV2ByPromptId
tags: [history]
summary: Get v2 history for a specific prompt
deprecated: true
description: |
**Deprecated.** This endpoint is superseded by `GET /api/jobs/{prompt_id}`.
Clients should migrate; the endpoint is retained for backward
compatibility but will be removed in a future release.
description: "[cloud-only] Returns the v2 history entry for a specific prompt execution."
x-runtime: [cloud]
parameters:
- name: prompt_id
@ -2277,12 +2266,7 @@ paths:
operationId: getCloudLogs
tags: [system]
summary: Get cloud execution logs
deprecated: true
description: |
**Deprecated.** This endpoint returns a static placeholder response and
provides no real log data. It is retained only to avoid breaking clients
that still call it. Clients should remove their dependency; the endpoint
will be removed in a future release.
description: "[cloud-only] Returns execution logs for the authenticated user's cloud jobs."
x-runtime: [cloud]
parameters:
- name: job_id
@ -5386,12 +5370,7 @@ paths:
operationId: viewVideo
tags: [view]
summary: View or download a video file
deprecated: true
description: |
**Deprecated.** This endpoint is an alias of `GET /api/view` added for
legacy history-queue video playback. Callers should use `/api/view`
directly; the endpoint is retained for backward compatibility but will
be removed in a future release.
description: "[cloud-only] Serves a video file from the output directory. Used by the frontend video player."
x-runtime: [cloud]
parameters:
- name: filename
@ -5544,6 +5523,7 @@ paths:
schema:
$ref: "#/components/schemas/CloudError"
components:
parameters:
ComfyUserHeader:
@ -6895,6 +6875,7 @@ components:
error:
type: string
# -------------------------------------------------------------------
# Cloud-runtime schemas
#