Compare commits

..

1 Commits

Author SHA1 Message Date
0ac6480c7e Save Image advanced node. 2026-05-12 01:33:04 -04:00
11 changed files with 441 additions and 385 deletions

View File

@ -89,12 +89,3 @@ rules:
then:
field: description
function: truthy
overrides:
# /ws uses HTTP 101 (Switching Protocols) — a legitimate response for a
# WebSocket upgrade, but not a 2xx, so operation-success-response fires
# as a false positive. OpenAPI 3.x has no native WebSocket support.
- files:
- "openapi.yaml#/paths/~1ws"
rules:
operation-success-response: off

View File

@ -1443,7 +1443,7 @@ class HiDreamO1(supported_models_base.BASE):
}
latent_format = latent_formats.HiDreamO1Pixel
memory_usage_factor = 0.033
memory_usage_factor = 0.6
# fp16 not supported: LM MLP down_proj activations fp16 overflow, causing NaNs
supported_inference_dtypes = [torch.bfloat16, torch.float32]

View File

@ -1,70 +0,0 @@
from enum import Enum
from typing import Literal
from pydantic import BaseModel, Field
class AnthropicRole(str, Enum):
user = "user"
assistant = "assistant"
class AnthropicTextContent(BaseModel):
type: Literal["text"] = "text"
text: str = Field(...)
class AnthropicImageSourceBase64(BaseModel):
type: Literal["base64"] = "base64"
media_type: str = Field(..., description="MIME type of the image, e.g. image/png, image/jpeg")
data: str = Field(..., description="Base64-encoded image data")
class AnthropicImageContent(BaseModel):
type: Literal["image"] = "image"
source: AnthropicImageSourceBase64 = Field(...)
class AnthropicMessage(BaseModel):
role: AnthropicRole = Field(...)
content: list[AnthropicTextContent | AnthropicImageContent] = Field(...)
class AnthropicMessagesRequest(BaseModel):
model: str = Field(...)
messages: list[AnthropicMessage] = Field(...)
max_tokens: int = Field(..., ge=1)
system: str | None = Field(None, description="Top-level system prompt")
temperature: float | None = Field(None, ge=0.0, le=1.0)
top_p: float | None = Field(None, ge=0.0, le=1.0)
top_k: int | None = Field(None, ge=0)
stop_sequences: list[str] | None = Field(None)
class AnthropicResponseTextBlock(BaseModel):
type: Literal["text"] = "text"
text: str = Field(...)
class AnthropicCacheCreationUsage(BaseModel):
ephemeral_5m_input_tokens: int | None = Field(None)
ephemeral_1h_input_tokens: int | None = Field(None)
class AnthropicMessagesUsage(BaseModel):
input_tokens: int | None = Field(None)
output_tokens: int | None = Field(None)
cache_creation_input_tokens: int | None = Field(None)
cache_read_input_tokens: int | None = Field(None)
cache_creation: AnthropicCacheCreationUsage | None = Field(None)
class AnthropicMessagesResponse(BaseModel):
id: str | None = Field(None)
type: str | None = Field(None)
role: str | None = Field(None)
model: str | None = Field(None)
content: list[AnthropicResponseTextBlock] | None = Field(None)
stop_reason: str | None = Field(None)
stop_sequence: str | None = Field(None)
usage: AnthropicMessagesUsage | None = Field(None)

View File

@ -1,250 +0,0 @@
"""API Nodes for Anthropic Claude (Messages API). See: https://docs.anthropic.com/en/api/messages"""
from typing_extensions import override
from comfy_api.latest import IO, ComfyExtension, Input
from comfy_api_nodes.apis.anthropic import (
AnthropicImageContent,
AnthropicImageSourceBase64,
AnthropicMessage,
AnthropicMessagesRequest,
AnthropicMessagesResponse,
AnthropicRole,
AnthropicTextContent,
)
from comfy_api_nodes.util import (
ApiEndpoint,
downscale_image_tensor,
get_number_of_images,
sync_op,
tensor_to_base64_string,
validate_string,
)
ANTHROPIC_MESSAGES_ENDPOINT = "/proxy/anthropic/v1/messages"
ANTHROPIC_IMAGE_MAX_PIXELS = 1568 * 1568 # Anthropic recommends max ~1568px on the longest edge
CLAUDE_MAX_IMAGES = 20 # Anthropic supports up to 20 images per request
CLAUDE_MODELS: dict[str, str] = {
"Opus 4.7": "claude-opus-4-7",
"Opus 4.6": "claude-opus-4-6",
"Sonnet 4.6": "claude-sonnet-4-6",
"Sonnet 4.5": "claude-sonnet-4-5-20250929",
"Haiku 4.5": "claude-haiku-4-5-20251001",
}
def _claude_model_inputs():
return [
IO.Int.Input(
"max_tokens",
default=16000,
min=32,
max=32000,
tooltip="Maximum number of tokens to generate before stopping.",
advanced=True,
),
IO.Float.Input(
"temperature",
default=1.0,
min=0.0,
max=1.0,
step=0.01,
tooltip="Controls randomness. 0.0 is deterministic, 1.0 is most random.",
advanced=True,
),
]
def _model_price_per_million(model: str) -> tuple[float, float] | None:
"""Return (input_per_1M, output_per_1M) USD for a Claude model, or None if unknown."""
if "opus-4" in model:
return 15.0, 75.0
if "sonnet-4" in model:
return 3.0, 15.0
if "haiku-4-5" in model:
return 1.0, 5.0
return None
def calculate_tokens_price(response: AnthropicMessagesResponse) -> float | None:
"""Compute approximate USD price from response usage. Server-side billing is authoritative."""
if not response.usage or not response.model:
return None
rates = _model_price_per_million(response.model)
if rates is None:
return None
input_rate, output_rate = rates
input_tokens = response.usage.input_tokens or 0
output_tokens = response.usage.output_tokens or 0
cache_read = response.usage.cache_read_input_tokens or 0
cache_5m = 0
cache_1h = 0
if response.usage.cache_creation:
cache_5m = response.usage.cache_creation.ephemeral_5m_input_tokens or 0
cache_1h = response.usage.cache_creation.ephemeral_1h_input_tokens or 0
total = (
input_tokens * input_rate
+ output_tokens * output_rate
+ cache_read * input_rate * 0.1
+ cache_5m * input_rate * 1.25
+ cache_1h * input_rate * 2.0
)
return total / 1_000_000.0
def _get_text_from_response(response: AnthropicMessagesResponse) -> str:
if not response.content:
return ""
return "\n".join(block.text for block in response.content if block.text)
def _build_image_content_blocks(image_tensors: list[Input.Image]) -> list[AnthropicImageContent]:
"""Convert image tensors (possibly batched) into Anthropic content blocks (base64 PNG)."""
blocks: list[AnthropicImageContent] = []
for tensor in image_tensors:
batch = tensor if len(tensor.shape) == 4 else tensor.unsqueeze(0)
for i in range(batch.shape[0]):
scaled = downscale_image_tensor(batch[i : i + 1], total_pixels=ANTHROPIC_IMAGE_MAX_PIXELS)
blocks.append(
AnthropicImageContent(
source=AnthropicImageSourceBase64(
media_type="image/png",
data=tensor_to_base64_string(scaled),
),
)
)
return blocks
class ClaudeNode(IO.ComfyNode):
"""Generate text responses from an Anthropic Claude model."""
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="ClaudeNode",
display_name="Anthropic Claude",
category="api node/text/Anthropic",
essentials_category="Text Generation",
description="Generate text responses with Anthropic's Claude models. "
"Provide a text prompt and optionally one or more images for multimodal context.",
inputs=[
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Text input to the model.",
),
IO.DynamicCombo.Input(
"model",
options=[IO.DynamicCombo.Option(label, _claude_model_inputs()) for label in CLAUDE_MODELS],
tooltip="The Claude model used to generate the response.",
),
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
control_after_generate=True,
tooltip="Seed controls whether the node should re-run; "
"results are non-deterministic regardless of seed.",
),
IO.Autogrow.Input(
"images",
template=IO.Autogrow.TemplateNames(
IO.Image.Input("image"),
names=[f"image_{i}" for i in range(1, CLAUDE_MAX_IMAGES + 1)],
min=0,
),
tooltip=f"Optional image(s) to use as context for the model. Up to {CLAUDE_MAX_IMAGES} images.",
),
IO.String.Input(
"system_prompt",
multiline=True,
default="",
optional=True,
advanced=True,
tooltip="Foundational instructions that dictate the model's behavior.",
),
],
outputs=[IO.String.Output()],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["model"]),
expr="""
(
$m := widgets.model;
$contains($m, "opus") ? {
"type": "list_usd",
"usd": [0.015, 0.075],
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
}
: $contains($m, "sonnet") ? {
"type": "list_usd",
"usd": [0.003, 0.015],
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
}
: $contains($m, "haiku") ? {
"type": "list_usd",
"usd": [0.001, 0.005],
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
}
: {"type":"text", "text":"Token-based"}
)
""",
),
)
@classmethod
async def execute(
cls,
prompt: str,
model: dict,
seed: int,
images: dict | None = None,
system_prompt: str = "",
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1)
model_label = model["model"]
max_tokens = model["max_tokens"]
temperature = model["temperature"]
image_tensors: list[Input.Image] = [t for t in (images or {}).values() if t is not None]
if sum(get_number_of_images(t) for t in image_tensors) > CLAUDE_MAX_IMAGES:
raise ValueError(f"Up to {CLAUDE_MAX_IMAGES} images are supported per request.")
content: list[AnthropicTextContent | AnthropicImageContent] = []
if image_tensors:
content.extend(_build_image_content_blocks(image_tensors))
content.append(AnthropicTextContent(text=prompt))
response = await sync_op(
cls,
ApiEndpoint(path=ANTHROPIC_MESSAGES_ENDPOINT, method="POST"),
response_model=AnthropicMessagesResponse,
data=AnthropicMessagesRequest(
model=CLAUDE_MODELS[model_label],
max_tokens=max_tokens,
messages=[AnthropicMessage(role=AnthropicRole.user, content=content)],
system=system_prompt or None,
temperature=temperature,
),
price_extractor=calculate_tokens_price,
)
return IO.NodeOutput(_get_text_from_response(response) or "Empty response from Claude model.")
class AnthropicExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [ClaudeNode]
async def comfy_entrypoint() -> AnthropicExtension:
return AnthropicExtension()

View File

@ -143,7 +143,7 @@ class QuiverTextToSVGNode(IO.ComfyNode):
if reference_images:
references = []
for key in reference_images:
url = await upload_image_to_comfyapi(cls, reference_images[key], mime_type="image/png")
url = await upload_image_to_comfyapi(cls, reference_images[key])
references.append(QuiverImageObject(url=url))
if len(references) > 4:
raise ValueError("Maximum 4 reference images are allowed.")
@ -252,7 +252,7 @@ class QuiverImageToSVGNode(IO.ComfyNode):
model: dict,
seed: int,
) -> IO.NodeOutput:
image_url = await upload_image_to_comfyapi(cls, image, mime_type="image/png")
image_url = await upload_image_to_comfyapi(cls, image)
response = await sync_op(
cls,

View File

@ -297,7 +297,6 @@ class LoadAudio(IO.ComfyNode):
@classmethod
def define_schema(cls):
input_dir = folder_paths.get_input_directory()
os.makedirs(input_dir, exist_ok=True)
files = folder_paths.filter_files_content_types(os.listdir(input_dir), ["audio", "video"])
return IO.Schema(
node_id="LoadAudio",

View File

@ -3,15 +3,23 @@ from __future__ import annotations
import nodes
import folder_paths
import av
import json
import os
import re
import math
import numpy as np
import struct
import torch
import zlib
import comfy.utils
from fractions import Fraction
from server import PromptServer
from comfy_api.latest import ComfyExtension, IO, UI
from comfy.cli_args import args
from typing_extensions import override
SVG = IO.SVG.Type # TODO: temporary solution for backward compatibility, will be removed later.
@ -830,6 +838,405 @@ class ImageMergeTileList(IO.ComfyNode):
return IO.NodeOutput(merged_image)
# ---------------------------------------------------------------------------
# Format specifications
# ---------------------------------------------------------------------------
# Maps (file_format, bit_depth, has_alpha) -> (numpy dtype scale, av pixel format,
# stream pix_fmt). Keeps the encode path declarative instead of branchy.
_FORMAT_SPECS = {
("png", "8-bit", False): {"scale": 255.0, "dtype": np.uint8, "frame_fmt": "rgb24", "stream_fmt": "rgb24"},
("png", "8-bit", True): {"scale": 255.0, "dtype": np.uint8, "frame_fmt": "rgba", "stream_fmt": "rgba"},
("png", "16-bit", False): {"scale": 65535.0, "dtype": np.uint16, "frame_fmt": "rgb48le", "stream_fmt": "rgb48be"},
("png", "16-bit", True): {"scale": 65535.0, "dtype": np.uint16, "frame_fmt": "rgba64le", "stream_fmt": "rgba64be"},
("exr", "32-bit float", False): {"scale": 1.0, "dtype": np.float32, "frame_fmt": "gbrpf32le", "stream_fmt": "gbrpf32le"},
("exr", "32-bit float", True): {"scale": 1.0, "dtype": np.float32, "frame_fmt": "gbrapf32le", "stream_fmt": "gbrapf32le"},
}
# ---------------------------------------------------------------------------
# Color transforms
# ---------------------------------------------------------------------------
def srgb_to_linear(t: torch.Tensor) -> torch.Tensor:
"""Inverse sRGB EOTF (IEC 61966-2-1). Operates on RGB channels only;
alpha (if present as the 4th channel) is passed through unchanged."""
if t.shape[-1] == 4:
rgb, alpha = t[..., :3], t[..., 3:]
return torch.cat([srgb_to_linear(rgb), alpha], dim=-1)
# Piecewise: linear toe below 0.04045, gamma curve above.
low = t / 12.92
high = ((t.clamp(min=0.0) + 0.055) / 1.055) ** 2.4
return torch.where(t <= 0.04045, low, high)
# HLG OETF constants from BT.2100 Table 5.
_HLG_A = 0.17883277
_HLG_B = 0.28466892
_HLG_C = 0.55991072928 # = 0.5 - a*ln(4*a)
def hlg_to_linear(t: torch.Tensor) -> torch.Tensor:
"""Inverse HLG OETF (BT.2100). Maps a non-linear HLG signal in [0, 1] to
*scene*-linear light in [0, 1]. Per BT.2100 Note 5a, this is the correct
transform when converting HLG to a linear scene-light representation
(rather than display-light, which would also involve the HLG OOTF).
Operates on RGB channels only; alpha is passed through unchanged."""
if t.shape[-1] == 4:
rgb, alpha = t[..., :3], t[..., 3:]
return torch.cat([hlg_to_linear(rgb), alpha], dim=-1)
# Piecewise: sqrt branch below 0.5, log branch above.
# Clamp inside the log branch so negative / out-of-range values don't blow up;
# values above 1.0 are allowed and extrapolate naturally.
low = (t ** 2) / 3.0
high = (torch.exp((t.clamp(min=_HLG_C) - _HLG_C) / _HLG_A) + _HLG_B) / 12.0
return torch.where(t <= 0.5, low, high)
# ---------------------------------------------------------------------------
# Metadata injection
# ---------------------------------------------------------------------------
_PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n"
def _png_chunk(chunk_type: bytes, data: bytes) -> bytes:
"""Build a single PNG chunk: length | type | data | CRC32(type+data)."""
crc = zlib.crc32(chunk_type + data) & 0xFFFFFFFF
return struct.pack(">I", len(data)) + chunk_type + data + struct.pack(">I", crc)
def _png_text_chunk(keyword: str, text: str) -> bytes:
"""tEXt chunk: latin-1 keyword + NUL + latin-1 text."""
payload = keyword.encode("latin-1") + b"\x00" + text.encode("latin-1", errors="replace")
return _png_chunk(b"tEXt", payload)
def inject_png_metadata(png_bytes: bytes, prompt: dict | None, extra_pnginfo: dict | None) -> bytes:
"""Insert ComfyUI prompt/workflow as tEXt chunks right after IHDR."""
if not png_bytes.startswith(_PNG_SIGNATURE):
return png_bytes
chunks: list[bytes] = []
if prompt is not None:
chunks.append(_png_text_chunk("prompt", json.dumps(prompt)))
if extra_pnginfo:
for key, value in extra_pnginfo.items():
chunks.append(_png_text_chunk(key, json.dumps(value)))
if not chunks:
return png_bytes
# IHDR is always the first chunk; insert ours immediately after it.
ihdr_length = struct.unpack(">I", png_bytes[8:12])[0]
ihdr_end = 8 + 8 + ihdr_length + 4 # signature + (len+type) + data + crc
return png_bytes[:ihdr_end] + b"".join(chunks) + png_bytes[ihdr_end:]
# Standard chromaticities (CIE 1931 xy) for the colorspaces this node writes.
# Each tuple is (Rx, Ry, Gx, Gy, Bx, By, Wx, Wy). All share D65 white point.
_CHROMATICITIES = {
# ITU-R BT.709 / sRGB primaries
"Rec.709": (0.6400, 0.3300, 0.3000, 0.6000, 0.1500, 0.0600, 0.3127, 0.3290),
# ITU-R BT.2020 (UHDTV / wide-gamut HDR) primaries
"Rec.2020": (0.7080, 0.2920, 0.1700, 0.7970, 0.1310, 0.0460, 0.3127, 0.3290),
}
def _pack_chromaticities(primaries: tuple) -> bytes:
"""Serialize 8 chromaticity floats into the EXR `chromaticities` payload."""
return struct.pack("<8f", *primaries)
def _exr_attribute(name: str, attr_type: str, value: bytes) -> bytes:
"""Serialize one EXR header attribute: name\\0 type\\0 size:int32 value."""
return (
name.encode("utf-8") + b"\x00"
+ attr_type.encode("utf-8") + b"\x00"
+ struct.pack("<i", len(value))
+ value
)
def inject_exr_metadata(
exr_bytes: bytes,
prompt: dict | None,
extra_pnginfo: dict | None,
colorspace: str | None = None,
) -> bytes:
"""Insert ComfyUI metadata and color-space info into an EXR header.
Color: EXR pixels are linear by convention. The standard way to describe
their RGB→XYZ relationship is the `chromaticities` attribute. We pick the
primaries that match what the user told us their input was:
colorspace="sRGB" → Rec. 709 / sRGB primaries (D65)
colorspace="HDR" → Rec. 2020 / BT.2100 primaries (D65)
Pixels are always converted to linear scene light upstream (sRGB EOTF
inverse for sRGB; HLG OETF inverse for HDR), so the file content is
scene-linear in the indicated gamut. OpenEXR has no standard transfer-
function attribute (the OpenEXR TSC has discussed adding one but it
doesn't exist), so we don't invent one — `chromaticities` plus the EXR
linear-by-convention rule fully specifies the color.
Prompt/workflow: written as plain `string` attributes using the same keys
(`prompt`, `workflow`, ...) that Comfy uses for PNG tEXt chunks, so the
same readers can pull them out symmetrically.
Implementation note: the chunk-offset table that follows the header stores
*absolute* byte offsets into the file. Inserting N bytes into the header
means every offset must be incremented by N or the file becomes unreadable.
"""
if len(exr_bytes) < 8 or exr_bytes[:4] != b"\x76\x2f\x31\x01":
return exr_bytes
new_blob = b""
if prompt is not None:
new_blob += _exr_attribute("prompt", "string", json.dumps(prompt).encode("utf-8"))
if extra_pnginfo:
for key, value in extra_pnginfo.items():
new_blob += _exr_attribute(key, "string", json.dumps(value).encode("utf-8"))
if colorspace is not None:
# Map each colorspace option to the RGB primaries the linear pixels
# are now in. "sRGB" and "linear" both produce Rec. 709 linear; "HDR"
# (HLG-encoded Rec. 2020 input) produces Rec. 2020 linear.
primaries_name = {
"sRGB": "Rec.709",
"linear": "Rec.709",
"HDR": "Rec.2020",
}.get(colorspace, "Rec.709")
new_blob += _exr_attribute(
"chromaticities",
"chromaticities",
_pack_chromaticities(_CHROMATICITIES[primaries_name]),
)
if not new_blob:
return exr_bytes
# Walk header attributes to find the terminating null byte, and pick up
# dataWindow + compression so we know how many chunks the offset table has.
pos = 8 # past magic (4) + version (4)
data_window = None
compression = 0
while pos < len(exr_bytes) and exr_bytes[pos] != 0:
name_end = exr_bytes.index(b"\x00", pos)
attr_name = exr_bytes[pos:name_end].decode("latin-1", errors="replace")
type_end = exr_bytes.index(b"\x00", name_end + 1)
attr_type = exr_bytes[name_end + 1:type_end].decode("latin-1", errors="replace")
size = struct.unpack("<i", exr_bytes[type_end + 1:type_end + 5])[0]
value_start = type_end + 5
value = exr_bytes[value_start:value_start + size]
if attr_name == "dataWindow" and attr_type == "box2i":
data_window = struct.unpack("<iiii", value) # xMin, yMin, xMax, yMax
elif attr_name == "compression" and attr_type == "compression":
compression = value[0]
pos = value_start + size
if data_window is None:
return exr_bytes # required attribute missing — don't risk corrupting
# Scanlines per chunk by compression, from the OpenEXR spec.
scanlines_per_block = {
0: 1, # NO_COMPRESSION
1: 1, # RLE
2: 1, # ZIPS
3: 16, # ZIP
4: 32, # PIZ
5: 16, # PXR24
6: 32, # B44
7: 32, # B44A
8: 256, # DWAA
9: 256, # DWAB
}.get(compression, 1)
_, y_min, _, y_max = data_window
height = y_max - y_min + 1
num_chunks = (height + scanlines_per_block - 1) // scanlines_per_block
header_end = pos # position of the terminating null byte
table_start = header_end + 1
pixel_start = table_start + num_chunks * 8
delta = len(new_blob)
old_offsets = struct.unpack(f"<{num_chunks}Q", exr_bytes[table_start:pixel_start])
new_table = struct.pack(f"<{num_chunks}Q", *(o + delta for o in old_offsets))
return (
exr_bytes[:header_end] # header attributes
+ new_blob # our new attributes
+ exr_bytes[header_end:table_start] # terminating null byte
+ new_table # shifted offset table
+ exr_bytes[pixel_start:] # pixel data, untouched
)
# ---------------------------------------------------------------------------
# Encoding
# ---------------------------------------------------------------------------
def _encode_image(
img_tensor: torch.Tensor,
file_format: str,
bit_depth: str,
colorspace: str,
) -> bytes:
"""Encode a single HxWxC tensor to PNG or EXR bytes in memory.
For EXR the input is interpreted according to `colorspace` and converted
to scene-linear (EXR's convention) before writing:
"sRGB" → input is sRGB-encoded Rec. 709; apply inverse sRGB EOTF.
"HDR" → input is HLG-encoded Rec. 2020 (BT.2100); apply inverse HLG
OETF to get scene-linear, per BT.2100 Note 5a.
"linear" → input is already scene-linear (Rec. 709 primaries); write
through unchanged. Use this for renderer/compositor output.
For PNG, colorspace selection does not modify pixels — PNG is delivered
sRGB-encoded and there is no PNG path for wide-gamut HDR in this node.
"""
height, width, num_channels = img_tensor.shape
has_alpha = num_channels == 4
spec = _FORMAT_SPECS[(file_format, bit_depth, has_alpha)]
if spec["dtype"] == np.float32:
# EXR path: preserve full range, no clamp.
if colorspace == "sRGB":
img_tensor = srgb_to_linear(img_tensor)
elif colorspace == "HDR":
img_tensor = hlg_to_linear(img_tensor)
img_np = img_tensor.cpu().numpy().astype(np.float32)
else:
# PNG path: quantize to integer range.
scaled = (img_tensor * spec["scale"]).clamp(0, spec["scale"])
img_np = scaled.to(torch.int32).cpu().numpy().astype(spec["dtype"])
# Encode directly via CodecContext. PyAV's `image2` muxer does NOT write to
# BytesIO (it expects a real file path), so we bypass the container entirely.
# For single-frame PNG/EXR the raw codec output IS the file.
codec = av.CodecContext.create(file_format, "w")
codec.width = width
codec.height = height
codec.pix_fmt = spec["stream_fmt"]
codec.time_base = Fraction(1, 1)
frame = av.VideoFrame.from_ndarray(img_np, format=spec["frame_fmt"])
if spec["frame_fmt"] != spec["stream_fmt"]:
frame = frame.reformat(format=spec["stream_fmt"])
frame.pts = 0
frame.time_base = codec.time_base
packets = list(codec.encode(frame)) + list(codec.encode(None)) # flush with None
return b"".join(bytes(p) for p in packets)
# ---------------------------------------------------------------------------
# Node
# ---------------------------------------------------------------------------
class SaveImageAdvanced(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="SaveImageAdvanced",
search_aliases=["save", "save image", "export image", "output image", "write image"],
display_name="Save Image (Advanced)",
description="Saves the input images to your ComfyUI output directory.",
category="image",
essentials_category="Basics",
inputs=[
IO.Image.Input("images", tooltip="The images to save."),
IO.String.Input(
"filename_prefix",
default="ComfyUI",
tooltip=(
"The prefix for the file to save. May include formatting tokens "
"such as %date:yyyy-MM-dd% or %Empty Latent Image.width%."
),
),
IO.DynamicCombo.Input(
"image_format",
options=[
IO.DynamicCombo.Option("png", [
IO.Combo.Input("bit_depth", options=["8-bit", "16-bit"],
default="8-bit", advanced=True),
IO.Combo.Input("colorspace", options=["sRGB"],
default="sRGB", advanced=True),
]),
IO.DynamicCombo.Option("exr", [
IO.Combo.Input("bit_depth", options=["32-bit float"],
default="32-bit float", advanced=True),
IO.Combo.Input(
"colorspace",
options=["sRGB", "HDR", "linear"],
default="sRGB",
advanced=True,
tooltip=(
"Colorspace of the input tensor. The EXR is "
"always written as scene-linear in the matching "
"gamut.\n"
" 'sRGB' — input is sRGB-encoded Rec.709; "
"the inverse sRGB EOTF is applied.\n"
" 'HDR' — input is HLG-encoded Rec.2020 "
"(BT.2100); the inverse HLG OETF is applied "
"to get scene-linear light.\n"
" 'linear' — input is already scene-linear "
"(Rec.709 primaries); written through unchanged. "
"Use this for renderer/compositor output."
),
),
]),
],
tooltip="The file format in which to save the image.",
),
],
hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo],
is_output_node=True,
)
@classmethod
def execute(cls, images, filename_prefix: str, image_format: dict) -> IO.NodeOutput:
file_format = image_format["image_format"]
bit_depth = image_format["bit_depth"]
colorspace = image_format.get("colorspace", "sRGB")
output_dir = folder_paths.get_output_directory()
full_output_folder, filename, counter, subfolder, filename_prefix = (
folder_paths.get_save_image_path(
filename_prefix, output_dir, images[0].shape[1], images[0].shape[0]
)
)
prompt = cls.hidden.prompt
extra_pnginfo = cls.hidden.extra_pnginfo
write_metadata = not args.disable_metadata
results = []
for batch_number, image in enumerate(images):
encoded = _encode_image(image, file_format, bit_depth, colorspace)
if write_metadata:
if file_format == "png":
encoded = inject_png_metadata(encoded, prompt, extra_pnginfo)
elif file_format == "exr":
encoded = inject_exr_metadata(encoded, prompt, extra_pnginfo, colorspace)
name = filename.replace("%batch_num%", str(batch_number))
file = f"{name}_{counter:05}.{file_format}"
with open(os.path.join(full_output_folder, file), "wb") as f:
f.write(encoded)
results.append({"filename": file, "subfolder": subfolder, "type": "output"})
counter += 1
return IO.NodeOutput(ui={"images": results})
class ImagesExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -842,6 +1249,7 @@ class ImagesExtension(ComfyExtension):
ImageAddNoise,
SaveAnimatedWEBP,
SaveAnimatedPNG,
SaveImageAdvanced,
SaveSVGNode,
ImageStitch,
ResizeAndPadImage,

View File

@ -338,25 +338,8 @@ class LTXVAddGuide(io.ComfyNode):
noise_mask = get_noise_mask(latent)
_, _, latent_length, latent_height, latent_width = latent_image.shape
# For mid-video multi-frame guides, prepend+strip a throwaway first frame so the VAE's "first latent = 1 pixel frame" asymmetry lands on the discarded slot
time_scale_factor = scale_factors[0]
num_frames_to_keep = ((image.shape[0] - 1) // time_scale_factor) * time_scale_factor + 1
resolved_frame_idx = frame_idx
if frame_idx < 0:
_, num_keyframes = get_keyframe_idxs(positive)
resolved_frame_idx = max((latent_length - num_keyframes - 1) * time_scale_factor + 1 + frame_idx, 0)
causal_fix = resolved_frame_idx == 0 or num_frames_to_keep == 1
if not causal_fix:
image = torch.cat([image[:1], image], dim=0)
image, t = cls.encode(vae, latent_width, latent_height, image, scale_factors)
if not causal_fix:
t = t[:, :, 1:, :, :]
image = image[1:]
frame_idx, latent_idx = cls.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors)
assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence."
@ -369,7 +352,6 @@ class LTXVAddGuide(io.ComfyNode):
t,
strength,
scale_factors,
causal_fix=causal_fix,
)
# Track this guide for per-reference attention control.

View File

@ -40,13 +40,23 @@ def composite(destination, source, x, y, mask = None, multiplier = 8, resize_sou
inverse_mask = torch.ones_like(mask) - mask
source_portion = mask * source[..., :visible_height, :visible_width]
destination_portion = inverse_mask * destination[..., top:bottom, left:right]
source_rgb = source[:, :3, :visible_height, :visible_width]
dest_slice = destination[..., top:bottom, left:right]
if destination.shape[1] == 4:
if torch.max(dest_slice) == 0:
destination[:, :3, top:bottom, left:right] = source_rgb
destination[:, 3:4, top:bottom, left:right] = mask
else:
destination[:, :3, top:bottom, left:right] = (mask * source_rgb) + (inverse_mask * dest_slice[:, :3])
destination[:, 3:4, top:bottom, left:right] = torch.max(mask, dest_slice[:, 3:4])
else:
source_portion = mask * source_rgb
destination_portion = inverse_mask * dest_slice
destination[..., top:bottom, left:right] = source_portion + destination_portion
destination[..., top:bottom, left:right] = source_portion + destination_portion
return destination
class LatentCompositeMasked(IO.ComfyNode):
@classmethod
def define_schema(cls):
@ -85,18 +95,23 @@ class ImageCompositeMasked(IO.ComfyNode):
display_name="Image Composite Masked",
category="image",
inputs=[
IO.Image.Input("destination"),
IO.Image.Input("source"),
IO.Int.Input("x", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1),
IO.Int.Input("y", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1),
IO.Boolean.Input("resize_source", default=False),
IO.Image.Input("destination", optional=True),
IO.Mask.Input("mask", optional=True),
],
outputs=[IO.Image.Output()],
)
@classmethod
def execute(cls, destination, source, x, y, resize_source, mask = None) -> IO.NodeOutput:
def execute(cls, source, x, y, resize_source, destination = None, mask = None) -> IO.NodeOutput:
if destination is None: # transparent rgba
B, H, W, C = source.shape
destination = torch.zeros((B, H, W, 4), dtype=source.dtype, device=source.device)
if C == 3:
source = torch.nn.functional.pad(source, (0, 1), value=1.0)
destination, source = node_helpers.image_alpha_fix(destination, source)
destination = destination.clone().movedim(-1, 1)
output = composite(destination, source.movedim(-1, 1), x, y, mask, 1, resize_source).movedim(1, -1)

View File

@ -2071,6 +2071,7 @@ paths:
type: integer
description: Number of assets marked as missing
# ===========================================================================
# Cloud-runtime FE-facing operations
#
@ -2121,11 +2122,7 @@ paths:
operationId: getCloudJobStatus
tags: [queue]
summary: Get status of a cloud job
deprecated: true
description: |
**Deprecated.** This endpoint is superseded by `GET /api/jobs/{job_id}`.
Clients should migrate; the endpoint is retained for backward
compatibility but will be removed in a future release.
description: "[cloud-only] Returns the current execution status of a cloud job."
x-runtime: [cloud]
parameters:
- name: job_id
@ -2195,11 +2192,7 @@ paths:
operationId: getHistoryV2
tags: [history]
summary: Get paginated execution history (v2)
deprecated: true
description: |
**Deprecated.** This endpoint is superseded by `GET /api/jobs`.
Clients should migrate; the endpoint is retained for backward
compatibility but will be removed in a future release.
description: "[cloud-only] Returns a paginated list of execution history entries in the v2 format, with richer metadata than the legacy history endpoint."
x-runtime: [cloud]
parameters:
- name: limit
@ -2238,11 +2231,7 @@ paths:
operationId: getHistoryV2ByPromptId
tags: [history]
summary: Get v2 history for a specific prompt
deprecated: true
description: |
**Deprecated.** This endpoint is superseded by `GET /api/jobs/{prompt_id}`.
Clients should migrate; the endpoint is retained for backward
compatibility but will be removed in a future release.
description: "[cloud-only] Returns the v2 history entry for a specific prompt execution."
x-runtime: [cloud]
parameters:
- name: prompt_id
@ -2277,12 +2266,7 @@ paths:
operationId: getCloudLogs
tags: [system]
summary: Get cloud execution logs
deprecated: true
description: |
**Deprecated.** This endpoint returns a static placeholder response and
provides no real log data. It is retained only to avoid breaking clients
that still call it. Clients should remove their dependency; the endpoint
will be removed in a future release.
description: "[cloud-only] Returns execution logs for the authenticated user's cloud jobs."
x-runtime: [cloud]
parameters:
- name: job_id
@ -5386,12 +5370,7 @@ paths:
operationId: viewVideo
tags: [view]
summary: View or download a video file
deprecated: true
description: |
**Deprecated.** This endpoint is an alias of `GET /api/view` added for
legacy history-queue video playback. Callers should use `/api/view`
directly; the endpoint is retained for backward compatibility but will
be removed in a future release.
description: "[cloud-only] Serves a video file from the output directory. Used by the frontend video player."
x-runtime: [cloud]
parameters:
- name: filename
@ -5544,6 +5523,7 @@ paths:
schema:
$ref: "#/components/schemas/CloudError"
components:
parameters:
ComfyUserHeader:
@ -6895,6 +6875,7 @@ components:
error:
type: string
# -------------------------------------------------------------------
# Cloud-runtime schemas
#

View File

@ -1,6 +1,6 @@
comfyui-frontend-package==1.43.18
comfyui-workflow-templates==0.9.73
comfyui-embedded-docs==0.5.0
comfyui-embedded-docs==0.4.4
torch
torchsde
torchvision