Compare commits

..

1 Commits

Author SHA1 Message Date
0ac6480c7e Save Image advanced node. 2026-05-12 01:33:04 -04:00
20 changed files with 655 additions and 853 deletions

View File

@ -89,12 +89,3 @@ rules:
then:
field: description
function: truthy
overrides:
# /ws uses HTTP 101 (Switching Protocols) — a legitimate response for a
# WebSocket upgrade, but not a 2xx, so operation-success-response fires
# as a false positive. OpenAPI 3.x has no native WebSocket support.
- files:
- "openapi.yaml#/paths/~1ws"
rules:
operation-success-response: off

View File

@ -1443,7 +1443,7 @@ class HiDreamO1(supported_models_base.BASE):
}
latent_format = latent_formats.HiDreamO1Pixel
memory_usage_factor = 0.033
memory_usage_factor = 0.6
# fp16 not supported: LM MLP down_proj activations fp16 overflow, causing NaNs
supported_inference_dtypes = [torch.bfloat16, torch.float32]

View File

@ -1164,18 +1164,12 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am
o = out
o_d = out_div
ps_view = ps
mask_view = mask
for d in range(dims):
l = min(ps_view.shape[d + 2], o.shape[d + 2] - upscaled[d])
o = o.narrow(d + 2, upscaled[d], l)
o_d = o_d.narrow(d + 2, upscaled[d], l)
if l < ps_view.shape[d + 2]:
ps_view = ps_view.narrow(d + 2, 0, l)
mask_view = mask_view.narrow(d + 2, 0, l)
o = o.narrow(d + 2, upscaled[d], mask.shape[d + 2])
o_d = o_d.narrow(d + 2, upscaled[d], mask.shape[d + 2])
o.add_(ps_view * mask_view)
o_d.add_(mask_view)
o.add_(ps * mask)
o_d.add_(mask)
if pbar is not None:
pbar.update(1)

View File

@ -12,24 +12,9 @@ class VOXEL:
class MESH:
def __init__(self, vertices: torch.Tensor, faces: torch.Tensor,
uvs: torch.Tensor | None = None,
vertex_colors: torch.Tensor | None = None,
texture: torch.Tensor | None = None,
vertex_counts: torch.Tensor | None = None,
face_counts: torch.Tensor | None = None):
assert (vertex_counts is None) == (face_counts is None), \
"vertex_counts and face_counts must be provided together (both or neither)"
self.vertices = vertices # vertices: (B, N, 3)
self.faces = faces # faces: (B, M, 3)
self.uvs = uvs # uvs: (B, N, 2)
self.vertex_colors = vertex_colors # vertex_colors: (B, N, 3 or 4)
self.texture = texture # texture: (B, H, W, 3)
# When vertices/faces are zero-padded to a common N/M across the batch (variable-size mesh batch),
# these hold the real per-item lengths (B,). None means rows are uniform and no slicing is needed.
self.vertex_counts = vertex_counts
self.face_counts = face_counts
def __init__(self, vertices: torch.Tensor, faces: torch.Tensor):
self.vertices = vertices
self.faces = faces
class File3D:

View File

@ -1,75 +0,0 @@
from enum import Enum
from typing import Literal
from pydantic import BaseModel, Field
class AnthropicRole(str, Enum):
user = "user"
assistant = "assistant"
class AnthropicTextContent(BaseModel):
type: Literal["text"] = "text"
text: str = Field(...)
class AnthropicImageSourceBase64(BaseModel):
type: Literal["base64"] = "base64"
media_type: str = Field(..., description="MIME type of the image, e.g. image/png, image/jpeg")
data: str = Field(..., description="Base64-encoded image data")
class AnthropicImageSourceUrl(BaseModel):
type: Literal["url"] = "url"
url: str = Field(...)
class AnthropicImageContent(BaseModel):
type: Literal["image"] = "image"
source: AnthropicImageSourceBase64 | AnthropicImageSourceUrl = Field(...)
class AnthropicMessage(BaseModel):
role: AnthropicRole = Field(...)
content: list[AnthropicTextContent | AnthropicImageContent] = Field(...)
class AnthropicMessagesRequest(BaseModel):
model: str = Field(...)
messages: list[AnthropicMessage] = Field(...)
max_tokens: int = Field(..., ge=1)
system: str | None = Field(None, description="Top-level system prompt")
temperature: float | None = Field(None, ge=0.0, le=1.0)
top_p: float | None = Field(None, ge=0.0, le=1.0)
top_k: int | None = Field(None, ge=0)
stop_sequences: list[str] | None = Field(None)
class AnthropicResponseTextBlock(BaseModel):
type: Literal["text"] = "text"
text: str = Field(...)
class AnthropicCacheCreationUsage(BaseModel):
ephemeral_5m_input_tokens: int | None = Field(None)
ephemeral_1h_input_tokens: int | None = Field(None)
class AnthropicMessagesUsage(BaseModel):
input_tokens: int | None = Field(None)
output_tokens: int | None = Field(None)
cache_creation_input_tokens: int | None = Field(None)
cache_read_input_tokens: int | None = Field(None)
cache_creation: AnthropicCacheCreationUsage | None = Field(None)
class AnthropicMessagesResponse(BaseModel):
id: str | None = Field(None)
type: str | None = Field(None)
role: str | None = Field(None)
model: str | None = Field(None)
content: list[AnthropicResponseTextBlock] | None = Field(None)
stop_reason: str | None = Field(None)
stop_sequence: str | None = Field(None)
usage: AnthropicMessagesUsage | None = Field(None)

View File

@ -1,245 +0,0 @@
"""API Nodes for Anthropic Claude (Messages API). See: https://docs.anthropic.com/en/api/messages"""
from typing_extensions import override
from comfy_api.latest import IO, ComfyExtension, Input
from comfy_api_nodes.apis.anthropic import (
AnthropicImageContent,
AnthropicImageSourceUrl,
AnthropicMessage,
AnthropicMessagesRequest,
AnthropicMessagesResponse,
AnthropicRole,
AnthropicTextContent,
)
from comfy_api_nodes.util import (
ApiEndpoint,
get_number_of_images,
sync_op,
upload_images_to_comfyapi,
validate_string,
)
ANTHROPIC_MESSAGES_ENDPOINT = "/proxy/anthropic/v1/messages"
ANTHROPIC_IMAGE_MAX_PIXELS = 1568 * 1568
CLAUDE_MAX_IMAGES = 20
CLAUDE_MODELS: dict[str, str] = {
"Opus 4.7": "claude-opus-4-7",
"Opus 4.6": "claude-opus-4-6",
"Sonnet 4.6": "claude-sonnet-4-6",
"Sonnet 4.5": "claude-sonnet-4-5-20250929",
"Haiku 4.5": "claude-haiku-4-5-20251001",
}
def _claude_model_inputs():
return [
IO.Int.Input(
"max_tokens",
default=16000,
min=32,
max=32000,
tooltip="Maximum number of tokens to generate before stopping.",
advanced=True,
),
IO.Float.Input(
"temperature",
default=1.0,
min=0.0,
max=1.0,
step=0.01,
tooltip="Controls randomness. 0.0 is deterministic, 1.0 is most random.",
advanced=True,
),
]
def _model_price_per_million(model: str) -> tuple[float, float] | None:
"""Return (input_per_1M, output_per_1M) USD for a Claude model, or None if unknown."""
if "opus-4-7" in model or "opus-4-6" in model or "opus-4-5" in model:
return 5.0, 25.0
if "sonnet-4" in model:
return 3.0, 15.0
if "haiku-4-5" in model:
return 1.0, 5.0
return None
def calculate_tokens_price(response: AnthropicMessagesResponse) -> float | None:
"""Compute approximate USD price from response usage. Server-side billing is authoritative."""
if not response.usage or not response.model:
return None
rates = _model_price_per_million(response.model)
if rates is None:
return None
input_rate, output_rate = rates
input_tokens = response.usage.input_tokens or 0
output_tokens = response.usage.output_tokens or 0
cache_read = response.usage.cache_read_input_tokens or 0
cache_5m = 0
cache_1h = 0
if response.usage.cache_creation:
cache_5m = response.usage.cache_creation.ephemeral_5m_input_tokens or 0
cache_1h = response.usage.cache_creation.ephemeral_1h_input_tokens or 0
total = (
input_tokens * input_rate
+ output_tokens * output_rate
+ cache_read * input_rate * 0.1
+ cache_5m * input_rate * 1.25
+ cache_1h * input_rate * 2.0
)
return total / 1_000_000.0
def _get_text_from_response(response: AnthropicMessagesResponse) -> str:
if not response.content:
return ""
return "\n".join(block.text for block in response.content if block.text)
async def _build_image_content_blocks(
cls: type[IO.ComfyNode],
image_tensors: list[Input.Image],
) -> list[AnthropicImageContent]:
urls = await upload_images_to_comfyapi(
cls,
image_tensors,
max_images=CLAUDE_MAX_IMAGES,
total_pixels=ANTHROPIC_IMAGE_MAX_PIXELS,
wait_label="Uploading reference images",
)
return [AnthropicImageContent(source=AnthropicImageSourceUrl(url=url)) for url in urls]
class ClaudeNode(IO.ComfyNode):
"""Generate text responses from an Anthropic Claude model."""
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="ClaudeNode",
display_name="Anthropic Claude",
category="api node/text/Anthropic",
essentials_category="Text Generation",
description="Generate text responses with Anthropic's Claude models. "
"Provide a text prompt and optionally one or more images for multimodal context.",
inputs=[
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Text input to the model.",
),
IO.DynamicCombo.Input(
"model",
options=[IO.DynamicCombo.Option(label, _claude_model_inputs()) for label in CLAUDE_MODELS],
tooltip="The Claude model used to generate the response.",
),
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
control_after_generate=True,
tooltip="Seed controls whether the node should re-run; "
"results are non-deterministic regardless of seed.",
),
IO.Autogrow.Input(
"images",
template=IO.Autogrow.TemplateNames(
IO.Image.Input("image"),
names=[f"image_{i}" for i in range(1, CLAUDE_MAX_IMAGES + 1)],
min=0,
),
tooltip=f"Optional image(s) to use as context for the model. Up to {CLAUDE_MAX_IMAGES} images.",
),
IO.String.Input(
"system_prompt",
multiline=True,
default="",
optional=True,
advanced=True,
tooltip="Foundational instructions that dictate the model's behavior.",
),
],
outputs=[IO.String.Output()],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["model"]),
expr="""
(
$m := widgets.model;
$contains($m, "opus") ? {
"type": "list_usd",
"usd": [0.005, 0.025],
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
}
: $contains($m, "sonnet") ? {
"type": "list_usd",
"usd": [0.003, 0.015],
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
}
: $contains($m, "haiku") ? {
"type": "list_usd",
"usd": [0.001, 0.005],
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
}
: {"type":"text", "text":"Token-based"}
)
""",
),
)
@classmethod
async def execute(
cls,
prompt: str,
model: dict,
seed: int,
images: dict | None = None,
system_prompt: str = "",
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1)
model_label = model["model"]
max_tokens = model["max_tokens"]
temperature = model["temperature"]
image_tensors: list[Input.Image] = [t for t in (images or {}).values() if t is not None]
if sum(get_number_of_images(t) for t in image_tensors) > CLAUDE_MAX_IMAGES:
raise ValueError(f"Up to {CLAUDE_MAX_IMAGES} images are supported per request.")
content: list[AnthropicTextContent | AnthropicImageContent] = []
if image_tensors:
content.extend(await _build_image_content_blocks(cls, image_tensors))
content.append(AnthropicTextContent(text=prompt))
response = await sync_op(
cls,
ApiEndpoint(path=ANTHROPIC_MESSAGES_ENDPOINT, method="POST"),
response_model=AnthropicMessagesResponse,
data=AnthropicMessagesRequest(
model=CLAUDE_MODELS[model_label],
max_tokens=max_tokens,
messages=[AnthropicMessage(role=AnthropicRole.user, content=content)],
system=system_prompt or None,
temperature=temperature,
),
price_extractor=calculate_tokens_price,
)
return IO.NodeOutput(_get_text_from_response(response) or "Empty response from Claude model.")
class AnthropicExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [ClaudeNode]
async def comfy_entrypoint() -> AnthropicExtension:
return AnthropicExtension()

View File

@ -143,7 +143,7 @@ class QuiverTextToSVGNode(IO.ComfyNode):
if reference_images:
references = []
for key in reference_images:
url = await upload_image_to_comfyapi(cls, reference_images[key], mime_type="image/png")
url = await upload_image_to_comfyapi(cls, reference_images[key])
references.append(QuiverImageObject(url=url))
if len(references) > 4:
raise ValueError("Maximum 4 reference images are allowed.")
@ -252,7 +252,7 @@ class QuiverImageToSVGNode(IO.ComfyNode):
model: dict,
seed: int,
) -> IO.NodeOutput:
image_url = await upload_image_to_comfyapi(cls, image, mime_type="image/png")
image_url = await upload_image_to_comfyapi(cls, image)
response = await sync_op(
cls,

View File

@ -297,7 +297,6 @@ class LoadAudio(IO.ComfyNode):
@classmethod
def define_schema(cls):
input_dir = folder_paths.get_input_directory()
os.makedirs(input_dir, exist_ok=True)
files = folder_paths.filter_files_content_types(os.listdir(input_dir), ["audio", "video"])
return IO.Schema(
node_id="LoadAudio",

View File

@ -1,7 +1,12 @@
import torch
import os
import json
import struct
import numpy as np
from comfy.ldm.modules.diffusionmodules.mmdit import get_1d_sincos_pos_embed_from_grid_torch
import folder_paths
import comfy.model_management
from comfy_extras.nodes_save_3d import pack_variable_mesh_batch
from comfy.cli_args import args
from typing_extensions import override
from comfy_api.latest import ComfyExtension, IO, Types
from comfy_api.latest._util import MESH, VOXEL # only for backward compatibility if someone import it from this file (will be removed later) # noqa
@ -439,9 +444,7 @@ class VoxelToMeshBasic(IO.ComfyNode):
vertices.append(v)
faces.append(f)
if vertices and all(v.shape == vertices[0].shape for v in vertices) and all(f.shape == faces[0].shape for f in faces):
return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces)))
return IO.NodeOutput(pack_variable_mesh_batch(vertices, faces))
return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces)))
decode = execute # TODO: remove
@ -478,13 +481,206 @@ class VoxelToMesh(IO.ComfyNode):
vertices.append(v)
faces.append(f)
if vertices and all(v.shape == vertices[0].shape for v in vertices) and all(f.shape == faces[0].shape for f in faces):
return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces)))
return IO.NodeOutput(pack_variable_mesh_batch(vertices, faces))
return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces)))
decode = execute # TODO: remove
def save_glb(vertices, faces, filepath, metadata=None):
"""
Save PyTorch tensor vertices and faces as a GLB file without external dependencies.
Parameters:
vertices: torch.Tensor of shape (N, 3) - The vertex coordinates
faces: torch.Tensor of shape (M, 3) - The face indices (triangle faces)
filepath: str - Output filepath (should end with .glb)
"""
# Convert tensors to numpy arrays
vertices_np = vertices.cpu().numpy().astype(np.float32)
faces_np = faces.cpu().numpy().astype(np.uint32)
vertices_buffer = vertices_np.tobytes()
indices_buffer = faces_np.tobytes()
def pad_to_4_bytes(buffer):
padding_length = (4 - (len(buffer) % 4)) % 4
return buffer + b'\x00' * padding_length
vertices_buffer_padded = pad_to_4_bytes(vertices_buffer)
indices_buffer_padded = pad_to_4_bytes(indices_buffer)
buffer_data = vertices_buffer_padded + indices_buffer_padded
vertices_byte_length = len(vertices_buffer)
vertices_byte_offset = 0
indices_byte_length = len(indices_buffer)
indices_byte_offset = len(vertices_buffer_padded)
gltf = {
"asset": {"version": "2.0", "generator": "ComfyUI"},
"buffers": [
{
"byteLength": len(buffer_data)
}
],
"bufferViews": [
{
"buffer": 0,
"byteOffset": vertices_byte_offset,
"byteLength": vertices_byte_length,
"target": 34962 # ARRAY_BUFFER
},
{
"buffer": 0,
"byteOffset": indices_byte_offset,
"byteLength": indices_byte_length,
"target": 34963 # ELEMENT_ARRAY_BUFFER
}
],
"accessors": [
{
"bufferView": 0,
"byteOffset": 0,
"componentType": 5126, # FLOAT
"count": len(vertices_np),
"type": "VEC3",
"max": vertices_np.max(axis=0).tolist(),
"min": vertices_np.min(axis=0).tolist()
},
{
"bufferView": 1,
"byteOffset": 0,
"componentType": 5125, # UNSIGNED_INT
"count": faces_np.size,
"type": "SCALAR"
}
],
"meshes": [
{
"primitives": [
{
"attributes": {
"POSITION": 0
},
"indices": 1,
"mode": 4 # TRIANGLES
}
]
}
],
"nodes": [
{
"mesh": 0
}
],
"scenes": [
{
"nodes": [0]
}
],
"scene": 0
}
if metadata is not None:
gltf["asset"]["extras"] = metadata
# Convert the JSON to bytes
gltf_json = json.dumps(gltf).encode('utf8')
def pad_json_to_4_bytes(buffer):
padding_length = (4 - (len(buffer) % 4)) % 4
return buffer + b' ' * padding_length
gltf_json_padded = pad_json_to_4_bytes(gltf_json)
# Create the GLB header
# Magic glTF
glb_header = struct.pack('<4sII', b'glTF', 2, 12 + 8 + len(gltf_json_padded) + 8 + len(buffer_data))
# Create JSON chunk header (chunk type 0)
json_chunk_header = struct.pack('<II', len(gltf_json_padded), 0x4E4F534A) # "JSON" in little endian
# Create BIN chunk header (chunk type 1)
bin_chunk_header = struct.pack('<II', len(buffer_data), 0x004E4942) # "BIN\0" in little endian
# Write the GLB file
with open(filepath, 'wb') as f:
f.write(glb_header)
f.write(json_chunk_header)
f.write(gltf_json_padded)
f.write(bin_chunk_header)
f.write(buffer_data)
return filepath
class SaveGLB(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="SaveGLB",
display_name="Save 3D Model",
search_aliases=["export 3d model", "save mesh"],
category="3d",
essentials_category="Basics",
is_output_node=True,
inputs=[
IO.MultiType.Input(
IO.Mesh.Input("mesh"),
types=[
IO.File3DGLB,
IO.File3DGLTF,
IO.File3DOBJ,
IO.File3DFBX,
IO.File3DSTL,
IO.File3DUSDZ,
IO.File3DAny,
],
tooltip="Mesh or 3D file to save",
),
IO.String.Input("filename_prefix", default="3d/ComfyUI"),
],
hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo]
)
@classmethod
def execute(cls, mesh: Types.MESH | Types.File3D, filename_prefix: str) -> IO.NodeOutput:
full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, folder_paths.get_output_directory())
results = []
metadata = {}
if not args.disable_metadata:
if cls.hidden.prompt is not None:
metadata["prompt"] = json.dumps(cls.hidden.prompt)
if cls.hidden.extra_pnginfo is not None:
for x in cls.hidden.extra_pnginfo:
metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x])
if isinstance(mesh, Types.File3D):
# Handle File3D input - save BytesIO data to output folder
ext = mesh.format or "glb"
f = f"{filename}_{counter:05}_.{ext}"
mesh.save_to(os.path.join(full_output_folder, f))
results.append({
"filename": f,
"subfolder": subfolder,
"type": "output"
})
else:
# Handle Mesh input - save vertices and faces as GLB
for i in range(mesh.vertices.shape[0]):
f = f"{filename}_{counter:05}_.glb"
save_glb(mesh.vertices[i], mesh.faces[i], os.path.join(full_output_folder, f), metadata)
results.append({
"filename": f,
"subfolder": subfolder,
"type": "output"
})
counter += 1
return IO.NodeOutput(ui={"3d": results})
class Hunyuan3dExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -495,6 +691,7 @@ class Hunyuan3dExtension(ComfyExtension):
VAEDecodeHunyuan3D,
VoxelToMeshBasic,
VoxelToMesh,
SaveGLB,
]

View File

@ -3,15 +3,23 @@ from __future__ import annotations
import nodes
import folder_paths
import av
import json
import os
import re
import math
import numpy as np
import struct
import torch
import zlib
import comfy.utils
from fractions import Fraction
from server import PromptServer
from comfy_api.latest import ComfyExtension, IO, UI
from comfy.cli_args import args
from typing_extensions import override
SVG = IO.SVG.Type # TODO: temporary solution for backward compatibility, will be removed later.
@ -830,6 +838,405 @@ class ImageMergeTileList(IO.ComfyNode):
return IO.NodeOutput(merged_image)
# ---------------------------------------------------------------------------
# Format specifications
# ---------------------------------------------------------------------------
# Maps (file_format, bit_depth, has_alpha) -> (numpy dtype scale, av pixel format,
# stream pix_fmt). Keeps the encode path declarative instead of branchy.
_FORMAT_SPECS = {
("png", "8-bit", False): {"scale": 255.0, "dtype": np.uint8, "frame_fmt": "rgb24", "stream_fmt": "rgb24"},
("png", "8-bit", True): {"scale": 255.0, "dtype": np.uint8, "frame_fmt": "rgba", "stream_fmt": "rgba"},
("png", "16-bit", False): {"scale": 65535.0, "dtype": np.uint16, "frame_fmt": "rgb48le", "stream_fmt": "rgb48be"},
("png", "16-bit", True): {"scale": 65535.0, "dtype": np.uint16, "frame_fmt": "rgba64le", "stream_fmt": "rgba64be"},
("exr", "32-bit float", False): {"scale": 1.0, "dtype": np.float32, "frame_fmt": "gbrpf32le", "stream_fmt": "gbrpf32le"},
("exr", "32-bit float", True): {"scale": 1.0, "dtype": np.float32, "frame_fmt": "gbrapf32le", "stream_fmt": "gbrapf32le"},
}
# ---------------------------------------------------------------------------
# Color transforms
# ---------------------------------------------------------------------------
def srgb_to_linear(t: torch.Tensor) -> torch.Tensor:
"""Inverse sRGB EOTF (IEC 61966-2-1). Operates on RGB channels only;
alpha (if present as the 4th channel) is passed through unchanged."""
if t.shape[-1] == 4:
rgb, alpha = t[..., :3], t[..., 3:]
return torch.cat([srgb_to_linear(rgb), alpha], dim=-1)
# Piecewise: linear toe below 0.04045, gamma curve above.
low = t / 12.92
high = ((t.clamp(min=0.0) + 0.055) / 1.055) ** 2.4
return torch.where(t <= 0.04045, low, high)
# HLG OETF constants from BT.2100 Table 5.
_HLG_A = 0.17883277
_HLG_B = 0.28466892
_HLG_C = 0.55991072928 # = 0.5 - a*ln(4*a)
def hlg_to_linear(t: torch.Tensor) -> torch.Tensor:
"""Inverse HLG OETF (BT.2100). Maps a non-linear HLG signal in [0, 1] to
*scene*-linear light in [0, 1]. Per BT.2100 Note 5a, this is the correct
transform when converting HLG to a linear scene-light representation
(rather than display-light, which would also involve the HLG OOTF).
Operates on RGB channels only; alpha is passed through unchanged."""
if t.shape[-1] == 4:
rgb, alpha = t[..., :3], t[..., 3:]
return torch.cat([hlg_to_linear(rgb), alpha], dim=-1)
# Piecewise: sqrt branch below 0.5, log branch above.
# Clamp inside the log branch so negative / out-of-range values don't blow up;
# values above 1.0 are allowed and extrapolate naturally.
low = (t ** 2) / 3.0
high = (torch.exp((t.clamp(min=_HLG_C) - _HLG_C) / _HLG_A) + _HLG_B) / 12.0
return torch.where(t <= 0.5, low, high)
# ---------------------------------------------------------------------------
# Metadata injection
# ---------------------------------------------------------------------------
_PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n"
def _png_chunk(chunk_type: bytes, data: bytes) -> bytes:
"""Build a single PNG chunk: length | type | data | CRC32(type+data)."""
crc = zlib.crc32(chunk_type + data) & 0xFFFFFFFF
return struct.pack(">I", len(data)) + chunk_type + data + struct.pack(">I", crc)
def _png_text_chunk(keyword: str, text: str) -> bytes:
"""tEXt chunk: latin-1 keyword + NUL + latin-1 text."""
payload = keyword.encode("latin-1") + b"\x00" + text.encode("latin-1", errors="replace")
return _png_chunk(b"tEXt", payload)
def inject_png_metadata(png_bytes: bytes, prompt: dict | None, extra_pnginfo: dict | None) -> bytes:
"""Insert ComfyUI prompt/workflow as tEXt chunks right after IHDR."""
if not png_bytes.startswith(_PNG_SIGNATURE):
return png_bytes
chunks: list[bytes] = []
if prompt is not None:
chunks.append(_png_text_chunk("prompt", json.dumps(prompt)))
if extra_pnginfo:
for key, value in extra_pnginfo.items():
chunks.append(_png_text_chunk(key, json.dumps(value)))
if not chunks:
return png_bytes
# IHDR is always the first chunk; insert ours immediately after it.
ihdr_length = struct.unpack(">I", png_bytes[8:12])[0]
ihdr_end = 8 + 8 + ihdr_length + 4 # signature + (len+type) + data + crc
return png_bytes[:ihdr_end] + b"".join(chunks) + png_bytes[ihdr_end:]
# Standard chromaticities (CIE 1931 xy) for the colorspaces this node writes.
# Each tuple is (Rx, Ry, Gx, Gy, Bx, By, Wx, Wy). All share D65 white point.
_CHROMATICITIES = {
# ITU-R BT.709 / sRGB primaries
"Rec.709": (0.6400, 0.3300, 0.3000, 0.6000, 0.1500, 0.0600, 0.3127, 0.3290),
# ITU-R BT.2020 (UHDTV / wide-gamut HDR) primaries
"Rec.2020": (0.7080, 0.2920, 0.1700, 0.7970, 0.1310, 0.0460, 0.3127, 0.3290),
}
def _pack_chromaticities(primaries: tuple) -> bytes:
"""Serialize 8 chromaticity floats into the EXR `chromaticities` payload."""
return struct.pack("<8f", *primaries)
def _exr_attribute(name: str, attr_type: str, value: bytes) -> bytes:
"""Serialize one EXR header attribute: name\\0 type\\0 size:int32 value."""
return (
name.encode("utf-8") + b"\x00"
+ attr_type.encode("utf-8") + b"\x00"
+ struct.pack("<i", len(value))
+ value
)
def inject_exr_metadata(
exr_bytes: bytes,
prompt: dict | None,
extra_pnginfo: dict | None,
colorspace: str | None = None,
) -> bytes:
"""Insert ComfyUI metadata and color-space info into an EXR header.
Color: EXR pixels are linear by convention. The standard way to describe
their RGB→XYZ relationship is the `chromaticities` attribute. We pick the
primaries that match what the user told us their input was:
colorspace="sRGB" → Rec. 709 / sRGB primaries (D65)
colorspace="HDR" → Rec. 2020 / BT.2100 primaries (D65)
Pixels are always converted to linear scene light upstream (sRGB EOTF
inverse for sRGB; HLG OETF inverse for HDR), so the file content is
scene-linear in the indicated gamut. OpenEXR has no standard transfer-
function attribute (the OpenEXR TSC has discussed adding one but it
doesn't exist), so we don't invent one — `chromaticities` plus the EXR
linear-by-convention rule fully specifies the color.
Prompt/workflow: written as plain `string` attributes using the same keys
(`prompt`, `workflow`, ...) that Comfy uses for PNG tEXt chunks, so the
same readers can pull them out symmetrically.
Implementation note: the chunk-offset table that follows the header stores
*absolute* byte offsets into the file. Inserting N bytes into the header
means every offset must be incremented by N or the file becomes unreadable.
"""
if len(exr_bytes) < 8 or exr_bytes[:4] != b"\x76\x2f\x31\x01":
return exr_bytes
new_blob = b""
if prompt is not None:
new_blob += _exr_attribute("prompt", "string", json.dumps(prompt).encode("utf-8"))
if extra_pnginfo:
for key, value in extra_pnginfo.items():
new_blob += _exr_attribute(key, "string", json.dumps(value).encode("utf-8"))
if colorspace is not None:
# Map each colorspace option to the RGB primaries the linear pixels
# are now in. "sRGB" and "linear" both produce Rec. 709 linear; "HDR"
# (HLG-encoded Rec. 2020 input) produces Rec. 2020 linear.
primaries_name = {
"sRGB": "Rec.709",
"linear": "Rec.709",
"HDR": "Rec.2020",
}.get(colorspace, "Rec.709")
new_blob += _exr_attribute(
"chromaticities",
"chromaticities",
_pack_chromaticities(_CHROMATICITIES[primaries_name]),
)
if not new_blob:
return exr_bytes
# Walk header attributes to find the terminating null byte, and pick up
# dataWindow + compression so we know how many chunks the offset table has.
pos = 8 # past magic (4) + version (4)
data_window = None
compression = 0
while pos < len(exr_bytes) and exr_bytes[pos] != 0:
name_end = exr_bytes.index(b"\x00", pos)
attr_name = exr_bytes[pos:name_end].decode("latin-1", errors="replace")
type_end = exr_bytes.index(b"\x00", name_end + 1)
attr_type = exr_bytes[name_end + 1:type_end].decode("latin-1", errors="replace")
size = struct.unpack("<i", exr_bytes[type_end + 1:type_end + 5])[0]
value_start = type_end + 5
value = exr_bytes[value_start:value_start + size]
if attr_name == "dataWindow" and attr_type == "box2i":
data_window = struct.unpack("<iiii", value) # xMin, yMin, xMax, yMax
elif attr_name == "compression" and attr_type == "compression":
compression = value[0]
pos = value_start + size
if data_window is None:
return exr_bytes # required attribute missing — don't risk corrupting
# Scanlines per chunk by compression, from the OpenEXR spec.
scanlines_per_block = {
0: 1, # NO_COMPRESSION
1: 1, # RLE
2: 1, # ZIPS
3: 16, # ZIP
4: 32, # PIZ
5: 16, # PXR24
6: 32, # B44
7: 32, # B44A
8: 256, # DWAA
9: 256, # DWAB
}.get(compression, 1)
_, y_min, _, y_max = data_window
height = y_max - y_min + 1
num_chunks = (height + scanlines_per_block - 1) // scanlines_per_block
header_end = pos # position of the terminating null byte
table_start = header_end + 1
pixel_start = table_start + num_chunks * 8
delta = len(new_blob)
old_offsets = struct.unpack(f"<{num_chunks}Q", exr_bytes[table_start:pixel_start])
new_table = struct.pack(f"<{num_chunks}Q", *(o + delta for o in old_offsets))
return (
exr_bytes[:header_end] # header attributes
+ new_blob # our new attributes
+ exr_bytes[header_end:table_start] # terminating null byte
+ new_table # shifted offset table
+ exr_bytes[pixel_start:] # pixel data, untouched
)
# ---------------------------------------------------------------------------
# Encoding
# ---------------------------------------------------------------------------
def _encode_image(
img_tensor: torch.Tensor,
file_format: str,
bit_depth: str,
colorspace: str,
) -> bytes:
"""Encode a single HxWxC tensor to PNG or EXR bytes in memory.
For EXR the input is interpreted according to `colorspace` and converted
to scene-linear (EXR's convention) before writing:
"sRGB" → input is sRGB-encoded Rec. 709; apply inverse sRGB EOTF.
"HDR" → input is HLG-encoded Rec. 2020 (BT.2100); apply inverse HLG
OETF to get scene-linear, per BT.2100 Note 5a.
"linear" → input is already scene-linear (Rec. 709 primaries); write
through unchanged. Use this for renderer/compositor output.
For PNG, colorspace selection does not modify pixels — PNG is delivered
sRGB-encoded and there is no PNG path for wide-gamut HDR in this node.
"""
height, width, num_channels = img_tensor.shape
has_alpha = num_channels == 4
spec = _FORMAT_SPECS[(file_format, bit_depth, has_alpha)]
if spec["dtype"] == np.float32:
# EXR path: preserve full range, no clamp.
if colorspace == "sRGB":
img_tensor = srgb_to_linear(img_tensor)
elif colorspace == "HDR":
img_tensor = hlg_to_linear(img_tensor)
img_np = img_tensor.cpu().numpy().astype(np.float32)
else:
# PNG path: quantize to integer range.
scaled = (img_tensor * spec["scale"]).clamp(0, spec["scale"])
img_np = scaled.to(torch.int32).cpu().numpy().astype(spec["dtype"])
# Encode directly via CodecContext. PyAV's `image2` muxer does NOT write to
# BytesIO (it expects a real file path), so we bypass the container entirely.
# For single-frame PNG/EXR the raw codec output IS the file.
codec = av.CodecContext.create(file_format, "w")
codec.width = width
codec.height = height
codec.pix_fmt = spec["stream_fmt"]
codec.time_base = Fraction(1, 1)
frame = av.VideoFrame.from_ndarray(img_np, format=spec["frame_fmt"])
if spec["frame_fmt"] != spec["stream_fmt"]:
frame = frame.reformat(format=spec["stream_fmt"])
frame.pts = 0
frame.time_base = codec.time_base
packets = list(codec.encode(frame)) + list(codec.encode(None)) # flush with None
return b"".join(bytes(p) for p in packets)
# ---------------------------------------------------------------------------
# Node
# ---------------------------------------------------------------------------
class SaveImageAdvanced(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="SaveImageAdvanced",
search_aliases=["save", "save image", "export image", "output image", "write image"],
display_name="Save Image (Advanced)",
description="Saves the input images to your ComfyUI output directory.",
category="image",
essentials_category="Basics",
inputs=[
IO.Image.Input("images", tooltip="The images to save."),
IO.String.Input(
"filename_prefix",
default="ComfyUI",
tooltip=(
"The prefix for the file to save. May include formatting tokens "
"such as %date:yyyy-MM-dd% or %Empty Latent Image.width%."
),
),
IO.DynamicCombo.Input(
"image_format",
options=[
IO.DynamicCombo.Option("png", [
IO.Combo.Input("bit_depth", options=["8-bit", "16-bit"],
default="8-bit", advanced=True),
IO.Combo.Input("colorspace", options=["sRGB"],
default="sRGB", advanced=True),
]),
IO.DynamicCombo.Option("exr", [
IO.Combo.Input("bit_depth", options=["32-bit float"],
default="32-bit float", advanced=True),
IO.Combo.Input(
"colorspace",
options=["sRGB", "HDR", "linear"],
default="sRGB",
advanced=True,
tooltip=(
"Colorspace of the input tensor. The EXR is "
"always written as scene-linear in the matching "
"gamut.\n"
" 'sRGB' — input is sRGB-encoded Rec.709; "
"the inverse sRGB EOTF is applied.\n"
" 'HDR' — input is HLG-encoded Rec.2020 "
"(BT.2100); the inverse HLG OETF is applied "
"to get scene-linear light.\n"
" 'linear' — input is already scene-linear "
"(Rec.709 primaries); written through unchanged. "
"Use this for renderer/compositor output."
),
),
]),
],
tooltip="The file format in which to save the image.",
),
],
hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo],
is_output_node=True,
)
@classmethod
def execute(cls, images, filename_prefix: str, image_format: dict) -> IO.NodeOutput:
file_format = image_format["image_format"]
bit_depth = image_format["bit_depth"]
colorspace = image_format.get("colorspace", "sRGB")
output_dir = folder_paths.get_output_directory()
full_output_folder, filename, counter, subfolder, filename_prefix = (
folder_paths.get_save_image_path(
filename_prefix, output_dir, images[0].shape[1], images[0].shape[0]
)
)
prompt = cls.hidden.prompt
extra_pnginfo = cls.hidden.extra_pnginfo
write_metadata = not args.disable_metadata
results = []
for batch_number, image in enumerate(images):
encoded = _encode_image(image, file_format, bit_depth, colorspace)
if write_metadata:
if file_format == "png":
encoded = inject_png_metadata(encoded, prompt, extra_pnginfo)
elif file_format == "exr":
encoded = inject_exr_metadata(encoded, prompt, extra_pnginfo, colorspace)
name = filename.replace("%batch_num%", str(batch_number))
file = f"{name}_{counter:05}.{file_format}"
with open(os.path.join(full_output_folder, file), "wb") as f:
f.write(encoded)
results.append({"filename": file, "subfolder": subfolder, "type": "output"})
counter += 1
return IO.NodeOutput(ui={"images": results})
class ImagesExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -842,6 +1249,7 @@ class ImagesExtension(ComfyExtension):
ImageAddNoise,
SaveAnimatedWEBP,
SaveAnimatedPNG,
SaveImageAdvanced,
SaveSVGNode,
ImageStitch,
ResizeAndPadImage,

View File

@ -338,25 +338,8 @@ class LTXVAddGuide(io.ComfyNode):
noise_mask = get_noise_mask(latent)
_, _, latent_length, latent_height, latent_width = latent_image.shape
# For mid-video multi-frame guides, prepend+strip a throwaway first frame so the VAE's "first latent = 1 pixel frame" asymmetry lands on the discarded slot
time_scale_factor = scale_factors[0]
num_frames_to_keep = ((image.shape[0] - 1) // time_scale_factor) * time_scale_factor + 1
resolved_frame_idx = frame_idx
if frame_idx < 0:
_, num_keyframes = get_keyframe_idxs(positive)
resolved_frame_idx = max((latent_length - num_keyframes - 1) * time_scale_factor + 1 + frame_idx, 0)
causal_fix = resolved_frame_idx == 0 or num_frames_to_keep == 1
if not causal_fix:
image = torch.cat([image[:1], image], dim=0)
image, t = cls.encode(vae, latent_width, latent_height, image, scale_factors)
if not causal_fix:
t = t[:, :, 1:, :, :]
image = image[1:]
frame_idx, latent_idx = cls.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors)
assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence."
@ -369,7 +352,6 @@ class LTXVAddGuide(io.ComfyNode):
t,
strength,
scale_factors,
causal_fix=causal_fix,
)
# Track this guide for per-reference attention control.

View File

@ -40,13 +40,23 @@ def composite(destination, source, x, y, mask = None, multiplier = 8, resize_sou
inverse_mask = torch.ones_like(mask) - mask
source_portion = mask * source[..., :visible_height, :visible_width]
destination_portion = inverse_mask * destination[..., top:bottom, left:right]
source_rgb = source[:, :3, :visible_height, :visible_width]
dest_slice = destination[..., top:bottom, left:right]
if destination.shape[1] == 4:
if torch.max(dest_slice) == 0:
destination[:, :3, top:bottom, left:right] = source_rgb
destination[:, 3:4, top:bottom, left:right] = mask
else:
destination[:, :3, top:bottom, left:right] = (mask * source_rgb) + (inverse_mask * dest_slice[:, :3])
destination[:, 3:4, top:bottom, left:right] = torch.max(mask, dest_slice[:, 3:4])
else:
source_portion = mask * source_rgb
destination_portion = inverse_mask * dest_slice
destination[..., top:bottom, left:right] = source_portion + destination_portion
destination[..., top:bottom, left:right] = source_portion + destination_portion
return destination
class LatentCompositeMasked(IO.ComfyNode):
@classmethod
def define_schema(cls):
@ -85,18 +95,23 @@ class ImageCompositeMasked(IO.ComfyNode):
display_name="Image Composite Masked",
category="image",
inputs=[
IO.Image.Input("destination"),
IO.Image.Input("source"),
IO.Int.Input("x", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1),
IO.Int.Input("y", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1),
IO.Boolean.Input("resize_source", default=False),
IO.Image.Input("destination", optional=True),
IO.Mask.Input("mask", optional=True),
],
outputs=[IO.Image.Output()],
)
@classmethod
def execute(cls, destination, source, x, y, resize_source, mask = None) -> IO.NodeOutput:
def execute(cls, source, x, y, resize_source, destination = None, mask = None) -> IO.NodeOutput:
if destination is None: # transparent rgba
B, H, W, C = source.shape
destination = torch.zeros((B, H, W, 4), dtype=source.dtype, device=source.device)
if C == 3:
source = torch.nn.functional.pad(source, (0, 1), value=1.0)
destination, source = node_helpers.image_alpha_fix(destination, source)
destination = destination.clone().movedim(-1, 1)
output = composite(destination, source.movedim(-1, 1), x, y, mask, 1, resize_source).movedim(1, -1)

View File

@ -1,396 +0,0 @@
"""Save-side 3D nodes: mesh packing/slicing helpers + GLB writer + SaveGLB node."""
import json
import logging
import os
import struct
from io import BytesIO
import numpy as np
from PIL import Image
import torch
from typing_extensions import override
import folder_paths
from comfy.cli_args import args
from comfy_api.latest import ComfyExtension, IO, Types
def pack_variable_mesh_batch(vertices, faces, colors=None, uvs=None, texture=None):
# Pack lists of (Nᵢ, *) vertex/face/color/uv tensors into padded batched tensors,
# stashing per-item lengths as runtime attrs so consumers can recover the real slice.
# colors and uvs are 1:1 with vertices, so they're padded to max_vertices and read with vertex_counts.
# texture is (B, H, W, 3) — passed through unchanged
batch_size = len(vertices)
max_vertices = max(v.shape[0] for v in vertices)
max_faces = max(f.shape[0] for f in faces)
packed_vertices = vertices[0].new_zeros((batch_size, max_vertices, vertices[0].shape[1]))
packed_faces = faces[0].new_zeros((batch_size, max_faces, faces[0].shape[1]))
vertex_counts = torch.tensor([v.shape[0] for v in vertices], device=vertices[0].device, dtype=torch.int64)
face_counts = torch.tensor([f.shape[0] for f in faces], device=faces[0].device, dtype=torch.int64)
for i, (v, f) in enumerate(zip(vertices, faces)):
packed_vertices[i, :v.shape[0]] = v
packed_faces[i, :f.shape[0]] = f
packed_colors = None
if colors is not None:
packed_colors = colors[0].new_zeros((batch_size, max_vertices, colors[0].shape[1]))
for i, c in enumerate(colors):
assert c.shape[0] == vertices[i].shape[0], (
f"vertex_colors[{i}] has {c.shape[0]} entries, expected {vertices[i].shape[0]} (1:1 with vertices)"
)
packed_colors[i, :c.shape[0]] = c
packed_uvs = None
if uvs is not None:
packed_uvs = uvs[0].new_zeros((batch_size, max_vertices, uvs[0].shape[1]))
for i, u in enumerate(uvs):
assert u.shape[0] == vertices[i].shape[0], (
f"uvs[{i}] has {u.shape[0]} entries, expected {vertices[i].shape[0]} (1:1 with vertices)"
)
packed_uvs[i, :u.shape[0]] = u
return Types.MESH(packed_vertices, packed_faces,
uvs=packed_uvs, vertex_colors=packed_colors, texture=texture,
vertex_counts=vertex_counts, face_counts=face_counts)
def get_mesh_batch_item(mesh, index):
# Returns (vertices, faces, colors, uvs) for batch index, slicing to real lengths
# if the mesh carries per-item counts (variable-size batch).
v_colors = getattr(mesh, "vertex_colors", None)
v_uvs = getattr(mesh, "uvs", None)
if getattr(mesh, "vertex_counts", None) is not None:
vertex_count = int(mesh.vertex_counts[index].item())
face_count = int(mesh.face_counts[index].item())
vertices = mesh.vertices[index, :vertex_count]
faces = mesh.faces[index, :face_count]
colors = v_colors[index, :vertex_count] if v_colors is not None else None
uvs = v_uvs[index, :vertex_count] if v_uvs is not None else None
return vertices, faces, colors, uvs
colors = v_colors[index] if v_colors is not None else None
uvs = v_uvs[index] if v_uvs is not None else None
return mesh.vertices[index], mesh.faces[index], colors, uvs
def save_glb(vertices, faces, filepath, metadata=None,
uvs=None, vertex_colors=None, texture_image=None):
"""
Save PyTorch tensor vertices and faces as a GLB file without external dependencies.
Parameters:
vertices: torch.Tensor of shape (N, 3) - The vertex coordinates
faces: torch.Tensor of shape (M, 3) - The face indices (triangle faces)
filepath: str - Output filepath (should end with .glb)
metadata: dict - Optional asset.extras metadata
uvs: torch.Tensor of shape (N, 2) - Optional per-vertex texture coordinates
vertex_colors: torch.Tensor of shape (N, 3) or (N, 4) - Optional per-vertex colors in [0, 1]
texture_image: PIL.Image - Optional baseColor texture, embedded as PNG
"""
# Convert tensors to numpy arrays
vertices_np = vertices.cpu().numpy().astype(np.float32)
faces_signed = faces.cpu().numpy().astype(np.int64)
uvs_np = uvs.cpu().numpy().astype(np.float32) if uvs is not None else None
colors_np = vertex_colors.cpu().numpy().astype(np.float32) if vertex_colors is not None else None
if colors_np is not None:
colors_np = np.clip(colors_np, 0.0, 1.0)
n_verts = vertices_np.shape[0]
if n_verts == 0:
raise ValueError("save_glb: vertices is empty")
if faces_signed.size > 0:
fmin = int(faces_signed.min())
fmax = int(faces_signed.max())
if fmin < 0 or fmax >= n_verts:
raise ValueError(
f"save_glb: face index out of range [0, {n_verts}): min={fmin}, max={fmax}"
)
if uvs_np is not None and uvs_np.shape[0] != n_verts:
raise ValueError(
f"save_glb: uvs has {uvs_np.shape[0]} entries but vertex count is {n_verts}"
)
if colors_np is not None and colors_np.shape[0] != n_verts:
raise ValueError(
f"save_glb: vertex_colors has {colors_np.shape[0]} entries but vertex count is {n_verts}"
)
faces_np = faces_signed.astype(np.uint32)
texture_png_bytes = None
if texture_image is not None:
buf = BytesIO()
texture_image.save(buf, format="PNG")
texture_png_bytes = buf.getvalue()
vertices_buffer = vertices_np.tobytes()
indices_buffer = faces_np.tobytes()
uvs_buffer = uvs_np.tobytes() if uvs_np is not None else b""
colors_buffer = colors_np.tobytes() if colors_np is not None else b""
texture_buffer = texture_png_bytes if texture_png_bytes is not None else b""
def pad_to_4_bytes(buffer):
padding_length = (4 - (len(buffer) % 4)) % 4
return buffer + b'\x00' * padding_length
vertices_buffer_padded = pad_to_4_bytes(vertices_buffer)
indices_buffer_padded = pad_to_4_bytes(indices_buffer)
uvs_buffer_padded = pad_to_4_bytes(uvs_buffer)
colors_buffer_padded = pad_to_4_bytes(colors_buffer)
texture_buffer_padded = pad_to_4_bytes(texture_buffer)
buffer_data = b"".join([
vertices_buffer_padded,
indices_buffer_padded,
uvs_buffer_padded,
colors_buffer_padded,
texture_buffer_padded,
])
vertices_byte_length = len(vertices_buffer)
vertices_byte_offset = 0
indices_byte_length = len(indices_buffer)
indices_byte_offset = len(vertices_buffer_padded)
uvs_byte_offset = indices_byte_offset + len(indices_buffer_padded)
colors_byte_offset = uvs_byte_offset + len(uvs_buffer_padded)
texture_byte_offset = colors_byte_offset + len(colors_buffer_padded)
buffer_views = [
{
"buffer": 0,
"byteOffset": vertices_byte_offset,
"byteLength": vertices_byte_length,
"target": 34962 # ARRAY_BUFFER
},
{
"buffer": 0,
"byteOffset": indices_byte_offset,
"byteLength": indices_byte_length,
"target": 34963 # ELEMENT_ARRAY_BUFFER
}
]
accessors = [
{
"bufferView": 0,
"byteOffset": 0,
"componentType": 5126, # FLOAT
"count": len(vertices_np),
"type": "VEC3",
"max": vertices_np.max(axis=0).tolist(),
"min": vertices_np.min(axis=0).tolist()
},
{
"bufferView": 1,
"byteOffset": 0,
"componentType": 5125, # UNSIGNED_INT
"count": faces_np.size,
"type": "SCALAR"
}
]
primitive_attributes = {"POSITION": 0}
if uvs_np is not None and len(uvs_np) > 0:
buffer_views.append({
"buffer": 0,
"byteOffset": uvs_byte_offset,
"byteLength": len(uvs_buffer),
"target": 34962
})
accessor_idx = len(accessors)
accessors.append({
"bufferView": len(buffer_views) - 1,
"byteOffset": 0,
"componentType": 5126,
"count": len(uvs_np),
"type": "VEC2",
})
primitive_attributes["TEXCOORD_0"] = accessor_idx
if colors_np is not None and len(colors_np) > 0:
buffer_views.append({
"buffer": 0,
"byteOffset": colors_byte_offset,
"byteLength": len(colors_buffer),
"target": 34962
})
accessor_idx = len(accessors)
accessors.append({
"bufferView": len(buffer_views) - 1,
"byteOffset": 0,
"componentType": 5126,
"count": len(colors_np),
"type": "VEC3" if colors_np.shape[1] == 3 else "VEC4",
})
primitive_attributes["COLOR_0"] = accessor_idx
primitive = {
"attributes": primitive_attributes,
"indices": 1,
"mode": 4 # TRIANGLES
}
images = []
textures = []
samplers = []
materials = []
if texture_png_bytes is not None and "TEXCOORD_0" in primitive_attributes:
buffer_views.append({
"buffer": 0,
"byteOffset": texture_byte_offset,
"byteLength": len(texture_buffer),
})
images.append({"bufferView": len(buffer_views) - 1, "mimeType": "image/png"})
samplers.append({"magFilter": 9729, "minFilter": 9729, "wrapS": 33071, "wrapT": 33071})
textures.append({"source": 0, "sampler": 0})
materials.append({
"pbrMetallicRoughness": {
"baseColorTexture": {"index": 0, "texCoord": 0},
"metallicFactor": 0.0,
"roughnessFactor": 1.0,
},
"doubleSided": True,
})
primitive["material"] = 0
gltf = {
"asset": {"version": "2.0", "generator": "ComfyUI"},
"buffers": [{"byteLength": len(buffer_data)}],
"bufferViews": buffer_views,
"accessors": accessors,
"meshes": [{"primitives": [primitive]}],
"nodes": [{"mesh": 0}],
"scenes": [{"nodes": [0]}],
"scene": 0,
}
if images:
gltf["images"] = images
if samplers:
gltf["samplers"] = samplers
if textures:
gltf["textures"] = textures
if materials:
gltf["materials"] = materials
if metadata:
gltf["asset"]["extras"] = metadata
# Convert the JSON to bytes
gltf_json = json.dumps(gltf).encode('utf8')
def pad_json_to_4_bytes(buffer):
padding_length = (4 - (len(buffer) % 4)) % 4
return buffer + b' ' * padding_length
gltf_json_padded = pad_json_to_4_bytes(gltf_json)
# Create the GLB header (a 4-byte ASCII magic identifier glTF)
glb_header = struct.pack('<4sII', b'glTF', 2, 12 + 8 + len(gltf_json_padded) + 8 + len(buffer_data))
# Create JSON chunk header (chunk type 0)
json_chunk_header = struct.pack('<II', len(gltf_json_padded), 0x4E4F534A) # "JSON" in little endian
# Create BIN chunk header (chunk type 1)
bin_chunk_header = struct.pack('<II', len(buffer_data), 0x004E4942) # "BIN\0" in little endian
# Write the GLB file
with open(filepath, 'wb') as f:
f.write(glb_header)
f.write(json_chunk_header)
f.write(gltf_json_padded)
f.write(bin_chunk_header)
f.write(buffer_data)
return filepath
class SaveGLB(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="SaveGLB",
display_name="Save 3D Model",
search_aliases=["export 3d model", "save mesh"],
category="3d",
essentials_category="Basics",
is_output_node=True,
inputs=[
IO.MultiType.Input(
IO.Mesh.Input("mesh"),
types=[
IO.File3DGLB,
IO.File3DGLTF,
IO.File3DOBJ,
IO.File3DFBX,
IO.File3DSTL,
IO.File3DUSDZ,
IO.File3DAny,
],
tooltip="Mesh or 3D file to save",
),
IO.String.Input("filename_prefix", default="3d/ComfyUI"),
],
hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo]
)
@classmethod
def execute(cls, mesh: Types.MESH | Types.File3D, filename_prefix: str) -> IO.NodeOutput:
full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, folder_paths.get_output_directory())
results = []
metadata = {}
if not args.disable_metadata:
if cls.hidden.prompt is not None:
metadata["prompt"] = json.dumps(cls.hidden.prompt)
if cls.hidden.extra_pnginfo is not None:
for x in cls.hidden.extra_pnginfo:
metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x])
if isinstance(mesh, Types.File3D):
# Handle File3D input - save BytesIO data to output folder
ext = mesh.format or "glb"
f = f"{filename}_{counter:05}_.{ext}"
mesh.save_to(os.path.join(full_output_folder, f))
results.append({
"filename": f,
"subfolder": subfolder,
"type": "output"
})
counter += 1
else:
# Handle Mesh input - save vertices and faces as GLB; carry optional UVs / colors / texture.
texture_b = getattr(mesh, "texture", None)
texture_np = None
if texture_b is not None:
texture_np = (texture_b.clamp(0.0, 1.0).cpu().numpy() * 255).astype(np.uint8)
assert texture_np.ndim == 4 and texture_np.shape[-1] == 3, (
f"texture must be (B, H, W, 3) RGB, got shape {tuple(texture_np.shape)}"
)
for i in range(mesh.vertices.shape[0]):
vertices_i, faces_i, v_colors, uvs_i = get_mesh_batch_item(mesh, i)
if vertices_i.shape[0] == 0 or faces_i.shape[0] == 0:
logging.warning(f"SaveGLB: skipping empty mesh at batch index {i}")
continue
tex_img = Image.fromarray(texture_np[i], mode="RGB") if texture_np is not None else None
f = f"{filename}_{counter:05}_.glb"
save_glb(vertices_i, faces_i, os.path.join(full_output_folder, f), metadata,
uvs=uvs_i,
vertex_colors=v_colors,
texture_image=tex_img)
results.append({
"filename": f,
"subfolder": subfolder,
"type": "output"
})
counter += 1
return IO.NodeOutput(ui={"3d": results})
class Save3DExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [SaveGLB]
async def comfy_entrypoint() -> Save3DExtension:
return Save3DExtension()

View File

@ -123,7 +123,6 @@ class CreateVideo(io.ComfyNode):
search_aliases=["images to video"],
display_name="Create Video",
category="video",
essentials_category="Video Tools",
description="Create a video from images.",
inputs=[
io.Image.Input("images", tooltip="The images to create a video from."),

View File

@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is
# updated in pyproject.toml.
__version__ = "0.21.1"
__version__ = "0.21.0"

View File

@ -2436,7 +2436,6 @@ async def init_builtin_extra_nodes():
"nodes_void.py",
"nodes_wandancer.py",
"nodes_hidream_o1.py",
"nodes_save_3d.py",
]
import_failed = []

View File

@ -2071,6 +2071,7 @@ paths:
type: integer
description: Number of assets marked as missing
# ===========================================================================
# Cloud-runtime FE-facing operations
#
@ -2121,11 +2122,7 @@ paths:
operationId: getCloudJobStatus
tags: [queue]
summary: Get status of a cloud job
deprecated: true
description: |
**Deprecated.** This endpoint is superseded by `GET /api/jobs/{job_id}`.
Clients should migrate; the endpoint is retained for backward
compatibility but will be removed in a future release.
description: "[cloud-only] Returns the current execution status of a cloud job."
x-runtime: [cloud]
parameters:
- name: job_id
@ -2195,11 +2192,7 @@ paths:
operationId: getHistoryV2
tags: [history]
summary: Get paginated execution history (v2)
deprecated: true
description: |
**Deprecated.** This endpoint is superseded by `GET /api/jobs`.
Clients should migrate; the endpoint is retained for backward
compatibility but will be removed in a future release.
description: "[cloud-only] Returns a paginated list of execution history entries in the v2 format, with richer metadata than the legacy history endpoint."
x-runtime: [cloud]
parameters:
- name: limit
@ -2238,11 +2231,7 @@ paths:
operationId: getHistoryV2ByPromptId
tags: [history]
summary: Get v2 history for a specific prompt
deprecated: true
description: |
**Deprecated.** This endpoint is superseded by `GET /api/jobs/{prompt_id}`.
Clients should migrate; the endpoint is retained for backward
compatibility but will be removed in a future release.
description: "[cloud-only] Returns the v2 history entry for a specific prompt execution."
x-runtime: [cloud]
parameters:
- name: prompt_id
@ -2277,12 +2266,7 @@ paths:
operationId: getCloudLogs
tags: [system]
summary: Get cloud execution logs
deprecated: true
description: |
**Deprecated.** This endpoint returns a static placeholder response and
provides no real log data. It is retained only to avoid breaking clients
that still call it. Clients should remove their dependency; the endpoint
will be removed in a future release.
description: "[cloud-only] Returns execution logs for the authenticated user's cloud jobs."
x-runtime: [cloud]
parameters:
- name: job_id
@ -5386,12 +5370,7 @@ paths:
operationId: viewVideo
tags: [view]
summary: View or download a video file
deprecated: true
description: |
**Deprecated.** This endpoint is an alias of `GET /api/view` added for
legacy history-queue video playback. Callers should use `/api/view`
directly; the endpoint is retained for backward compatibility but will
be removed in a future release.
description: "[cloud-only] Serves a video file from the output directory. Used by the frontend video player."
x-runtime: [cloud]
parameters:
- name: filename
@ -5544,6 +5523,7 @@ paths:
schema:
$ref: "#/components/schemas/CloudError"
components:
parameters:
ComfyUserHeader:
@ -6895,6 +6875,7 @@ components:
error:
type: string
# -------------------------------------------------------------------
# Cloud-runtime schemas
#

View File

@ -1,6 +1,6 @@
[project]
name = "ComfyUI"
version = "0.21.1"
version = "0.21.0"
readme = "README.md"
license = { file = "LICENSE" }
requires-python = ">=3.10"

View File

@ -1,6 +1,6 @@
comfyui-frontend-package==1.43.18
comfyui-workflow-templates==0.9.75
comfyui-embedded-docs==0.5.0
comfyui-workflow-templates==0.9.73
comfyui-embedded-docs==0.4.4
torch
torchsde
torchvision

View File

@ -1,23 +1,9 @@
from collections import defaultdict
import torch
from comfy.model_detection import detect_unet_config, model_config_from_unet_config
import comfy.supported_models
def _freeze(value):
"""Recursively convert a value to a hashable form so configs can be
compared/used as dict keys or set members."""
if isinstance(value, dict):
return frozenset((k, _freeze(v)) for k, v in value.items())
if isinstance(value, (list, tuple)):
return tuple(_freeze(v) for v in value)
if isinstance(value, set):
return frozenset(_freeze(v) for v in value)
return value
def _make_longcat_comfyui_sd():
"""Minimal ComfyUI-format state dict for pre-converted LongCat-Image weights."""
sd = {}
@ -124,21 +110,3 @@ class TestModelDetection:
model_config = model_config_from_unet_config(unet_config, sd)
assert model_config is not None
assert type(model_config).__name__ == "FluxSchnell"
def test_unet_config_and_required_keys_combination_is_unique(self):
"""Each model in the registry must have a unique combination of
``unet_config`` and ``required_keys``. If two models share the same
combination, ``BASE.matches`` cannot disambiguate between them and the
first one in the list will always win."""
models = comfy.supported_models.models
groups = defaultdict(list)
for model in models:
key = (_freeze(model.unet_config), _freeze(model.required_keys))
groups[key].append(model.__name__)
duplicates = {k: names for k, names in groups.items() if len(names) > 1}
assert not duplicates, (
"Found models sharing the same (unet_config, required_keys) "
"combination, which makes detection ambiguous: "
+ "; ".join(", ".join(names) for names in duplicates.values())
)