mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-13 21:37:16 +08:00
Compare commits
1 Commits
feat/api-n
...
temp_pr
| Author | SHA1 | Date | |
|---|---|---|---|
| 0ac6480c7e |
@ -89,12 +89,3 @@ rules:
|
||||
then:
|
||||
field: description
|
||||
function: truthy
|
||||
|
||||
overrides:
|
||||
# /ws uses HTTP 101 (Switching Protocols) — a legitimate response for a
|
||||
# WebSocket upgrade, but not a 2xx, so operation-success-response fires
|
||||
# as a false positive. OpenAPI 3.x has no native WebSocket support.
|
||||
- files:
|
||||
- "openapi.yaml#/paths/~1ws"
|
||||
rules:
|
||||
operation-success-response: off
|
||||
|
||||
@ -1443,7 +1443,7 @@ class HiDreamO1(supported_models_base.BASE):
|
||||
}
|
||||
|
||||
latent_format = latent_formats.HiDreamO1Pixel
|
||||
memory_usage_factor = 0.033
|
||||
memory_usage_factor = 0.6
|
||||
# fp16 not supported: LM MLP down_proj activations fp16 overflow, causing NaNs
|
||||
supported_inference_dtypes = [torch.bfloat16, torch.float32]
|
||||
|
||||
|
||||
@ -1,70 +0,0 @@
|
||||
from enum import Enum
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class AnthropicRole(str, Enum):
|
||||
user = "user"
|
||||
assistant = "assistant"
|
||||
|
||||
|
||||
class AnthropicTextContent(BaseModel):
|
||||
type: Literal["text"] = "text"
|
||||
text: str = Field(...)
|
||||
|
||||
|
||||
class AnthropicImageSourceBase64(BaseModel):
|
||||
type: Literal["base64"] = "base64"
|
||||
media_type: str = Field(..., description="MIME type of the image, e.g. image/png, image/jpeg")
|
||||
data: str = Field(..., description="Base64-encoded image data")
|
||||
|
||||
|
||||
class AnthropicImageContent(BaseModel):
|
||||
type: Literal["image"] = "image"
|
||||
source: AnthropicImageSourceBase64 = Field(...)
|
||||
|
||||
|
||||
class AnthropicMessage(BaseModel):
|
||||
role: AnthropicRole = Field(...)
|
||||
content: list[AnthropicTextContent | AnthropicImageContent] = Field(...)
|
||||
|
||||
|
||||
class AnthropicMessagesRequest(BaseModel):
|
||||
model: str = Field(...)
|
||||
messages: list[AnthropicMessage] = Field(...)
|
||||
max_tokens: int = Field(..., ge=1)
|
||||
system: str | None = Field(None, description="Top-level system prompt")
|
||||
temperature: float | None = Field(None, ge=0.0, le=1.0)
|
||||
top_p: float | None = Field(None, ge=0.0, le=1.0)
|
||||
top_k: int | None = Field(None, ge=0)
|
||||
stop_sequences: list[str] | None = Field(None)
|
||||
|
||||
|
||||
class AnthropicResponseTextBlock(BaseModel):
|
||||
type: Literal["text"] = "text"
|
||||
text: str = Field(...)
|
||||
|
||||
|
||||
class AnthropicCacheCreationUsage(BaseModel):
|
||||
ephemeral_5m_input_tokens: int | None = Field(None)
|
||||
ephemeral_1h_input_tokens: int | None = Field(None)
|
||||
|
||||
|
||||
class AnthropicMessagesUsage(BaseModel):
|
||||
input_tokens: int | None = Field(None)
|
||||
output_tokens: int | None = Field(None)
|
||||
cache_creation_input_tokens: int | None = Field(None)
|
||||
cache_read_input_tokens: int | None = Field(None)
|
||||
cache_creation: AnthropicCacheCreationUsage | None = Field(None)
|
||||
|
||||
|
||||
class AnthropicMessagesResponse(BaseModel):
|
||||
id: str | None = Field(None)
|
||||
type: str | None = Field(None)
|
||||
role: str | None = Field(None)
|
||||
model: str | None = Field(None)
|
||||
content: list[AnthropicResponseTextBlock] | None = Field(None)
|
||||
stop_reason: str | None = Field(None)
|
||||
stop_sequence: str | None = Field(None)
|
||||
usage: AnthropicMessagesUsage | None = Field(None)
|
||||
@ -1,250 +0,0 @@
|
||||
"""API Nodes for Anthropic Claude (Messages API). See: https://docs.anthropic.com/en/api/messages"""
|
||||
|
||||
from typing_extensions import override
|
||||
|
||||
from comfy_api.latest import IO, ComfyExtension, Input
|
||||
from comfy_api_nodes.apis.anthropic import (
|
||||
AnthropicImageContent,
|
||||
AnthropicImageSourceBase64,
|
||||
AnthropicMessage,
|
||||
AnthropicMessagesRequest,
|
||||
AnthropicMessagesResponse,
|
||||
AnthropicRole,
|
||||
AnthropicTextContent,
|
||||
)
|
||||
from comfy_api_nodes.util import (
|
||||
ApiEndpoint,
|
||||
downscale_image_tensor,
|
||||
get_number_of_images,
|
||||
sync_op,
|
||||
tensor_to_base64_string,
|
||||
validate_string,
|
||||
)
|
||||
|
||||
ANTHROPIC_MESSAGES_ENDPOINT = "/proxy/anthropic/v1/messages"
|
||||
ANTHROPIC_IMAGE_MAX_PIXELS = 1568 * 1568 # Anthropic recommends max ~1568px on the longest edge
|
||||
CLAUDE_MAX_IMAGES = 20 # Anthropic supports up to 20 images per request
|
||||
|
||||
CLAUDE_MODELS: dict[str, str] = {
|
||||
"Opus 4.7": "claude-opus-4-7",
|
||||
"Opus 4.6": "claude-opus-4-6",
|
||||
"Sonnet 4.6": "claude-sonnet-4-6",
|
||||
"Sonnet 4.5": "claude-sonnet-4-5-20250929",
|
||||
"Haiku 4.5": "claude-haiku-4-5-20251001",
|
||||
}
|
||||
|
||||
|
||||
def _claude_model_inputs():
|
||||
return [
|
||||
IO.Int.Input(
|
||||
"max_tokens",
|
||||
default=16000,
|
||||
min=32,
|
||||
max=32000,
|
||||
tooltip="Maximum number of tokens to generate before stopping.",
|
||||
advanced=True,
|
||||
),
|
||||
IO.Float.Input(
|
||||
"temperature",
|
||||
default=1.0,
|
||||
min=0.0,
|
||||
max=1.0,
|
||||
step=0.01,
|
||||
tooltip="Controls randomness. 0.0 is deterministic, 1.0 is most random.",
|
||||
advanced=True,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _model_price_per_million(model: str) -> tuple[float, float] | None:
|
||||
"""Return (input_per_1M, output_per_1M) USD for a Claude model, or None if unknown."""
|
||||
if "opus-4" in model:
|
||||
return 15.0, 75.0
|
||||
if "sonnet-4" in model:
|
||||
return 3.0, 15.0
|
||||
if "haiku-4-5" in model:
|
||||
return 1.0, 5.0
|
||||
return None
|
||||
|
||||
|
||||
def calculate_tokens_price(response: AnthropicMessagesResponse) -> float | None:
|
||||
"""Compute approximate USD price from response usage. Server-side billing is authoritative."""
|
||||
if not response.usage or not response.model:
|
||||
return None
|
||||
rates = _model_price_per_million(response.model)
|
||||
if rates is None:
|
||||
return None
|
||||
input_rate, output_rate = rates
|
||||
input_tokens = response.usage.input_tokens or 0
|
||||
output_tokens = response.usage.output_tokens or 0
|
||||
cache_read = response.usage.cache_read_input_tokens or 0
|
||||
cache_5m = 0
|
||||
cache_1h = 0
|
||||
if response.usage.cache_creation:
|
||||
cache_5m = response.usage.cache_creation.ephemeral_5m_input_tokens or 0
|
||||
cache_1h = response.usage.cache_creation.ephemeral_1h_input_tokens or 0
|
||||
total = (
|
||||
input_tokens * input_rate
|
||||
+ output_tokens * output_rate
|
||||
+ cache_read * input_rate * 0.1
|
||||
+ cache_5m * input_rate * 1.25
|
||||
+ cache_1h * input_rate * 2.0
|
||||
)
|
||||
return total / 1_000_000.0
|
||||
|
||||
|
||||
def _get_text_from_response(response: AnthropicMessagesResponse) -> str:
|
||||
if not response.content:
|
||||
return ""
|
||||
return "\n".join(block.text for block in response.content if block.text)
|
||||
|
||||
|
||||
def _build_image_content_blocks(image_tensors: list[Input.Image]) -> list[AnthropicImageContent]:
|
||||
"""Convert image tensors (possibly batched) into Anthropic content blocks (base64 PNG)."""
|
||||
blocks: list[AnthropicImageContent] = []
|
||||
for tensor in image_tensors:
|
||||
batch = tensor if len(tensor.shape) == 4 else tensor.unsqueeze(0)
|
||||
for i in range(batch.shape[0]):
|
||||
scaled = downscale_image_tensor(batch[i : i + 1], total_pixels=ANTHROPIC_IMAGE_MAX_PIXELS)
|
||||
blocks.append(
|
||||
AnthropicImageContent(
|
||||
source=AnthropicImageSourceBase64(
|
||||
media_type="image/png",
|
||||
data=tensor_to_base64_string(scaled),
|
||||
),
|
||||
)
|
||||
)
|
||||
return blocks
|
||||
|
||||
|
||||
class ClaudeNode(IO.ComfyNode):
|
||||
"""Generate text responses from an Anthropic Claude model."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return IO.Schema(
|
||||
node_id="ClaudeNode",
|
||||
display_name="Anthropic Claude",
|
||||
category="api node/text/Anthropic",
|
||||
essentials_category="Text Generation",
|
||||
description="Generate text responses with Anthropic's Claude models. "
|
||||
"Provide a text prompt and optionally one or more images for multimodal context.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
"prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Text input to the model.",
|
||||
),
|
||||
IO.DynamicCombo.Input(
|
||||
"model",
|
||||
options=[IO.DynamicCombo.Option(label, _claude_model_inputs()) for label in CLAUDE_MODELS],
|
||||
tooltip="The Claude model used to generate the response.",
|
||||
),
|
||||
IO.Int.Input(
|
||||
"seed",
|
||||
default=0,
|
||||
min=0,
|
||||
max=2147483647,
|
||||
control_after_generate=True,
|
||||
tooltip="Seed controls whether the node should re-run; "
|
||||
"results are non-deterministic regardless of seed.",
|
||||
),
|
||||
IO.Autogrow.Input(
|
||||
"images",
|
||||
template=IO.Autogrow.TemplateNames(
|
||||
IO.Image.Input("image"),
|
||||
names=[f"image_{i}" for i in range(1, CLAUDE_MAX_IMAGES + 1)],
|
||||
min=0,
|
||||
),
|
||||
tooltip=f"Optional image(s) to use as context for the model. Up to {CLAUDE_MAX_IMAGES} images.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"system_prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
optional=True,
|
||||
advanced=True,
|
||||
tooltip="Foundational instructions that dictate the model's behavior.",
|
||||
),
|
||||
],
|
||||
outputs=[IO.String.Output()],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
price_badge=IO.PriceBadge(
|
||||
depends_on=IO.PriceBadgeDepends(widgets=["model"]),
|
||||
expr="""
|
||||
(
|
||||
$m := widgets.model;
|
||||
$contains($m, "opus") ? {
|
||||
"type": "list_usd",
|
||||
"usd": [0.015, 0.075],
|
||||
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
|
||||
}
|
||||
: $contains($m, "sonnet") ? {
|
||||
"type": "list_usd",
|
||||
"usd": [0.003, 0.015],
|
||||
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
|
||||
}
|
||||
: $contains($m, "haiku") ? {
|
||||
"type": "list_usd",
|
||||
"usd": [0.001, 0.005],
|
||||
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
|
||||
}
|
||||
: {"type":"text", "text":"Token-based"}
|
||||
)
|
||||
""",
|
||||
),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
prompt: str,
|
||||
model: dict,
|
||||
seed: int,
|
||||
images: dict | None = None,
|
||||
system_prompt: str = "",
|
||||
) -> IO.NodeOutput:
|
||||
validate_string(prompt, strip_whitespace=True, min_length=1)
|
||||
model_label = model["model"]
|
||||
max_tokens = model["max_tokens"]
|
||||
temperature = model["temperature"]
|
||||
|
||||
image_tensors: list[Input.Image] = [t for t in (images or {}).values() if t is not None]
|
||||
if sum(get_number_of_images(t) for t in image_tensors) > CLAUDE_MAX_IMAGES:
|
||||
raise ValueError(f"Up to {CLAUDE_MAX_IMAGES} images are supported per request.")
|
||||
|
||||
content: list[AnthropicTextContent | AnthropicImageContent] = []
|
||||
if image_tensors:
|
||||
content.extend(_build_image_content_blocks(image_tensors))
|
||||
content.append(AnthropicTextContent(text=prompt))
|
||||
|
||||
response = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path=ANTHROPIC_MESSAGES_ENDPOINT, method="POST"),
|
||||
response_model=AnthropicMessagesResponse,
|
||||
data=AnthropicMessagesRequest(
|
||||
model=CLAUDE_MODELS[model_label],
|
||||
max_tokens=max_tokens,
|
||||
messages=[AnthropicMessage(role=AnthropicRole.user, content=content)],
|
||||
system=system_prompt or None,
|
||||
temperature=temperature,
|
||||
),
|
||||
price_extractor=calculate_tokens_price,
|
||||
)
|
||||
return IO.NodeOutput(_get_text_from_response(response) or "Empty response from Claude model.")
|
||||
|
||||
|
||||
class AnthropicExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
||||
return [ClaudeNode]
|
||||
|
||||
|
||||
async def comfy_entrypoint() -> AnthropicExtension:
|
||||
return AnthropicExtension()
|
||||
@ -143,7 +143,7 @@ class QuiverTextToSVGNode(IO.ComfyNode):
|
||||
if reference_images:
|
||||
references = []
|
||||
for key in reference_images:
|
||||
url = await upload_image_to_comfyapi(cls, reference_images[key], mime_type="image/png")
|
||||
url = await upload_image_to_comfyapi(cls, reference_images[key])
|
||||
references.append(QuiverImageObject(url=url))
|
||||
if len(references) > 4:
|
||||
raise ValueError("Maximum 4 reference images are allowed.")
|
||||
@ -252,7 +252,7 @@ class QuiverImageToSVGNode(IO.ComfyNode):
|
||||
model: dict,
|
||||
seed: int,
|
||||
) -> IO.NodeOutput:
|
||||
image_url = await upload_image_to_comfyapi(cls, image, mime_type="image/png")
|
||||
image_url = await upload_image_to_comfyapi(cls, image)
|
||||
|
||||
response = await sync_op(
|
||||
cls,
|
||||
|
||||
@ -297,7 +297,6 @@ class LoadAudio(IO.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
input_dir = folder_paths.get_input_directory()
|
||||
os.makedirs(input_dir, exist_ok=True)
|
||||
files = folder_paths.filter_files_content_types(os.listdir(input_dir), ["audio", "video"])
|
||||
return IO.Schema(
|
||||
node_id="LoadAudio",
|
||||
|
||||
@ -3,15 +3,23 @@ from __future__ import annotations
|
||||
import nodes
|
||||
import folder_paths
|
||||
|
||||
import av
|
||||
import json
|
||||
|
||||
import os
|
||||
import re
|
||||
import math
|
||||
import numpy as np
|
||||
import struct
|
||||
import torch
|
||||
|
||||
import zlib
|
||||
import comfy.utils
|
||||
from fractions import Fraction
|
||||
|
||||
from server import PromptServer
|
||||
from comfy_api.latest import ComfyExtension, IO, UI
|
||||
from comfy.cli_args import args
|
||||
from typing_extensions import override
|
||||
|
||||
SVG = IO.SVG.Type # TODO: temporary solution for backward compatibility, will be removed later.
|
||||
@ -830,6 +838,405 @@ class ImageMergeTileList(IO.ComfyNode):
|
||||
return IO.NodeOutput(merged_image)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Format specifications
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Maps (file_format, bit_depth, has_alpha) -> (numpy dtype scale, av pixel format,
|
||||
# stream pix_fmt). Keeps the encode path declarative instead of branchy.
|
||||
_FORMAT_SPECS = {
|
||||
("png", "8-bit", False): {"scale": 255.0, "dtype": np.uint8, "frame_fmt": "rgb24", "stream_fmt": "rgb24"},
|
||||
("png", "8-bit", True): {"scale": 255.0, "dtype": np.uint8, "frame_fmt": "rgba", "stream_fmt": "rgba"},
|
||||
("png", "16-bit", False): {"scale": 65535.0, "dtype": np.uint16, "frame_fmt": "rgb48le", "stream_fmt": "rgb48be"},
|
||||
("png", "16-bit", True): {"scale": 65535.0, "dtype": np.uint16, "frame_fmt": "rgba64le", "stream_fmt": "rgba64be"},
|
||||
("exr", "32-bit float", False): {"scale": 1.0, "dtype": np.float32, "frame_fmt": "gbrpf32le", "stream_fmt": "gbrpf32le"},
|
||||
("exr", "32-bit float", True): {"scale": 1.0, "dtype": np.float32, "frame_fmt": "gbrapf32le", "stream_fmt": "gbrapf32le"},
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Color transforms
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def srgb_to_linear(t: torch.Tensor) -> torch.Tensor:
|
||||
"""Inverse sRGB EOTF (IEC 61966-2-1). Operates on RGB channels only;
|
||||
alpha (if present as the 4th channel) is passed through unchanged."""
|
||||
if t.shape[-1] == 4:
|
||||
rgb, alpha = t[..., :3], t[..., 3:]
|
||||
return torch.cat([srgb_to_linear(rgb), alpha], dim=-1)
|
||||
|
||||
# Piecewise: linear toe below 0.04045, gamma curve above.
|
||||
low = t / 12.92
|
||||
high = ((t.clamp(min=0.0) + 0.055) / 1.055) ** 2.4
|
||||
return torch.where(t <= 0.04045, low, high)
|
||||
|
||||
|
||||
# HLG OETF constants from BT.2100 Table 5.
|
||||
_HLG_A = 0.17883277
|
||||
_HLG_B = 0.28466892
|
||||
_HLG_C = 0.55991072928 # = 0.5 - a*ln(4*a)
|
||||
|
||||
|
||||
def hlg_to_linear(t: torch.Tensor) -> torch.Tensor:
|
||||
"""Inverse HLG OETF (BT.2100). Maps a non-linear HLG signal in [0, 1] to
|
||||
*scene*-linear light in [0, 1]. Per BT.2100 Note 5a, this is the correct
|
||||
transform when converting HLG to a linear scene-light representation
|
||||
(rather than display-light, which would also involve the HLG OOTF).
|
||||
|
||||
Operates on RGB channels only; alpha is passed through unchanged."""
|
||||
if t.shape[-1] == 4:
|
||||
rgb, alpha = t[..., :3], t[..., 3:]
|
||||
return torch.cat([hlg_to_linear(rgb), alpha], dim=-1)
|
||||
|
||||
# Piecewise: sqrt branch below 0.5, log branch above.
|
||||
# Clamp inside the log branch so negative / out-of-range values don't blow up;
|
||||
# values above 1.0 are allowed and extrapolate naturally.
|
||||
low = (t ** 2) / 3.0
|
||||
high = (torch.exp((t.clamp(min=_HLG_C) - _HLG_C) / _HLG_A) + _HLG_B) / 12.0
|
||||
return torch.where(t <= 0.5, low, high)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Metadata injection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n"
|
||||
|
||||
|
||||
def _png_chunk(chunk_type: bytes, data: bytes) -> bytes:
|
||||
"""Build a single PNG chunk: length | type | data | CRC32(type+data)."""
|
||||
crc = zlib.crc32(chunk_type + data) & 0xFFFFFFFF
|
||||
return struct.pack(">I", len(data)) + chunk_type + data + struct.pack(">I", crc)
|
||||
|
||||
|
||||
def _png_text_chunk(keyword: str, text: str) -> bytes:
|
||||
"""tEXt chunk: latin-1 keyword + NUL + latin-1 text."""
|
||||
payload = keyword.encode("latin-1") + b"\x00" + text.encode("latin-1", errors="replace")
|
||||
return _png_chunk(b"tEXt", payload)
|
||||
|
||||
|
||||
def inject_png_metadata(png_bytes: bytes, prompt: dict | None, extra_pnginfo: dict | None) -> bytes:
|
||||
"""Insert ComfyUI prompt/workflow as tEXt chunks right after IHDR."""
|
||||
if not png_bytes.startswith(_PNG_SIGNATURE):
|
||||
return png_bytes
|
||||
|
||||
chunks: list[bytes] = []
|
||||
if prompt is not None:
|
||||
chunks.append(_png_text_chunk("prompt", json.dumps(prompt)))
|
||||
if extra_pnginfo:
|
||||
for key, value in extra_pnginfo.items():
|
||||
chunks.append(_png_text_chunk(key, json.dumps(value)))
|
||||
if not chunks:
|
||||
return png_bytes
|
||||
|
||||
# IHDR is always the first chunk; insert ours immediately after it.
|
||||
ihdr_length = struct.unpack(">I", png_bytes[8:12])[0]
|
||||
ihdr_end = 8 + 8 + ihdr_length + 4 # signature + (len+type) + data + crc
|
||||
return png_bytes[:ihdr_end] + b"".join(chunks) + png_bytes[ihdr_end:]
|
||||
|
||||
|
||||
# Standard chromaticities (CIE 1931 xy) for the colorspaces this node writes.
|
||||
# Each tuple is (Rx, Ry, Gx, Gy, Bx, By, Wx, Wy). All share D65 white point.
|
||||
_CHROMATICITIES = {
|
||||
# ITU-R BT.709 / sRGB primaries
|
||||
"Rec.709": (0.6400, 0.3300, 0.3000, 0.6000, 0.1500, 0.0600, 0.3127, 0.3290),
|
||||
# ITU-R BT.2020 (UHDTV / wide-gamut HDR) primaries
|
||||
"Rec.2020": (0.7080, 0.2920, 0.1700, 0.7970, 0.1310, 0.0460, 0.3127, 0.3290),
|
||||
}
|
||||
|
||||
|
||||
def _pack_chromaticities(primaries: tuple) -> bytes:
|
||||
"""Serialize 8 chromaticity floats into the EXR `chromaticities` payload."""
|
||||
return struct.pack("<8f", *primaries)
|
||||
|
||||
|
||||
def _exr_attribute(name: str, attr_type: str, value: bytes) -> bytes:
|
||||
"""Serialize one EXR header attribute: name\\0 type\\0 size:int32 value."""
|
||||
return (
|
||||
name.encode("utf-8") + b"\x00"
|
||||
+ attr_type.encode("utf-8") + b"\x00"
|
||||
+ struct.pack("<i", len(value))
|
||||
+ value
|
||||
)
|
||||
|
||||
|
||||
def inject_exr_metadata(
|
||||
exr_bytes: bytes,
|
||||
prompt: dict | None,
|
||||
extra_pnginfo: dict | None,
|
||||
colorspace: str | None = None,
|
||||
) -> bytes:
|
||||
"""Insert ComfyUI metadata and color-space info into an EXR header.
|
||||
|
||||
Color: EXR pixels are linear by convention. The standard way to describe
|
||||
their RGB→XYZ relationship is the `chromaticities` attribute. We pick the
|
||||
primaries that match what the user told us their input was:
|
||||
|
||||
colorspace="sRGB" → Rec. 709 / sRGB primaries (D65)
|
||||
colorspace="HDR" → Rec. 2020 / BT.2100 primaries (D65)
|
||||
|
||||
Pixels are always converted to linear scene light upstream (sRGB EOTF
|
||||
inverse for sRGB; HLG OETF inverse for HDR), so the file content is
|
||||
scene-linear in the indicated gamut. OpenEXR has no standard transfer-
|
||||
function attribute (the OpenEXR TSC has discussed adding one but it
|
||||
doesn't exist), so we don't invent one — `chromaticities` plus the EXR
|
||||
linear-by-convention rule fully specifies the color.
|
||||
|
||||
Prompt/workflow: written as plain `string` attributes using the same keys
|
||||
(`prompt`, `workflow`, ...) that Comfy uses for PNG tEXt chunks, so the
|
||||
same readers can pull them out symmetrically.
|
||||
|
||||
Implementation note: the chunk-offset table that follows the header stores
|
||||
*absolute* byte offsets into the file. Inserting N bytes into the header
|
||||
means every offset must be incremented by N or the file becomes unreadable.
|
||||
"""
|
||||
if len(exr_bytes) < 8 or exr_bytes[:4] != b"\x76\x2f\x31\x01":
|
||||
return exr_bytes
|
||||
|
||||
new_blob = b""
|
||||
if prompt is not None:
|
||||
new_blob += _exr_attribute("prompt", "string", json.dumps(prompt).encode("utf-8"))
|
||||
if extra_pnginfo:
|
||||
for key, value in extra_pnginfo.items():
|
||||
new_blob += _exr_attribute(key, "string", json.dumps(value).encode("utf-8"))
|
||||
if colorspace is not None:
|
||||
# Map each colorspace option to the RGB primaries the linear pixels
|
||||
# are now in. "sRGB" and "linear" both produce Rec. 709 linear; "HDR"
|
||||
# (HLG-encoded Rec. 2020 input) produces Rec. 2020 linear.
|
||||
primaries_name = {
|
||||
"sRGB": "Rec.709",
|
||||
"linear": "Rec.709",
|
||||
"HDR": "Rec.2020",
|
||||
}.get(colorspace, "Rec.709")
|
||||
new_blob += _exr_attribute(
|
||||
"chromaticities",
|
||||
"chromaticities",
|
||||
_pack_chromaticities(_CHROMATICITIES[primaries_name]),
|
||||
)
|
||||
if not new_blob:
|
||||
return exr_bytes
|
||||
|
||||
# Walk header attributes to find the terminating null byte, and pick up
|
||||
# dataWindow + compression so we know how many chunks the offset table has.
|
||||
pos = 8 # past magic (4) + version (4)
|
||||
data_window = None
|
||||
compression = 0
|
||||
while pos < len(exr_bytes) and exr_bytes[pos] != 0:
|
||||
name_end = exr_bytes.index(b"\x00", pos)
|
||||
attr_name = exr_bytes[pos:name_end].decode("latin-1", errors="replace")
|
||||
type_end = exr_bytes.index(b"\x00", name_end + 1)
|
||||
attr_type = exr_bytes[name_end + 1:type_end].decode("latin-1", errors="replace")
|
||||
size = struct.unpack("<i", exr_bytes[type_end + 1:type_end + 5])[0]
|
||||
value_start = type_end + 5
|
||||
value = exr_bytes[value_start:value_start + size]
|
||||
|
||||
if attr_name == "dataWindow" and attr_type == "box2i":
|
||||
data_window = struct.unpack("<iiii", value) # xMin, yMin, xMax, yMax
|
||||
elif attr_name == "compression" and attr_type == "compression":
|
||||
compression = value[0]
|
||||
|
||||
pos = value_start + size
|
||||
|
||||
if data_window is None:
|
||||
return exr_bytes # required attribute missing — don't risk corrupting
|
||||
|
||||
# Scanlines per chunk by compression, from the OpenEXR spec.
|
||||
scanlines_per_block = {
|
||||
0: 1, # NO_COMPRESSION
|
||||
1: 1, # RLE
|
||||
2: 1, # ZIPS
|
||||
3: 16, # ZIP
|
||||
4: 32, # PIZ
|
||||
5: 16, # PXR24
|
||||
6: 32, # B44
|
||||
7: 32, # B44A
|
||||
8: 256, # DWAA
|
||||
9: 256, # DWAB
|
||||
}.get(compression, 1)
|
||||
|
||||
_, y_min, _, y_max = data_window
|
||||
height = y_max - y_min + 1
|
||||
num_chunks = (height + scanlines_per_block - 1) // scanlines_per_block
|
||||
|
||||
header_end = pos # position of the terminating null byte
|
||||
table_start = header_end + 1
|
||||
pixel_start = table_start + num_chunks * 8
|
||||
delta = len(new_blob)
|
||||
|
||||
old_offsets = struct.unpack(f"<{num_chunks}Q", exr_bytes[table_start:pixel_start])
|
||||
new_table = struct.pack(f"<{num_chunks}Q", *(o + delta for o in old_offsets))
|
||||
|
||||
return (
|
||||
exr_bytes[:header_end] # header attributes
|
||||
+ new_blob # our new attributes
|
||||
+ exr_bytes[header_end:table_start] # terminating null byte
|
||||
+ new_table # shifted offset table
|
||||
+ exr_bytes[pixel_start:] # pixel data, untouched
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Encoding
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _encode_image(
|
||||
img_tensor: torch.Tensor,
|
||||
file_format: str,
|
||||
bit_depth: str,
|
||||
colorspace: str,
|
||||
) -> bytes:
|
||||
"""Encode a single HxWxC tensor to PNG or EXR bytes in memory.
|
||||
|
||||
For EXR the input is interpreted according to `colorspace` and converted
|
||||
to scene-linear (EXR's convention) before writing:
|
||||
|
||||
"sRGB" → input is sRGB-encoded Rec. 709; apply inverse sRGB EOTF.
|
||||
"HDR" → input is HLG-encoded Rec. 2020 (BT.2100); apply inverse HLG
|
||||
OETF to get scene-linear, per BT.2100 Note 5a.
|
||||
"linear" → input is already scene-linear (Rec. 709 primaries); write
|
||||
through unchanged. Use this for renderer/compositor output.
|
||||
|
||||
For PNG, colorspace selection does not modify pixels — PNG is delivered
|
||||
sRGB-encoded and there is no PNG path for wide-gamut HDR in this node.
|
||||
"""
|
||||
height, width, num_channels = img_tensor.shape
|
||||
has_alpha = num_channels == 4
|
||||
|
||||
spec = _FORMAT_SPECS[(file_format, bit_depth, has_alpha)]
|
||||
|
||||
if spec["dtype"] == np.float32:
|
||||
# EXR path: preserve full range, no clamp.
|
||||
if colorspace == "sRGB":
|
||||
img_tensor = srgb_to_linear(img_tensor)
|
||||
elif colorspace == "HDR":
|
||||
img_tensor = hlg_to_linear(img_tensor)
|
||||
img_np = img_tensor.cpu().numpy().astype(np.float32)
|
||||
else:
|
||||
# PNG path: quantize to integer range.
|
||||
scaled = (img_tensor * spec["scale"]).clamp(0, spec["scale"])
|
||||
img_np = scaled.to(torch.int32).cpu().numpy().astype(spec["dtype"])
|
||||
|
||||
# Encode directly via CodecContext. PyAV's `image2` muxer does NOT write to
|
||||
# BytesIO (it expects a real file path), so we bypass the container entirely.
|
||||
# For single-frame PNG/EXR the raw codec output IS the file.
|
||||
codec = av.CodecContext.create(file_format, "w")
|
||||
codec.width = width
|
||||
codec.height = height
|
||||
codec.pix_fmt = spec["stream_fmt"]
|
||||
codec.time_base = Fraction(1, 1)
|
||||
|
||||
frame = av.VideoFrame.from_ndarray(img_np, format=spec["frame_fmt"])
|
||||
if spec["frame_fmt"] != spec["stream_fmt"]:
|
||||
frame = frame.reformat(format=spec["stream_fmt"])
|
||||
frame.pts = 0
|
||||
frame.time_base = codec.time_base
|
||||
|
||||
packets = list(codec.encode(frame)) + list(codec.encode(None)) # flush with None
|
||||
return b"".join(bytes(p) for p in packets)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Node
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SaveImageAdvanced(IO.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return IO.Schema(
|
||||
node_id="SaveImageAdvanced",
|
||||
search_aliases=["save", "save image", "export image", "output image", "write image"],
|
||||
display_name="Save Image (Advanced)",
|
||||
description="Saves the input images to your ComfyUI output directory.",
|
||||
category="image",
|
||||
essentials_category="Basics",
|
||||
inputs=[
|
||||
IO.Image.Input("images", tooltip="The images to save."),
|
||||
IO.String.Input(
|
||||
"filename_prefix",
|
||||
default="ComfyUI",
|
||||
tooltip=(
|
||||
"The prefix for the file to save. May include formatting tokens "
|
||||
"such as %date:yyyy-MM-dd% or %Empty Latent Image.width%."
|
||||
),
|
||||
),
|
||||
IO.DynamicCombo.Input(
|
||||
"image_format",
|
||||
options=[
|
||||
IO.DynamicCombo.Option("png", [
|
||||
IO.Combo.Input("bit_depth", options=["8-bit", "16-bit"],
|
||||
default="8-bit", advanced=True),
|
||||
IO.Combo.Input("colorspace", options=["sRGB"],
|
||||
default="sRGB", advanced=True),
|
||||
]),
|
||||
IO.DynamicCombo.Option("exr", [
|
||||
IO.Combo.Input("bit_depth", options=["32-bit float"],
|
||||
default="32-bit float", advanced=True),
|
||||
IO.Combo.Input(
|
||||
"colorspace",
|
||||
options=["sRGB", "HDR", "linear"],
|
||||
default="sRGB",
|
||||
advanced=True,
|
||||
tooltip=(
|
||||
"Colorspace of the input tensor. The EXR is "
|
||||
"always written as scene-linear in the matching "
|
||||
"gamut.\n"
|
||||
" 'sRGB' — input is sRGB-encoded Rec.709; "
|
||||
"the inverse sRGB EOTF is applied.\n"
|
||||
" 'HDR' — input is HLG-encoded Rec.2020 "
|
||||
"(BT.2100); the inverse HLG OETF is applied "
|
||||
"to get scene-linear light.\n"
|
||||
" 'linear' — input is already scene-linear "
|
||||
"(Rec.709 primaries); written through unchanged. "
|
||||
"Use this for renderer/compositor output."
|
||||
),
|
||||
),
|
||||
]),
|
||||
],
|
||||
tooltip="The file format in which to save the image.",
|
||||
),
|
||||
],
|
||||
hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo],
|
||||
is_output_node=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, images, filename_prefix: str, image_format: dict) -> IO.NodeOutput:
|
||||
file_format = image_format["image_format"]
|
||||
bit_depth = image_format["bit_depth"]
|
||||
colorspace = image_format.get("colorspace", "sRGB")
|
||||
|
||||
output_dir = folder_paths.get_output_directory()
|
||||
full_output_folder, filename, counter, subfolder, filename_prefix = (
|
||||
folder_paths.get_save_image_path(
|
||||
filename_prefix, output_dir, images[0].shape[1], images[0].shape[0]
|
||||
)
|
||||
)
|
||||
|
||||
prompt = cls.hidden.prompt
|
||||
extra_pnginfo = cls.hidden.extra_pnginfo
|
||||
write_metadata = not args.disable_metadata
|
||||
|
||||
results = []
|
||||
for batch_number, image in enumerate(images):
|
||||
encoded = _encode_image(image, file_format, bit_depth, colorspace)
|
||||
|
||||
if write_metadata:
|
||||
if file_format == "png":
|
||||
encoded = inject_png_metadata(encoded, prompt, extra_pnginfo)
|
||||
elif file_format == "exr":
|
||||
encoded = inject_exr_metadata(encoded, prompt, extra_pnginfo, colorspace)
|
||||
|
||||
name = filename.replace("%batch_num%", str(batch_number))
|
||||
file = f"{name}_{counter:05}.{file_format}"
|
||||
with open(os.path.join(full_output_folder, file), "wb") as f:
|
||||
f.write(encoded)
|
||||
|
||||
results.append({"filename": file, "subfolder": subfolder, "type": "output"})
|
||||
counter += 1
|
||||
|
||||
return IO.NodeOutput(ui={"images": results})
|
||||
|
||||
|
||||
class ImagesExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
||||
@ -842,6 +1249,7 @@ class ImagesExtension(ComfyExtension):
|
||||
ImageAddNoise,
|
||||
SaveAnimatedWEBP,
|
||||
SaveAnimatedPNG,
|
||||
SaveImageAdvanced,
|
||||
SaveSVGNode,
|
||||
ImageStitch,
|
||||
ResizeAndPadImage,
|
||||
|
||||
@ -338,25 +338,8 @@ class LTXVAddGuide(io.ComfyNode):
|
||||
noise_mask = get_noise_mask(latent)
|
||||
|
||||
_, _, latent_length, latent_height, latent_width = latent_image.shape
|
||||
|
||||
# For mid-video multi-frame guides, prepend+strip a throwaway first frame so the VAE's "first latent = 1 pixel frame" asymmetry lands on the discarded slot
|
||||
time_scale_factor = scale_factors[0]
|
||||
num_frames_to_keep = ((image.shape[0] - 1) // time_scale_factor) * time_scale_factor + 1
|
||||
resolved_frame_idx = frame_idx
|
||||
if frame_idx < 0:
|
||||
_, num_keyframes = get_keyframe_idxs(positive)
|
||||
resolved_frame_idx = max((latent_length - num_keyframes - 1) * time_scale_factor + 1 + frame_idx, 0)
|
||||
causal_fix = resolved_frame_idx == 0 or num_frames_to_keep == 1
|
||||
|
||||
if not causal_fix:
|
||||
image = torch.cat([image[:1], image], dim=0)
|
||||
|
||||
image, t = cls.encode(vae, latent_width, latent_height, image, scale_factors)
|
||||
|
||||
if not causal_fix:
|
||||
t = t[:, :, 1:, :, :]
|
||||
image = image[1:]
|
||||
|
||||
frame_idx, latent_idx = cls.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors)
|
||||
assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence."
|
||||
|
||||
@ -369,7 +352,6 @@ class LTXVAddGuide(io.ComfyNode):
|
||||
t,
|
||||
strength,
|
||||
scale_factors,
|
||||
causal_fix=causal_fix,
|
||||
)
|
||||
|
||||
# Track this guide for per-reference attention control.
|
||||
|
||||
@ -40,13 +40,23 @@ def composite(destination, source, x, y, mask = None, multiplier = 8, resize_sou
|
||||
|
||||
inverse_mask = torch.ones_like(mask) - mask
|
||||
|
||||
source_portion = mask * source[..., :visible_height, :visible_width]
|
||||
destination_portion = inverse_mask * destination[..., top:bottom, left:right]
|
||||
source_rgb = source[:, :3, :visible_height, :visible_width]
|
||||
dest_slice = destination[..., top:bottom, left:right]
|
||||
|
||||
if destination.shape[1] == 4:
|
||||
if torch.max(dest_slice) == 0:
|
||||
destination[:, :3, top:bottom, left:right] = source_rgb
|
||||
destination[:, 3:4, top:bottom, left:right] = mask
|
||||
else:
|
||||
destination[:, :3, top:bottom, left:right] = (mask * source_rgb) + (inverse_mask * dest_slice[:, :3])
|
||||
destination[:, 3:4, top:bottom, left:right] = torch.max(mask, dest_slice[:, 3:4])
|
||||
else:
|
||||
source_portion = mask * source_rgb
|
||||
destination_portion = inverse_mask * dest_slice
|
||||
destination[..., top:bottom, left:right] = source_portion + destination_portion
|
||||
|
||||
destination[..., top:bottom, left:right] = source_portion + destination_portion
|
||||
return destination
|
||||
|
||||
|
||||
class LatentCompositeMasked(IO.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
@ -85,18 +95,23 @@ class ImageCompositeMasked(IO.ComfyNode):
|
||||
display_name="Image Composite Masked",
|
||||
category="image",
|
||||
inputs=[
|
||||
IO.Image.Input("destination"),
|
||||
IO.Image.Input("source"),
|
||||
IO.Int.Input("x", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1),
|
||||
IO.Int.Input("y", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1),
|
||||
IO.Boolean.Input("resize_source", default=False),
|
||||
IO.Image.Input("destination", optional=True),
|
||||
IO.Mask.Input("mask", optional=True),
|
||||
],
|
||||
outputs=[IO.Image.Output()],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, destination, source, x, y, resize_source, mask = None) -> IO.NodeOutput:
|
||||
def execute(cls, source, x, y, resize_source, destination = None, mask = None) -> IO.NodeOutput:
|
||||
if destination is None: # transparent rgba
|
||||
B, H, W, C = source.shape
|
||||
destination = torch.zeros((B, H, W, 4), dtype=source.dtype, device=source.device)
|
||||
if C == 3:
|
||||
source = torch.nn.functional.pad(source, (0, 1), value=1.0)
|
||||
destination, source = node_helpers.image_alpha_fix(destination, source)
|
||||
destination = destination.clone().movedim(-1, 1)
|
||||
output = composite(destination, source.movedim(-1, 1), x, y, mask, 1, resize_source).movedim(1, -1)
|
||||
|
||||
35
openapi.yaml
35
openapi.yaml
@ -2071,6 +2071,7 @@ paths:
|
||||
type: integer
|
||||
description: Number of assets marked as missing
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Cloud-runtime FE-facing operations
|
||||
#
|
||||
@ -2121,11 +2122,7 @@ paths:
|
||||
operationId: getCloudJobStatus
|
||||
tags: [queue]
|
||||
summary: Get status of a cloud job
|
||||
deprecated: true
|
||||
description: |
|
||||
**Deprecated.** This endpoint is superseded by `GET /api/jobs/{job_id}`.
|
||||
Clients should migrate; the endpoint is retained for backward
|
||||
compatibility but will be removed in a future release.
|
||||
description: "[cloud-only] Returns the current execution status of a cloud job."
|
||||
x-runtime: [cloud]
|
||||
parameters:
|
||||
- name: job_id
|
||||
@ -2195,11 +2192,7 @@ paths:
|
||||
operationId: getHistoryV2
|
||||
tags: [history]
|
||||
summary: Get paginated execution history (v2)
|
||||
deprecated: true
|
||||
description: |
|
||||
**Deprecated.** This endpoint is superseded by `GET /api/jobs`.
|
||||
Clients should migrate; the endpoint is retained for backward
|
||||
compatibility but will be removed in a future release.
|
||||
description: "[cloud-only] Returns a paginated list of execution history entries in the v2 format, with richer metadata than the legacy history endpoint."
|
||||
x-runtime: [cloud]
|
||||
parameters:
|
||||
- name: limit
|
||||
@ -2238,11 +2231,7 @@ paths:
|
||||
operationId: getHistoryV2ByPromptId
|
||||
tags: [history]
|
||||
summary: Get v2 history for a specific prompt
|
||||
deprecated: true
|
||||
description: |
|
||||
**Deprecated.** This endpoint is superseded by `GET /api/jobs/{prompt_id}`.
|
||||
Clients should migrate; the endpoint is retained for backward
|
||||
compatibility but will be removed in a future release.
|
||||
description: "[cloud-only] Returns the v2 history entry for a specific prompt execution."
|
||||
x-runtime: [cloud]
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
@ -2277,12 +2266,7 @@ paths:
|
||||
operationId: getCloudLogs
|
||||
tags: [system]
|
||||
summary: Get cloud execution logs
|
||||
deprecated: true
|
||||
description: |
|
||||
**Deprecated.** This endpoint returns a static placeholder response and
|
||||
provides no real log data. It is retained only to avoid breaking clients
|
||||
that still call it. Clients should remove their dependency; the endpoint
|
||||
will be removed in a future release.
|
||||
description: "[cloud-only] Returns execution logs for the authenticated user's cloud jobs."
|
||||
x-runtime: [cloud]
|
||||
parameters:
|
||||
- name: job_id
|
||||
@ -5386,12 +5370,7 @@ paths:
|
||||
operationId: viewVideo
|
||||
tags: [view]
|
||||
summary: View or download a video file
|
||||
deprecated: true
|
||||
description: |
|
||||
**Deprecated.** This endpoint is an alias of `GET /api/view` added for
|
||||
legacy history-queue video playback. Callers should use `/api/view`
|
||||
directly; the endpoint is retained for backward compatibility but will
|
||||
be removed in a future release.
|
||||
description: "[cloud-only] Serves a video file from the output directory. Used by the frontend video player."
|
||||
x-runtime: [cloud]
|
||||
parameters:
|
||||
- name: filename
|
||||
@ -5544,6 +5523,7 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/CloudError"
|
||||
|
||||
|
||||
components:
|
||||
parameters:
|
||||
ComfyUserHeader:
|
||||
@ -6895,6 +6875,7 @@ components:
|
||||
error:
|
||||
type: string
|
||||
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Cloud-runtime schemas
|
||||
#
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
comfyui-frontend-package==1.43.18
|
||||
comfyui-workflow-templates==0.9.73
|
||||
comfyui-embedded-docs==0.5.0
|
||||
comfyui-embedded-docs==0.4.4
|
||||
torch
|
||||
torchsde
|
||||
torchvision
|
||||
|
||||
Reference in New Issue
Block a user