mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-14 13:48:05 +08:00
Compare commits
15 Commits
cursor/mar
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 1f28908d6e | |||
| fb51a988b6 | |||
| 26515acd23 | |||
| 74c17a25e5 | |||
| afb4fa15d5 | |||
| b94941d8d3 | |||
| 8505abf52e | |||
| a5189fed51 | |||
| 240363f11e | |||
| 2bd65f2091 | |||
| cccb697aa3 | |||
| 300b6c8c91 | |||
| 1d95ed211e | |||
| a5f7bc5658 | |||
| fb097bedc2 |
@ -89,3 +89,12 @@ rules:
|
||||
then:
|
||||
field: description
|
||||
function: truthy
|
||||
|
||||
overrides:
|
||||
# /ws uses HTTP 101 (Switching Protocols) — a legitimate response for a
|
||||
# WebSocket upgrade, but not a 2xx, so operation-success-response fires
|
||||
# as a false positive. OpenAPI 3.x has no native WebSocket support.
|
||||
- files:
|
||||
- "openapi.yaml#/paths/~1ws"
|
||||
rules:
|
||||
operation-success-response: off
|
||||
|
||||
@ -1443,7 +1443,7 @@ class HiDreamO1(supported_models_base.BASE):
|
||||
}
|
||||
|
||||
latent_format = latent_formats.HiDreamO1Pixel
|
||||
memory_usage_factor = 0.6
|
||||
memory_usage_factor = 0.033
|
||||
# fp16 not supported: LM MLP down_proj activations fp16 overflow, causing NaNs
|
||||
supported_inference_dtypes = [torch.bfloat16, torch.float32]
|
||||
|
||||
|
||||
@ -1164,12 +1164,18 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am
|
||||
|
||||
o = out
|
||||
o_d = out_div
|
||||
ps_view = ps
|
||||
mask_view = mask
|
||||
for d in range(dims):
|
||||
o = o.narrow(d + 2, upscaled[d], mask.shape[d + 2])
|
||||
o_d = o_d.narrow(d + 2, upscaled[d], mask.shape[d + 2])
|
||||
l = min(ps_view.shape[d + 2], o.shape[d + 2] - upscaled[d])
|
||||
o = o.narrow(d + 2, upscaled[d], l)
|
||||
o_d = o_d.narrow(d + 2, upscaled[d], l)
|
||||
if l < ps_view.shape[d + 2]:
|
||||
ps_view = ps_view.narrow(d + 2, 0, l)
|
||||
mask_view = mask_view.narrow(d + 2, 0, l)
|
||||
|
||||
o.add_(ps * mask)
|
||||
o_d.add_(mask)
|
||||
o.add_(ps_view * mask_view)
|
||||
o_d.add_(mask_view)
|
||||
|
||||
if pbar is not None:
|
||||
pbar.update(1)
|
||||
|
||||
@ -12,9 +12,24 @@ class VOXEL:
|
||||
|
||||
|
||||
class MESH:
|
||||
def __init__(self, vertices: torch.Tensor, faces: torch.Tensor):
|
||||
self.vertices = vertices
|
||||
self.faces = faces
|
||||
def __init__(self, vertices: torch.Tensor, faces: torch.Tensor,
|
||||
uvs: torch.Tensor | None = None,
|
||||
vertex_colors: torch.Tensor | None = None,
|
||||
texture: torch.Tensor | None = None,
|
||||
vertex_counts: torch.Tensor | None = None,
|
||||
face_counts: torch.Tensor | None = None):
|
||||
|
||||
assert (vertex_counts is None) == (face_counts is None), \
|
||||
"vertex_counts and face_counts must be provided together (both or neither)"
|
||||
self.vertices = vertices # vertices: (B, N, 3)
|
||||
self.faces = faces # faces: (B, M, 3)
|
||||
self.uvs = uvs # uvs: (B, N, 2)
|
||||
self.vertex_colors = vertex_colors # vertex_colors: (B, N, 3 or 4)
|
||||
self.texture = texture # texture: (B, H, W, 3)
|
||||
# When vertices/faces are zero-padded to a common N/M across the batch (variable-size mesh batch),
|
||||
# these hold the real per-item lengths (B,). None means rows are uniform and no slicing is needed.
|
||||
self.vertex_counts = vertex_counts
|
||||
self.face_counts = face_counts
|
||||
|
||||
|
||||
class File3D:
|
||||
|
||||
75
comfy_api_nodes/apis/anthropic.py
Normal file
75
comfy_api_nodes/apis/anthropic.py
Normal file
@ -0,0 +1,75 @@
|
||||
from enum import Enum
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class AnthropicRole(str, Enum):
|
||||
user = "user"
|
||||
assistant = "assistant"
|
||||
|
||||
|
||||
class AnthropicTextContent(BaseModel):
|
||||
type: Literal["text"] = "text"
|
||||
text: str = Field(...)
|
||||
|
||||
|
||||
class AnthropicImageSourceBase64(BaseModel):
|
||||
type: Literal["base64"] = "base64"
|
||||
media_type: str = Field(..., description="MIME type of the image, e.g. image/png, image/jpeg")
|
||||
data: str = Field(..., description="Base64-encoded image data")
|
||||
|
||||
|
||||
class AnthropicImageSourceUrl(BaseModel):
|
||||
type: Literal["url"] = "url"
|
||||
url: str = Field(...)
|
||||
|
||||
|
||||
class AnthropicImageContent(BaseModel):
|
||||
type: Literal["image"] = "image"
|
||||
source: AnthropicImageSourceBase64 | AnthropicImageSourceUrl = Field(...)
|
||||
|
||||
|
||||
class AnthropicMessage(BaseModel):
|
||||
role: AnthropicRole = Field(...)
|
||||
content: list[AnthropicTextContent | AnthropicImageContent] = Field(...)
|
||||
|
||||
|
||||
class AnthropicMessagesRequest(BaseModel):
|
||||
model: str = Field(...)
|
||||
messages: list[AnthropicMessage] = Field(...)
|
||||
max_tokens: int = Field(..., ge=1)
|
||||
system: str | None = Field(None, description="Top-level system prompt")
|
||||
temperature: float | None = Field(None, ge=0.0, le=1.0)
|
||||
top_p: float | None = Field(None, ge=0.0, le=1.0)
|
||||
top_k: int | None = Field(None, ge=0)
|
||||
stop_sequences: list[str] | None = Field(None)
|
||||
|
||||
|
||||
class AnthropicResponseTextBlock(BaseModel):
|
||||
type: Literal["text"] = "text"
|
||||
text: str = Field(...)
|
||||
|
||||
|
||||
class AnthropicCacheCreationUsage(BaseModel):
|
||||
ephemeral_5m_input_tokens: int | None = Field(None)
|
||||
ephemeral_1h_input_tokens: int | None = Field(None)
|
||||
|
||||
|
||||
class AnthropicMessagesUsage(BaseModel):
|
||||
input_tokens: int | None = Field(None)
|
||||
output_tokens: int | None = Field(None)
|
||||
cache_creation_input_tokens: int | None = Field(None)
|
||||
cache_read_input_tokens: int | None = Field(None)
|
||||
cache_creation: AnthropicCacheCreationUsage | None = Field(None)
|
||||
|
||||
|
||||
class AnthropicMessagesResponse(BaseModel):
|
||||
id: str | None = Field(None)
|
||||
type: str | None = Field(None)
|
||||
role: str | None = Field(None)
|
||||
model: str | None = Field(None)
|
||||
content: list[AnthropicResponseTextBlock] | None = Field(None)
|
||||
stop_reason: str | None = Field(None)
|
||||
stop_sequence: str | None = Field(None)
|
||||
usage: AnthropicMessagesUsage | None = Field(None)
|
||||
245
comfy_api_nodes/nodes_anthropic.py
Normal file
245
comfy_api_nodes/nodes_anthropic.py
Normal file
@ -0,0 +1,245 @@
|
||||
"""API Nodes for Anthropic Claude (Messages API). See: https://docs.anthropic.com/en/api/messages"""
|
||||
|
||||
from typing_extensions import override
|
||||
|
||||
from comfy_api.latest import IO, ComfyExtension, Input
|
||||
from comfy_api_nodes.apis.anthropic import (
|
||||
AnthropicImageContent,
|
||||
AnthropicImageSourceUrl,
|
||||
AnthropicMessage,
|
||||
AnthropicMessagesRequest,
|
||||
AnthropicMessagesResponse,
|
||||
AnthropicRole,
|
||||
AnthropicTextContent,
|
||||
)
|
||||
from comfy_api_nodes.util import (
|
||||
ApiEndpoint,
|
||||
get_number_of_images,
|
||||
sync_op,
|
||||
upload_images_to_comfyapi,
|
||||
validate_string,
|
||||
)
|
||||
|
||||
ANTHROPIC_MESSAGES_ENDPOINT = "/proxy/anthropic/v1/messages"
|
||||
ANTHROPIC_IMAGE_MAX_PIXELS = 1568 * 1568
|
||||
CLAUDE_MAX_IMAGES = 20
|
||||
|
||||
CLAUDE_MODELS: dict[str, str] = {
|
||||
"Opus 4.7": "claude-opus-4-7",
|
||||
"Opus 4.6": "claude-opus-4-6",
|
||||
"Sonnet 4.6": "claude-sonnet-4-6",
|
||||
"Sonnet 4.5": "claude-sonnet-4-5-20250929",
|
||||
"Haiku 4.5": "claude-haiku-4-5-20251001",
|
||||
}
|
||||
|
||||
|
||||
def _claude_model_inputs():
|
||||
return [
|
||||
IO.Int.Input(
|
||||
"max_tokens",
|
||||
default=16000,
|
||||
min=32,
|
||||
max=32000,
|
||||
tooltip="Maximum number of tokens to generate before stopping.",
|
||||
advanced=True,
|
||||
),
|
||||
IO.Float.Input(
|
||||
"temperature",
|
||||
default=1.0,
|
||||
min=0.0,
|
||||
max=1.0,
|
||||
step=0.01,
|
||||
tooltip="Controls randomness. 0.0 is deterministic, 1.0 is most random.",
|
||||
advanced=True,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _model_price_per_million(model: str) -> tuple[float, float] | None:
|
||||
"""Return (input_per_1M, output_per_1M) USD for a Claude model, or None if unknown."""
|
||||
if "opus-4-7" in model or "opus-4-6" in model or "opus-4-5" in model:
|
||||
return 5.0, 25.0
|
||||
if "sonnet-4" in model:
|
||||
return 3.0, 15.0
|
||||
if "haiku-4-5" in model:
|
||||
return 1.0, 5.0
|
||||
return None
|
||||
|
||||
|
||||
def calculate_tokens_price(response: AnthropicMessagesResponse) -> float | None:
|
||||
"""Compute approximate USD price from response usage. Server-side billing is authoritative."""
|
||||
if not response.usage or not response.model:
|
||||
return None
|
||||
rates = _model_price_per_million(response.model)
|
||||
if rates is None:
|
||||
return None
|
||||
input_rate, output_rate = rates
|
||||
input_tokens = response.usage.input_tokens or 0
|
||||
output_tokens = response.usage.output_tokens or 0
|
||||
cache_read = response.usage.cache_read_input_tokens or 0
|
||||
cache_5m = 0
|
||||
cache_1h = 0
|
||||
if response.usage.cache_creation:
|
||||
cache_5m = response.usage.cache_creation.ephemeral_5m_input_tokens or 0
|
||||
cache_1h = response.usage.cache_creation.ephemeral_1h_input_tokens or 0
|
||||
total = (
|
||||
input_tokens * input_rate
|
||||
+ output_tokens * output_rate
|
||||
+ cache_read * input_rate * 0.1
|
||||
+ cache_5m * input_rate * 1.25
|
||||
+ cache_1h * input_rate * 2.0
|
||||
)
|
||||
return total / 1_000_000.0
|
||||
|
||||
|
||||
def _get_text_from_response(response: AnthropicMessagesResponse) -> str:
|
||||
if not response.content:
|
||||
return ""
|
||||
return "\n".join(block.text for block in response.content if block.text)
|
||||
|
||||
|
||||
async def _build_image_content_blocks(
|
||||
cls: type[IO.ComfyNode],
|
||||
image_tensors: list[Input.Image],
|
||||
) -> list[AnthropicImageContent]:
|
||||
urls = await upload_images_to_comfyapi(
|
||||
cls,
|
||||
image_tensors,
|
||||
max_images=CLAUDE_MAX_IMAGES,
|
||||
total_pixels=ANTHROPIC_IMAGE_MAX_PIXELS,
|
||||
wait_label="Uploading reference images",
|
||||
)
|
||||
return [AnthropicImageContent(source=AnthropicImageSourceUrl(url=url)) for url in urls]
|
||||
|
||||
|
||||
class ClaudeNode(IO.ComfyNode):
|
||||
"""Generate text responses from an Anthropic Claude model."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return IO.Schema(
|
||||
node_id="ClaudeNode",
|
||||
display_name="Anthropic Claude",
|
||||
category="api node/text/Anthropic",
|
||||
essentials_category="Text Generation",
|
||||
description="Generate text responses with Anthropic's Claude models. "
|
||||
"Provide a text prompt and optionally one or more images for multimodal context.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
"prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Text input to the model.",
|
||||
),
|
||||
IO.DynamicCombo.Input(
|
||||
"model",
|
||||
options=[IO.DynamicCombo.Option(label, _claude_model_inputs()) for label in CLAUDE_MODELS],
|
||||
tooltip="The Claude model used to generate the response.",
|
||||
),
|
||||
IO.Int.Input(
|
||||
"seed",
|
||||
default=0,
|
||||
min=0,
|
||||
max=2147483647,
|
||||
control_after_generate=True,
|
||||
tooltip="Seed controls whether the node should re-run; "
|
||||
"results are non-deterministic regardless of seed.",
|
||||
),
|
||||
IO.Autogrow.Input(
|
||||
"images",
|
||||
template=IO.Autogrow.TemplateNames(
|
||||
IO.Image.Input("image"),
|
||||
names=[f"image_{i}" for i in range(1, CLAUDE_MAX_IMAGES + 1)],
|
||||
min=0,
|
||||
),
|
||||
tooltip=f"Optional image(s) to use as context for the model. Up to {CLAUDE_MAX_IMAGES} images.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"system_prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
optional=True,
|
||||
advanced=True,
|
||||
tooltip="Foundational instructions that dictate the model's behavior.",
|
||||
),
|
||||
],
|
||||
outputs=[IO.String.Output()],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
price_badge=IO.PriceBadge(
|
||||
depends_on=IO.PriceBadgeDepends(widgets=["model"]),
|
||||
expr="""
|
||||
(
|
||||
$m := widgets.model;
|
||||
$contains($m, "opus") ? {
|
||||
"type": "list_usd",
|
||||
"usd": [0.005, 0.025],
|
||||
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
|
||||
}
|
||||
: $contains($m, "sonnet") ? {
|
||||
"type": "list_usd",
|
||||
"usd": [0.003, 0.015],
|
||||
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
|
||||
}
|
||||
: $contains($m, "haiku") ? {
|
||||
"type": "list_usd",
|
||||
"usd": [0.001, 0.005],
|
||||
"format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
|
||||
}
|
||||
: {"type":"text", "text":"Token-based"}
|
||||
)
|
||||
""",
|
||||
),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
prompt: str,
|
||||
model: dict,
|
||||
seed: int,
|
||||
images: dict | None = None,
|
||||
system_prompt: str = "",
|
||||
) -> IO.NodeOutput:
|
||||
validate_string(prompt, strip_whitespace=True, min_length=1)
|
||||
model_label = model["model"]
|
||||
max_tokens = model["max_tokens"]
|
||||
temperature = model["temperature"]
|
||||
|
||||
image_tensors: list[Input.Image] = [t for t in (images or {}).values() if t is not None]
|
||||
if sum(get_number_of_images(t) for t in image_tensors) > CLAUDE_MAX_IMAGES:
|
||||
raise ValueError(f"Up to {CLAUDE_MAX_IMAGES} images are supported per request.")
|
||||
|
||||
content: list[AnthropicTextContent | AnthropicImageContent] = []
|
||||
if image_tensors:
|
||||
content.extend(await _build_image_content_blocks(cls, image_tensors))
|
||||
content.append(AnthropicTextContent(text=prompt))
|
||||
|
||||
response = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path=ANTHROPIC_MESSAGES_ENDPOINT, method="POST"),
|
||||
response_model=AnthropicMessagesResponse,
|
||||
data=AnthropicMessagesRequest(
|
||||
model=CLAUDE_MODELS[model_label],
|
||||
max_tokens=max_tokens,
|
||||
messages=[AnthropicMessage(role=AnthropicRole.user, content=content)],
|
||||
system=system_prompt or None,
|
||||
temperature=temperature,
|
||||
),
|
||||
price_extractor=calculate_tokens_price,
|
||||
)
|
||||
return IO.NodeOutput(_get_text_from_response(response) or "Empty response from Claude model.")
|
||||
|
||||
|
||||
class AnthropicExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
||||
return [ClaudeNode]
|
||||
|
||||
|
||||
async def comfy_entrypoint() -> AnthropicExtension:
|
||||
return AnthropicExtension()
|
||||
@ -82,6 +82,8 @@ class VAEEncodeAudio(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def execute(cls, vae, audio) -> IO.NodeOutput:
|
||||
if audio is None:
|
||||
raise ValueError("VAEEncodeAudio: input audio is None (source video may have no audio track).")
|
||||
sample_rate = audio["sample_rate"]
|
||||
vae_sample_rate = getattr(vae, "audio_sample_rate", 44100)
|
||||
if vae_sample_rate != sample_rate:
|
||||
@ -171,6 +173,8 @@ class SaveAudio(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def execute(cls, audio, filename_prefix="ComfyUI", format="flac") -> IO.NodeOutput:
|
||||
if audio is None:
|
||||
raise ValueError("SaveAudio: input audio is None (source video may have no audio track).")
|
||||
return IO.NodeOutput(
|
||||
ui=UI.AudioSaveHelper.get_save_audio_ui(audio, filename_prefix=filename_prefix, cls=cls, format=format)
|
||||
)
|
||||
@ -198,6 +202,8 @@ class SaveAudioMP3(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def execute(cls, audio, filename_prefix="ComfyUI", format="mp3", quality="128k") -> IO.NodeOutput:
|
||||
if audio is None:
|
||||
raise ValueError("SaveAudioMP3: input audio is None (source video may have no audio track).")
|
||||
return IO.NodeOutput(
|
||||
ui=UI.AudioSaveHelper.get_save_audio_ui(
|
||||
audio, filename_prefix=filename_prefix, cls=cls, format=format, quality=quality
|
||||
@ -226,6 +232,8 @@ class SaveAudioOpus(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def execute(cls, audio, filename_prefix="ComfyUI", format="opus", quality="V3") -> IO.NodeOutput:
|
||||
if audio is None:
|
||||
raise ValueError("SaveAudioOpus: input audio is None (source video may have no audio track).")
|
||||
return IO.NodeOutput(
|
||||
ui=UI.AudioSaveHelper.get_save_audio_ui(
|
||||
audio, filename_prefix=filename_prefix, cls=cls, format=format, quality=quality
|
||||
@ -252,6 +260,8 @@ class PreviewAudio(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def execute(cls, audio) -> IO.NodeOutput:
|
||||
if audio is None:
|
||||
raise ValueError("PreviewAudio: input audio is None (source video may have no audio track).")
|
||||
return IO.NodeOutput(ui=UI.PreviewAudio(audio, cls=cls))
|
||||
|
||||
save_flac = execute # TODO: remove
|
||||
@ -297,6 +307,7 @@ class LoadAudio(IO.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
input_dir = folder_paths.get_input_directory()
|
||||
os.makedirs(input_dir, exist_ok=True)
|
||||
files = folder_paths.filter_files_content_types(os.listdir(input_dir), ["audio", "video"])
|
||||
return IO.Schema(
|
||||
node_id="LoadAudio",
|
||||
@ -391,21 +402,26 @@ class TrimAudioDuration(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def execute(cls, audio, start_index, duration) -> IO.NodeOutput:
|
||||
if audio is None:
|
||||
return IO.NodeOutput(None)
|
||||
waveform = audio["waveform"]
|
||||
sample_rate = audio["sample_rate"]
|
||||
audio_length = waveform.shape[-1]
|
||||
|
||||
if audio_length == 0:
|
||||
return IO.NodeOutput(audio)
|
||||
|
||||
if start_index < 0:
|
||||
start_frame = audio_length + int(round(start_index * sample_rate))
|
||||
else:
|
||||
start_frame = int(round(start_index * sample_rate))
|
||||
start_frame = max(0, min(start_frame, audio_length - 1))
|
||||
start_frame = max(0, min(start_frame, audio_length))
|
||||
|
||||
end_frame = start_frame + int(round(duration * sample_rate))
|
||||
end_frame = max(0, min(end_frame, audio_length))
|
||||
|
||||
if start_frame >= end_frame:
|
||||
raise ValueError("AudioTrim: Start time must be less than end time and be within the audio length.")
|
||||
raise ValueError("TrimAudioDuration: Start time must be less than end time and be within the audio length.")
|
||||
|
||||
return IO.NodeOutput({"waveform": waveform[..., start_frame:end_frame], "sample_rate": sample_rate})
|
||||
|
||||
@ -432,11 +448,13 @@ class SplitAudioChannels(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def execute(cls, audio) -> IO.NodeOutput:
|
||||
if audio is None:
|
||||
return IO.NodeOutput(None, None)
|
||||
waveform = audio["waveform"]
|
||||
sample_rate = audio["sample_rate"]
|
||||
|
||||
if waveform.shape[1] != 2:
|
||||
raise ValueError("AudioSplit: Input audio has only one channel.")
|
||||
raise ValueError(f"AudioSplit: Input audio must be stereo (2 channels), got {waveform.shape[1]} channel(s).")
|
||||
|
||||
left_channel = waveform[..., 0:1, :]
|
||||
right_channel = waveform[..., 1:2, :]
|
||||
@ -464,6 +482,12 @@ class JoinAudioChannels(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def execute(cls, audio_left, audio_right) -> IO.NodeOutput:
|
||||
if audio_left is None and audio_right is None:
|
||||
return IO.NodeOutput(None)
|
||||
if audio_left is None:
|
||||
return IO.NodeOutput(audio_right)
|
||||
if audio_right is None:
|
||||
return IO.NodeOutput(audio_left)
|
||||
waveform_left = audio_left["waveform"]
|
||||
sample_rate_left = audio_left["sample_rate"]
|
||||
waveform_right = audio_right["waveform"]
|
||||
@ -537,6 +561,12 @@ class AudioConcat(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def execute(cls, audio1, audio2, direction) -> IO.NodeOutput:
|
||||
if audio1 is None and audio2 is None:
|
||||
return IO.NodeOutput(None)
|
||||
if audio1 is None:
|
||||
return IO.NodeOutput(audio2)
|
||||
if audio2 is None:
|
||||
return IO.NodeOutput(audio1)
|
||||
waveform_1 = audio1["waveform"]
|
||||
waveform_2 = audio2["waveform"]
|
||||
sample_rate_1 = audio1["sample_rate"]
|
||||
@ -584,6 +614,12 @@ class AudioMerge(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def execute(cls, audio1, audio2, merge_method) -> IO.NodeOutput:
|
||||
if audio1 is None and audio2 is None:
|
||||
return IO.NodeOutput(None)
|
||||
if audio1 is None:
|
||||
return IO.NodeOutput(audio2)
|
||||
if audio2 is None:
|
||||
return IO.NodeOutput(audio1)
|
||||
waveform_1 = audio1["waveform"]
|
||||
waveform_2 = audio2["waveform"]
|
||||
sample_rate_1 = audio1["sample_rate"]
|
||||
@ -594,6 +630,9 @@ class AudioMerge(IO.ComfyNode):
|
||||
length_1 = waveform_1.shape[-1]
|
||||
length_2 = waveform_2.shape[-1]
|
||||
|
||||
if length_1 == 0 or length_2 == 0:
|
||||
return IO.NodeOutput({"waveform": waveform_1, "sample_rate": output_sample_rate})
|
||||
|
||||
if length_2 > length_1:
|
||||
logging.info(f"AudioMerge: Trimming audio2 from {length_2} to {length_1} samples to match audio1 length.")
|
||||
waveform_2 = waveform_2[..., :length_1]
|
||||
@ -645,6 +684,8 @@ class AudioAdjustVolume(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def execute(cls, audio, volume) -> IO.NodeOutput:
|
||||
if audio is None:
|
||||
return IO.NodeOutput(None)
|
||||
if volume == 0:
|
||||
return IO.NodeOutput(audio)
|
||||
waveform = audio["waveform"]
|
||||
@ -728,8 +769,14 @@ class AudioEqualizer3Band(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def execute(cls, audio, low_gain_dB, low_freq, mid_gain_dB, mid_freq, mid_q, high_gain_dB, high_freq) -> IO.NodeOutput:
|
||||
if audio is None:
|
||||
return IO.NodeOutput(None)
|
||||
waveform = audio["waveform"]
|
||||
sample_rate = audio["sample_rate"]
|
||||
|
||||
if waveform.shape[-1] == 0:
|
||||
return IO.NodeOutput(audio)
|
||||
|
||||
eq_waveform = waveform.clone()
|
||||
|
||||
# 1. Apply Low Shelf (Bass)
|
||||
|
||||
@ -1,12 +1,7 @@
|
||||
import torch
|
||||
import os
|
||||
import json
|
||||
import struct
|
||||
import numpy as np
|
||||
from comfy.ldm.modules.diffusionmodules.mmdit import get_1d_sincos_pos_embed_from_grid_torch
|
||||
import folder_paths
|
||||
import comfy.model_management
|
||||
from comfy.cli_args import args
|
||||
from comfy_extras.nodes_save_3d import pack_variable_mesh_batch
|
||||
from typing_extensions import override
|
||||
from comfy_api.latest import ComfyExtension, IO, Types
|
||||
from comfy_api.latest._util import MESH, VOXEL # only for backward compatibility if someone import it from this file (will be removed later) # noqa
|
||||
@ -444,7 +439,9 @@ class VoxelToMeshBasic(IO.ComfyNode):
|
||||
vertices.append(v)
|
||||
faces.append(f)
|
||||
|
||||
return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces)))
|
||||
if vertices and all(v.shape == vertices[0].shape for v in vertices) and all(f.shape == faces[0].shape for f in faces):
|
||||
return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces)))
|
||||
return IO.NodeOutput(pack_variable_mesh_batch(vertices, faces))
|
||||
|
||||
decode = execute # TODO: remove
|
||||
|
||||
@ -481,206 +478,13 @@ class VoxelToMesh(IO.ComfyNode):
|
||||
vertices.append(v)
|
||||
faces.append(f)
|
||||
|
||||
return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces)))
|
||||
if vertices and all(v.shape == vertices[0].shape for v in vertices) and all(f.shape == faces[0].shape for f in faces):
|
||||
return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces)))
|
||||
return IO.NodeOutput(pack_variable_mesh_batch(vertices, faces))
|
||||
|
||||
decode = execute # TODO: remove
|
||||
|
||||
|
||||
def save_glb(vertices, faces, filepath, metadata=None):
|
||||
"""
|
||||
Save PyTorch tensor vertices and faces as a GLB file without external dependencies.
|
||||
|
||||
Parameters:
|
||||
vertices: torch.Tensor of shape (N, 3) - The vertex coordinates
|
||||
faces: torch.Tensor of shape (M, 3) - The face indices (triangle faces)
|
||||
filepath: str - Output filepath (should end with .glb)
|
||||
"""
|
||||
|
||||
# Convert tensors to numpy arrays
|
||||
vertices_np = vertices.cpu().numpy().astype(np.float32)
|
||||
faces_np = faces.cpu().numpy().astype(np.uint32)
|
||||
|
||||
vertices_buffer = vertices_np.tobytes()
|
||||
indices_buffer = faces_np.tobytes()
|
||||
|
||||
def pad_to_4_bytes(buffer):
|
||||
padding_length = (4 - (len(buffer) % 4)) % 4
|
||||
return buffer + b'\x00' * padding_length
|
||||
|
||||
vertices_buffer_padded = pad_to_4_bytes(vertices_buffer)
|
||||
indices_buffer_padded = pad_to_4_bytes(indices_buffer)
|
||||
|
||||
buffer_data = vertices_buffer_padded + indices_buffer_padded
|
||||
|
||||
vertices_byte_length = len(vertices_buffer)
|
||||
vertices_byte_offset = 0
|
||||
indices_byte_length = len(indices_buffer)
|
||||
indices_byte_offset = len(vertices_buffer_padded)
|
||||
|
||||
gltf = {
|
||||
"asset": {"version": "2.0", "generator": "ComfyUI"},
|
||||
"buffers": [
|
||||
{
|
||||
"byteLength": len(buffer_data)
|
||||
}
|
||||
],
|
||||
"bufferViews": [
|
||||
{
|
||||
"buffer": 0,
|
||||
"byteOffset": vertices_byte_offset,
|
||||
"byteLength": vertices_byte_length,
|
||||
"target": 34962 # ARRAY_BUFFER
|
||||
},
|
||||
{
|
||||
"buffer": 0,
|
||||
"byteOffset": indices_byte_offset,
|
||||
"byteLength": indices_byte_length,
|
||||
"target": 34963 # ELEMENT_ARRAY_BUFFER
|
||||
}
|
||||
],
|
||||
"accessors": [
|
||||
{
|
||||
"bufferView": 0,
|
||||
"byteOffset": 0,
|
||||
"componentType": 5126, # FLOAT
|
||||
"count": len(vertices_np),
|
||||
"type": "VEC3",
|
||||
"max": vertices_np.max(axis=0).tolist(),
|
||||
"min": vertices_np.min(axis=0).tolist()
|
||||
},
|
||||
{
|
||||
"bufferView": 1,
|
||||
"byteOffset": 0,
|
||||
"componentType": 5125, # UNSIGNED_INT
|
||||
"count": faces_np.size,
|
||||
"type": "SCALAR"
|
||||
}
|
||||
],
|
||||
"meshes": [
|
||||
{
|
||||
"primitives": [
|
||||
{
|
||||
"attributes": {
|
||||
"POSITION": 0
|
||||
},
|
||||
"indices": 1,
|
||||
"mode": 4 # TRIANGLES
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"nodes": [
|
||||
{
|
||||
"mesh": 0
|
||||
}
|
||||
],
|
||||
"scenes": [
|
||||
{
|
||||
"nodes": [0]
|
||||
}
|
||||
],
|
||||
"scene": 0
|
||||
}
|
||||
|
||||
if metadata is not None:
|
||||
gltf["asset"]["extras"] = metadata
|
||||
|
||||
# Convert the JSON to bytes
|
||||
gltf_json = json.dumps(gltf).encode('utf8')
|
||||
|
||||
def pad_json_to_4_bytes(buffer):
|
||||
padding_length = (4 - (len(buffer) % 4)) % 4
|
||||
return buffer + b' ' * padding_length
|
||||
|
||||
gltf_json_padded = pad_json_to_4_bytes(gltf_json)
|
||||
|
||||
# Create the GLB header
|
||||
# Magic glTF
|
||||
glb_header = struct.pack('<4sII', b'glTF', 2, 12 + 8 + len(gltf_json_padded) + 8 + len(buffer_data))
|
||||
|
||||
# Create JSON chunk header (chunk type 0)
|
||||
json_chunk_header = struct.pack('<II', len(gltf_json_padded), 0x4E4F534A) # "JSON" in little endian
|
||||
|
||||
# Create BIN chunk header (chunk type 1)
|
||||
bin_chunk_header = struct.pack('<II', len(buffer_data), 0x004E4942) # "BIN\0" in little endian
|
||||
|
||||
# Write the GLB file
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(glb_header)
|
||||
f.write(json_chunk_header)
|
||||
f.write(gltf_json_padded)
|
||||
f.write(bin_chunk_header)
|
||||
f.write(buffer_data)
|
||||
|
||||
return filepath
|
||||
|
||||
|
||||
class SaveGLB(IO.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return IO.Schema(
|
||||
node_id="SaveGLB",
|
||||
display_name="Save 3D Model",
|
||||
search_aliases=["export 3d model", "save mesh"],
|
||||
category="3d",
|
||||
essentials_category="Basics",
|
||||
is_output_node=True,
|
||||
inputs=[
|
||||
IO.MultiType.Input(
|
||||
IO.Mesh.Input("mesh"),
|
||||
types=[
|
||||
IO.File3DGLB,
|
||||
IO.File3DGLTF,
|
||||
IO.File3DOBJ,
|
||||
IO.File3DFBX,
|
||||
IO.File3DSTL,
|
||||
IO.File3DUSDZ,
|
||||
IO.File3DAny,
|
||||
],
|
||||
tooltip="Mesh or 3D file to save",
|
||||
),
|
||||
IO.String.Input("filename_prefix", default="3d/ComfyUI"),
|
||||
],
|
||||
hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, mesh: Types.MESH | Types.File3D, filename_prefix: str) -> IO.NodeOutput:
|
||||
full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, folder_paths.get_output_directory())
|
||||
results = []
|
||||
|
||||
metadata = {}
|
||||
if not args.disable_metadata:
|
||||
if cls.hidden.prompt is not None:
|
||||
metadata["prompt"] = json.dumps(cls.hidden.prompt)
|
||||
if cls.hidden.extra_pnginfo is not None:
|
||||
for x in cls.hidden.extra_pnginfo:
|
||||
metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x])
|
||||
|
||||
if isinstance(mesh, Types.File3D):
|
||||
# Handle File3D input - save BytesIO data to output folder
|
||||
ext = mesh.format or "glb"
|
||||
f = f"{filename}_{counter:05}_.{ext}"
|
||||
mesh.save_to(os.path.join(full_output_folder, f))
|
||||
results.append({
|
||||
"filename": f,
|
||||
"subfolder": subfolder,
|
||||
"type": "output"
|
||||
})
|
||||
else:
|
||||
# Handle Mesh input - save vertices and faces as GLB
|
||||
for i in range(mesh.vertices.shape[0]):
|
||||
f = f"{filename}_{counter:05}_.glb"
|
||||
save_glb(mesh.vertices[i], mesh.faces[i], os.path.join(full_output_folder, f), metadata)
|
||||
results.append({
|
||||
"filename": f,
|
||||
"subfolder": subfolder,
|
||||
"type": "output"
|
||||
})
|
||||
counter += 1
|
||||
return IO.NodeOutput(ui={"3d": results})
|
||||
|
||||
|
||||
class Hunyuan3dExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
||||
@ -691,7 +495,6 @@ class Hunyuan3dExtension(ComfyExtension):
|
||||
VAEDecodeHunyuan3D,
|
||||
VoxelToMeshBasic,
|
||||
VoxelToMesh,
|
||||
SaveGLB,
|
||||
]
|
||||
|
||||
|
||||
|
||||
@ -338,8 +338,25 @@ class LTXVAddGuide(io.ComfyNode):
|
||||
noise_mask = get_noise_mask(latent)
|
||||
|
||||
_, _, latent_length, latent_height, latent_width = latent_image.shape
|
||||
|
||||
# For mid-video multi-frame guides, prepend+strip a throwaway first frame so the VAE's "first latent = 1 pixel frame" asymmetry lands on the discarded slot
|
||||
time_scale_factor = scale_factors[0]
|
||||
num_frames_to_keep = ((image.shape[0] - 1) // time_scale_factor) * time_scale_factor + 1
|
||||
resolved_frame_idx = frame_idx
|
||||
if frame_idx < 0:
|
||||
_, num_keyframes = get_keyframe_idxs(positive)
|
||||
resolved_frame_idx = max((latent_length - num_keyframes - 1) * time_scale_factor + 1 + frame_idx, 0)
|
||||
causal_fix = resolved_frame_idx == 0 or num_frames_to_keep == 1
|
||||
|
||||
if not causal_fix:
|
||||
image = torch.cat([image[:1], image], dim=0)
|
||||
|
||||
image, t = cls.encode(vae, latent_width, latent_height, image, scale_factors)
|
||||
|
||||
if not causal_fix:
|
||||
t = t[:, :, 1:, :, :]
|
||||
image = image[1:]
|
||||
|
||||
frame_idx, latent_idx = cls.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors)
|
||||
assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence."
|
||||
|
||||
@ -352,6 +369,7 @@ class LTXVAddGuide(io.ComfyNode):
|
||||
t,
|
||||
strength,
|
||||
scale_factors,
|
||||
causal_fix=causal_fix,
|
||||
)
|
||||
|
||||
# Track this guide for per-reference attention control.
|
||||
|
||||
@ -40,23 +40,13 @@ def composite(destination, source, x, y, mask = None, multiplier = 8, resize_sou
|
||||
|
||||
inverse_mask = torch.ones_like(mask) - mask
|
||||
|
||||
source_rgb = source[:, :3, :visible_height, :visible_width]
|
||||
dest_slice = destination[..., top:bottom, left:right]
|
||||
|
||||
if destination.shape[1] == 4:
|
||||
if torch.max(dest_slice) == 0:
|
||||
destination[:, :3, top:bottom, left:right] = source_rgb
|
||||
destination[:, 3:4, top:bottom, left:right] = mask
|
||||
else:
|
||||
destination[:, :3, top:bottom, left:right] = (mask * source_rgb) + (inverse_mask * dest_slice[:, :3])
|
||||
destination[:, 3:4, top:bottom, left:right] = torch.max(mask, dest_slice[:, 3:4])
|
||||
else:
|
||||
source_portion = mask * source_rgb
|
||||
destination_portion = inverse_mask * dest_slice
|
||||
destination[..., top:bottom, left:right] = source_portion + destination_portion
|
||||
source_portion = mask * source[..., :visible_height, :visible_width]
|
||||
destination_portion = inverse_mask * destination[..., top:bottom, left:right]
|
||||
|
||||
destination[..., top:bottom, left:right] = source_portion + destination_portion
|
||||
return destination
|
||||
|
||||
|
||||
class LatentCompositeMasked(IO.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
@ -95,23 +85,18 @@ class ImageCompositeMasked(IO.ComfyNode):
|
||||
display_name="Image Composite Masked",
|
||||
category="image",
|
||||
inputs=[
|
||||
IO.Image.Input("destination"),
|
||||
IO.Image.Input("source"),
|
||||
IO.Int.Input("x", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1),
|
||||
IO.Int.Input("y", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1),
|
||||
IO.Boolean.Input("resize_source", default=False),
|
||||
IO.Image.Input("destination", optional=True),
|
||||
IO.Mask.Input("mask", optional=True),
|
||||
],
|
||||
outputs=[IO.Image.Output()],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, source, x, y, resize_source, destination = None, mask = None) -> IO.NodeOutput:
|
||||
if destination is None: # transparent rgba
|
||||
B, H, W, C = source.shape
|
||||
destination = torch.zeros((B, H, W, 4), dtype=source.dtype, device=source.device)
|
||||
if C == 3:
|
||||
source = torch.nn.functional.pad(source, (0, 1), value=1.0)
|
||||
def execute(cls, destination, source, x, y, resize_source, mask = None) -> IO.NodeOutput:
|
||||
destination, source = node_helpers.image_alpha_fix(destination, source)
|
||||
destination = destination.clone().movedim(-1, 1)
|
||||
output = composite(destination, source.movedim(-1, 1), x, y, mask, 1, resize_source).movedim(1, -1)
|
||||
|
||||
396
comfy_extras/nodes_save_3d.py
Normal file
396
comfy_extras/nodes_save_3d.py
Normal file
@ -0,0 +1,396 @@
|
||||
"""Save-side 3D nodes: mesh packing/slicing helpers + GLB writer + SaveGLB node."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import struct
|
||||
from io import BytesIO
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import torch
|
||||
from typing_extensions import override
|
||||
|
||||
import folder_paths
|
||||
from comfy.cli_args import args
|
||||
from comfy_api.latest import ComfyExtension, IO, Types
|
||||
|
||||
|
||||
def pack_variable_mesh_batch(vertices, faces, colors=None, uvs=None, texture=None):
|
||||
# Pack lists of (Nᵢ, *) vertex/face/color/uv tensors into padded batched tensors,
|
||||
# stashing per-item lengths as runtime attrs so consumers can recover the real slice.
|
||||
# colors and uvs are 1:1 with vertices, so they're padded to max_vertices and read with vertex_counts.
|
||||
# texture is (B, H, W, 3) — passed through unchanged
|
||||
batch_size = len(vertices)
|
||||
max_vertices = max(v.shape[0] for v in vertices)
|
||||
max_faces = max(f.shape[0] for f in faces)
|
||||
|
||||
packed_vertices = vertices[0].new_zeros((batch_size, max_vertices, vertices[0].shape[1]))
|
||||
packed_faces = faces[0].new_zeros((batch_size, max_faces, faces[0].shape[1]))
|
||||
vertex_counts = torch.tensor([v.shape[0] for v in vertices], device=vertices[0].device, dtype=torch.int64)
|
||||
face_counts = torch.tensor([f.shape[0] for f in faces], device=faces[0].device, dtype=torch.int64)
|
||||
|
||||
for i, (v, f) in enumerate(zip(vertices, faces)):
|
||||
packed_vertices[i, :v.shape[0]] = v
|
||||
packed_faces[i, :f.shape[0]] = f
|
||||
|
||||
packed_colors = None
|
||||
if colors is not None:
|
||||
packed_colors = colors[0].new_zeros((batch_size, max_vertices, colors[0].shape[1]))
|
||||
for i, c in enumerate(colors):
|
||||
assert c.shape[0] == vertices[i].shape[0], (
|
||||
f"vertex_colors[{i}] has {c.shape[0]} entries, expected {vertices[i].shape[0]} (1:1 with vertices)"
|
||||
)
|
||||
packed_colors[i, :c.shape[0]] = c
|
||||
|
||||
packed_uvs = None
|
||||
if uvs is not None:
|
||||
packed_uvs = uvs[0].new_zeros((batch_size, max_vertices, uvs[0].shape[1]))
|
||||
for i, u in enumerate(uvs):
|
||||
assert u.shape[0] == vertices[i].shape[0], (
|
||||
f"uvs[{i}] has {u.shape[0]} entries, expected {vertices[i].shape[0]} (1:1 with vertices)"
|
||||
)
|
||||
packed_uvs[i, :u.shape[0]] = u
|
||||
|
||||
return Types.MESH(packed_vertices, packed_faces,
|
||||
uvs=packed_uvs, vertex_colors=packed_colors, texture=texture,
|
||||
vertex_counts=vertex_counts, face_counts=face_counts)
|
||||
|
||||
|
||||
def get_mesh_batch_item(mesh, index):
|
||||
# Returns (vertices, faces, colors, uvs) for batch index, slicing to real lengths
|
||||
# if the mesh carries per-item counts (variable-size batch).
|
||||
v_colors = getattr(mesh, "vertex_colors", None)
|
||||
v_uvs = getattr(mesh, "uvs", None)
|
||||
if getattr(mesh, "vertex_counts", None) is not None:
|
||||
vertex_count = int(mesh.vertex_counts[index].item())
|
||||
face_count = int(mesh.face_counts[index].item())
|
||||
vertices = mesh.vertices[index, :vertex_count]
|
||||
faces = mesh.faces[index, :face_count]
|
||||
colors = v_colors[index, :vertex_count] if v_colors is not None else None
|
||||
uvs = v_uvs[index, :vertex_count] if v_uvs is not None else None
|
||||
return vertices, faces, colors, uvs
|
||||
|
||||
colors = v_colors[index] if v_colors is not None else None
|
||||
uvs = v_uvs[index] if v_uvs is not None else None
|
||||
return mesh.vertices[index], mesh.faces[index], colors, uvs
|
||||
|
||||
|
||||
def save_glb(vertices, faces, filepath, metadata=None,
|
||||
uvs=None, vertex_colors=None, texture_image=None):
|
||||
"""
|
||||
Save PyTorch tensor vertices and faces as a GLB file without external dependencies.
|
||||
|
||||
Parameters:
|
||||
vertices: torch.Tensor of shape (N, 3) - The vertex coordinates
|
||||
faces: torch.Tensor of shape (M, 3) - The face indices (triangle faces)
|
||||
filepath: str - Output filepath (should end with .glb)
|
||||
metadata: dict - Optional asset.extras metadata
|
||||
uvs: torch.Tensor of shape (N, 2) - Optional per-vertex texture coordinates
|
||||
vertex_colors: torch.Tensor of shape (N, 3) or (N, 4) - Optional per-vertex colors in [0, 1]
|
||||
texture_image: PIL.Image - Optional baseColor texture, embedded as PNG
|
||||
"""
|
||||
|
||||
# Convert tensors to numpy arrays
|
||||
vertices_np = vertices.cpu().numpy().astype(np.float32)
|
||||
faces_signed = faces.cpu().numpy().astype(np.int64)
|
||||
uvs_np = uvs.cpu().numpy().astype(np.float32) if uvs is not None else None
|
||||
colors_np = vertex_colors.cpu().numpy().astype(np.float32) if vertex_colors is not None else None
|
||||
if colors_np is not None:
|
||||
colors_np = np.clip(colors_np, 0.0, 1.0)
|
||||
|
||||
n_verts = vertices_np.shape[0]
|
||||
if n_verts == 0:
|
||||
raise ValueError("save_glb: vertices is empty")
|
||||
if faces_signed.size > 0:
|
||||
fmin = int(faces_signed.min())
|
||||
fmax = int(faces_signed.max())
|
||||
if fmin < 0 or fmax >= n_verts:
|
||||
raise ValueError(
|
||||
f"save_glb: face index out of range [0, {n_verts}): min={fmin}, max={fmax}"
|
||||
)
|
||||
if uvs_np is not None and uvs_np.shape[0] != n_verts:
|
||||
raise ValueError(
|
||||
f"save_glb: uvs has {uvs_np.shape[0]} entries but vertex count is {n_verts}"
|
||||
)
|
||||
if colors_np is not None and colors_np.shape[0] != n_verts:
|
||||
raise ValueError(
|
||||
f"save_glb: vertex_colors has {colors_np.shape[0]} entries but vertex count is {n_verts}"
|
||||
)
|
||||
faces_np = faces_signed.astype(np.uint32)
|
||||
texture_png_bytes = None
|
||||
if texture_image is not None:
|
||||
buf = BytesIO()
|
||||
texture_image.save(buf, format="PNG")
|
||||
texture_png_bytes = buf.getvalue()
|
||||
|
||||
vertices_buffer = vertices_np.tobytes()
|
||||
indices_buffer = faces_np.tobytes()
|
||||
uvs_buffer = uvs_np.tobytes() if uvs_np is not None else b""
|
||||
colors_buffer = colors_np.tobytes() if colors_np is not None else b""
|
||||
texture_buffer = texture_png_bytes if texture_png_bytes is not None else b""
|
||||
|
||||
def pad_to_4_bytes(buffer):
|
||||
padding_length = (4 - (len(buffer) % 4)) % 4
|
||||
return buffer + b'\x00' * padding_length
|
||||
|
||||
vertices_buffer_padded = pad_to_4_bytes(vertices_buffer)
|
||||
indices_buffer_padded = pad_to_4_bytes(indices_buffer)
|
||||
uvs_buffer_padded = pad_to_4_bytes(uvs_buffer)
|
||||
colors_buffer_padded = pad_to_4_bytes(colors_buffer)
|
||||
texture_buffer_padded = pad_to_4_bytes(texture_buffer)
|
||||
|
||||
buffer_data = b"".join([
|
||||
vertices_buffer_padded,
|
||||
indices_buffer_padded,
|
||||
uvs_buffer_padded,
|
||||
colors_buffer_padded,
|
||||
texture_buffer_padded,
|
||||
])
|
||||
|
||||
vertices_byte_length = len(vertices_buffer)
|
||||
vertices_byte_offset = 0
|
||||
indices_byte_length = len(indices_buffer)
|
||||
indices_byte_offset = len(vertices_buffer_padded)
|
||||
uvs_byte_offset = indices_byte_offset + len(indices_buffer_padded)
|
||||
colors_byte_offset = uvs_byte_offset + len(uvs_buffer_padded)
|
||||
texture_byte_offset = colors_byte_offset + len(colors_buffer_padded)
|
||||
|
||||
buffer_views = [
|
||||
{
|
||||
"buffer": 0,
|
||||
"byteOffset": vertices_byte_offset,
|
||||
"byteLength": vertices_byte_length,
|
||||
"target": 34962 # ARRAY_BUFFER
|
||||
},
|
||||
{
|
||||
"buffer": 0,
|
||||
"byteOffset": indices_byte_offset,
|
||||
"byteLength": indices_byte_length,
|
||||
"target": 34963 # ELEMENT_ARRAY_BUFFER
|
||||
}
|
||||
]
|
||||
accessors = [
|
||||
{
|
||||
"bufferView": 0,
|
||||
"byteOffset": 0,
|
||||
"componentType": 5126, # FLOAT
|
||||
"count": len(vertices_np),
|
||||
"type": "VEC3",
|
||||
"max": vertices_np.max(axis=0).tolist(),
|
||||
"min": vertices_np.min(axis=0).tolist()
|
||||
},
|
||||
{
|
||||
"bufferView": 1,
|
||||
"byteOffset": 0,
|
||||
"componentType": 5125, # UNSIGNED_INT
|
||||
"count": faces_np.size,
|
||||
"type": "SCALAR"
|
||||
}
|
||||
]
|
||||
primitive_attributes = {"POSITION": 0}
|
||||
|
||||
if uvs_np is not None and len(uvs_np) > 0:
|
||||
buffer_views.append({
|
||||
"buffer": 0,
|
||||
"byteOffset": uvs_byte_offset,
|
||||
"byteLength": len(uvs_buffer),
|
||||
"target": 34962
|
||||
})
|
||||
accessor_idx = len(accessors)
|
||||
accessors.append({
|
||||
"bufferView": len(buffer_views) - 1,
|
||||
"byteOffset": 0,
|
||||
"componentType": 5126,
|
||||
"count": len(uvs_np),
|
||||
"type": "VEC2",
|
||||
})
|
||||
primitive_attributes["TEXCOORD_0"] = accessor_idx
|
||||
|
||||
if colors_np is not None and len(colors_np) > 0:
|
||||
buffer_views.append({
|
||||
"buffer": 0,
|
||||
"byteOffset": colors_byte_offset,
|
||||
"byteLength": len(colors_buffer),
|
||||
"target": 34962
|
||||
})
|
||||
accessor_idx = len(accessors)
|
||||
accessors.append({
|
||||
"bufferView": len(buffer_views) - 1,
|
||||
"byteOffset": 0,
|
||||
"componentType": 5126,
|
||||
"count": len(colors_np),
|
||||
"type": "VEC3" if colors_np.shape[1] == 3 else "VEC4",
|
||||
})
|
||||
primitive_attributes["COLOR_0"] = accessor_idx
|
||||
|
||||
primitive = {
|
||||
"attributes": primitive_attributes,
|
||||
"indices": 1,
|
||||
"mode": 4 # TRIANGLES
|
||||
}
|
||||
|
||||
images = []
|
||||
textures = []
|
||||
samplers = []
|
||||
materials = []
|
||||
if texture_png_bytes is not None and "TEXCOORD_0" in primitive_attributes:
|
||||
buffer_views.append({
|
||||
"buffer": 0,
|
||||
"byteOffset": texture_byte_offset,
|
||||
"byteLength": len(texture_buffer),
|
||||
})
|
||||
images.append({"bufferView": len(buffer_views) - 1, "mimeType": "image/png"})
|
||||
samplers.append({"magFilter": 9729, "minFilter": 9729, "wrapS": 33071, "wrapT": 33071})
|
||||
textures.append({"source": 0, "sampler": 0})
|
||||
materials.append({
|
||||
"pbrMetallicRoughness": {
|
||||
"baseColorTexture": {"index": 0, "texCoord": 0},
|
||||
"metallicFactor": 0.0,
|
||||
"roughnessFactor": 1.0,
|
||||
},
|
||||
"doubleSided": True,
|
||||
})
|
||||
primitive["material"] = 0
|
||||
|
||||
gltf = {
|
||||
"asset": {"version": "2.0", "generator": "ComfyUI"},
|
||||
"buffers": [{"byteLength": len(buffer_data)}],
|
||||
"bufferViews": buffer_views,
|
||||
"accessors": accessors,
|
||||
"meshes": [{"primitives": [primitive]}],
|
||||
"nodes": [{"mesh": 0}],
|
||||
"scenes": [{"nodes": [0]}],
|
||||
"scene": 0,
|
||||
}
|
||||
if images:
|
||||
gltf["images"] = images
|
||||
if samplers:
|
||||
gltf["samplers"] = samplers
|
||||
if textures:
|
||||
gltf["textures"] = textures
|
||||
if materials:
|
||||
gltf["materials"] = materials
|
||||
|
||||
if metadata:
|
||||
gltf["asset"]["extras"] = metadata
|
||||
|
||||
# Convert the JSON to bytes
|
||||
gltf_json = json.dumps(gltf).encode('utf8')
|
||||
|
||||
def pad_json_to_4_bytes(buffer):
|
||||
padding_length = (4 - (len(buffer) % 4)) % 4
|
||||
return buffer + b' ' * padding_length
|
||||
|
||||
gltf_json_padded = pad_json_to_4_bytes(gltf_json)
|
||||
|
||||
# Create the GLB header (a 4-byte ASCII magic identifier glTF)
|
||||
glb_header = struct.pack('<4sII', b'glTF', 2, 12 + 8 + len(gltf_json_padded) + 8 + len(buffer_data))
|
||||
|
||||
# Create JSON chunk header (chunk type 0)
|
||||
json_chunk_header = struct.pack('<II', len(gltf_json_padded), 0x4E4F534A) # "JSON" in little endian
|
||||
|
||||
# Create BIN chunk header (chunk type 1)
|
||||
bin_chunk_header = struct.pack('<II', len(buffer_data), 0x004E4942) # "BIN\0" in little endian
|
||||
|
||||
# Write the GLB file
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(glb_header)
|
||||
f.write(json_chunk_header)
|
||||
f.write(gltf_json_padded)
|
||||
f.write(bin_chunk_header)
|
||||
f.write(buffer_data)
|
||||
|
||||
return filepath
|
||||
|
||||
|
||||
class SaveGLB(IO.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return IO.Schema(
|
||||
node_id="SaveGLB",
|
||||
display_name="Save 3D Model",
|
||||
search_aliases=["export 3d model", "save mesh"],
|
||||
category="3d",
|
||||
essentials_category="Basics",
|
||||
is_output_node=True,
|
||||
inputs=[
|
||||
IO.MultiType.Input(
|
||||
IO.Mesh.Input("mesh"),
|
||||
types=[
|
||||
IO.File3DGLB,
|
||||
IO.File3DGLTF,
|
||||
IO.File3DOBJ,
|
||||
IO.File3DFBX,
|
||||
IO.File3DSTL,
|
||||
IO.File3DUSDZ,
|
||||
IO.File3DAny,
|
||||
],
|
||||
tooltip="Mesh or 3D file to save",
|
||||
),
|
||||
IO.String.Input("filename_prefix", default="3d/ComfyUI"),
|
||||
],
|
||||
hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, mesh: Types.MESH | Types.File3D, filename_prefix: str) -> IO.NodeOutput:
|
||||
full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, folder_paths.get_output_directory())
|
||||
results = []
|
||||
|
||||
metadata = {}
|
||||
if not args.disable_metadata:
|
||||
if cls.hidden.prompt is not None:
|
||||
metadata["prompt"] = json.dumps(cls.hidden.prompt)
|
||||
if cls.hidden.extra_pnginfo is not None:
|
||||
for x in cls.hidden.extra_pnginfo:
|
||||
metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x])
|
||||
|
||||
if isinstance(mesh, Types.File3D):
|
||||
# Handle File3D input - save BytesIO data to output folder
|
||||
ext = mesh.format or "glb"
|
||||
f = f"{filename}_{counter:05}_.{ext}"
|
||||
mesh.save_to(os.path.join(full_output_folder, f))
|
||||
results.append({
|
||||
"filename": f,
|
||||
"subfolder": subfolder,
|
||||
"type": "output"
|
||||
})
|
||||
counter += 1
|
||||
else:
|
||||
# Handle Mesh input - save vertices and faces as GLB; carry optional UVs / colors / texture.
|
||||
texture_b = getattr(mesh, "texture", None)
|
||||
texture_np = None
|
||||
if texture_b is not None:
|
||||
texture_np = (texture_b.clamp(0.0, 1.0).cpu().numpy() * 255).astype(np.uint8)
|
||||
assert texture_np.ndim == 4 and texture_np.shape[-1] == 3, (
|
||||
f"texture must be (B, H, W, 3) RGB, got shape {tuple(texture_np.shape)}"
|
||||
)
|
||||
for i in range(mesh.vertices.shape[0]):
|
||||
vertices_i, faces_i, v_colors, uvs_i = get_mesh_batch_item(mesh, i)
|
||||
if vertices_i.shape[0] == 0 or faces_i.shape[0] == 0:
|
||||
logging.warning(f"SaveGLB: skipping empty mesh at batch index {i}")
|
||||
continue
|
||||
tex_img = Image.fromarray(texture_np[i], mode="RGB") if texture_np is not None else None
|
||||
f = f"{filename}_{counter:05}_.glb"
|
||||
save_glb(vertices_i, faces_i, os.path.join(full_output_folder, f), metadata,
|
||||
uvs=uvs_i,
|
||||
vertex_colors=v_colors,
|
||||
texture_image=tex_img)
|
||||
results.append({
|
||||
"filename": f,
|
||||
"subfolder": subfolder,
|
||||
"type": "output"
|
||||
})
|
||||
counter += 1
|
||||
return IO.NodeOutput(ui={"3d": results})
|
||||
|
||||
|
||||
class Save3DExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
||||
return [SaveGLB]
|
||||
|
||||
|
||||
async def comfy_entrypoint() -> Save3DExtension:
|
||||
return Save3DExtension()
|
||||
@ -123,6 +123,7 @@ class CreateVideo(io.ComfyNode):
|
||||
search_aliases=["images to video"],
|
||||
display_name="Create Video",
|
||||
category="video",
|
||||
essentials_category="Video Tools",
|
||||
description="Create a video from images.",
|
||||
inputs=[
|
||||
io.Image.Input("images", tooltip="The images to create a video from."),
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
# This file is automatically generated by the build process when version is
|
||||
# updated in pyproject.toml.
|
||||
__version__ = "0.21.0"
|
||||
__version__ = "0.21.1"
|
||||
|
||||
1
nodes.py
1
nodes.py
@ -2436,6 +2436,7 @@ async def init_builtin_extra_nodes():
|
||||
"nodes_void.py",
|
||||
"nodes_wandancer.py",
|
||||
"nodes_hidream_o1.py",
|
||||
"nodes_save_3d.py",
|
||||
]
|
||||
|
||||
import_failed = []
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "ComfyUI"
|
||||
version = "0.21.0"
|
||||
version = "0.21.1"
|
||||
readme = "README.md"
|
||||
license = { file = "LICENSE" }
|
||||
requires-python = ">=3.10"
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
comfyui-frontend-package==1.43.18
|
||||
comfyui-workflow-templates==0.9.73
|
||||
comfyui-embedded-docs==0.4.4
|
||||
comfyui-workflow-templates==0.9.75
|
||||
comfyui-embedded-docs==0.5.0
|
||||
torch
|
||||
torchsde
|
||||
torchvision
|
||||
|
||||
@ -1,9 +1,23 @@
|
||||
from collections import defaultdict
|
||||
|
||||
import torch
|
||||
|
||||
from comfy.model_detection import detect_unet_config, model_config_from_unet_config
|
||||
import comfy.supported_models
|
||||
|
||||
|
||||
def _freeze(value):
|
||||
"""Recursively convert a value to a hashable form so configs can be
|
||||
compared/used as dict keys or set members."""
|
||||
if isinstance(value, dict):
|
||||
return frozenset((k, _freeze(v)) for k, v in value.items())
|
||||
if isinstance(value, (list, tuple)):
|
||||
return tuple(_freeze(v) for v in value)
|
||||
if isinstance(value, set):
|
||||
return frozenset(_freeze(v) for v in value)
|
||||
return value
|
||||
|
||||
|
||||
def _make_longcat_comfyui_sd():
|
||||
"""Minimal ComfyUI-format state dict for pre-converted LongCat-Image weights."""
|
||||
sd = {}
|
||||
@ -110,3 +124,21 @@ class TestModelDetection:
|
||||
model_config = model_config_from_unet_config(unet_config, sd)
|
||||
assert model_config is not None
|
||||
assert type(model_config).__name__ == "FluxSchnell"
|
||||
|
||||
def test_unet_config_and_required_keys_combination_is_unique(self):
|
||||
"""Each model in the registry must have a unique combination of
|
||||
``unet_config`` and ``required_keys``. If two models share the same
|
||||
combination, ``BASE.matches`` cannot disambiguate between them and the
|
||||
first one in the list will always win."""
|
||||
models = comfy.supported_models.models
|
||||
groups = defaultdict(list)
|
||||
for model in models:
|
||||
key = (_freeze(model.unet_config), _freeze(model.required_keys))
|
||||
groups[key].append(model.__name__)
|
||||
|
||||
duplicates = {k: names for k, names in groups.items() if len(names) > 1}
|
||||
assert not duplicates, (
|
||||
"Found models sharing the same (unet_config, required_keys) "
|
||||
"combination, which makes detection ambiguous: "
|
||||
+ "; ".join(", ".join(names) for names in duplicates.values())
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user