Compare commits

..

9 Commits

Author SHA1 Message Date
cd45f42a83 fix(multigpu): replace hardcoded torch.cuda.set_device with device-agnostic set_torch_device (#14191) 2026-05-30 21:18:42 -04:00
81aa5a38b2 Speed up ernie model by a bit on nvidia and use higher quality rope. (#14192) 2026-05-30 17:53:37 -07:00
ea73d3b2ea chore: update embedded docs to v0.5.2 (#14193) 2026-05-31 07:49:59 +08:00
08e93a31a3 feat: add Preview3DAdvanced node (#14175)
Co-authored-by: Alexis Rolland <alexisrolland@hotmail.com>
2026-05-30 17:57:36 -04:00
f7297bc5a9 Revert deprecation of non-dynamic smart memory (CORE-152 (revert)) (#14183)
* mm: re-instantate smart memory for VRAM

* mm: restore non-dynamic smart memory

By popular demand. We aren't quite ready for the deprecation as non
dynamic enabled GPUs and some high-vram custom model loader setups
prefer the old full hands on.
2026-05-30 15:20:33 -04:00
e154da83b1 Threaded Loader performance fixes / improvements (+ Aimdo 0.4.6) (#14116)
* memory_management: Add direct to read GPU mode

Make destination optional (or make it optionally GPU) and use aimdo
to file_read direct to GPU.

* ops: Remove stream pin buffers and use aimdo reads

This consumed too much RAM and its better to just take the hit on
the CPU syncing back the stream on a short ring buffer. Aimdo
implements this so just rip the stream pin buffer from comfy.

* model_management: all active pin registration movement

Its better to just let the active model load past the pin limit as
pins and let the pins move around. The saves the HDD and SATA
people disk traffic while only costing a few GPU syncs.

* utils: use aimdo file handle

This opens on windows with more favourable flags

* mp: only count the model proper for loaded_ram and vram

Exclude live loras from the numbers to avoid the case where the reported
loaded memory exceeds the size of the model.

This causes me confusion in the Kijai visualizer when it looked fully
loaded but was hitting disk due to this accounding disrepency.

* utils: add bit reverse utility

useful for max scattering something ordered.

* pinned_memory: Implement offload balancing

Use a max scatter alogorithm to prioritize pins of the same size such
that when doing a little bit of offloading it gets scattered, allowing
the prefetcher to more evenly swollow the offload.

* comfy-aimdo 0.4.7

Aimdo 0.4.7 implement VRAM buffer exhaustion predection to avoid
early speculative load of weights that definately wont fix once the
inference gets further in.

* model-prefetch: consolidate pin ensures on the sync point

This could happen mid prefetch block, cause a sync of the entire
block and lose overlap. Get ahead of the problem with a free down
at the natural compute stream sync point.

* mm: Put a 2GB min on the pin ceiling

This is reasonably bad if it starts causing swap pressure, moreso than
during normal ram-cache proceedings. Clamp it.

* add --fast-disk
2026-05-30 15:20:04 -04:00
bb560036b9 feat(io): add File3DPLY / File3DSPLAT / File3DSPZ / File3DKSPLAT types (#14185) 2026-05-30 09:39:26 -04:00
0b04660ba3 Speed up anima a bit on nvidia. (#14181) 2026-05-29 22:47:10 -07:00
6e1ef2311b Remove useless code. (#14178) 2026-05-29 16:26:46 -07:00
18 changed files with 11980 additions and 10090 deletions

View File

@ -149,6 +149,7 @@ parser.add_argument("--async-offload", nargs='?', const=2, type=int, default=Non
parser.add_argument("--disable-async-offload", action="store_true", help="Disable async weight offloading.") parser.add_argument("--disable-async-offload", action="store_true", help="Disable async weight offloading.")
parser.add_argument("--disable-dynamic-vram", action="store_true", help="Disable dynamic VRAM and use estimate based model loading.") parser.add_argument("--disable-dynamic-vram", action="store_true", help="Disable dynamic VRAM and use estimate based model loading.")
parser.add_argument("--enable-dynamic-vram", action="store_true", help="Enable dynamic VRAM on systems where it's not enabled by default.") parser.add_argument("--enable-dynamic-vram", action="store_true", help="Enable dynamic VRAM on systems where it's not enabled by default.")
parser.add_argument("--fast-disk", action="store_true", help="Prefer disk-backed dynamic loading and offload over unpinned RAM. Can be faster for users with fast NVME disks.")
parser.add_argument("--force-non-blocking", action="store_true", help="Force ComfyUI to use non-blocking operations for all applicable tensors. This may improve performance on some non-Nvidia systems but can cause issues with some workflows.") parser.add_argument("--force-non-blocking", action="store_true", help="Force ComfyUI to use non-blocking operations for all applicable tensors. This may improve performance on some non-Nvidia systems but can cause issues with some workflows.")

View File

@ -14,15 +14,7 @@ from torchvision import transforms
import comfy.patcher_extension import comfy.patcher_extension
from comfy.ldm.modules.attention import optimized_attention from comfy.ldm.modules.attention import optimized_attention
import comfy.ldm.common_dit import comfy.ldm.common_dit
import comfy.quant_ops
def apply_rotary_pos_emb(
t: torch.Tensor,
freqs: torch.Tensor,
) -> torch.Tensor:
t_ = t.reshape(*t.shape[:-1], 2, -1).movedim(-2, -1).unsqueeze(-2).float()
t_out = freqs[..., 0] * t_[..., 0] + freqs[..., 1] * t_[..., 1]
t_out = t_out.movedim(-1, -2).reshape(*t.shape).type_as(t)
return t_out
# ---------------------- Feed Forward Network ----------------------- # ---------------------- Feed Forward Network -----------------------
@ -173,8 +165,7 @@ class Attention(nn.Module):
k = self.k_norm(k) k = self.k_norm(k)
v = self.v_norm(v) v = self.v_norm(v)
if self.is_selfattn and rope_emb is not None: # only apply to self-attention! if self.is_selfattn and rope_emb is not None: # only apply to self-attention!
q = apply_rotary_pos_emb(q, rope_emb) q, k = comfy.quant_ops.ck.apply_rope_split_half(q, k, rope_emb)
k = apply_rotary_pos_emb(k, rope_emb)
return q, k, v return q, k, v
q, k, v = apply_norm_and_rotary_pos_emb(q, k, v, rope_emb) q, k, v = apply_norm_and_rotary_pos_emb(q, k, v, rope_emb)

View File

@ -5,6 +5,7 @@ import torch.nn.functional as F
from comfy.ldm.modules.attention import optimized_attention from comfy.ldm.modules.attention import optimized_attention
import comfy.model_management import comfy.model_management
import comfy.quant_ops
def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor: def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor:
assert dim % 2 == 0 assert dim % 2 == 0
@ -19,15 +20,6 @@ def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor:
out = torch.stack([torch.cos(out), torch.sin(out)], dim=0) out = torch.stack([torch.cos(out), torch.sin(out)], dim=0)
return out.to(dtype=torch.float32, device=pos.device) return out.to(dtype=torch.float32, device=pos.device)
def apply_rotary_emb(x_in: torch.Tensor, freqs_cis: torch.Tensor) -> torch.Tensor:
rot_dim = freqs_cis.shape[-1]
x, x_pass = x_in[..., :rot_dim], x_in[..., rot_dim:]
cos_ = freqs_cis[0]
sin_ = freqs_cis[1]
x1, x2 = x.chunk(2, dim=-1)
x_rotated = torch.cat((-x2, x1), dim=-1)
return torch.cat((x * cos_ + x_rotated * sin_, x_pass), dim=-1)
class ErnieImageEmbedND3(nn.Module): class ErnieImageEmbedND3(nn.Module):
def __init__(self, dim: int, theta: int, axes_dim: tuple): def __init__(self, dim: int, theta: int, axes_dim: tuple):
super().__init__() super().__init__()
@ -37,8 +29,16 @@ class ErnieImageEmbedND3(nn.Module):
def forward(self, ids: torch.Tensor) -> torch.Tensor: def forward(self, ids: torch.Tensor) -> torch.Tensor:
emb = torch.cat([rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(3)], dim=-1) emb = torch.cat([rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(3)], dim=-1)
emb = emb.unsqueeze(3) # [2, B, S, 1, head_dim//2] cos_ = emb[0]
return torch.stack([emb, emb], dim=-1).reshape(*emb.shape[:-1], -1) # [B, S, 1, head_dim] sin_ = emb[1]
N = cos_.shape[-1]
half = N // 2
cos_top = cos_[..., :half].repeat_interleave(2, dim=-1)
sin_top = sin_[..., :half].repeat_interleave(2, dim=-1)
cos_bot = cos_[..., half:].repeat_interleave(2, dim=-1)
sin_bot = sin_[..., half:].repeat_interleave(2, dim=-1)
rot = torch.stack([cos_top, -sin_top, sin_bot, cos_bot], dim=-1)
return rot.reshape(*rot.shape[:-1], 2, 2).unsqueeze(2)
class ErnieImagePatchEmbedDynamic(nn.Module): class ErnieImagePatchEmbedDynamic(nn.Module):
def __init__(self, in_channels: int, embed_dim: int, patch_size: int, operations, device=None, dtype=None): def __init__(self, in_channels: int, embed_dim: int, patch_size: int, operations, device=None, dtype=None):
@ -115,8 +115,7 @@ class ErnieImageAttention(nn.Module):
key = self.norm_k(key) key = self.norm_k(key)
if image_rotary_emb is not None: if image_rotary_emb is not None:
query = apply_rotary_emb(query, image_rotary_emb) query, key = comfy.quant_ops.ck.apply_rope_split_half(query, key, image_rotary_emb)
key = apply_rotary_emb(key, image_rotary_emb)
q_flat = query.reshape(B, S, -1) q_flat = query.reshape(B, S, -1)
k_flat = key.reshape(B, S, -1) k_flat = key.reshape(B, S, -1)
@ -274,7 +273,7 @@ class ErnieImageModel(nn.Module):
image_ids = image_ids.view(1, N_img, 3).expand(B, -1, -1) image_ids = image_ids.view(1, N_img, 3).expand(B, -1, -1)
rotary_pos_emb = self.pos_embed(torch.cat([image_ids, text_ids], dim=1)).to(x.dtype) rotary_pos_emb = self.pos_embed(torch.cat([image_ids, text_ids], dim=1))
del image_ids, text_ids del image_ids, text_ids
sample = self.time_proj(timesteps).to(dtype) sample = self.time_proj(timesteps).to(dtype)

View File

@ -51,15 +51,6 @@ class FeedForward(nn.Module):
return hidden_states return hidden_states
def apply_rotary_emb(x, freqs_cis):
if x.shape[1] == 0:
return x
t_ = x.reshape(*x.shape[:-1], -1, 1, 2)
t_out = freqs_cis[..., 0] * t_[..., 0] + freqs_cis[..., 1] * t_[..., 1]
return t_out.reshape(*x.shape)
class QwenTimestepProjEmbeddings(nn.Module): class QwenTimestepProjEmbeddings(nn.Module):
def __init__(self, embedding_dim, pooled_projection_dim, use_additional_t_cond=False, dtype=None, device=None, operations=None): def __init__(self, embedding_dim, pooled_projection_dim, use_additional_t_cond=False, dtype=None, device=None, operations=None):
super().__init__() super().__init__()

View File

@ -4,6 +4,7 @@ import dataclasses
import torch import torch
from typing import NamedTuple from typing import NamedTuple
import comfy_aimdo.host_buffer
from comfy.quant_ops import QuantizedTensor from comfy.quant_ops import QuantizedTensor
@ -17,21 +18,18 @@ class TensorFileSlice(NamedTuple):
def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=None): def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=None):
if isinstance(tensor, QuantizedTensor): if isinstance(tensor, QuantizedTensor):
if not isinstance(destination, QuantizedTensor): if not read_tensor_file_slice_into(tensor._qdata,
return False destination._qdata if destination is not None else None, stream=stream,
if tensor._layout_cls != destination._layout_cls:
return False
if not read_tensor_file_slice_into(tensor._qdata, destination._qdata, stream=stream,
destination2=(destination2._qdata if destination2 is not None else None)): destination2=(destination2._qdata if destination2 is not None else None)):
return False return False
dst_orig_dtype = destination._params.orig_dtype if destination is not None:
destination._params.copy_from(tensor._params, non_blocking=False) dst_orig_dtype = destination._params.orig_dtype
destination._params = dataclasses.replace(destination._params, orig_dtype=dst_orig_dtype) destination._params.copy_from(tensor._params, non_blocking=False)
destination._params = dataclasses.replace(destination._params, orig_dtype=dst_orig_dtype)
if destination2 is not None: if destination2 is not None:
dst_orig_dtype = destination2._params.orig_dtype dst_orig_dtype = destination2._params.orig_dtype
destination2._params.copy_from(destination._params, non_blocking=True) destination2._params.copy_from(destination._params if destination is not None else tensor._params, non_blocking=True)
destination2._params = dataclasses.replace(destination2._params, orig_dtype=dst_orig_dtype) destination2._params = dataclasses.replace(destination2._params, orig_dtype=dst_orig_dtype)
return True return True
@ -39,10 +37,15 @@ def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=N
if info is None: if info is None:
return False return False
if destination is not None and destination.device.type != "cpu" and destination2 is None:
destination2 = destination
destination = None
file_obj = info.file_ref file_obj = info.file_ref
if (destination.device.type != "cpu" if (file_obj is None
or file_obj is None or (destination is None and destination2 is None)
or destination.numel() * destination.element_size() < info.size or (destination is not None and (destination.device.type != "cpu" or destination.numel() * destination.element_size() < info.size))
or (destination2 is not None and (destination2.device.type == "cpu" or destination2.numel() * destination2.element_size() < info.size))
or tensor.numel() * tensor.element_size() != info.size or tensor.numel() * tensor.element_size() != info.size
or tensor.storage_offset() != 0 or tensor.storage_offset() != 0
or not tensor.is_contiguous()): or not tensor.is_contiguous()):
@ -51,6 +54,14 @@ def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=N
if info.size == 0: if info.size == 0:
return True return True
if destination is None:
stream_ptr = getattr(stream, "cuda_stream", 0) if stream is not None else 0
comfy_aimdo.host_buffer.read_file_to_device(file_obj, info.offset, info.size,
stream_ptr, destination2.data_ptr(),
destination2.device.index,
mark_cold=False)
return True
hostbuf = getattr(destination.untyped_storage(), "_comfy_hostbuf", None) hostbuf = getattr(destination.untyped_storage(), "_comfy_hostbuf", None)
if hostbuf is not None: if hostbuf is not None:
stream_ptr = getattr(stream, "cuda_stream", 0) if stream is not None else 0 stream_ptr = getattr(stream, "cuda_stream", 0) if stream is not None else 0
@ -63,6 +74,9 @@ def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=N
device=None if destination2 is None else destination2.device.index) device=None if destination2 is None else destination2.device.index)
return True return True
if not hasattr(file_obj, "seek") or not hasattr(file_obj, "readinto"):
return False
buf_type = ctypes.c_ubyte * info.size buf_type = ctypes.c_ubyte * info.size
view = memoryview(buf_type.from_address(destination.data_ptr())) view = memoryview(buf_type.from_address(destination.data_ptr()))

View File

@ -641,14 +641,17 @@ def free_pins(size, evict_active=False):
return freed_total return freed_total
def ensure_pin_budget(size, evict_active=False): def ensure_pin_budget(size, evict_active=False):
shortfall = size + comfy.memory_management.RAM_CACHE_HEADROOM / 2 - psutil.virtual_memory().available if args.fast_disk:
shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY
else:
shortfall = size + max(comfy.memory_management.RAM_CACHE_HEADROOM / 2, 2048 * 1024 ** 2) - psutil.virtual_memory().available
if shortfall <= 0: if shortfall <= 0:
return True return True
to_free = shortfall + PIN_PRESSURE_HYSTERESIS to_free = shortfall + PIN_PRESSURE_HYSTERESIS
return free_pins(to_free, evict_active=evict_active) >= shortfall return free_pins(to_free, evict_active=evict_active) >= shortfall
def ensure_pin_registerable(size, evict_active=False): def ensure_pin_registerable(size, evict_active=True):
shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY
if MAX_PINNED_MEMORY <= 0: if MAX_PINNED_MEMORY <= 0:
return False return False
@ -658,10 +661,17 @@ def ensure_pin_registerable(size, evict_active=False):
shortfall += REGISTERABLE_PIN_HYSTERESIS shortfall += REGISTERABLE_PIN_HYSTERESIS
for loaded_model in reversed(current_loaded_models): for loaded_model in reversed(current_loaded_models):
model = loaded_model.model model = loaded_model.model
if model is not None and model.is_dynamic() and (evict_active or not model.model.dynamic_pins[model.load_device]["active"]): if model is not None and model.is_dynamic() and not model.model.dynamic_pins[model.load_device]["active"]:
shortfall -= model.unregister_inactive_pins(shortfall) shortfall -= model.unregister_inactive_pins(shortfall)
if shortfall <= 0: if shortfall <= 0:
return True return True
if evict_active:
for loaded_model in current_loaded_models:
model = loaded_model.model
if model is not None and model.is_dynamic() and model.model.dynamic_pins[model.load_device]["active"]:
shortfall -= model.unregister_inactive_pins(shortfall)
if shortfall <= 0:
return True
return shortfall <= REGISTERABLE_PIN_HYSTERESIS return shortfall <= REGISTERABLE_PIN_HYSTERESIS
class LoadedModel: class LoadedModel:
@ -803,9 +813,9 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins
for x in can_unload_sorted: for x in can_unload_sorted:
i = x[-1] i = x[-1]
memory_to_free = 1e32 memory_to_free = 1e32
if current_loaded_models[i].model.is_dynamic() and (not DISABLE_SMART_MEMORY or device is None): if not DISABLE_SMART_MEMORY or device is None:
memory_to_free = 0 if device is None else memory_required - get_free_memory(device) memory_to_free = 0 if device is None else memory_required - get_free_memory(device)
if for_dynamic: if current_loaded_models[i].model.is_dynamic() and for_dynamic:
#don't actually unload dynamic models for the sake of other dynamic models #don't actually unload dynamic models for the sake of other dynamic models
#as that works on-demand. #as that works on-demand.
memory_required -= current_loaded_models[i].model.loaded_size() memory_required -= current_loaded_models[i].model.loaded_size()
@ -817,6 +827,10 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins
for i in sorted(unloaded_model, reverse=True): for i in sorted(unloaded_model, reverse=True):
unloaded_models.append(current_loaded_models.pop(i)) unloaded_models.append(current_loaded_models.pop(i))
if not for_dynamic and pins_required > 0:
ensure_pin_budget(pins_required)
ensure_pin_registerable(pins_required)
if len(unloaded_model) > 0: if len(unloaded_model) > 0:
soft_empty_cache() soft_empty_cache()
elif device is not None: elif device is not None:
@ -879,15 +893,19 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
model_to_unload.model_finalizer.detach() model_to_unload.model_finalizer.detach()
total_memory_required = {} total_memory_required = {}
total_pins_required = {}
for loaded_model in models_to_load: for loaded_model in models_to_load:
device = loaded_model.device device = loaded_model.device
total_memory_required[device] = total_memory_required.get(device, 0) + loaded_model.model_memory_required(device) total_memory_required[device] = total_memory_required.get(device, 0) + loaded_model.model_memory_required(device)
if not loaded_model.model.is_dynamic():
total_pins_required[device] = total_pins_required.get(device, 0) + loaded_model.model_memory()
for device in total_memory_required: for device in total_memory_required:
if device != torch.device("cpu"): if device != torch.device("cpu"):
free_memory(total_memory_required[device] * 1.1 + extra_mem, free_memory(total_memory_required[device] * 1.1 + extra_mem,
device, device,
for_dynamic=free_for_dynamic) for_dynamic=free_for_dynamic,
pins_required=total_pins_required.get(device, 0))
for device in total_memory_required: for device in total_memory_required:
if device != torch.device("cpu"): if device != torch.device("cpu"):
@ -1283,7 +1301,6 @@ STREAM_CAST_BUFFERS = {}
LARGEST_CASTED_WEIGHT = (None, 0) LARGEST_CASTED_WEIGHT = (None, 0)
STREAM_AIMDO_CAST_BUFFERS = {} STREAM_AIMDO_CAST_BUFFERS = {}
LARGEST_AIMDO_CASTED_WEIGHT = (None, 0) LARGEST_AIMDO_CASTED_WEIGHT = (None, 0)
STREAM_PIN_BUFFERS = {}
DEFAULT_AIMDO_CAST_BUFFER_RESERVATION_SIZE = 16 * 1024 ** 3 DEFAULT_AIMDO_CAST_BUFFER_RESERVATION_SIZE = 16 * 1024 ** 3
@ -1326,42 +1343,13 @@ def get_aimdo_cast_buffer(offload_stream, device):
STREAM_AIMDO_CAST_BUFFERS[offload_stream] = cast_buffer STREAM_AIMDO_CAST_BUFFERS[offload_stream] = cast_buffer
return cast_buffer return cast_buffer
def get_pin_buffer(offload_stream):
pin_buffer = STREAM_PIN_BUFFERS.get(offload_stream, None)
if pin_buffer is None:
pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0, 0, pinned_hostbuf_size(8 * 1024**3), mark_cold=False)
STREAM_PIN_BUFFERS[offload_stream] = pin_buffer
elif offload_stream is not None:
event = getattr(pin_buffer, "_comfy_event", None)
if event is not None:
event.synchronize()
delattr(pin_buffer, "_comfy_event")
return pin_buffer
def resize_pin_buffer(pin_buffer, size):
global TOTAL_PINNED_MEMORY
old_size = pin_buffer.size
if size <= old_size:
return True
growth = size - old_size
comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM)
ensure_pin_budget(growth, evict_active=True)
ensure_pin_registerable(growth, evict_active=True)
try:
pin_buffer.extend(size=size, reallocate=True)
except RuntimeError:
return False
TOTAL_PINNED_MEMORY += pin_buffer.size - old_size
return True
def reset_cast_buffers(): def reset_cast_buffers():
global TOTAL_PINNED_MEMORY
global LARGEST_CASTED_WEIGHT global LARGEST_CASTED_WEIGHT
global LARGEST_AIMDO_CASTED_WEIGHT global LARGEST_AIMDO_CASTED_WEIGHT
LARGEST_CASTED_WEIGHT = (None, 0) LARGEST_CASTED_WEIGHT = (None, 0)
LARGEST_AIMDO_CASTED_WEIGHT = (None, 0) LARGEST_AIMDO_CASTED_WEIGHT = (None, 0)
for offload_stream in set(STREAM_CAST_BUFFERS) | set(STREAM_AIMDO_CAST_BUFFERS) | set(STREAM_PIN_BUFFERS): for offload_stream in set(STREAM_CAST_BUFFERS) | set(STREAM_AIMDO_CAST_BUFFERS):
if offload_stream is not None: if offload_stream is not None:
offload_stream.synchronize() offload_stream.synchronize()
synchronize() synchronize()
@ -1370,20 +1358,24 @@ def reset_cast_buffers():
mmap_obj.bounce() mmap_obj.bounce()
DIRTY_MMAPS.clear() DIRTY_MMAPS.clear()
for pin_buffer in STREAM_PIN_BUFFERS.values():
TOTAL_PINNED_MEMORY -= pin_buffer.size
TOTAL_PINNED_MEMORY = max(0, TOTAL_PINNED_MEMORY)
for loaded_model in current_loaded_models: for loaded_model in current_loaded_models:
model = loaded_model.model model = loaded_model.model
if model is not None and model.is_dynamic(): if model is not None and model.is_dynamic():
model.model.dynamic_pins[model.load_device]["active"] = False pin_state = model.model.dynamic_pins[model.load_device]
if pin_state["active"]:
*_, buckets = pin_state["weights"]
for size, bucket in list(buckets.items()):
bucket[:] = [ entry for entry in bucket if entry[-1] is not None ]
if not bucket:
del buckets[size]
pin_state["active"] = False
model.partially_unload_ram(1e30, subsets=[ "patches" ]) model.partially_unload_ram(1e30, subsets=[ "patches" ])
model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, pinned_hostbuf_size(model.model_size())), [], [-1], [0]) model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, pinned_hostbuf_size(model.model_size())), [], [-1], [0], [0], {})
STREAM_CAST_BUFFERS.clear() STREAM_CAST_BUFFERS.clear()
STREAM_AIMDO_CAST_BUFFERS.clear() STREAM_AIMDO_CAST_BUFFERS.clear()
STREAM_PIN_BUFFERS.clear()
soft_empty_cache() soft_empty_cache()
def get_offload_stream(device): def get_offload_stream(device):
@ -1436,7 +1428,7 @@ def cast_to_gathered(tensors, r, non_blocking=False, stream=None, r2=None):
if hasattr(wf_context, "as_context"): if hasattr(wf_context, "as_context"):
wf_context = wf_context.as_context(stream) wf_context = wf_context.as_context(stream)
dest_views = comfy.memory_management.interpret_gathered_like(tensors, r) dest_views = comfy.memory_management.interpret_gathered_like(tensors, r) if r is not None else [None] * len(tensors)
dest2_views = comfy.memory_management.interpret_gathered_like(tensors, r2) if r2 is not None else None dest2_views = comfy.memory_management.interpret_gathered_like(tensors, r2) if r2 is not None else None
with wf_context: with wf_context:
for tensor in tensors: for tensor in tensors:
@ -1448,9 +1440,10 @@ def cast_to_gathered(tensors, r, non_blocking=False, stream=None, r2=None):
continue continue
storage = tensor._qdata.untyped_storage() if isinstance(tensor, comfy.quant_ops.QuantizedTensor) else tensor.untyped_storage() storage = tensor._qdata.untyped_storage() if isinstance(tensor, comfy.quant_ops.QuantizedTensor) else tensor.untyped_storage()
mark_mmap_dirty(storage) mark_mmap_dirty(storage)
dest_view.copy_(tensor, non_blocking=non_blocking) if dest_view is not None:
dest_view.copy_(tensor, non_blocking=non_blocking)
if dest2_view is not None: if dest2_view is not None:
dest2_view.copy_(dest_view, non_blocking=non_blocking) dest2_view.copy_(tensor if dest_view is None else dest_view, non_blocking=non_blocking)
def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, stream=None, r=None): def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, stream=None, r=None):
@ -1723,6 +1716,13 @@ def is_device_xpu(device):
def is_device_cuda(device): def is_device_cuda(device):
return is_device_type(device, 'cuda') return is_device_type(device, 'cuda')
def set_torch_device(device):
"""Set the current device for the given torch device. Supports CUDA and XPU."""
if is_device_cuda(device):
torch.cuda.set_device(device)
elif is_device_xpu(device):
torch.xpu.set_device(device)
def is_directml_enabled(): def is_directml_enabled():
global directml_enabled global directml_enabled
if directml_enabled: if directml_enabled:

View File

@ -1721,8 +1721,8 @@ class ModelPatcherDynamic(ModelPatcher):
""" """
if device not in self.model.dynamic_pins: if device not in self.model.dynamic_pins:
self.model.dynamic_pins[device] = { self.model.dynamic_pins[device] = {
"weights": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0]), "weights": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0], [0], {}),
"patches": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0]), "patches": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0], [0], {}),
"hostbufs_initialized": False, "hostbufs_initialized": False,
"failed": False, "failed": False,
"active": False, "active": False,
@ -1799,8 +1799,8 @@ class ModelPatcherDynamic(ModelPatcher):
pin_state = self.model.dynamic_pins[self.load_device] pin_state = self.model.dynamic_pins[self.load_device]
if not pin_state["hostbufs_initialized"]: if not pin_state["hostbufs_initialized"]:
hostbuf_size = comfy.model_management.pinned_hostbuf_size(self.model_size()) hostbuf_size = comfy.model_management.pinned_hostbuf_size(self.model_size())
pin_state["weights"] = (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024, hostbuf_size), [], [-1], [0]) pin_state["weights"] = (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024, hostbuf_size), [], [-1], [0], [0], {})
pin_state["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, hostbuf_size), [], [-1], [0]) pin_state["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, hostbuf_size), [], [-1], [0], [0], {})
pin_state["hostbufs_initialized"] = True pin_state["hostbufs_initialized"] = True
pin_state["failed"] = False pin_state["failed"] = False
pin_state["active"] = True pin_state["active"] = True
@ -1942,18 +1942,16 @@ class ModelPatcherDynamic(ModelPatcher):
return freed return freed
def loaded_ram_size(self): def loaded_ram_size(self):
return (self.model.dynamic_pins[self.load_device]["weights"][0].size + return (self.model.dynamic_pins[self.load_device]["weights"][0].size)
self.model.dynamic_pins[self.load_device]["patches"][0].size)
def pinned_memory_size(self): def pinned_memory_size(self):
return (self.model.dynamic_pins[self.load_device]["weights"][3][0] + return (self.model.dynamic_pins[self.load_device]["weights"][3][0])
self.model.dynamic_pins[self.load_device]["patches"][3][0])
def unregister_inactive_pins(self, ram_to_unload, subsets=[ "weights", "patches" ]): def unregister_inactive_pins(self, ram_to_unload, subsets=[ "weights", "patches" ]):
freed = 0 freed = 0
pin_state = self.model.dynamic_pins[self.load_device] pin_state = self.model.dynamic_pins[self.load_device]
for subset in subsets: for subset in subsets:
hostbuf, stack, stack_split, pinned_size = pin_state[subset] hostbuf, stack, stack_split, pinned_size, *_ = pin_state[subset]
split = stack_split[0] split = stack_split[0]
while split >= 0: while split >= 0:
module, offset = stack[split] module, offset = stack[split]
@ -1978,10 +1976,12 @@ class ModelPatcherDynamic(ModelPatcher):
freed = 0 freed = 0
pin_state = self.model.dynamic_pins[self.load_device] pin_state = self.model.dynamic_pins[self.load_device]
for subset in subsets: for subset in subsets:
hostbuf, stack, stack_split, pinned_size = pin_state[subset] hostbuf, stack, stack_split, pinned_size, *_ = pin_state[subset]
while len(stack) > 0: while len(stack) > 0:
module, offset = stack.pop() module, offset = stack.pop()
size = module._pin.numel() * module._pin.element_size() size = module._pin.numel() * module._pin.element_size()
module._pin_balancer_entry[-1] = None
del module._pin_balancer_entry
del module._pin del module._pin
hostbuf.truncate(offset, do_unregister=module._pin_registered) hostbuf.truncate(offset, do_unregister=module._pin_registered)
stack_split[0] = min(stack_split[0], len(stack) - 1) stack_split[0] = min(stack_split[0], len(stack) - 1)

View File

@ -1,4 +1,5 @@
import comfy_aimdo.model_vbar import comfy_aimdo.model_vbar
import comfy.memory_management
import comfy.model_management import comfy.model_management
import comfy.ops import comfy.ops
@ -50,7 +51,17 @@ def prefetch_queue_pop(queue, device, module):
if hasattr(s, "_v"): if hasattr(s, "_v"):
comfy_modules.append(s) comfy_modules.append(s)
registerable_size = 0
for s in comfy_modules:
registerable_size += comfy.memory_management.vram_aligned_size([s.weight, s.bias])
for param_key in ("weight", "bias"):
lowvram_fn = getattr(s, param_key + "_lowvram_function", None)
if lowvram_fn is not None:
registerable_size += lowvram_fn.memory_required()
offload_stream = comfy.ops.cast_modules_with_vbar(comfy_modules, None, device, None, True) offload_stream = comfy.ops.cast_modules_with_vbar(comfy_modules, None, device, None, True)
if not comfy.model_management.args.fast_disk:
comfy.model_management.ensure_pin_registerable(registerable_size)
comfy.model_management.sync_stream(device, offload_stream) comfy.model_management.sync_stream(device, offload_stream)
queue[0] = (offload_stream, (prefetch, comfy_modules)) queue[0] = (offload_stream, (prefetch, comfy_modules))

View File

@ -17,7 +17,7 @@ class MultiGPUThreadPool:
"""Persistent thread pool for multi-GPU work distribution. """Persistent thread pool for multi-GPU work distribution.
Maintains one worker thread per extra GPU device. Each thread calls Maintains one worker thread per extra GPU device. Each thread calls
torch.cuda.set_device() once at startup so that compiled kernel caches set_torch_device() once at startup so that compiled kernel caches
(inductor/triton) stay warm across diffusion steps. (inductor/triton) stay warm across diffusion steps.
""" """
@ -37,7 +37,7 @@ class MultiGPUThreadPool:
def _worker_loop(self, device: torch.device, work_q: queue.Queue, result_q: queue.Queue): def _worker_loop(self, device: torch.device, work_q: queue.Queue, result_q: queue.Queue):
try: try:
torch.cuda.set_device(device) comfy.model_management.set_torch_device(device)
except Exception as e: except Exception as e:
logging.error(f"MultiGPUThreadPool: failed to set device {device}: {e}") logging.error(f"MultiGPUThreadPool: failed to set device {device}: {e}")
while True: while True:

View File

@ -76,8 +76,6 @@ except:
cast_to = comfy.model_management.cast_to #TODO: remove once no more references cast_to = comfy.model_management.cast_to #TODO: remove once no more references
STREAM_PIN_BUFFER_HEADROOM = 8 * 1024 * 1024
def cast_to_input(weight, input, non_blocking=False, copy=True): def cast_to_input(weight, input, non_blocking=False, copy=True):
return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy) return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
@ -94,9 +92,6 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
offload_stream = None offload_stream = None
cast_buffer = None cast_buffer = None
cast_buffer_offset = 0 cast_buffer_offset = 0
stream_pin_hostbuf = None
stream_pin_offset = 0
stream_pin_queue = []
def ensure_offload_stream(module, required_size, check_largest): def ensure_offload_stream(module, required_size, check_largest):
nonlocal offload_stream nonlocal offload_stream
@ -130,22 +125,6 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
cast_buffer_offset += buffer_size cast_buffer_offset += buffer_size
return buffer return buffer
def get_stream_pin_buffer_offset(buffer_size):
nonlocal stream_pin_hostbuf
nonlocal stream_pin_offset
if buffer_size == 0 or offload_stream is None:
return None
if stream_pin_hostbuf is None:
stream_pin_hostbuf = comfy.model_management.get_pin_buffer(offload_stream)
if stream_pin_hostbuf is None:
return None
offset = stream_pin_offset
stream_pin_offset += buffer_size
return offset
for s in comfy_modules: for s in comfy_modules:
signature = comfy_aimdo.model_vbar.vbar_fault(s._v) signature = comfy_aimdo.model_vbar.vbar_fault(s._v)
resident = comfy_aimdo.model_vbar.vbar_signature_compare(signature, s._v_signature) resident = comfy_aimdo.model_vbar.vbar_signature_compare(signature, s._v_signature)
@ -184,12 +163,18 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
if xfer_dest is None: if xfer_dest is None:
xfer_dest = get_cast_buffer(dest_size) xfer_dest = get_cast_buffer(dest_size)
def cast_maybe_lowvram_patch(xfer_source, xfer_dest, stream): def cast_maybe_lowvram_patch(xfer_source, xfer_dest, stream, xfer_dest2=None):
if xfer_source is not None: if xfer_source is not None:
if getattr(xfer_source, "is_lowvram_patch", False): if getattr(xfer_source, "is_lowvram_patch", False):
xfer_source.prepare(xfer_dest, stream, copy=True, commit=False) if xfer_dest is not None:
else: xfer_source.prepare(xfer_dest, stream, copy=True, commit=False)
comfy.model_management.cast_to_gathered(xfer_source, xfer_dest, non_blocking=non_blocking, stream=stream) xfer_source = [ xfer_dest ]
xfer_dest = xfer_dest2
xfer_dest2 = None
elif xfer_dest2 is not None:
xfer_source.prepare(xfer_dest2, stream, copy=True, commit=False)
return
comfy.model_management.cast_to_gathered(xfer_source, xfer_dest, non_blocking=non_blocking, stream=stream, r2=xfer_dest2)
def handle_pin(m, pin, source, dest, subset="weights", size=None): def handle_pin(m, pin, source, dest, subset="weights", size=None):
if pin is not None: if pin is not None:
@ -198,19 +183,7 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
if signature is None: if signature is None:
comfy.pinned_memory.pin_memory(m, subset=subset, size=size) comfy.pinned_memory.pin_memory(m, subset=subset, size=size)
pin = comfy.pinned_memory.get_pin(m, subset=subset) pin = comfy.pinned_memory.get_pin(m, subset=subset)
if pin is not None: cast_maybe_lowvram_patch(source, pin, offload_stream, xfer_dest2=dest)
if isinstance(source, list):
comfy.model_management.cast_to_gathered(source, pin, non_blocking=non_blocking, stream=offload_stream, r2=dest)
else:
cast_maybe_lowvram_patch(source, pin, None)
cast_maybe_lowvram_patch([ pin ], dest, offload_stream)
return
if pin is None:
pin_offset = get_stream_pin_buffer_offset(size)
if pin_offset is not None:
stream_pin_queue.append((source, pin_offset, size, dest))
return
cast_maybe_lowvram_patch(source, dest, offload_stream)
handle_pin(s, pin, xfer_source, xfer_dest, size=dest_size) handle_pin(s, pin, xfer_source, xfer_dest, size=dest_size)
@ -232,23 +205,6 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
prefetch["needs_cast"] = needs_cast prefetch["needs_cast"] = needs_cast
s._prefetch = prefetch s._prefetch = prefetch
if stream_pin_offset > 0:
if stream_pin_hostbuf.size < stream_pin_offset:
if not comfy.model_management.resize_pin_buffer(stream_pin_hostbuf, stream_pin_offset + STREAM_PIN_BUFFER_HEADROOM):
for xfer_source, _, _, xfer_dest in stream_pin_queue:
cast_maybe_lowvram_patch(xfer_source, xfer_dest, offload_stream)
return offload_stream
stream_pin_tensor = comfy_aimdo.torch.hostbuf_to_tensor(stream_pin_hostbuf)
stream_pin_tensor.untyped_storage()._comfy_hostbuf = stream_pin_hostbuf
for xfer_source, pin_offset, pin_size, xfer_dest in stream_pin_queue:
pin = stream_pin_tensor[pin_offset:pin_offset + pin_size]
if isinstance(xfer_source, list):
comfy.model_management.cast_to_gathered(xfer_source, pin, non_blocking=non_blocking, stream=offload_stream, r2=xfer_dest)
else:
cast_maybe_lowvram_patch(xfer_source, pin, None)
comfy.model_management.cast_to_gathered([ pin ], xfer_dest, non_blocking=non_blocking, stream=offload_stream)
stream_pin_hostbuf._comfy_event = offload_stream.record_event()
return offload_stream return offload_stream

View File

@ -1,17 +1,55 @@
import bisect
import comfy.model_management import comfy.model_management
import comfy.memory_management import comfy.memory_management
import comfy.utils
import comfy_aimdo.host_buffer import comfy_aimdo.host_buffer
import comfy_aimdo.torch import comfy_aimdo.torch
import torch import torch
from comfy.cli_args import args from comfy.cli_args import args
def _add_to_bucket(module, buckets, size, priority):
bucket = buckets.setdefault(size, [])
entry = [-priority, 0, module]
entry[1] = id(entry)
bisect.insort(bucket, entry)
module._pin_balancer_entry = entry
def _steal_pin(module, stack, buckets, size, priority):
bucket = buckets.get(size)
if bucket is None:
return False
while bucket and bucket[-1][-1] is None:
bucket.pop()
if not bucket:
del buckets[size]
return False
if priority <= -bucket[-1][0]:
return False
*_, victim = bucket.pop()
module._pin = victim._pin
module._pin_registered = victim._pin_registered
module._pin_stack_index = victim._pin_stack_index
stack[module._pin_stack_index] = (module, stack[module._pin_stack_index][1])
victim._pin_registered = False
del victim._pin
del victim._pin_stack_index
del victim._pin_balancer_entry
_add_to_bucket(module, buckets, size, priority)
return True
def get_pin(module, subset="weights"): def get_pin(module, subset="weights"):
pin = getattr(module, "_pin", None) pin = getattr(module, "_pin", None)
if pin is None or module._pin_registered or args.disable_pinned_memory: if pin is None or module._pin_registered or args.disable_pinned_memory:
return pin return pin
_, _, stack_split, pinned_size = module._pin_state[subset] _, _, stack_split, pinned_size, *_ = module._pin_state[subset]
size = pin.nbytes size = pin.nbytes
comfy.model_management.ensure_pin_registerable(size) comfy.model_management.ensure_pin_registerable(size)
@ -31,26 +69,30 @@ def pin_memory(module, subset="weights", size=None):
return return
pin = get_pin(module, subset) pin = get_pin(module, subset)
if pin is not None or pin_state["failed"]: if pin is not None:
return return
hostbuf, stack, stack_split, pinned_size = pin_state[subset] hostbuf, stack, stack_split, pinned_size, counter, buckets = pin_state[subset]
if size is None: if size is None:
size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ]) size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])
offset = hostbuf.size offset = hostbuf.size
registerable_size = size + max(0, hostbuf.size - pinned_size[0]) registerable_size = size
priority = getattr(module, "_pin_balancer_priority", None)
if priority is None:
priority = comfy.utils.bit_reverse_range(counter[0], 16)
counter[0] += 1
module._pin_balancer_priority = priority
comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM) comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM)
if (not comfy.model_management.ensure_pin_budget(size) or if (not comfy.model_management.ensure_pin_budget(size) or
not comfy.model_management.ensure_pin_registerable(registerable_size)): not comfy.model_management.ensure_pin_registerable(registerable_size)):
pin_state["failed"] = True return _steal_pin(module, stack, buckets, size, priority)
return False
try: try:
hostbuf.extend(size=size) hostbuf.extend(size=size)
except RuntimeError: except RuntimeError:
pin_state["failed"] = True return _steal_pin(module, stack, buckets, size, priority)
return False
module._pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size] module._pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size]
module._pin.untyped_storage()._comfy_hostbuf = hostbuf module._pin.untyped_storage()._comfy_hostbuf = hostbuf
@ -60,4 +102,5 @@ def pin_memory(module, subset="weights", size=None):
stack_split[0] = max(stack_split[0], module._pin_stack_index) stack_split[0] = max(stack_split[0], module._pin_stack_index)
comfy.model_management.TOTAL_PINNED_MEMORY += size comfy.model_management.TOTAL_PINNED_MEMORY += size
pinned_size[0] += size pinned_size[0] += size
_add_to_bucket(module, buckets, size, priority)
return True return True

View File

@ -464,10 +464,7 @@ def _calc_cond_batch_multigpu(model: BaseModel, conds: list[list[dict]], x_in: t
def _handle_batch(device: torch.device, batch_tuple: tuple[comfy.hooks.HookGroup, tuple], results: list[thread_result]): def _handle_batch(device: torch.device, batch_tuple: tuple[comfy.hooks.HookGroup, tuple], results: list[thread_result]):
try: try:
# TODO: non-NVIDIA support -- guard with `if device.type == "cuda":` once comfy.model_management.set_torch_device(device)
# we extend multigpu QA beyond CUDA. Unconditional call crashes on
# XPU/NPU/MPS/CPU/DirectML backends.
torch.cuda.set_device(device)
model_current: BaseModel = model_options["multigpu_clones"][device].model model_current: BaseModel = model_options["multigpu_clones"][device].model
# run every hooked_to_run separately # run every hooked_to_run separately
with torch.no_grad(): with torch.no_grad():

View File

@ -85,9 +85,9 @@ _TYPES = {
def load_safetensors(ckpt): def load_safetensors(ckpt):
import comfy_aimdo.model_mmap import comfy_aimdo.model_mmap
f = open(ckpt, "rb", buffering=0)
file_lock = threading.Lock() file_lock = threading.Lock()
model_mmap = comfy_aimdo.model_mmap.ModelMMAP(ckpt) model_mmap = comfy_aimdo.model_mmap.ModelMMAP(ckpt)
f = model_mmap.get_file_handle()
file_size = os.path.getsize(ckpt) file_size = os.path.getsize(ckpt)
mv = memoryview((ctypes.c_uint8 * file_size).from_address(model_mmap.get())) mv = memoryview((ctypes.c_uint8 * file_size).from_address(model_mmap.get()))
@ -1452,3 +1452,10 @@ def deepcopy_list_dict(obj, memo=None):
memo[obj_id] = res memo[obj_id] = res
return res return res
def bit_reverse_range(index, bits):
result = 0
for _ in range(bits):
result = (result << 1) | (index & 1)
index >>= 1
return result

View File

@ -727,6 +727,30 @@ class File3DUSDZ(ComfyTypeIO):
Type = File3D Type = File3D
@comfytype(io_type="FILE_3D_PLY")
class File3DPLY(ComfyTypeIO):
"""PLY format 3D file - point cloud or Gaussian splat."""
Type = File3D
@comfytype(io_type="FILE_3D_SPLAT")
class File3DSPLAT(ComfyTypeIO):
"""SPLAT format 3D file - 3D Gaussian splat."""
Type = File3D
@comfytype(io_type="FILE_3D_SPZ")
class File3DSPZ(ComfyTypeIO):
"""SPZ format 3D file - compressed 3D Gaussian splat."""
Type = File3D
@comfytype(io_type="FILE_3D_KSPLAT")
class File3DKSPLAT(ComfyTypeIO):
"""KSPLAT format 3D file - 3D Gaussian splat."""
Type = File3D
@comfytype(io_type="HOOKS") @comfytype(io_type="HOOKS")
class Hooks(ComfyTypeIO): class Hooks(ComfyTypeIO):
if TYPE_CHECKING: if TYPE_CHECKING:
@ -2303,6 +2327,10 @@ __all__ = [
"File3DOBJ", "File3DOBJ",
"File3DSTL", "File3DSTL",
"File3DUSDZ", "File3DUSDZ",
"File3DPLY",
"File3DSPLAT",
"File3DSPZ",
"File3DKSPLAT",
"Hooks", "Hooks",
"HookKeyframes", "HookKeyframes",
"TimestepsRange", "TimestepsRange",

View File

@ -452,6 +452,16 @@ class PreviewUI3D(_UIOutput):
return {"result": [self.model_file, self.camera_info, self.bg_image_path]} return {"result": [self.model_file, self.camera_info, self.bg_image_path]}
class PreviewUI3DAdvanced(_UIOutput):
def __init__(self, model_file, camera_info, model_3d_info):
self.model_file = model_file
self.camera_info = camera_info
self.model_3d_info = model_3d_info
def as_dict(self):
return {"result": [self.model_file, self.camera_info, self.model_3d_info]}
class PreviewText(_UIOutput): class PreviewText(_UIOutput):
def __init__(self, value: str, **kwargs): def __init__(self, value: str, **kwargs):
self.value = value self.value = value
@ -471,5 +481,6 @@ __all__ = [
"PreviewAudio", "PreviewAudio",
"PreviewVideo", "PreviewVideo",
"PreviewUI3D", "PreviewUI3D",
"PreviewUI3DAdvanced",
"PreviewText", "PreviewText",
] ]

View File

@ -124,12 +124,71 @@ class Preview3D(IO.ComfyNode):
process = execute # TODO: remove process = execute # TODO: remove
class Preview3DAdvanced(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="Preview3DAdvanced",
display_name="Preview 3D (Advanced)",
search_aliases=["preview 3d", "3d viewer", "view mesh", "frame 3d", "3d camera output"],
category="3d",
is_experimental=True,
is_output_node=True,
inputs=[
IO.MultiType.Input(
"model_file",
types=[
IO.File3DGLB,
IO.File3DGLTF,
IO.File3DFBX,
IO.File3DOBJ,
IO.File3DSTL,
IO.File3DUSDZ,
IO.File3DAny,
],
tooltip="3D model file from an upstream 3D node.",
),
IO.Load3D.Input("image"),
IO.Load3DCamera.Input("camera_info", optional=True, advanced=True),
IO.Load3DModelInfo.Input("model_3d_info", optional=True, advanced=True),
IO.Int.Input("width", default=1024, min=1, max=4096, step=1),
IO.Int.Input("height", default=1024, min=1, max=4096, step=1),
],
outputs=[
IO.File3DAny.Output(display_name="model_file"),
IO.Load3DCamera.Output(display_name="camera_info"),
IO.Load3DModelInfo.Output(display_name="model_3d_info"),
IO.Int.Output(display_name="width"),
IO.Int.Output(display_name="height"),
],
)
@classmethod
def execute(cls, model_file: Types.File3D, image, width: int, height: int, **kwargs) -> IO.NodeOutput:
filename = f"preview3d_advanced_{uuid.uuid4().hex}.{model_file.format}"
model_file.save_to(os.path.join(folder_paths.get_output_directory(), filename))
camera_info_input = kwargs.get("camera_info", None)
camera_info = camera_info_input if camera_info_input is not None else image['camera_info']
model_3d_info_input = kwargs.get("model_3d_info", None)
model_3d_info = model_3d_info_input if model_3d_info_input is not None else image.get('model_3d_info', [])
return IO.NodeOutput(
model_file,
camera_info,
model_3d_info,
width,
height,
ui=UI.PreviewUI3DAdvanced(filename, camera_info, model_3d_info),
)
class Load3DExtension(ComfyExtension): class Load3DExtension(ComfyExtension):
@override @override
async def get_node_list(self) -> list[type[IO.ComfyNode]]: async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [ return [
Load3D, Load3D,
Preview3D, Preview3D,
Preview3DAdvanced,
] ]

21608
openapi.yaml

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
comfyui-frontend-package==1.44.19 comfyui-frontend-package==1.44.19
comfyui-workflow-templates==0.9.91 comfyui-workflow-templates==0.9.91
comfyui-embedded-docs==0.5.1 comfyui-embedded-docs==0.5.2
torch torch
torchsde torchsde
torchvision torchvision
@ -22,8 +22,8 @@ alembic
SQLAlchemy>=2.0.0 SQLAlchemy>=2.0.0
filelock filelock
av>=16.0.0 av>=16.0.0
comfy-kitchen==0.2.9 comfy-kitchen==0.2.10
comfy-aimdo==0.4.5 comfy-aimdo==0.4.7
requests requests
simpleeval>=1.0.0 simpleeval>=1.0.0
blake3 blake3