mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-26 07:56:54 +08:00
Compare commits
4 Commits
xpu-aimdo-
...
cloud-open
| Author | SHA1 | Date | |
|---|---|---|---|
| 7f253cbb08 | |||
| 64e1d740b8 | |||
| 5a48a054f4 | |||
| b22d0fb9c0 |
@ -1274,148 +1274,13 @@ def force_channels_last():
|
||||
return False
|
||||
|
||||
|
||||
_INTEL_XPU_DISCRETE = None
|
||||
def is_intel_xpu_discrete():
|
||||
# Returns True only if the active Intel XPU is a discrete GPU. torch.xpu does
|
||||
# not expose the integrated-vs-discrete distinction, so we query Level Zero
|
||||
# directly via ctypes. Works on Windows (ze_loader.dll) and Linux
|
||||
# (libze_loader.so.1). Any failure or ambiguity returns False so a
|
||||
# discrete-only fast path is never enabled by mistake.
|
||||
global _INTEL_XPU_DISCRETE
|
||||
if _INTEL_XPU_DISCRETE is not None:
|
||||
return _INTEL_XPU_DISCRETE
|
||||
_INTEL_XPU_DISCRETE = False
|
||||
if not is_intel_xpu():
|
||||
return False
|
||||
|
||||
try:
|
||||
import ctypes
|
||||
import ctypes.util
|
||||
|
||||
ZE_RESULT_SUCCESS = 0
|
||||
ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES = 0x3
|
||||
ZE_DEVICE_TYPE_GPU = 1
|
||||
ZE_DEVICE_PROPERTY_FLAG_INTEGRATED = 1 << 0
|
||||
ZE_MAX_DEVICE_NAME = 256
|
||||
|
||||
class ze_device_uuid_t(ctypes.Structure):
|
||||
_fields_ = [("id", ctypes.c_ubyte * 16)]
|
||||
|
||||
class ze_device_properties_t(ctypes.Structure):
|
||||
_fields_ = [
|
||||
("stype", ctypes.c_uint32),
|
||||
("pNext", ctypes.c_void_p),
|
||||
("type", ctypes.c_uint32),
|
||||
("vendorId", ctypes.c_uint32),
|
||||
("deviceId", ctypes.c_uint32),
|
||||
("flags", ctypes.c_uint32),
|
||||
("subdeviceId", ctypes.c_uint32),
|
||||
("coreClockRate", ctypes.c_uint32),
|
||||
("maxMemAllocSize", ctypes.c_uint64),
|
||||
("maxHardwareContexts", ctypes.c_uint32),
|
||||
("maxCommandQueuePriority", ctypes.c_uint32),
|
||||
("numThreadsPerEU", ctypes.c_uint32),
|
||||
("physicalEUSimdWidth", ctypes.c_uint32),
|
||||
("numEUsPerSubslice", ctypes.c_uint32),
|
||||
("numSubslicesPerSlice", ctypes.c_uint32),
|
||||
("numSlices", ctypes.c_uint32),
|
||||
("timerResolution", ctypes.c_uint64),
|
||||
("timestampValidBits", ctypes.c_uint32),
|
||||
("kernelTimestampValidBits", ctypes.c_uint32),
|
||||
("uuid", ze_device_uuid_t),
|
||||
("name", ctypes.c_char * ZE_MAX_DEVICE_NAME),
|
||||
]
|
||||
|
||||
if sys.platform == "win32":
|
||||
loader_names = ["ze_loader.dll"]
|
||||
else:
|
||||
loader_names = [ctypes.util.find_library("ze_loader"), "libze_loader.so.1", "libze_loader.so"]
|
||||
|
||||
ze = None
|
||||
for name in loader_names:
|
||||
if not name:
|
||||
continue
|
||||
try:
|
||||
ze = ctypes.CDLL(name)
|
||||
break
|
||||
except OSError:
|
||||
pass
|
||||
if ze is None:
|
||||
return False
|
||||
|
||||
ze.zeInit.argtypes = [ctypes.c_uint32]
|
||||
ze.zeInit.restype = ctypes.c_uint32
|
||||
ze.zeDriverGet.argtypes = [ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.c_void_p)]
|
||||
ze.zeDriverGet.restype = ctypes.c_uint32
|
||||
ze.zeDeviceGet.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.c_void_p)]
|
||||
ze.zeDeviceGet.restype = ctypes.c_uint32
|
||||
ze.zeDeviceGetProperties.argtypes = [ctypes.c_void_p, ctypes.POINTER(ze_device_properties_t)]
|
||||
ze.zeDeviceGetProperties.restype = ctypes.c_uint32
|
||||
|
||||
if ze.zeInit(0) != ZE_RESULT_SUCCESS:
|
||||
return False
|
||||
|
||||
try:
|
||||
torch_device_id = int(torch.xpu.get_device_properties(torch.xpu.current_device()).device_id)
|
||||
except Exception:
|
||||
torch_device_id = None
|
||||
|
||||
driver_count = ctypes.c_uint32(0)
|
||||
if ze.zeDriverGet(ctypes.byref(driver_count), None) != ZE_RESULT_SUCCESS or driver_count.value == 0:
|
||||
return False
|
||||
allocated_drivers = driver_count.value
|
||||
drivers = (ctypes.c_void_p * allocated_drivers)()
|
||||
if ze.zeDriverGet(ctypes.byref(driver_count), drivers) != ZE_RESULT_SUCCESS:
|
||||
return False
|
||||
|
||||
gpu_devices = [] # (deviceId, is_integrated)
|
||||
for i in range(min(driver_count.value, allocated_drivers)):
|
||||
device_count = ctypes.c_uint32(0)
|
||||
if ze.zeDeviceGet(drivers[i], ctypes.byref(device_count), None) != ZE_RESULT_SUCCESS:
|
||||
return False
|
||||
if device_count.value == 0:
|
||||
continue
|
||||
allocated_devices = device_count.value
|
||||
devices = (ctypes.c_void_p * allocated_devices)()
|
||||
if ze.zeDeviceGet(drivers[i], ctypes.byref(device_count), devices) != ZE_RESULT_SUCCESS:
|
||||
return False
|
||||
for j in range(min(device_count.value, allocated_devices)):
|
||||
props = ze_device_properties_t()
|
||||
props.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES
|
||||
props.pNext = None
|
||||
if ze.zeDeviceGetProperties(devices[j], ctypes.byref(props)) != ZE_RESULT_SUCCESS:
|
||||
return False
|
||||
if props.type != ZE_DEVICE_TYPE_GPU:
|
||||
continue
|
||||
gpu_devices.append((int(props.deviceId), bool(props.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED)))
|
||||
|
||||
if not gpu_devices:
|
||||
return False
|
||||
|
||||
if torch_device_id is not None:
|
||||
matches = [integrated for device_id, integrated in gpu_devices if device_id == torch_device_id]
|
||||
if matches:
|
||||
# Fail closed if a duplicate PCI device id somehow mixes flags.
|
||||
_INTEL_XPU_DISCRETE = not any(matches)
|
||||
return _INTEL_XPU_DISCRETE
|
||||
|
||||
# No reliable match: only enable when every visible GPU is discrete so a
|
||||
# mixed iGPU+dGPU system never enables streams while running on the iGPU.
|
||||
_INTEL_XPU_DISCRETE = all(not integrated for _, integrated in gpu_devices)
|
||||
return _INTEL_XPU_DISCRETE
|
||||
except Exception as e:
|
||||
logging.info("Could not determine Intel XPU type via Level Zero: {}".format(e))
|
||||
_INTEL_XPU_DISCRETE = False
|
||||
return False
|
||||
|
||||
|
||||
STREAMS = {}
|
||||
NUM_STREAMS = 0
|
||||
if args.async_offload is not None:
|
||||
NUM_STREAMS = args.async_offload
|
||||
else:
|
||||
# Enable by default on Nvidia, AMD, and discrete Intel XPU
|
||||
if not args.disable_async_offload and (is_nvidia() or is_amd() or is_intel_xpu_discrete()):
|
||||
# Enable by default on Nvidia and AMD
|
||||
if is_nvidia() or is_amd():
|
||||
NUM_STREAMS = 2
|
||||
|
||||
if args.disable_async_offload:
|
||||
@ -1622,7 +1487,7 @@ PINNED_MEMORY = {}
|
||||
TOTAL_PINNED_MEMORY = 0
|
||||
MAX_PINNED_MEMORY = -1
|
||||
if not args.disable_pinned_memory:
|
||||
if is_nvidia() or is_amd() or is_intel_xpu():
|
||||
if is_nvidia() or is_amd():
|
||||
ram = get_total_memory(torch.device("cpu"))
|
||||
if WINDOWS:
|
||||
MAX_PINNED_MEMORY = ram * 0.40 # Windows limit is apparently 50%
|
||||
@ -1647,20 +1512,6 @@ def discard_cuda_async_error():
|
||||
#Dump it! We already know about it from the synchronous return
|
||||
pass
|
||||
|
||||
def host_register(ptr, size):
|
||||
# Intel XPU has no CUDA host-registration API. The pinnable buffers used by
|
||||
# the DynamicVRAM path are already Level Zero host USM (allocated through the
|
||||
# aimdo hostbuf / zeMemAllocHost), and pageable host memory is still usable
|
||||
# for transfers, so registration is a no-op success on XPU.
|
||||
if is_intel_xpu():
|
||||
return 0
|
||||
return torch.cuda.cudart().cudaHostRegister(ptr, size, 1)
|
||||
|
||||
def host_unregister(ptr):
|
||||
if is_intel_xpu():
|
||||
return 0
|
||||
return torch.cuda.cudart().cudaHostUnregister(ptr)
|
||||
|
||||
def pin_memory(tensor):
|
||||
global TOTAL_PINNED_MEMORY
|
||||
if MAX_PINNED_MEMORY <= 0:
|
||||
@ -1689,7 +1540,7 @@ def pin_memory(tensor):
|
||||
if ptr == 0:
|
||||
return False
|
||||
|
||||
if host_register(ptr, size) == 0:
|
||||
if torch.cuda.cudart().cudaHostRegister(ptr, size, 1) == 0:
|
||||
PINNED_MEMORY[ptr] = size
|
||||
TOTAL_PINNED_MEMORY += size
|
||||
return True
|
||||
@ -1719,7 +1570,7 @@ def unpin_memory(tensor):
|
||||
logging.warning("Size of pinned tensor changed")
|
||||
return False
|
||||
|
||||
if host_unregister(ptr) == 0:
|
||||
if torch.cuda.cudart().cudaHostUnregister(ptr) == 0:
|
||||
size = PINNED_MEMORY.pop(ptr)
|
||||
TOTAL_PINNED_MEMORY -= size
|
||||
return True
|
||||
|
||||
@ -1961,7 +1961,7 @@ class ModelPatcherDynamic(ModelPatcher):
|
||||
if not module._pin_registered:
|
||||
continue
|
||||
size = module._pin.numel() * module._pin.element_size()
|
||||
if comfy.model_management.host_unregister(module._pin.data_ptr()) != 0:
|
||||
if torch.cuda.cudart().cudaHostUnregister(module._pin.data_ptr()) != 0:
|
||||
comfy.model_management.discard_cuda_async_error()
|
||||
continue
|
||||
module._pin_registered = False
|
||||
|
||||
@ -53,7 +53,7 @@ def get_pin(module, subset="weights"):
|
||||
size = pin.nbytes
|
||||
comfy.model_management.ensure_pin_registerable(size)
|
||||
|
||||
if comfy.model_management.host_register(pin.data_ptr(), size) != 0:
|
||||
if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0:
|
||||
comfy.model_management.discard_cuda_async_error()
|
||||
return pin
|
||||
|
||||
@ -95,10 +95,10 @@ def pin_memory(module, subset="weights", size=None):
|
||||
extended = True
|
||||
pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size]
|
||||
pin.untyped_storage()._comfy_hostbuf = hostbuf
|
||||
if comfy.model_management.host_register(pin.data_ptr(), size) != 0:
|
||||
if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0:
|
||||
comfy.model_management.discard_cuda_async_error()
|
||||
comfy.model_management.free_registrations(size)
|
||||
if comfy.model_management.host_register(pin.data_ptr(), size) != 0:
|
||||
if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0:
|
||||
comfy.model_management.discard_cuda_async_error()
|
||||
del pin
|
||||
hostbuf.truncate(offset, do_unregister=False)
|
||||
|
||||
@ -337,6 +337,36 @@ class ModelMergeQwenImage(comfy_extras.nodes_model_merging.ModelMergeBlocks):
|
||||
|
||||
return {"required": arg_dict}
|
||||
|
||||
class ModelMergeKrea2(comfy_extras.nodes_model_merging.ModelMergeBlocks):
|
||||
CATEGORY = "model/merging/model specific"
|
||||
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
arg_dict = { "model1": ("MODEL",),
|
||||
"model2": ("MODEL",)}
|
||||
|
||||
argument = ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01})
|
||||
|
||||
arg_dict["first."] = argument
|
||||
arg_dict["tmlp."] = argument
|
||||
arg_dict["txtmlp."] = argument
|
||||
arg_dict["tproj."] = argument
|
||||
|
||||
for i in range(2):
|
||||
arg_dict["txtfusion.layerwise_blocks.{}.".format(i)] = argument
|
||||
|
||||
arg_dict["txtfusion.projector."] = argument
|
||||
|
||||
for i in range(2):
|
||||
arg_dict["txtfusion.refiner_blocks.{}.".format(i)] = argument
|
||||
|
||||
for i in range(28):
|
||||
arg_dict["blocks.{}.".format(i)] = argument
|
||||
|
||||
arg_dict["last."] = argument
|
||||
|
||||
return {"required": arg_dict}
|
||||
|
||||
NODE_CLASS_MAPPINGS = {
|
||||
"ModelMergeSD1": ModelMergeSD1,
|
||||
"ModelMergeSD2": ModelMergeSD1, #SD1 and SD2 have the same blocks
|
||||
@ -353,4 +383,5 @@ NODE_CLASS_MAPPINGS = {
|
||||
"ModelMergeCosmosPredict2_2B": ModelMergeCosmosPredict2_2B,
|
||||
"ModelMergeCosmosPredict2_14B": ModelMergeCosmosPredict2_14B,
|
||||
"ModelMergeQwenImage": ModelMergeQwenImage,
|
||||
"ModelMergeKrea2": ModelMergeKrea2,
|
||||
}
|
||||
|
||||
33
comfy_extras/nodes_seed.py
Normal file
33
comfy_extras/nodes_seed.py
Normal file
@ -0,0 +1,33 @@
|
||||
import sys
|
||||
from typing_extensions import override
|
||||
|
||||
from comfy_api.latest import ComfyExtension, io
|
||||
|
||||
|
||||
class SeedNode(io.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return io.Schema(
|
||||
node_id="SeedNode",
|
||||
display_name="Seed",
|
||||
search_aliases=["seed", "random"],
|
||||
category="utilities",
|
||||
inputs=[
|
||||
io.Int.Input("seed", min=0, max=sys.maxsize, control_after_generate=io.ControlAfterGenerate.fixed),
|
||||
],
|
||||
outputs=[io.Int.Output(display_name="seed")],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, seed: int) -> io.NodeOutput:
|
||||
return io.NodeOutput(seed)
|
||||
|
||||
|
||||
class SeedExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||
return [SeedNode]
|
||||
|
||||
|
||||
async def comfy_entrypoint() -> SeedExtension:
|
||||
return SeedExtension()
|
||||
2
main.py
2
main.py
@ -236,7 +236,7 @@ import hook_breaker_ac10a0
|
||||
import comfy.memory_management
|
||||
import comfy.model_patcher
|
||||
|
||||
if args.enable_dynamic_vram or (enables_dynamic_vram() and (comfy.model_management.is_nvidia() or comfy.model_management.is_intel_xpu()) and not comfy.model_management.is_wsl()):
|
||||
if args.enable_dynamic_vram or (enables_dynamic_vram() and comfy.model_management.is_nvidia() and not comfy.model_management.is_wsl()):
|
||||
if (not args.enable_dynamic_vram) and (comfy.model_management.torch_version_numeric < (2, 8)):
|
||||
logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
|
||||
else:
|
||||
|
||||
1
nodes.py
1
nodes.py
@ -2473,6 +2473,7 @@ async def init_builtin_extra_nodes():
|
||||
"nodes_gaussian_splat.py",
|
||||
"nodes_triposplat.py",
|
||||
"nodes_depth_anything_3.py",
|
||||
"nodes_seed.py",
|
||||
]
|
||||
|
||||
import_failed = []
|
||||
|
||||
14
openapi.yaml
14
openapi.yaml
@ -1692,6 +1692,12 @@ paths:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
description: Unsupported media type
|
||||
"422":
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
description: Validation error (e.g., disallowed model_type tag)
|
||||
"500":
|
||||
content:
|
||||
application/json:
|
||||
@ -2137,6 +2143,12 @@ paths:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
description: Source asset with given hash not found
|
||||
"422":
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
description: Validation error (e.g., disallowed model_type tag)
|
||||
"500":
|
||||
content:
|
||||
application/json:
|
||||
@ -2992,7 +3004,7 @@ paths:
|
||||
format: uuid
|
||||
type: string
|
||||
- description: |
|
||||
When present, each output item in the response receives a `short_url` field containing an owner-gated durable link for that asset. Omit this parameter (the default) to receive a response identical to the no-param baseline. The value selects the link's lifetime: use `ephemeral_tool_chain` for short-lived machine-to-machine handoffs (~15 minutes); use `default` for durable human-revisitable links (30 days). Links are minted only for the authenticated request owner and are not resolvable by other users.
|
||||
When present, each output item in the response receives a `short_url` field containing a short link for that asset. Omit this parameter (the default) to receive a response identical to the no-param baseline. The value selects the link's lifetime and auth model: use `ephemeral_tool_chain` for short-lived (≤5 minute) machine-to-machine handoffs — these are public bearer links where the link ID itself is the credential, so anyone holding the link can resolve it (intended for pasting into an agent/MCP tool chain); use `default` for durable (30 day) human-revisitable links, which are owner-gated and resolvable only by the authenticated owner. Links are always minted under the authenticated request owner's identity; the auth model is selected by the server and is never settable by the caller.
|
||||
in: query
|
||||
name: short_link
|
||||
schema:
|
||||
|
||||
Reference in New Issue
Block a user