mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-25 23:47:00 +08:00
Compare commits
8 Commits
xpu-aimdo-
...
release/v0
| Author | SHA1 | Date | |
|---|---|---|---|
| 7c8450ef2b | |||
| c39ba98848 | |||
| b25396e6c9 | |||
| 8ceb6fa8d7 | |||
| f2eb8dc846 | |||
| a8a93bec53 | |||
| c7c2c440cc | |||
| 299d6c50c1 |
@ -1274,148 +1274,13 @@ def force_channels_last():
|
||||
return False
|
||||
|
||||
|
||||
_INTEL_XPU_DISCRETE = None
|
||||
def is_intel_xpu_discrete():
|
||||
# Returns True only if the active Intel XPU is a discrete GPU. torch.xpu does
|
||||
# not expose the integrated-vs-discrete distinction, so we query Level Zero
|
||||
# directly via ctypes. Works on Windows (ze_loader.dll) and Linux
|
||||
# (libze_loader.so.1). Any failure or ambiguity returns False so a
|
||||
# discrete-only fast path is never enabled by mistake.
|
||||
global _INTEL_XPU_DISCRETE
|
||||
if _INTEL_XPU_DISCRETE is not None:
|
||||
return _INTEL_XPU_DISCRETE
|
||||
_INTEL_XPU_DISCRETE = False
|
||||
if not is_intel_xpu():
|
||||
return False
|
||||
|
||||
try:
|
||||
import ctypes
|
||||
import ctypes.util
|
||||
|
||||
ZE_RESULT_SUCCESS = 0
|
||||
ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES = 0x3
|
||||
ZE_DEVICE_TYPE_GPU = 1
|
||||
ZE_DEVICE_PROPERTY_FLAG_INTEGRATED = 1 << 0
|
||||
ZE_MAX_DEVICE_NAME = 256
|
||||
|
||||
class ze_device_uuid_t(ctypes.Structure):
|
||||
_fields_ = [("id", ctypes.c_ubyte * 16)]
|
||||
|
||||
class ze_device_properties_t(ctypes.Structure):
|
||||
_fields_ = [
|
||||
("stype", ctypes.c_uint32),
|
||||
("pNext", ctypes.c_void_p),
|
||||
("type", ctypes.c_uint32),
|
||||
("vendorId", ctypes.c_uint32),
|
||||
("deviceId", ctypes.c_uint32),
|
||||
("flags", ctypes.c_uint32),
|
||||
("subdeviceId", ctypes.c_uint32),
|
||||
("coreClockRate", ctypes.c_uint32),
|
||||
("maxMemAllocSize", ctypes.c_uint64),
|
||||
("maxHardwareContexts", ctypes.c_uint32),
|
||||
("maxCommandQueuePriority", ctypes.c_uint32),
|
||||
("numThreadsPerEU", ctypes.c_uint32),
|
||||
("physicalEUSimdWidth", ctypes.c_uint32),
|
||||
("numEUsPerSubslice", ctypes.c_uint32),
|
||||
("numSubslicesPerSlice", ctypes.c_uint32),
|
||||
("numSlices", ctypes.c_uint32),
|
||||
("timerResolution", ctypes.c_uint64),
|
||||
("timestampValidBits", ctypes.c_uint32),
|
||||
("kernelTimestampValidBits", ctypes.c_uint32),
|
||||
("uuid", ze_device_uuid_t),
|
||||
("name", ctypes.c_char * ZE_MAX_DEVICE_NAME),
|
||||
]
|
||||
|
||||
if sys.platform == "win32":
|
||||
loader_names = ["ze_loader.dll"]
|
||||
else:
|
||||
loader_names = [ctypes.util.find_library("ze_loader"), "libze_loader.so.1", "libze_loader.so"]
|
||||
|
||||
ze = None
|
||||
for name in loader_names:
|
||||
if not name:
|
||||
continue
|
||||
try:
|
||||
ze = ctypes.CDLL(name)
|
||||
break
|
||||
except OSError:
|
||||
pass
|
||||
if ze is None:
|
||||
return False
|
||||
|
||||
ze.zeInit.argtypes = [ctypes.c_uint32]
|
||||
ze.zeInit.restype = ctypes.c_uint32
|
||||
ze.zeDriverGet.argtypes = [ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.c_void_p)]
|
||||
ze.zeDriverGet.restype = ctypes.c_uint32
|
||||
ze.zeDeviceGet.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.c_void_p)]
|
||||
ze.zeDeviceGet.restype = ctypes.c_uint32
|
||||
ze.zeDeviceGetProperties.argtypes = [ctypes.c_void_p, ctypes.POINTER(ze_device_properties_t)]
|
||||
ze.zeDeviceGetProperties.restype = ctypes.c_uint32
|
||||
|
||||
if ze.zeInit(0) != ZE_RESULT_SUCCESS:
|
||||
return False
|
||||
|
||||
try:
|
||||
torch_device_id = int(torch.xpu.get_device_properties(torch.xpu.current_device()).device_id)
|
||||
except Exception:
|
||||
torch_device_id = None
|
||||
|
||||
driver_count = ctypes.c_uint32(0)
|
||||
if ze.zeDriverGet(ctypes.byref(driver_count), None) != ZE_RESULT_SUCCESS or driver_count.value == 0:
|
||||
return False
|
||||
allocated_drivers = driver_count.value
|
||||
drivers = (ctypes.c_void_p * allocated_drivers)()
|
||||
if ze.zeDriverGet(ctypes.byref(driver_count), drivers) != ZE_RESULT_SUCCESS:
|
||||
return False
|
||||
|
||||
gpu_devices = [] # (deviceId, is_integrated)
|
||||
for i in range(min(driver_count.value, allocated_drivers)):
|
||||
device_count = ctypes.c_uint32(0)
|
||||
if ze.zeDeviceGet(drivers[i], ctypes.byref(device_count), None) != ZE_RESULT_SUCCESS:
|
||||
return False
|
||||
if device_count.value == 0:
|
||||
continue
|
||||
allocated_devices = device_count.value
|
||||
devices = (ctypes.c_void_p * allocated_devices)()
|
||||
if ze.zeDeviceGet(drivers[i], ctypes.byref(device_count), devices) != ZE_RESULT_SUCCESS:
|
||||
return False
|
||||
for j in range(min(device_count.value, allocated_devices)):
|
||||
props = ze_device_properties_t()
|
||||
props.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES
|
||||
props.pNext = None
|
||||
if ze.zeDeviceGetProperties(devices[j], ctypes.byref(props)) != ZE_RESULT_SUCCESS:
|
||||
return False
|
||||
if props.type != ZE_DEVICE_TYPE_GPU:
|
||||
continue
|
||||
gpu_devices.append((int(props.deviceId), bool(props.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED)))
|
||||
|
||||
if not gpu_devices:
|
||||
return False
|
||||
|
||||
if torch_device_id is not None:
|
||||
matches = [integrated for device_id, integrated in gpu_devices if device_id == torch_device_id]
|
||||
if matches:
|
||||
# Fail closed if a duplicate PCI device id somehow mixes flags.
|
||||
_INTEL_XPU_DISCRETE = not any(matches)
|
||||
return _INTEL_XPU_DISCRETE
|
||||
|
||||
# No reliable match: only enable when every visible GPU is discrete so a
|
||||
# mixed iGPU+dGPU system never enables streams while running on the iGPU.
|
||||
_INTEL_XPU_DISCRETE = all(not integrated for _, integrated in gpu_devices)
|
||||
return _INTEL_XPU_DISCRETE
|
||||
except Exception as e:
|
||||
logging.info("Could not determine Intel XPU type via Level Zero: {}".format(e))
|
||||
_INTEL_XPU_DISCRETE = False
|
||||
return False
|
||||
|
||||
|
||||
STREAMS = {}
|
||||
NUM_STREAMS = 0
|
||||
if args.async_offload is not None:
|
||||
NUM_STREAMS = args.async_offload
|
||||
else:
|
||||
# Enable by default on Nvidia, AMD, and discrete Intel XPU
|
||||
if not args.disable_async_offload and (is_nvidia() or is_amd() or is_intel_xpu_discrete()):
|
||||
# Enable by default on Nvidia and AMD
|
||||
if is_nvidia() or is_amd():
|
||||
NUM_STREAMS = 2
|
||||
|
||||
if args.disable_async_offload:
|
||||
@ -1622,7 +1487,7 @@ PINNED_MEMORY = {}
|
||||
TOTAL_PINNED_MEMORY = 0
|
||||
MAX_PINNED_MEMORY = -1
|
||||
if not args.disable_pinned_memory:
|
||||
if is_nvidia() or is_amd() or is_intel_xpu():
|
||||
if is_nvidia() or is_amd():
|
||||
ram = get_total_memory(torch.device("cpu"))
|
||||
if WINDOWS:
|
||||
MAX_PINNED_MEMORY = ram * 0.40 # Windows limit is apparently 50%
|
||||
@ -1647,20 +1512,6 @@ def discard_cuda_async_error():
|
||||
#Dump it! We already know about it from the synchronous return
|
||||
pass
|
||||
|
||||
def host_register(ptr, size):
|
||||
# Intel XPU has no CUDA host-registration API. The pinnable buffers used by
|
||||
# the DynamicVRAM path are already Level Zero host USM (allocated through the
|
||||
# aimdo hostbuf / zeMemAllocHost), and pageable host memory is still usable
|
||||
# for transfers, so registration is a no-op success on XPU.
|
||||
if is_intel_xpu():
|
||||
return 0
|
||||
return torch.cuda.cudart().cudaHostRegister(ptr, size, 1)
|
||||
|
||||
def host_unregister(ptr):
|
||||
if is_intel_xpu():
|
||||
return 0
|
||||
return torch.cuda.cudart().cudaHostUnregister(ptr)
|
||||
|
||||
def pin_memory(tensor):
|
||||
global TOTAL_PINNED_MEMORY
|
||||
if MAX_PINNED_MEMORY <= 0:
|
||||
@ -1689,7 +1540,7 @@ def pin_memory(tensor):
|
||||
if ptr == 0:
|
||||
return False
|
||||
|
||||
if host_register(ptr, size) == 0:
|
||||
if torch.cuda.cudart().cudaHostRegister(ptr, size, 1) == 0:
|
||||
PINNED_MEMORY[ptr] = size
|
||||
TOTAL_PINNED_MEMORY += size
|
||||
return True
|
||||
@ -1719,7 +1570,7 @@ def unpin_memory(tensor):
|
||||
logging.warning("Size of pinned tensor changed")
|
||||
return False
|
||||
|
||||
if host_unregister(ptr) == 0:
|
||||
if torch.cuda.cudart().cudaHostUnregister(ptr) == 0:
|
||||
size = PINNED_MEMORY.pop(ptr)
|
||||
TOTAL_PINNED_MEMORY -= size
|
||||
return True
|
||||
|
||||
@ -1961,7 +1961,7 @@ class ModelPatcherDynamic(ModelPatcher):
|
||||
if not module._pin_registered:
|
||||
continue
|
||||
size = module._pin.numel() * module._pin.element_size()
|
||||
if comfy.model_management.host_unregister(module._pin.data_ptr()) != 0:
|
||||
if torch.cuda.cudart().cudaHostUnregister(module._pin.data_ptr()) != 0:
|
||||
comfy.model_management.discard_cuda_async_error()
|
||||
continue
|
||||
module._pin_registered = False
|
||||
|
||||
@ -53,7 +53,7 @@ def get_pin(module, subset="weights"):
|
||||
size = pin.nbytes
|
||||
comfy.model_management.ensure_pin_registerable(size)
|
||||
|
||||
if comfy.model_management.host_register(pin.data_ptr(), size) != 0:
|
||||
if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0:
|
||||
comfy.model_management.discard_cuda_async_error()
|
||||
return pin
|
||||
|
||||
@ -95,10 +95,10 @@ def pin_memory(module, subset="weights", size=None):
|
||||
extended = True
|
||||
pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size]
|
||||
pin.untyped_storage()._comfy_hostbuf = hostbuf
|
||||
if comfy.model_management.host_register(pin.data_ptr(), size) != 0:
|
||||
if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0:
|
||||
comfy.model_management.discard_cuda_async_error()
|
||||
comfy.model_management.free_registrations(size)
|
||||
if comfy.model_management.host_register(pin.data_ptr(), size) != 0:
|
||||
if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0:
|
||||
comfy.model_management.discard_cuda_async_error()
|
||||
del pin
|
||||
hostbuf.truncate(offset, do_unregister=False)
|
||||
|
||||
@ -177,6 +177,10 @@ SEEDANCE2_PRICE_PER_1K_TOKENS = {
|
||||
("dreamina-seedance-2-0-fast-260128", True, "480p"): 0.0033,
|
||||
("dreamina-seedance-2-0-fast-260128", False, "720p"): 0.0056,
|
||||
("dreamina-seedance-2-0-fast-260128", True, "720p"): 0.0033,
|
||||
("dreamina-seedance-2-0-mini", False, "480p"): 0.0035,
|
||||
("dreamina-seedance-2-0-mini", True, "480p"): 0.0021,
|
||||
("dreamina-seedance-2-0-mini", False, "720p"): 0.0035,
|
||||
("dreamina-seedance-2-0-mini", True, "720p"): 0.0021,
|
||||
}
|
||||
|
||||
|
||||
@ -278,6 +282,10 @@ SEEDANCE2_REF_VIDEO_PIXEL_LIMITS = {
|
||||
"480p": {"min": 409_600, "max": 927_408},
|
||||
"720p": {"min": 409_600, "max": 927_408},
|
||||
},
|
||||
"dreamina-seedance-2-0-mini": {
|
||||
"480p": {"min": 409_600, "max": 927_408},
|
||||
"720p": {"min": 409_600, "max": 927_408},
|
||||
},
|
||||
}
|
||||
|
||||
# The time in this dictionary are given for 10 seconds duration.
|
||||
|
||||
@ -89,6 +89,7 @@ BYTEPLUS_SEEDANCE2_TASK_STATUS_ENDPOINT = "/proxy/byteplus-seedance2/api/v3/cont
|
||||
SEEDANCE_MODELS = {
|
||||
"Seedance 2.0": "dreamina-seedance-2-0-260128",
|
||||
"Seedance 2.0 Fast": "dreamina-seedance-2-0-fast-260128",
|
||||
"Seedance 2.0 Mini": "dreamina-seedance-2-0-mini",
|
||||
}
|
||||
|
||||
DEPRECATED_MODELS = {"seedance-1-0-lite-t2v-250428", "seedance-1-0-lite-i2v-250428"}
|
||||
@ -1623,8 +1624,10 @@ class ByteDance2TextToVideoNode(IO.ComfyNode):
|
||||
options=[
|
||||
IO.DynamicCombo.Option("Seedance 2.0", _seedance2_text_inputs(["480p", "720p", "1080p", "4k"])),
|
||||
IO.DynamicCombo.Option("Seedance 2.0 Fast", _seedance2_text_inputs(["480p", "720p"])),
|
||||
IO.DynamicCombo.Option("Seedance 2.0 Mini", _seedance2_text_inputs(["480p", "720p"])),
|
||||
],
|
||||
tooltip="Seedance 2.0 for maximum quality; Seedance 2.0 Fast for speed optimization.",
|
||||
tooltip="Seedance 2.0 for maximum quality; Fast for speed optimization; "
|
||||
"Mini for the fastest, lowest-cost generation.",
|
||||
),
|
||||
IO.Int.Input(
|
||||
"seed",
|
||||
@ -1666,6 +1669,7 @@ class ByteDance2TextToVideoNode(IO.ComfyNode):
|
||||
$dur := $lookup(widgets, "model.duration");
|
||||
$pricePer1K := $res = "4k" ? 0.00572 :
|
||||
$res = "1080p" ? 0.011011 :
|
||||
$contains($m, "mini") ? 0.005005 :
|
||||
$contains($m, "fast") ? 0.008008 : 0.01001;
|
||||
$rate := $res = "4k" ? $rate4k :
|
||||
$res = "1080p" ? $rate1080 :
|
||||
@ -1734,8 +1738,13 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode):
|
||||
"Seedance 2.0 Fast",
|
||||
_seedance2_text_inputs(["480p", "720p"], default_ratio="adaptive"),
|
||||
),
|
||||
IO.DynamicCombo.Option(
|
||||
"Seedance 2.0 Mini",
|
||||
_seedance2_text_inputs(["480p", "720p"], default_ratio="adaptive"),
|
||||
),
|
||||
],
|
||||
tooltip="Seedance 2.0 for maximum quality; Seedance 2.0 Fast for speed optimization.",
|
||||
tooltip="Seedance 2.0 for maximum quality; Fast for speed optimization; "
|
||||
"Mini for the fastest, lowest-cost generation.",
|
||||
),
|
||||
IO.Image.Input(
|
||||
"first_frame",
|
||||
@ -1801,6 +1810,7 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode):
|
||||
$dur := $lookup(widgets, "model.duration");
|
||||
$pricePer1K := $res = "4k" ? 0.00572 :
|
||||
$res = "1080p" ? 0.011011 :
|
||||
$contains($m, "mini") ? 0.005005 :
|
||||
$contains($m, "fast") ? 0.008008 : 0.01001;
|
||||
$rate := $res = "4k" ? $rate4k :
|
||||
$res = "1080p" ? $rate1080 :
|
||||
@ -2024,8 +2034,13 @@ class ByteDance2ReferenceNode(IO.ComfyNode):
|
||||
"Seedance 2.0 Fast",
|
||||
_seedance2_reference_inputs(["480p", "720p"], default_ratio="adaptive"),
|
||||
),
|
||||
IO.DynamicCombo.Option(
|
||||
"Seedance 2.0 Mini",
|
||||
_seedance2_reference_inputs(["480p", "720p"], default_ratio="adaptive"),
|
||||
),
|
||||
],
|
||||
tooltip="Seedance 2.0 for maximum quality; Seedance 2.0 Fast for speed optimization.",
|
||||
tooltip="Seedance 2.0 for maximum quality; Fast for speed optimization; "
|
||||
"Mini for the fastest, lowest-cost generation.",
|
||||
),
|
||||
IO.Int.Input(
|
||||
"seed",
|
||||
@ -2071,9 +2086,11 @@ class ByteDance2ReferenceNode(IO.ComfyNode):
|
||||
$dur := $lookup(widgets, "model.duration");
|
||||
$noVideoPricePer1K := $res = "4k" ? 0.00572 :
|
||||
$res = "1080p" ? 0.011011 :
|
||||
$contains($m, "mini") ? 0.005005 :
|
||||
$contains($m, "fast") ? 0.008008 : 0.01001;
|
||||
$videoPricePer1K := $res = "4k" ? 0.003432 :
|
||||
$res = "1080p" ? 0.006721 :
|
||||
$contains($m, "mini") ? 0.003003 :
|
||||
$contains($m, "fast") ? 0.004719 : 0.006149;
|
||||
$rate := $res = "4k" ? $rate4k :
|
||||
$res = "1080p" ? $rate1080 :
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
# This file is automatically generated by the build process when version is
|
||||
# updated in pyproject.toml.
|
||||
__version__ = "0.26.0"
|
||||
__version__ = "0.26.2"
|
||||
|
||||
2
main.py
2
main.py
@ -236,7 +236,7 @@ import hook_breaker_ac10a0
|
||||
import comfy.memory_management
|
||||
import comfy.model_patcher
|
||||
|
||||
if args.enable_dynamic_vram or (enables_dynamic_vram() and (comfy.model_management.is_nvidia() or comfy.model_management.is_intel_xpu()) and not comfy.model_management.is_wsl()):
|
||||
if args.enable_dynamic_vram or (enables_dynamic_vram() and comfy.model_management.is_nvidia() and not comfy.model_management.is_wsl()):
|
||||
if (not args.enable_dynamic_vram) and (comfy.model_management.torch_version_numeric < (2, 8)):
|
||||
logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
|
||||
else:
|
||||
|
||||
@ -2357,10 +2357,6 @@ paths:
|
||||
description: |
|
||||
Returns a list of model folders available in the system.
|
||||
This is an experimental endpoint that replaces the legacy /models endpoint.
|
||||
Each folder's name is the identifier to pass to /api/experiment/models/{folder}.
|
||||
Once the model_type migration is active the names are model_type folder_names
|
||||
(e.g. `ultralytics_bbox`); a folder with no folder_name mapping is returned by
|
||||
its directory path.
|
||||
operationId: getModelFolders
|
||||
responses:
|
||||
"200":
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "ComfyUI"
|
||||
version = "0.26.0"
|
||||
version = "0.26.2"
|
||||
readme = "README.md"
|
||||
license = { file = "LICENSE" }
|
||||
requires-python = ">=3.10"
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
comfyui-frontend-package==1.45.19
|
||||
comfyui-workflow-templates==0.10.2
|
||||
comfyui-workflow-templates==0.10.7
|
||||
comfyui-embedded-docs==0.5.5
|
||||
torch
|
||||
torchsde
|
||||
|
||||
Reference in New Issue
Block a user