Merge branch 'master' into cloud-openapi-projection

Add advanced krea 2 model merging node. (#14621 )
chore(openapi): sync shared API contract from cloud@4118910
2026-06-26 07:56:54 +08:00 · 2026-06-25 13:10:52 +08:00 · 2026-06-24 20:37:30 -07:00 · 2026-06-25 01:50:24 +00:00 · 2026-06-25 09:39:10 +08:00
8 changed files with 88 additions and 160 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1274,148 +1274,13 @@ def force_channels_last():
    return False


-_INTEL_XPU_DISCRETE = None
-def is_intel_xpu_discrete():
-    # Returns True only if the active Intel XPU is a discrete GPU. torch.xpu does
-    # not expose the integrated-vs-discrete distinction, so we query Level Zero
-    # directly via ctypes. Works on Windows (ze_loader.dll) and Linux
-    # (libze_loader.so.1). Any failure or ambiguity returns False so a
-    # discrete-only fast path is never enabled by mistake.
-    global _INTEL_XPU_DISCRETE
-    if _INTEL_XPU_DISCRETE is not None:
-        return _INTEL_XPU_DISCRETE
-    _INTEL_XPU_DISCRETE = False
-    if not is_intel_xpu():
-        return False
-
-    try:
-        import ctypes
-        import ctypes.util
-
-        ZE_RESULT_SUCCESS = 0
-        ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES = 0x3
-        ZE_DEVICE_TYPE_GPU = 1
-        ZE_DEVICE_PROPERTY_FLAG_INTEGRATED = 1 << 0
-        ZE_MAX_DEVICE_NAME = 256
-
-        class ze_device_uuid_t(ctypes.Structure):
-            _fields_ = [("id", ctypes.c_ubyte * 16)]
-
-        class ze_device_properties_t(ctypes.Structure):
-            _fields_ = [
-                ("stype", ctypes.c_uint32),
-                ("pNext", ctypes.c_void_p),
-                ("type", ctypes.c_uint32),
-                ("vendorId", ctypes.c_uint32),
-                ("deviceId", ctypes.c_uint32),
-                ("flags", ctypes.c_uint32),
-                ("subdeviceId", ctypes.c_uint32),
-                ("coreClockRate", ctypes.c_uint32),
-                ("maxMemAllocSize", ctypes.c_uint64),
-                ("maxHardwareContexts", ctypes.c_uint32),
-                ("maxCommandQueuePriority", ctypes.c_uint32),
-                ("numThreadsPerEU", ctypes.c_uint32),
-                ("physicalEUSimdWidth", ctypes.c_uint32),
-                ("numEUsPerSubslice", ctypes.c_uint32),
-                ("numSubslicesPerSlice", ctypes.c_uint32),
-                ("numSlices", ctypes.c_uint32),
-                ("timerResolution", ctypes.c_uint64),
-                ("timestampValidBits", ctypes.c_uint32),
-                ("kernelTimestampValidBits", ctypes.c_uint32),
-                ("uuid", ze_device_uuid_t),
-                ("name", ctypes.c_char * ZE_MAX_DEVICE_NAME),
-            ]
-
-        if sys.platform == "win32":
-            loader_names = ["ze_loader.dll"]
-        else:
-            loader_names = [ctypes.util.find_library("ze_loader"), "libze_loader.so.1", "libze_loader.so"]
-
-        ze = None
-        for name in loader_names:
-            if not name:
-                continue
-            try:
-                ze = ctypes.CDLL(name)
-                break
-            except OSError:
-                pass
-        if ze is None:
-            return False
-
-        ze.zeInit.argtypes = [ctypes.c_uint32]
-        ze.zeInit.restype = ctypes.c_uint32
-        ze.zeDriverGet.argtypes = [ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.c_void_p)]
-        ze.zeDriverGet.restype = ctypes.c_uint32
-        ze.zeDeviceGet.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.c_void_p)]
-        ze.zeDeviceGet.restype = ctypes.c_uint32
-        ze.zeDeviceGetProperties.argtypes = [ctypes.c_void_p, ctypes.POINTER(ze_device_properties_t)]
-        ze.zeDeviceGetProperties.restype = ctypes.c_uint32
-
-        if ze.zeInit(0) != ZE_RESULT_SUCCESS:
-            return False
-
-        try:
-            torch_device_id = int(torch.xpu.get_device_properties(torch.xpu.current_device()).device_id)
-        except Exception:
-            torch_device_id = None
-
-        driver_count = ctypes.c_uint32(0)
-        if ze.zeDriverGet(ctypes.byref(driver_count), None) != ZE_RESULT_SUCCESS or driver_count.value == 0:
-            return False
-        allocated_drivers = driver_count.value
-        drivers = (ctypes.c_void_p * allocated_drivers)()
-        if ze.zeDriverGet(ctypes.byref(driver_count), drivers) != ZE_RESULT_SUCCESS:
-            return False
-
-        gpu_devices = []  # (deviceId, is_integrated)
-        for i in range(min(driver_count.value, allocated_drivers)):
-            device_count = ctypes.c_uint32(0)
-            if ze.zeDeviceGet(drivers[i], ctypes.byref(device_count), None) != ZE_RESULT_SUCCESS:
-                return False
-            if device_count.value == 0:
-                continue
-            allocated_devices = device_count.value
-            devices = (ctypes.c_void_p * allocated_devices)()
-            if ze.zeDeviceGet(drivers[i], ctypes.byref(device_count), devices) != ZE_RESULT_SUCCESS:
-                return False
-            for j in range(min(device_count.value, allocated_devices)):
-                props = ze_device_properties_t()
-                props.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES
-                props.pNext = None
-                if ze.zeDeviceGetProperties(devices[j], ctypes.byref(props)) != ZE_RESULT_SUCCESS:
-                    return False
-                if props.type != ZE_DEVICE_TYPE_GPU:
-                    continue
-                gpu_devices.append((int(props.deviceId), bool(props.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED)))
-
-        if not gpu_devices:
-            return False
-
-        if torch_device_id is not None:
-            matches = [integrated for device_id, integrated in gpu_devices if device_id == torch_device_id]
-            if matches:
-                # Fail closed if a duplicate PCI device id somehow mixes flags.
-                _INTEL_XPU_DISCRETE = not any(matches)
-                return _INTEL_XPU_DISCRETE
-
-        # No reliable match: only enable when every visible GPU is discrete so a
-        # mixed iGPU+dGPU system never enables streams while running on the iGPU.
-        _INTEL_XPU_DISCRETE = all(not integrated for _, integrated in gpu_devices)
-        return _INTEL_XPU_DISCRETE
-    except Exception as e:
-        logging.info("Could not determine Intel XPU type via Level Zero: {}".format(e))
-        _INTEL_XPU_DISCRETE = False
-        return False
-
-
 STREAMS = {}
 NUM_STREAMS = 0
 if args.async_offload is not None:
    NUM_STREAMS = args.async_offload
 else:
-    #  Enable by default on Nvidia, AMD, and discrete Intel XPU
-    if not args.disable_async_offload and (is_nvidia() or is_amd() or is_intel_xpu_discrete()):
+    #  Enable by default on Nvidia and AMD
+    if is_nvidia() or is_amd():
        NUM_STREAMS = 2

 if args.disable_async_offload:
@ -1622,7 +1487,7 @@ PINNED_MEMORY = {}
 TOTAL_PINNED_MEMORY = 0
 MAX_PINNED_MEMORY = -1
 if not args.disable_pinned_memory:
-    if is_nvidia() or is_amd() or is_intel_xpu():
+    if is_nvidia() or is_amd():
        ram = get_total_memory(torch.device("cpu"))
        if WINDOWS:
            MAX_PINNED_MEMORY = ram * 0.40  # Windows limit is apparently 50%
@ -1647,20 +1512,6 @@ def discard_cuda_async_error():
        #Dump it! We already know about it from the synchronous return
        pass

-def host_register(ptr, size):
-    # Intel XPU has no CUDA host-registration API. The pinnable buffers used by
-    # the DynamicVRAM path are already Level Zero host USM (allocated through the
-    # aimdo hostbuf / zeMemAllocHost), and pageable host memory is still usable
-    # for transfers, so registration is a no-op success on XPU.
-    if is_intel_xpu():
-        return 0
-    return torch.cuda.cudart().cudaHostRegister(ptr, size, 1)
-
-def host_unregister(ptr):
-    if is_intel_xpu():
-        return 0
-    return torch.cuda.cudart().cudaHostUnregister(ptr)
-
 def pin_memory(tensor):
    global TOTAL_PINNED_MEMORY
    if MAX_PINNED_MEMORY <= 0:
@ -1689,7 +1540,7 @@ def pin_memory(tensor):
    if ptr == 0:
        return False

-    if host_register(ptr, size) == 0:
+    if torch.cuda.cudart().cudaHostRegister(ptr, size, 1) == 0:
        PINNED_MEMORY[ptr] = size
        TOTAL_PINNED_MEMORY += size
        return True
@ -1719,7 +1570,7 @@ def unpin_memory(tensor):
        logging.warning("Size of pinned tensor changed")
        return False

-    if host_unregister(ptr) == 0:
+    if torch.cuda.cudart().cudaHostUnregister(ptr) == 0:
        size = PINNED_MEMORY.pop(ptr)
        TOTAL_PINNED_MEMORY -= size
        return True
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -1961,7 +1961,7 @@ class ModelPatcherDynamic(ModelPatcher):
                if not module._pin_registered:
                    continue
                size = module._pin.numel() * module._pin.element_size()
-                if comfy.model_management.host_unregister(module._pin.data_ptr()) != 0:
+                if torch.cuda.cudart().cudaHostUnregister(module._pin.data_ptr()) != 0:
                    comfy.model_management.discard_cuda_async_error()
                    continue
                module._pin_registered = False
--- a/comfy/pinned_memory.py
+++ b/comfy/pinned_memory.py
@ -53,7 +53,7 @@ def get_pin(module, subset="weights"):
    size = pin.nbytes
    comfy.model_management.ensure_pin_registerable(size)

-    if comfy.model_management.host_register(pin.data_ptr(), size) != 0:
+    if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0:
        comfy.model_management.discard_cuda_async_error()
        return pin

@ -95,10 +95,10 @@ def pin_memory(module, subset="weights", size=None):
        extended = True
        pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size]
        pin.untyped_storage()._comfy_hostbuf = hostbuf
-        if comfy.model_management.host_register(pin.data_ptr(), size) != 0:
+        if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0:
            comfy.model_management.discard_cuda_async_error()
            comfy.model_management.free_registrations(size)
-            if comfy.model_management.host_register(pin.data_ptr(), size) != 0:
+            if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0:
                comfy.model_management.discard_cuda_async_error()
                del pin
                hostbuf.truncate(offset, do_unregister=False)
--- a/comfy_extras/nodes_model_merging_model_specific.py
+++ b/comfy_extras/nodes_model_merging_model_specific.py
@ -337,6 +337,36 @@ class ModelMergeQwenImage(comfy_extras.nodes_model_merging.ModelMergeBlocks):

        return {"required": arg_dict}

+class ModelMergeKrea2(comfy_extras.nodes_model_merging.ModelMergeBlocks):
+    CATEGORY = "model/merging/model specific"
+
+    @classmethod
+    def INPUT_TYPES(s):
+        arg_dict = { "model1": ("MODEL",),
+                              "model2": ("MODEL",)}
+
+        argument = ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01})
+
+        arg_dict["first."] = argument
+        arg_dict["tmlp."] = argument
+        arg_dict["txtmlp."] = argument
+        arg_dict["tproj."] = argument
+
+        for i in range(2):
+            arg_dict["txtfusion.layerwise_blocks.{}.".format(i)] = argument
+
+        arg_dict["txtfusion.projector."] = argument
+
+        for i in range(2):
+            arg_dict["txtfusion.refiner_blocks.{}.".format(i)] = argument
+
+        for i in range(28):
+            arg_dict["blocks.{}.".format(i)] = argument
+
+        arg_dict["last."] = argument
+
+        return {"required": arg_dict}
+
 NODE_CLASS_MAPPINGS = {
    "ModelMergeSD1": ModelMergeSD1,
    "ModelMergeSD2": ModelMergeSD1, #SD1 and SD2 have the same blocks
@ -353,4 +383,5 @@ NODE_CLASS_MAPPINGS = {
    "ModelMergeCosmosPredict2_2B": ModelMergeCosmosPredict2_2B,
    "ModelMergeCosmosPredict2_14B": ModelMergeCosmosPredict2_14B,
    "ModelMergeQwenImage": ModelMergeQwenImage,
+    "ModelMergeKrea2": ModelMergeKrea2,
 }
--- a/comfy_extras/nodes_seed.py
+++ b/comfy_extras/nodes_seed.py
@ -0,0 +1,33 @@
+import sys
+from typing_extensions import override
+
+from comfy_api.latest import ComfyExtension, io
+
+
+class SeedNode(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SeedNode",
+            display_name="Seed",
+            search_aliases=["seed", "random"],
+            category="utilities",
+            inputs=[
+                io.Int.Input("seed", min=0, max=sys.maxsize, control_after_generate=io.ControlAfterGenerate.fixed),
+            ],
+            outputs=[io.Int.Output(display_name="seed")],
+        )
+
+    @classmethod
+    def execute(cls, seed: int) -> io.NodeOutput:
+        return io.NodeOutput(seed)
+
+
+class SeedExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [SeedNode]
+
+
+async def comfy_entrypoint() -> SeedExtension:
+    return SeedExtension()
--- a/main.py
+++ b/main.py
@ -236,7 +236,7 @@ import hook_breaker_ac10a0
 import comfy.memory_management
 import comfy.model_patcher

-if args.enable_dynamic_vram or (enables_dynamic_vram() and (comfy.model_management.is_nvidia() or comfy.model_management.is_intel_xpu()) and not comfy.model_management.is_wsl()):
+if args.enable_dynamic_vram or (enables_dynamic_vram() and comfy.model_management.is_nvidia() and not comfy.model_management.is_wsl()):
    if (not args.enable_dynamic_vram) and (comfy.model_management.torch_version_numeric < (2, 8)):
        logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
    else:
--- a/nodes.py
+++ b/nodes.py
@ -2473,6 +2473,7 @@ async def init_builtin_extra_nodes():
        "nodes_gaussian_splat.py",
        "nodes_triposplat.py",
        "nodes_depth_anything_3.py",
+        "nodes_seed.py",
    ]

    import_failed = []
--- a/openapi.yaml
+++ b/openapi.yaml
@ -1692,6 +1692,12 @@ paths:
                            schema:
                                $ref: '#/components/schemas/ErrorResponse'
                    description: Unsupported media type
+                "422":
+                    content:
+                        application/json:
+                            schema:
+                                $ref: '#/components/schemas/ErrorResponse'
+                    description: Validation error (e.g., disallowed model_type tag)
                "500":
                    content:
                        application/json:
@ -2137,6 +2143,12 @@ paths:
                            schema:
                                $ref: '#/components/schemas/ErrorResponse'
                    description: Source asset with given hash not found
+                "422":
+                    content:
+                        application/json:
+                            schema:
+                                $ref: '#/components/schemas/ErrorResponse'
+                    description: Validation error (e.g., disallowed model_type tag)
                "500":
                    content:
                        application/json:
@ -2992,7 +3004,7 @@ paths:
                    format: uuid
                    type: string
                - description: |
-                    When present, each output item in the response receives a `short_url` field containing an owner-gated durable link for that asset. Omit this parameter (the default) to receive a response identical to the no-param baseline. The value selects the link's lifetime: use `ephemeral_tool_chain` for short-lived machine-to-machine handoffs (~15 minutes); use `default` for durable human-revisitable links (30 days). Links are minted only for the authenticated request owner and are not resolvable by other users.
+                    When present, each output item in the response receives a `short_url` field containing a short link for that asset. Omit this parameter (the default) to receive a response identical to the no-param baseline. The value selects the link's lifetime and auth model: use `ephemeral_tool_chain` for short-lived (≤5 minute) machine-to-machine handoffs — these are public bearer links where the link ID itself is the credential, so anyone holding the link can resolve it (intended for pasting into an agent/MCP tool chain); use `default` for durable (30 day) human-revisitable links, which are owner-gated and resolvable only by the authenticated owner. Links are always minted under the authenticated request owner's identity; the auth model is selected by the server and is never settable by the caller.
                  in: query
                  name: short_link
                  schema:
Author	SHA1	Message	Date
Alexis Rolland	7f253cbb08	Merge branch 'master' into cloud-openapi-projection	2026-06-25 13:10:52 +08:00
comfyanonymous	64e1d740b8	Add advanced krea 2 model merging node. (#14621 )	2026-06-24 20:37:30 -07:00
mattmillerai	5a48a054f4	chore(openapi): sync shared API contract from cloud@4118910	2026-06-25 01:50:24 +00:00
Yousef R. Gamaleldin	b22d0fb9c0	feat: Add Support For Simple Seed (CORE-295) (#14616 )	2026-06-25 09:39:10 +08:00