feat(api-nodes): add Bria RMBG nodes

Add working Qwen 2512 ControlNet (Fun ControlNet) support (#12359 )
Add left padding to LTXAV text encoder. (#12456 )
2026-02-14 23:45:18 +08:00 · 2026-02-14 14:13:27 +02:00 · 2026-02-13 22:23:52 -05:00 · 2026-02-13 21:56:54 -05:00 · 2026-02-13 20:21:10 -05:00 · 2026-02-13 20:15:23 -05:00
29 changed files with 587 additions and 428 deletions
--- a/app/node_replace_manager.py
+++ b/app/node_replace_manager.py
@ -1,105 +0,0 @@
-from __future__ import annotations
-
-from aiohttp import web
-
-from typing import TYPE_CHECKING, TypedDict
-if TYPE_CHECKING:
-    from comfy_api.latest._node_replace import NodeReplace
-
-from nodes import NODE_CLASS_MAPPINGS
-
-class NodeStruct(TypedDict):
-    inputs: dict[str, str | int | float | bool | tuple[str, int]]
-    class_type: str
-    _meta: dict[str, str]
-
-def copy_node_struct(node_struct: NodeStruct, empty_inputs: bool = False) -> NodeStruct:
-    new_node_struct = node_struct.copy()
-    if empty_inputs:
-        new_node_struct["inputs"] = {}
-    else:
-        new_node_struct["inputs"] = node_struct["inputs"].copy()
-    new_node_struct["_meta"] = node_struct["_meta"].copy()
-    return new_node_struct
-
-
-class NodeReplaceManager:
-    """Manages node replacement registrations."""
-
-    def __init__(self):
-        self._replacements: dict[str, list[NodeReplace]] = {}
-
-    def register(self, node_replace: NodeReplace):
-        """Register a node replacement mapping."""
-        self._replacements.setdefault(node_replace.old_node_id, []).append(node_replace)
-
-    def get_replacement(self, old_node_id: str) -> list[NodeReplace] | None:
-        """Get replacements for an old node ID."""
-        return self._replacements.get(old_node_id)
-
-    def has_replacement(self, old_node_id: str) -> bool:
-        """Check if a replacement exists for an old node ID."""
-        return old_node_id in self._replacements
-
-    def apply_replacements(self, prompt: dict[str, NodeStruct]):
-        connections: dict[str, list[tuple[str, str, int]]] = {}
-        need_replacement: set[str] = set()
-        for node_number, node_struct in prompt.items():
-            class_type = node_struct["class_type"]
-            # need replacement if not in NODE_CLASS_MAPPINGS and has replacement
-            if class_type not in NODE_CLASS_MAPPINGS.keys() and self.has_replacement(class_type):
-                need_replacement.add(node_number)
-            # keep track of connections
-            for input_id, input_value in node_struct["inputs"].items():
-                if isinstance(input_value, list):
-                    conn_number = input_value[0]
-                    connections.setdefault(conn_number, []).append((node_number, input_id, input_value[1]))
-        if len(need_replacement) > 0:
-            for node_number in need_replacement:
-                node_struct = prompt[node_number]
-                class_type = node_struct["class_type"]
-                replacements = self.get_replacement(class_type)
-                if replacements is None:
-                    continue
-                # just use the first replacement
-                replacement = replacements[0]
-                new_node_id = replacement.new_node_id
-                # if replacement is not a valid node, skip trying to replace it as will only cause confusion
-                if new_node_id not in NODE_CLASS_MAPPINGS.keys():
-                    continue
-                # first, replace node id (class_type)
-                new_node_struct = copy_node_struct(node_struct, empty_inputs=True)
-                new_node_struct["class_type"] = new_node_id
-                # TODO: consider replacing display_name in _meta as well for error reporting purposes; would need to query node schema
-                # second, replace inputs
-                if replacement.input_mapping is not None:
-                    for input_map in replacement.input_mapping:
-                        if "set_value" in input_map:
-                            new_node_struct["inputs"][input_map["new_id"]] = input_map["set_value"]
-                        elif "old_id" in input_map:
-                            new_node_struct["inputs"][input_map["new_id"]] = node_struct["inputs"][input_map["old_id"]]
-                # finalize input replacement
-                prompt[node_number] = new_node_struct
-                # third, replace outputs
-                if replacement.output_mapping is not None:
-                    # re-mapping outputs requires changing the input values of nodes that receive connections from this one
-                    if node_number in connections:
-                        for conns in connections[node_number]:
-                            conn_node_number, conn_input_id, old_output_idx = conns
-                            for output_map in replacement.output_mapping:
-                                if output_map["old_idx"] == old_output_idx:
-                                    new_output_idx = output_map["new_idx"]
-                                    previous_input = prompt[conn_node_number]["inputs"][conn_input_id]
-                                    previous_input[1] = new_output_idx
-
-    def as_dict(self):
-        """Serialize all replacements to dict."""
-        return {
-            k: [v.as_dict() for v in v_list]
-            for k, v_list in self._replacements.items()
-        }
-
-    def add_routes(self, routes):
-        @routes.get("/node_replacements")
-        async def get_node_replacements(request):
-            return web.json_response(self.as_dict())
--- a/comfy/controlnet.py
+++ b/comfy/controlnet.py
@ -297,6 +297,30 @@ class ControlNet(ControlBase):
        self.model_sampling_current = None
        super().cleanup()

+
+class QwenFunControlNet(ControlNet):
+    def get_control(self, x_noisy, t, cond, batched_number, transformer_options):
+        # Fun checkpoints are more sensitive to high strengths in the generic
+        # ControlNet merge path. Use a soft response curve so strength=1.0 stays
+        # unchanged while >1 grows more gently.
+        original_strength = self.strength
+        self.strength = math.sqrt(max(self.strength, 0.0))
+        try:
+            return super().get_control(x_noisy, t, cond, batched_number, transformer_options)
+        finally:
+            self.strength = original_strength
+
+    def pre_run(self, model, percent_to_timestep_function):
+        super().pre_run(model, percent_to_timestep_function)
+        self.set_extra_arg("base_model", model.diffusion_model)
+
+    def copy(self):
+        c = QwenFunControlNet(None, global_average_pooling=self.global_average_pooling, load_device=self.load_device, manual_cast_dtype=self.manual_cast_dtype)
+        c.control_model = self.control_model
+        c.control_model_wrapped = self.control_model_wrapped
+        self.copy_to(c)
+        return c
+
 class ControlLoraOps:
    class Linear(torch.nn.Module, comfy.ops.CastWeightBiasOp):
        def __init__(self, in_features: int, out_features: int, bias: bool = True,
@ -560,6 +584,7 @@ def load_controlnet_hunyuandit(controlnet_data, model_options={}):
 def load_controlnet_flux_xlabs_mistoline(sd, mistoline=False, model_options={}):
    model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(sd, model_options=model_options)
    control_model = comfy.ldm.flux.controlnet.ControlNetFlux(mistoline=mistoline, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
+    sd = model_config.process_unet_state_dict(sd)
    control_model = controlnet_load_state_dict(control_model, sd)
    extra_conds = ['y', 'guidance']
    control = ControlNet(control_model, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
@ -605,6 +630,53 @@ def load_controlnet_qwen_instantx(sd, model_options={}):
    control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, concat_mask=concat_mask, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
    return control

+
+def load_controlnet_qwen_fun(sd, model_options={}):
+    load_device = comfy.model_management.get_torch_device()
+    weight_dtype = comfy.utils.weight_dtype(sd)
+    unet_dtype = model_options.get("dtype", weight_dtype)
+    manual_cast_dtype = comfy.model_management.unet_manual_cast(unet_dtype, load_device)
+
+    operations = model_options.get("custom_operations", None)
+    if operations is None:
+        operations = comfy.ops.pick_operations(unet_dtype, manual_cast_dtype, disable_fast_fp8=True)
+
+    in_features = sd["control_img_in.weight"].shape[1]
+    inner_dim = sd["control_img_in.weight"].shape[0]
+
+    block_weight = sd["control_blocks.0.attn.to_q.weight"]
+    attention_head_dim = sd["control_blocks.0.attn.norm_q.weight"].shape[0]
+    num_attention_heads = max(1, block_weight.shape[0] // max(1, attention_head_dim))
+
+    model = comfy.ldm.qwen_image.controlnet.QwenImageFunControlNetModel(
+        control_in_features=in_features,
+        inner_dim=inner_dim,
+        num_attention_heads=num_attention_heads,
+        attention_head_dim=attention_head_dim,
+        num_control_blocks=5,
+        main_model_double=60,
+        injection_layers=(0, 12, 24, 36, 48),
+        operations=operations,
+        device=comfy.model_management.unet_offload_device(),
+        dtype=unet_dtype,
+    )
+    model = controlnet_load_state_dict(model, sd)
+
+    latent_format = comfy.latent_formats.Wan21()
+    control = QwenFunControlNet(
+        model,
+        compression_ratio=1,
+        latent_format=latent_format,
+        # Fun checkpoints already expect their own 33-channel context handling.
+        # Enabling generic concat_mask injects an extra mask channel at apply-time
+        # and breaks the intended fallback packing path.
+        concat_mask=False,
+        load_device=load_device,
+        manual_cast_dtype=manual_cast_dtype,
+        extra_conds=[],
+    )
+    return control
+
 def convert_mistoline(sd):
    return comfy.utils.state_dict_prefix_replace(sd, {"single_controlnet_blocks.": "controlnet_single_blocks."})

@ -682,6 +754,8 @@ def load_controlnet_state_dict(state_dict, model=None, model_options={}):
            return load_controlnet_qwen_instantx(controlnet_data, model_options=model_options)
        elif "controlnet_x_embedder.weight" in controlnet_data:
            return load_controlnet_flux_instantx(controlnet_data, model_options=model_options)
+    elif "control_blocks.0.after_proj.weight" in controlnet_data and "control_img_in.weight" in controlnet_data:
+        return load_controlnet_qwen_fun(controlnet_data, model_options=model_options)

    elif "controlnet_blocks.0.linear.weight" in controlnet_data: #mistoline flux
        return load_controlnet_flux_xlabs_mistoline(convert_mistoline(controlnet_data), mistoline=True, model_options=model_options)
--- a/comfy/ldm/chroma/layers.py
+++ b/comfy/ldm/chroma/layers.py
@ -3,7 +3,6 @@ from torch import Tensor, nn

 from comfy.ldm.flux.layers import (
    MLPEmbedder,
-    RMSNorm,
    ModulationOut,
 )

@ -29,7 +28,7 @@ class Approximator(nn.Module):
        super().__init__()
        self.in_proj = operations.Linear(in_dim, hidden_dim, bias=True, dtype=dtype, device=device)
        self.layers = nn.ModuleList([MLPEmbedder(hidden_dim, hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
-        self.norms = nn.ModuleList([RMSNorm(hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
+        self.norms = nn.ModuleList([operations.RMSNorm(hidden_dim, dtype=dtype, device=device) for x in range( n_layers)])
        self.out_proj = operations.Linear(hidden_dim, out_dim, dtype=dtype, device=device)

    @property
--- a/comfy/ldm/chroma_radiance/layers.py
+++ b/comfy/ldm/chroma_radiance/layers.py
@ -4,8 +4,6 @@ from functools import lru_cache
 import torch
 from torch import nn

-from comfy.ldm.flux.layers import RMSNorm
-

 class NerfEmbedder(nn.Module):
    """
@ -145,7 +143,7 @@ class NerfGLUBlock(nn.Module):
        # We now need to generate parameters for 3 matrices.
        total_params = 3 * hidden_size_x**2 * mlp_ratio
        self.param_generator = operations.Linear(hidden_size_s, total_params, dtype=dtype, device=device)
-        self.norm = RMSNorm(hidden_size_x, dtype=dtype, device=device, operations=operations)
+        self.norm = operations.RMSNorm(hidden_size_x, dtype=dtype, device=device)
        self.mlp_ratio = mlp_ratio


@ -178,7 +176,7 @@ class NerfGLUBlock(nn.Module):
 class NerfFinalLayer(nn.Module):
    def __init__(self, hidden_size, out_channels, dtype=None, device=None, operations=None):
        super().__init__()
-        self.norm = RMSNorm(hidden_size, dtype=dtype, device=device, operations=operations)
+        self.norm = operations.RMSNorm(hidden_size, dtype=dtype, device=device)
        self.linear = operations.Linear(hidden_size, out_channels, dtype=dtype, device=device)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
@ -190,7 +188,7 @@ class NerfFinalLayer(nn.Module):
 class NerfFinalLayerConv(nn.Module):
    def __init__(self, hidden_size: int, out_channels: int, dtype=None, device=None, operations=None):
        super().__init__()
-        self.norm = RMSNorm(hidden_size, dtype=dtype, device=device, operations=operations)
+        self.norm = operations.RMSNorm(hidden_size, dtype=dtype, device=device)
        self.conv = operations.Conv2d(
            in_channels=hidden_size,
            out_channels=out_channels,
--- a/comfy/ldm/flux/layers.py
+++ b/comfy/ldm/flux/layers.py
@ -5,9 +5,9 @@ import torch
 from torch import Tensor, nn

 from .math import attention, rope
-import comfy.ops
-import comfy.ldm.common_dit

+# Fix import for some custom nodes, TODO: delete eventually.
+RMSNorm = None

 class EmbedND(nn.Module):
    def __init__(self, dim: int, theta: int, axes_dim: list):
@ -87,20 +87,12 @@ def build_mlp(hidden_size, mlp_hidden_dim, mlp_silu_act=False, yak_mlp=False, dt
            operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
        )

-class RMSNorm(torch.nn.Module):
-    def __init__(self, dim: int, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.scale = nn.Parameter(torch.empty((dim), dtype=dtype, device=device))
-
-    def forward(self, x: Tensor):
-        return comfy.ldm.common_dit.rms_norm(x, self.scale, 1e-6)
-

 class QKNorm(torch.nn.Module):
    def __init__(self, dim: int, dtype=None, device=None, operations=None):
        super().__init__()
-        self.query_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations)
-        self.key_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations)
+        self.query_norm = operations.RMSNorm(dim, dtype=dtype, device=device)
+        self.key_norm = operations.RMSNorm(dim, dtype=dtype, device=device)

    def forward(self, q: Tensor, k: Tensor, v: Tensor) -> tuple:
        q = self.query_norm(q)
@ -169,7 +161,7 @@ class SiLUActivation(nn.Module):


 class DoubleStreamBlock(nn.Module):
-    def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, flipped_img_txt=False, modulation=True, mlp_silu_act=False, proj_bias=True, yak_mlp=False, dtype=None, device=None, operations=None):
+    def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, modulation=True, mlp_silu_act=False, proj_bias=True, yak_mlp=False, dtype=None, device=None, operations=None):
        super().__init__()

        mlp_hidden_dim = int(hidden_size * mlp_ratio)
@ -197,8 +189,6 @@ class DoubleStreamBlock(nn.Module):

        self.txt_mlp = build_mlp(hidden_size, mlp_hidden_dim, mlp_silu_act=mlp_silu_act, yak_mlp=yak_mlp, dtype=dtype, device=device, operations=operations)

-        self.flipped_img_txt = flipped_img_txt
-
    def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, attn_mask=None, modulation_dims_img=None, modulation_dims_txt=None, transformer_options={}):
        if self.modulation:
            img_mod1, img_mod2 = self.img_mod(vec)
@ -224,32 +214,17 @@ class DoubleStreamBlock(nn.Module):
        del txt_qkv
        txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)

-        if self.flipped_img_txt:
-            q = torch.cat((img_q, txt_q), dim=2)
-            del img_q, txt_q
-            k = torch.cat((img_k, txt_k), dim=2)
-            del img_k, txt_k
-            v = torch.cat((img_v, txt_v), dim=2)
-            del img_v, txt_v
-            # run actual attention
-            attn = attention(q, k, v,
-                             pe=pe, mask=attn_mask, transformer_options=transformer_options)
-            del q, k, v
+        q = torch.cat((txt_q, img_q), dim=2)
+        del txt_q, img_q
+        k = torch.cat((txt_k, img_k), dim=2)
+        del txt_k, img_k
+        v = torch.cat((txt_v, img_v), dim=2)
+        del txt_v, img_v
+        # run actual attention
+        attn = attention(q, k, v, pe=pe, mask=attn_mask, transformer_options=transformer_options)
+        del q, k, v

-            img_attn, txt_attn = attn[:, : img.shape[1]], attn[:, img.shape[1]:]
-        else:
-            q = torch.cat((txt_q, img_q), dim=2)
-            del txt_q, img_q
-            k = torch.cat((txt_k, img_k), dim=2)
-            del txt_k, img_k
-            v = torch.cat((txt_v, img_v), dim=2)
-            del txt_v, img_v
-            # run actual attention
-            attn = attention(q, k, v,
-                             pe=pe, mask=attn_mask, transformer_options=transformer_options)
-            del q, k, v
-
-            txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:]
+        txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:]

        # calculate the img bloks
        img += apply_mod(self.img_attn.proj(img_attn), img_mod1.gate, None, modulation_dims_img)
--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@ -16,7 +16,6 @@ from .layers import (
    SingleStreamBlock,
    timestep_embedding,
    Modulation,
-    RMSNorm
 )

@dataclass
@ -81,7 +80,7 @@ class Flux(nn.Module):
        self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, bias=params.ops_bias, dtype=dtype, device=device)

        if params.txt_norm:
-            self.txt_norm = RMSNorm(params.context_in_dim, dtype=dtype, device=device, operations=operations)
+            self.txt_norm = operations.RMSNorm(params.context_in_dim, dtype=dtype, device=device)
        else:
            self.txt_norm = None

--- a/comfy/ldm/hunyuan_video/model.py
+++ b/comfy/ldm/hunyuan_video/model.py
@ -241,7 +241,6 @@ class HunyuanVideo(nn.Module):
                    self.num_heads,
                    mlp_ratio=params.mlp_ratio,
                    qkv_bias=params.qkv_bias,
-                    flipped_img_txt=True,
                    dtype=dtype, device=device, operations=operations
                )
                for _ in range(params.depth)
@ -378,14 +377,14 @@ class HunyuanVideo(nn.Module):
            extra_txt_ids = torch.zeros((txt_ids.shape[0], txt_vision_states.shape[1], txt_ids.shape[-1]), device=txt_ids.device, dtype=txt_ids.dtype)
            txt_ids = torch.cat((txt_ids, extra_txt_ids), dim=1)

-        ids = torch.cat((img_ids, txt_ids), dim=1)
+        ids = torch.cat((txt_ids, img_ids), dim=1)
        pe = self.pe_embedder(ids)

        img_len = img.shape[1]
        if txt_mask is not None:
            attn_mask_len = img_len + txt.shape[1]
            attn_mask = torch.zeros((1, 1, attn_mask_len), dtype=img.dtype, device=img.device)
-            attn_mask[:, 0, img_len:] = txt_mask
+            attn_mask[:, 0, :txt.shape[1]] = txt_mask
        else:
            attn_mask = None

@ -413,7 +412,7 @@ class HunyuanVideo(nn.Module):
                    if add is not None:
                        img += add

-        img = torch.cat((img, txt), 1)
+        img = torch.cat((txt, img), 1)

        transformer_options["total_blocks"] = len(self.single_blocks)
        transformer_options["block_type"] = "single"
@ -435,9 +434,9 @@ class HunyuanVideo(nn.Module):
                if i < len(control_o):
                    add = control_o[i]
                    if add is not None:
-                        img[:, : img_len] += add
+                        img[:, txt.shape[1]: img_len + txt.shape[1]] += add

-        img = img[:, : img_len]
+        img = img[:, txt.shape[1]: img_len + txt.shape[1]]
        if ref_latent is not None:
            img = img[:, ref_latent.shape[1]:]

--- a/comfy/ldm/qwen_image/controlnet.py
+++ b/comfy/ldm/qwen_image/controlnet.py
@ -2,6 +2,196 @@ import torch
 import math

 from .model import QwenImageTransformer2DModel
+from .model import QwenImageTransformerBlock
+
+
+class QwenImageFunControlBlock(QwenImageTransformerBlock):
+    def __init__(self, dim, num_attention_heads, attention_head_dim, has_before_proj=False, dtype=None, device=None, operations=None):
+        super().__init__(
+            dim=dim,
+            num_attention_heads=num_attention_heads,
+            attention_head_dim=attention_head_dim,
+            dtype=dtype,
+            device=device,
+            operations=operations,
+        )
+        self.has_before_proj = has_before_proj
+        if has_before_proj:
+            self.before_proj = operations.Linear(dim, dim, device=device, dtype=dtype)
+        self.after_proj = operations.Linear(dim, dim, device=device, dtype=dtype)
+
+
+class QwenImageFunControlNetModel(torch.nn.Module):
+    def __init__(
+        self,
+        control_in_features=132,
+        inner_dim=3072,
+        num_attention_heads=24,
+        attention_head_dim=128,
+        num_control_blocks=5,
+        main_model_double=60,
+        injection_layers=(0, 12, 24, 36, 48),
+        dtype=None,
+        device=None,
+        operations=None,
+    ):
+        super().__init__()
+        self.dtype = dtype
+        self.main_model_double = main_model_double
+        self.injection_layers = tuple(injection_layers)
+        # Keep base hint scaling at 1.0 so user-facing strength behaves similarly
+        # to the reference Gen2/VideoX implementation around strength=1.
+        self.hint_scale = 1.0
+        self.control_img_in = operations.Linear(control_in_features, inner_dim, device=device, dtype=dtype)
+
+        self.control_blocks = torch.nn.ModuleList([])
+        for i in range(num_control_blocks):
+            self.control_blocks.append(
+                QwenImageFunControlBlock(
+                    dim=inner_dim,
+                    num_attention_heads=num_attention_heads,
+                    attention_head_dim=attention_head_dim,
+                    has_before_proj=(i == 0),
+                    dtype=dtype,
+                    device=device,
+                    operations=operations,
+                )
+            )
+
+    def _process_hint_tokens(self, hint):
+        if hint is None:
+            return None
+        if hint.ndim == 4:
+            hint = hint.unsqueeze(2)
+
+        # Fun checkpoints are trained with 33 latent channels before 2x2 packing:
+        # [control_latent(16), mask(1), inpaint_latent(16)] -> 132 features.
+        # Default behavior (no inpaint input in stock Apply ControlNet) should use
+        # zeros for mask/inpaint branches, matching VideoX fallback semantics.
+        expected_c = self.control_img_in.weight.shape[1] // 4
+        if hint.shape[1] == 16 and expected_c == 33:
+            zeros_mask = torch.zeros_like(hint[:, :1])
+            zeros_inpaint = torch.zeros_like(hint)
+            hint = torch.cat([hint, zeros_mask, zeros_inpaint], dim=1)
+
+        bs, c, t, h, w = hint.shape
+        hidden_states = torch.nn.functional.pad(hint, (0, w % 2, 0, h % 2))
+        orig_shape = hidden_states.shape
+        hidden_states = hidden_states.view(
+            orig_shape[0],
+            orig_shape[1],
+            orig_shape[-3],
+            orig_shape[-2] // 2,
+            2,
+            orig_shape[-1] // 2,
+            2,
+        )
+        hidden_states = hidden_states.permute(0, 2, 3, 5, 1, 4, 6)
+        hidden_states = hidden_states.reshape(
+            bs,
+            t * ((h + 1) // 2) * ((w + 1) // 2),
+            c * 4,
+        )
+
+        expected_in = self.control_img_in.weight.shape[1]
+        cur_in = hidden_states.shape[-1]
+        if cur_in < expected_in:
+            pad = torch.zeros(
+                (hidden_states.shape[0], hidden_states.shape[1], expected_in - cur_in),
+                device=hidden_states.device,
+                dtype=hidden_states.dtype,
+            )
+            hidden_states = torch.cat([hidden_states, pad], dim=-1)
+        elif cur_in > expected_in:
+            hidden_states = hidden_states[:, :, :expected_in]
+
+        return hidden_states
+
+    def forward(
+        self,
+        x,
+        timesteps,
+        context,
+        attention_mask=None,
+        guidance: torch.Tensor = None,
+        hint=None,
+        transformer_options={},
+        base_model=None,
+        **kwargs,
+    ):
+        if base_model is None:
+            raise RuntimeError("Qwen Fun ControlNet requires a QwenImage base model at runtime.")
+
+        encoder_hidden_states_mask = attention_mask
+        # Keep attention mask disabled inside Fun control blocks to mirror
+        # VideoX behavior (they rely on seq lengths for RoPE, not masked attention).
+        encoder_hidden_states_mask = None
+
+        hidden_states, img_ids, _ = base_model.process_img(x)
+        hint_tokens = self._process_hint_tokens(hint)
+        if hint_tokens is None:
+            raise RuntimeError("Qwen Fun ControlNet requires a control hint image.")
+
+        if hint_tokens.shape[1] != hidden_states.shape[1]:
+            max_tokens = min(hint_tokens.shape[1], hidden_states.shape[1])
+            hint_tokens = hint_tokens[:, :max_tokens]
+            hidden_states = hidden_states[:, :max_tokens]
+            img_ids = img_ids[:, :max_tokens]
+
+        txt_start = round(
+            max(
+                ((x.shape[-1] + (base_model.patch_size // 2)) // base_model.patch_size) // 2,
+                ((x.shape[-2] + (base_model.patch_size // 2)) // base_model.patch_size) // 2,
+            )
+        )
+        txt_ids = torch.arange(txt_start, txt_start + context.shape[1], device=x.device).reshape(1, -1, 1).repeat(x.shape[0], 1, 3)
+        ids = torch.cat((txt_ids, img_ids), dim=1)
+        image_rotary_emb = base_model.pe_embedder(ids).to(x.dtype).contiguous()
+
+        hidden_states = base_model.img_in(hidden_states)
+        encoder_hidden_states = base_model.txt_norm(context)
+        encoder_hidden_states = base_model.txt_in(encoder_hidden_states)
+
+        if guidance is not None:
+            guidance = guidance * 1000
+
+        temb = (
+            base_model.time_text_embed(timesteps, hidden_states)
+            if guidance is None
+            else base_model.time_text_embed(timesteps, guidance, hidden_states)
+        )
+
+        c = self.control_img_in(hint_tokens)
+
+        for i, block in enumerate(self.control_blocks):
+            if i == 0:
+                c_in = block.before_proj(c) + hidden_states
+                all_c = []
+            else:
+                all_c = list(torch.unbind(c, dim=0))
+                c_in = all_c.pop(-1)
+
+            encoder_hidden_states, c_out = block(
+                hidden_states=c_in,
+                encoder_hidden_states=encoder_hidden_states,
+                encoder_hidden_states_mask=encoder_hidden_states_mask,
+                temb=temb,
+                image_rotary_emb=image_rotary_emb,
+                transformer_options=transformer_options,
+            )
+
+            c_skip = block.after_proj(c_out) * self.hint_scale
+            all_c += [c_skip, c_out]
+            c = torch.stack(all_c, dim=0)
+
+        hints = torch.unbind(c, dim=0)[:-1]
+
+        controlnet_block_samples = [None] * self.main_model_double
+        for local_idx, base_idx in enumerate(self.injection_layers):
+            if local_idx < len(hints) and base_idx < len(controlnet_block_samples):
+                controlnet_block_samples[base_idx] = hints[local_idx]
+
+        return {"input": controlnet_block_samples}


 class QwenImageControlNetModel(QwenImageTransformer2DModel):
--- a/comfy/lora_convert.py
+++ b/comfy/lora_convert.py
@ -5,7 +5,7 @@ import comfy.utils
 def convert_lora_bfl_control(sd): #BFL loras for Flux
    sd_out = {}
    for k in sd:
-        k_to = "diffusion_model.{}".format(k.replace(".lora_B.bias", ".diff_b").replace("_norm.scale", "_norm.scale.set_weight"))
+        k_to = "diffusion_model.{}".format(k.replace(".lora_B.bias", ".diff_b").replace("_norm.scale", "_norm.set_weight"))
        sd_out[k_to] = sd[k]

    sd_out["diffusion_model.img_in.reshape_weight"] = torch.tensor([sd["img_in.lora_B.weight"].shape[0], sd["img_in.lora_A.weight"].shape[1]])
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@ -19,6 +19,12 @@ def count_blocks(state_dict_keys, prefix_string):
        count += 1
    return count

+def any_suffix_in(keys, prefix, main, suffix_list=[]):
+    for x in suffix_list:
+        if "{}{}{}".format(prefix, main, x) in keys:
+            return True
+    return False
+
 def calculate_transformer_depth(prefix, state_dict_keys, state_dict):
    context_dim = None
    use_linear_in_transformer = False
@ -186,7 +192,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
            dit_config["meanflow_sum"] = False
        return dit_config

-    if '{}double_blocks.0.img_attn.norm.key_norm.scale'.format(key_prefix) in state_dict_keys and ('{}img_in.weight'.format(key_prefix) in state_dict_keys or f"{key_prefix}distilled_guidance_layer.norms.0.scale" in state_dict_keys): #Flux, Chroma or Chroma Radiance (has no img_in.weight)
+    if any_suffix_in(state_dict_keys, key_prefix, 'double_blocks.0.img_attn.norm.key_norm.', ["weight", "scale"]) and ('{}img_in.weight'.format(key_prefix) in state_dict_keys or any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.norms.0.', ["weight", "scale"])): #Flux, Chroma or Chroma Radiance (has no img_in.weight)
        dit_config = {}
        if '{}double_stream_modulation_img.lin.weight'.format(key_prefix) in state_dict_keys:
            dit_config["image_model"] = "flux2"
@ -241,7 +247,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):

        dit_config["depth"] = count_blocks(state_dict_keys, '{}double_blocks.'.format(key_prefix) + '{}.')
        dit_config["depth_single_blocks"] = count_blocks(state_dict_keys, '{}single_blocks.'.format(key_prefix) + '{}.')
-        if '{}distilled_guidance_layer.0.norms.0.scale'.format(key_prefix) in state_dict_keys or '{}distilled_guidance_layer.norms.0.scale'.format(key_prefix) in state_dict_keys: #Chroma
+
+        if any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.0.norms.0.', ["weight", "scale"]) or any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.norms.0.', ["weight", "scale"]): #Chroma
            dit_config["image_model"] = "chroma"
            dit_config["in_channels"] = 64
            dit_config["out_channels"] = 64
@ -249,7 +256,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
            dit_config["out_dim"] = 3072
            dit_config["hidden_dim"] = 5120
            dit_config["n_layers"] = 5
-            if f"{key_prefix}nerf_blocks.0.norm.scale" in state_dict_keys: #Chroma Radiance
+
+            if any_suffix_in(state_dict_keys, key_prefix, 'nerf_blocks.0.norm.', ["weight", "scale"]): #Chroma Radiance
                dit_config["image_model"] = "chroma_radiance"
                dit_config["in_channels"] = 3
                dit_config["out_channels"] = 3
@ -259,7 +267,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
                dit_config["nerf_depth"] = 4
                dit_config["nerf_max_freqs"] = 8
                dit_config["nerf_tile_size"] = 512
-                dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear"
+                dit_config["nerf_final_head_type"] = "conv" if any_suffix_in(state_dict_keys, key_prefix, 'nerf_final_layer_conv.norm.', ["weight", "scale"]) else "linear"
                dit_config["nerf_embedder_dtype"] = torch.float32
                if "{}__x0__".format(key_prefix) in state_dict_keys: # x0 pred
                    dit_config["use_x0"] = True
@ -268,7 +276,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        else:
            dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys
            dit_config["yak_mlp"] = '{}double_blocks.0.img_mlp.gate_proj.weight'.format(key_prefix) in state_dict_keys
-            dit_config["txt_norm"] = "{}txt_norm.scale".format(key_prefix) in state_dict_keys
+            dit_config["txt_norm"] = any_suffix_in(state_dict_keys, key_prefix, 'txt_norm.', ["weight", "scale"])
            if dit_config["yak_mlp"] and dit_config["txt_norm"]:  # Ovis model
                dit_config["txt_ids_dims"] = [1, 2]

--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -1561,6 +1561,8 @@ class ModelPatcherDynamic(ModelPatcher):
                        allocated_size += weight_size
                    vbar.set_watermark_limit(allocated_size)

+                move_weight_functions(m, device_to)
+
            logging.info(f"Model {self.model.__class__.__name__} prepared for dynamic VRAM loading. {allocated_size // (1024 ** 2)}MB Staged. {num_patches} patches attached.")

            self.model.device = device_to
@ -1601,6 +1603,8 @@ class ModelPatcherDynamic(ModelPatcher):
        if unpatch_weights:
            self.partially_unload_ram(1e32)
            self.partially_unload(None, 1e32)
+            for m in self.model.modules():
+                move_weight_functions(m, device_to)

    def partially_load(self, device_to, extra_memory=0, force_patch_weights=False):
        assert not force_patch_weights #See above
--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@ -171,8 +171,9 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):

    def process_tokens(self, tokens, device):
        end_token = self.special_tokens.get("end", None)
+        pad_token = self.special_tokens.get("pad", -1)
        if end_token is None:
-            cmp_token = self.special_tokens.get("pad", -1)
+            cmp_token = pad_token
        else:
            cmp_token = end_token

@ -186,15 +187,21 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
            other_embeds = []
            eos = False
            index = 0
+            left_pad = False
            for y in x:
                if isinstance(y, numbers.Integral):
-                    if eos:
+                    token = int(y)
+                    if index == 0 and token == pad_token:
+                        left_pad = True
+
+                    if eos or (left_pad and token == pad_token):
                        attention_mask.append(0)
                    else:
                        attention_mask.append(1)
-                    token = int(y)
+                        left_pad = False
+
                    tokens_temp += [token]
-                    if not eos and token == cmp_token:
+                    if not eos and token == cmp_token and not left_pad:
                        if end_token is None:
                            attention_mask[-1] = 0
                        eos = True
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@ -710,6 +710,15 @@ class Flux(supported_models_base.BASE):

    supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]

+    def process_unet_state_dict(self, state_dict):
+        out_sd = {}
+        for k in list(state_dict.keys()):
+            key_out = k
+            if key_out.endswith("_norm.scale"):
+                key_out = "{}.weight".format(key_out[:-len(".scale")])
+            out_sd[key_out] = state_dict[k]
+        return out_sd
+
    vae_key_prefix = ["vae."]
    text_encoder_key_prefix = ["text_encoders."]

@ -898,11 +907,13 @@ class HunyuanVideo(supported_models_base.BASE):
            key_out = key_out.replace("txt_in.c_embedder.linear_1.", "txt_in.c_embedder.in_layer.").replace("txt_in.c_embedder.linear_2.", "txt_in.c_embedder.out_layer.")
            key_out = key_out.replace("_mod.linear.", "_mod.lin.").replace("_attn_qkv.", "_attn.qkv.")
            key_out = key_out.replace("mlp.fc1.", "mlp.0.").replace("mlp.fc2.", "mlp.2.")
-            key_out = key_out.replace("_attn_q_norm.weight", "_attn.norm.query_norm.scale").replace("_attn_k_norm.weight", "_attn.norm.key_norm.scale")
-            key_out = key_out.replace(".q_norm.weight", ".norm.query_norm.scale").replace(".k_norm.weight", ".norm.key_norm.scale")
+            key_out = key_out.replace("_attn_q_norm.weight", "_attn.norm.query_norm.weight").replace("_attn_k_norm.weight", "_attn.norm.key_norm.weight")
+            key_out = key_out.replace(".q_norm.weight", ".norm.query_norm.weight").replace(".k_norm.weight", ".norm.key_norm.weight")
            key_out = key_out.replace("_attn_proj.", "_attn.proj.")
            key_out = key_out.replace(".modulation.linear.", ".modulation.lin.")
            key_out = key_out.replace("_in.mlp.2.", "_in.out_layer.").replace("_in.mlp.0.", "_in.in_layer.")
+            if key_out.endswith(".scale"):
+                key_out = "{}.weight".format(key_out[:-len(".scale")])
            out_sd[key_out] = state_dict[k]
        return out_sd

@ -1264,6 +1275,15 @@ class Hunyuan3Dv2(supported_models_base.BASE):

    latent_format = latent_formats.Hunyuan3Dv2

+    def process_unet_state_dict(self, state_dict):
+        out_sd = {}
+        for k in list(state_dict.keys()):
+            key_out = k
+            if key_out.endswith(".scale"):
+                key_out = "{}.weight".format(key_out[:-len(".scale")])
+            out_sd[key_out] = state_dict[k]
+        return out_sd
+
    def process_unet_state_dict_for_saving(self, state_dict):
        replace_prefix = {"": "model."}
        return utils.state_dict_prefix_replace(state_dict, replace_prefix)
@ -1341,6 +1361,14 @@ class Chroma(supported_models_base.BASE):

    supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]

+    def process_unet_state_dict(self, state_dict):
+        out_sd = {}
+        for k in list(state_dict.keys()):
+            key_out = k
+            if key_out.endswith(".scale"):
+                key_out = "{}.weight".format(key_out[:-len(".scale")])
+            out_sd[key_out] = state_dict[k]
+        return out_sd

    def get_model(self, state_dict, prefix="", device=None):
        out = model_base.Chroma(self, device=device)
--- a/comfy/text_encoders/ace15.py
+++ b/comfy/text_encoders/ace15.py
@ -10,7 +10,6 @@ import comfy.utils
 def sample_manual_loop_no_classes(
    model,
    ids=None,
-    paddings=[],
    execution_dtype=None,
    cfg_scale: float = 2.0,
    temperature: float = 0.85,
@ -36,9 +35,6 @@ def sample_manual_loop_no_classes(

    embeds, attention_mask, num_tokens, embeds_info = model.process_tokens(ids, device)
    embeds_batch = embeds.shape[0]
-    for i, t in enumerate(paddings):
-        attention_mask[i, :t] = 0
-        attention_mask[i, t:] = 1

    output_audio_codes = []
    past_key_values = []
@ -135,13 +131,11 @@ def generate_audio_codes(model, positive, negative, min_tokens=1, max_tokens=102
            pos_pad = (len(negative) - len(positive))
            positive = [model.special_tokens["pad"]] * pos_pad + positive

-        paddings = [pos_pad, neg_pad]
        ids = [positive, negative]
    else:
-        paddings = []
        ids = [positive]

-    return sample_manual_loop_no_classes(model, ids, paddings, cfg_scale=cfg_scale, temperature=temperature, top_p=top_p, top_k=top_k, min_p=min_p, seed=seed, min_tokens=min_tokens, max_new_tokens=max_tokens)
+    return sample_manual_loop_no_classes(model, ids, cfg_scale=cfg_scale, temperature=temperature, top_p=top_p, top_k=top_k, min_p=min_p, seed=seed, min_tokens=min_tokens, max_new_tokens=max_tokens)


 class ACE15Tokenizer(sd1_clip.SD1Tokenizer):
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@ -25,7 +25,7 @@ def ltxv_te(*args, **kwargs):
 class Gemma3_12BTokenizer(sd1_clip.SDTokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
        tokenizer = tokenizer_data.get("spiece_model", None)
-        super().__init__(tokenizer, pad_with_end=False, embedding_size=3840, embedding_key='gemma3_12b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, disable_weights=True, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data)
+        super().__init__(tokenizer, pad_with_end=False, embedding_size=3840, embedding_key='gemma3_12b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=512, pad_left=True, disable_weights=True, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data)

    def state_dict(self):
        return {"spiece_model": self.tokenizer.serialize_model()}
@ -97,6 +97,7 @@ class LTXAVTEModel(torch.nn.Module):
        token_weight_pairs = token_weight_pairs["gemma3_12b"]

        out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs)
+        out = out[:, :, -torch.sum(extra["attention_mask"]).item():]
        out_device = out.device
        if comfy.model_management.should_use_bf16(self.execution_device):
            out = out.to(device=self.execution_device, dtype=torch.bfloat16)
@ -138,6 +139,7 @@ class LTXAVTEModel(torch.nn.Module):

        token_weight_pairs = token_weight_pairs.get("gemma3_12b", [])
        num_tokens = sum(map(lambda a: len(a), token_weight_pairs))
+        num_tokens = max(num_tokens, 64)
        return num_tokens * constant * 1024 * 1024

 def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
--- a/comfy/utils.py
+++ b/comfy/utils.py
@ -675,10 +675,10 @@ def flux_to_diffusers(mmdit_config, output_prefix=""):
                        "ff_context.linear_in.bias": "txt_mlp.0.bias",
                        "ff_context.linear_out.weight": "txt_mlp.2.weight",
                        "ff_context.linear_out.bias": "txt_mlp.2.bias",
-                        "attn.norm_q.weight": "img_attn.norm.query_norm.scale",
-                        "attn.norm_k.weight": "img_attn.norm.key_norm.scale",
-                        "attn.norm_added_q.weight": "txt_attn.norm.query_norm.scale",
-                        "attn.norm_added_k.weight": "txt_attn.norm.key_norm.scale",
+                        "attn.norm_q.weight": "img_attn.norm.query_norm.weight",
+                        "attn.norm_k.weight": "img_attn.norm.key_norm.weight",
+                        "attn.norm_added_q.weight": "txt_attn.norm.query_norm.weight",
+                        "attn.norm_added_k.weight": "txt_attn.norm.key_norm.weight",
                    }

        for k in block_map:
@ -701,8 +701,8 @@ def flux_to_diffusers(mmdit_config, output_prefix=""):
                        "norm.linear.bias": "modulation.lin.bias",
                        "proj_out.weight": "linear2.weight",
                        "proj_out.bias": "linear2.bias",
-                        "attn.norm_q.weight": "norm.query_norm.scale",
-                        "attn.norm_k.weight": "norm.key_norm.scale",
+                        "attn.norm_q.weight": "norm.query_norm.weight",
+                        "attn.norm_k.weight": "norm.key_norm.weight",
                        "attn.to_qkv_mlp_proj.weight": "linear1.weight", # Flux 2
                        "attn.to_out.weight": "linear2.weight", # Flux 2
                    }
--- a/comfy_api/feature_flags.py
+++ b/comfy_api/feature_flags.py
@ -14,7 +14,6 @@ SERVER_FEATURE_FLAGS: dict[str, Any] = {
    "supports_preview_metadata": True,
    "max_upload_size": args.max_upload_size * 1024 * 1024, # Convert MB to bytes
    "extension": {"manager": {"supports_v4": True}},
-    "node_replacements": True,
 }


--- a/comfy_api/latest/init.py
+++ b/comfy_api/latest/init.py
@ -10,7 +10,6 @@ from ._input_impl import VideoFromFile, VideoFromComponents
 from ._util import VideoCodec, VideoContainer, VideoComponents, MESH, VOXEL, File3D
 from . import _io_public as io
 from . import _ui_public as ui
-from . import _node_replace_public as node_replace
 from comfy_execution.utils import get_executing_context
 from comfy_execution.progress import get_progress_state, PreviewImageTuple
 from PIL import Image
@ -22,14 +21,6 @@ class ComfyAPI_latest(ComfyAPIBase):
    VERSION = "latest"
    STABLE = False

-    class NodeReplacement(ProxiedSingleton):
-        async def register(self, node_replace: 'node_replace.NodeReplace') -> None:
-            """Register a node replacement mapping."""
-            from server import PromptServer
-            PromptServer.instance.node_replace_manager.register(node_replace)
-
-    node_replacement: NodeReplacement
-
    class Execution(ProxiedSingleton):
        async def set_progress(
            self,
@ -140,5 +131,4 @@ __all__ = [
    "IO",
    "ui",
    "UI",
-    "node_replace",
 ]
--- a/comfy_api/latest/_node_replace.py
+++ b/comfy_api/latest/_node_replace.py
@ -1,69 +0,0 @@
-from __future__ import annotations
-
-from typing import Any, TypedDict
-
-
-class InputMapOldId(TypedDict):
-    """Map an old node input to a new node input by ID."""
-    new_id: str
-    old_id: str
-
-
-class InputMapSetValue(TypedDict):
-    """Set a specific value for a new node input."""
-    new_id: str
-    set_value: Any
-
-
-InputMap = InputMapOldId | InputMapSetValue
-"""
-Input mapping for node replacement. Type is inferred by dictionary keys:
- {"new_id": str, "old_id": str} - maps old input to new input
- {"new_id": str, "set_value": Any} - sets a specific value for new input
-"""
-
-
-class OutputMap(TypedDict):
-    """Map outputs of node replacement via indexes."""
-    new_idx: int
-    old_idx: int
-
-
-class NodeReplace:
-    """
-    Defines a possible node replacement, mapping inputs and outputs of the old node to the new node.
-
-    Also supports assigning specific values to the input widgets of the new node.
-
-    Args:
-        new_node_id: The class name of the new replacement node.
-        old_node_id: The class name of the deprecated node.
-        old_widget_ids: Ordered list of input IDs for widgets that may not have an input slot
-            connected. The workflow JSON stores widget values by their relative position index,
-            not by ID. This list maps those positional indexes to input IDs, enabling the
-            replacement system to correctly identify widget values during node migration.
-        input_mapping: List of input mappings from old node to new node.
-        output_mapping: List of output mappings from old node to new node.
-    """
-    def __init__(self,
-        new_node_id: str,
-        old_node_id: str,
-        old_widget_ids: list[str] | None=None,
-        input_mapping: list[InputMap] | None=None,
-        output_mapping: list[OutputMap] | None=None,
-    ):
-        self.new_node_id = new_node_id
-        self.old_node_id = old_node_id
-        self.old_widget_ids = old_widget_ids
-        self.input_mapping = input_mapping
-        self.output_mapping = output_mapping
-
-    def as_dict(self):
-        """Create serializable representation of the node replacement."""
-        return {
-            "new_node_id": self.new_node_id,
-            "old_node_id": self.old_node_id,
-            "old_widget_ids": self.old_widget_ids,
-            "input_mapping": list(self.input_mapping) if self.input_mapping else None,
-            "output_mapping": list(self.output_mapping) if self.output_mapping else None,
-        }
--- a/comfy_api/latest/_node_replace_public.py
+++ b/comfy_api/latest/_node_replace_public.py
@ -1 +0,0 @@
-from ._node_replace import *  # noqa: F403
--- a/comfy_api/v0_0_2/init.py
+++ b/comfy_api/v0_0_2/init.py
@ -6,7 +6,7 @@ from comfy_api.latest import (
 )
 from typing import Type, TYPE_CHECKING
 from comfy_api.internal.async_to_sync import create_sync_class
-from comfy_api.latest import io, ui, IO, UI, ComfyExtension, node_replace  #noqa: F401
+from comfy_api.latest import io, ui, IO, UI, ComfyExtension  #noqa: F401


 class ComfyAPIAdapter_v0_0_2(ComfyAPI_latest):
@ -46,5 +46,4 @@ __all__ = [
    "IO",
    "ui",
    "UI",
-    "node_replace",
 ]
--- a/comfy_api_nodes/apis/bria.py
+++ b/comfy_api_nodes/apis/bria.py
@ -45,17 +45,55 @@ class BriaEditImageRequest(BaseModel):
    )


+class BriaRemoveBackgroundRequest(BaseModel):
+    image: str = Field(...)
+    sync: bool = Field(False)
+    visual_input_content_moderation: bool = Field(
+        False, description="If true, returns 422 on input image moderation failure."
+    )
+    visual_output_content_moderation: bool = Field(
+        False, description="If true, returns 422 on visual output moderation failure."
+    )
+    seed: int = Field(...)
+
+
 class BriaStatusResponse(BaseModel):
    request_id: str = Field(...)
    status_url: str = Field(...)
    warning: str | None = Field(None)


-class BriaResult(BaseModel):
+class BriaRemoveBackgroundResult(BaseModel):
+    image_url: str = Field(...)
+
+
+class BriaRemoveBackgroundResponse(BaseModel):
+    status: str = Field(...)
+    result: BriaRemoveBackgroundResult | None = Field(None)
+
+
+class BriaImageEditResult(BaseModel):
    structured_prompt: str = Field(...)
    image_url: str = Field(...)


-class BriaResponse(BaseModel):
+class BriaImageEditResponse(BaseModel):
    status: str = Field(...)
-    result: BriaResult | None = Field(None)
+    result: BriaImageEditResult | None = Field(None)
+
+
+class BriaRemoveVideoBackgroundRequest(BaseModel):
+    video: str = Field(...)
+    background_color: str = Field(default="transparent", description="Background color for the output video.")
+    output_container_and_codec: str = Field(...)
+    preserve_audio: bool = Field(True)
+    seed: int = Field(...)
+
+
+class BriaRemoveVideoBackgroundResult(BaseModel):
+    video_url: str = Field(...)
+
+
+class BriaRemoveVideoBackgroundResponse(BaseModel):
+    status: str = Field(...)
+    result: BriaRemoveVideoBackgroundResult | None = Field(None)
--- a/comfy_api_nodes/nodes_bria.py
+++ b/comfy_api_nodes/nodes_bria.py
@ -3,7 +3,11 @@ from typing_extensions import override
 from comfy_api.latest import IO, ComfyExtension, Input
 from comfy_api_nodes.apis.bria import (
    BriaEditImageRequest,
-    BriaResponse,
+    BriaRemoveBackgroundRequest,
+    BriaRemoveBackgroundResponse,
+    BriaRemoveVideoBackgroundRequest,
+    BriaRemoveVideoBackgroundResponse,
+    BriaImageEditResponse,
    BriaStatusResponse,
    InputModerationSettings,
 )
@ -11,10 +15,12 @@ from comfy_api_nodes.util import (
    ApiEndpoint,
    convert_mask_to_image,
    download_url_to_image_tensor,
-    get_number_of_images,
+    download_url_to_video_output,
    poll_op,
    sync_op,
-    upload_images_to_comfyapi,
+    upload_image_to_comfyapi,
+    upload_video_to_comfyapi,
+    validate_video_duration,
 )


@ -73,21 +79,15 @@ class BriaImageEditNode(IO.ComfyNode):
                IO.DynamicCombo.Input(
                    "moderation",
                    options=[
+                        IO.DynamicCombo.Option("false", []),
                        IO.DynamicCombo.Option(
                            "true",
                            [
-                                IO.Boolean.Input(
-                                    "prompt_content_moderation", default=False
-                                ),
-                                IO.Boolean.Input(
-                                    "visual_input_moderation", default=False
-                                ),
-                                IO.Boolean.Input(
-                                    "visual_output_moderation", default=True
-                                ),
+                                IO.Boolean.Input("prompt_content_moderation", default=False),
+                                IO.Boolean.Input("visual_input_moderation", default=False),
+                                IO.Boolean.Input("visual_output_moderation", default=True),
                            ],
                        ),
-                        IO.DynamicCombo.Option("false", []),
                    ],
                    tooltip="Moderation settings",
                ),
@ -127,50 +127,26 @@ class BriaImageEditNode(IO.ComfyNode):
        mask: Input.Image | None = None,
    ) -> IO.NodeOutput:
        if not prompt and not structured_prompt:
-            raise ValueError(
-                "One of prompt or structured_prompt is required to be non-empty."
-            )
-        if get_number_of_images(image) != 1:
-            raise ValueError("Exactly one input image is required.")
+            raise ValueError("One of prompt or structured_prompt is required to be non-empty.")
        mask_url = None
        if mask is not None:
-            mask_url = (
-                await upload_images_to_comfyapi(
-                    cls,
-                    convert_mask_to_image(mask),
-                    max_images=1,
-                    mime_type="image/png",
-                    wait_label="Uploading mask",
-                )
-            )[0]
+            mask_url = await upload_image_to_comfyapi(cls, convert_mask_to_image(mask), wait_label="Uploading mask")
        response = await sync_op(
            cls,
            ApiEndpoint(path="proxy/bria/v2/image/edit", method="POST"),
            data=BriaEditImageRequest(
                instruction=prompt if prompt else None,
                structured_instruction=structured_prompt if structured_prompt else None,
-                images=await upload_images_to_comfyapi(
-                    cls,
-                    image,
-                    max_images=1,
-                    mime_type="image/png",
-                    wait_label="Uploading image",
-                ),
+                images=[await upload_image_to_comfyapi(cls, image, wait_label="Uploading image")],
                mask=mask_url,
                negative_prompt=negative_prompt if negative_prompt else None,
                guidance_scale=guidance_scale,
                seed=seed,
                model_version=model,
                steps_num=steps,
-                prompt_content_moderation=moderation.get(
-                    "prompt_content_moderation", False
-                ),
-                visual_input_content_moderation=moderation.get(
-                    "visual_input_moderation", False
-                ),
-                visual_output_content_moderation=moderation.get(
-                    "visual_output_moderation", False
-                ),
+                prompt_content_moderation=moderation.get("prompt_content_moderation", False),
+                visual_input_content_moderation=moderation.get("visual_input_moderation", False),
+                visual_output_content_moderation=moderation.get("visual_output_moderation", False),
            ),
            response_model=BriaStatusResponse,
        )
@ -178,7 +154,7 @@ class BriaImageEditNode(IO.ComfyNode):
            cls,
            ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"),
            status_extractor=lambda r: r.status,
-            response_model=BriaResponse,
+            response_model=BriaImageEditResponse,
        )
        return IO.NodeOutput(
            await download_url_to_image_tensor(response.result.image_url),
@ -186,11 +162,167 @@ class BriaImageEditNode(IO.ComfyNode):
        )


+class BriaRemoveImageBackground(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="BriaRemoveImageBackground",
+            display_name="Bria Remove Image Background",
+            category="api node/image/Bria",
+            description="Remove the background from an image using Bria RMBG 2.0.",
+            inputs=[
+                IO.Image.Input("image"),
+                IO.DynamicCombo.Input(
+                    "moderation",
+                    options=[
+                        IO.DynamicCombo.Option("false", []),
+                        IO.DynamicCombo.Option(
+                            "true",
+                            [
+                                IO.Boolean.Input("visual_input_moderation", default=False),
+                                IO.Boolean.Input("visual_output_moderation", default=True),
+                            ],
+                        ),
+                    ],
+                    tooltip="Moderation settings",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                ),
+            ],
+            outputs=[IO.Image.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"usd","usd":0.018}""",
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        image: Input.Image,
+        moderation: dict,
+        seed: int,
+    ) -> IO.NodeOutput:
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/bria/v2/image/edit/remove_background", method="POST"),
+            data=BriaRemoveBackgroundRequest(
+                image=await upload_image_to_comfyapi(cls, image, wait_label="Uploading image"),
+                sync=False,
+                visual_input_content_moderation=moderation.get("visual_input_moderation", False),
+                visual_output_content_moderation=moderation.get("visual_output_moderation", False),
+                seed=seed,
+            ),
+            response_model=BriaStatusResponse,
+        )
+        response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"),
+            status_extractor=lambda r: r.status,
+            response_model=BriaRemoveBackgroundResponse,
+        )
+        return IO.NodeOutput(await download_url_to_image_tensor(response.result.image_url))
+
+
+class BriaRemoveVideoBackground(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="BriaRemoveVideoBackground",
+            display_name="Bria Remove Video Background",
+            category="api node/video/Bria",
+            description="Remove the background from a video using Bria. ",
+            inputs=[
+                IO.Video.Input("video"),
+                IO.Combo.Input(
+                    "background_color",
+                    options=[
+                        "Black",
+                        "White",
+                        "Gray",
+                        "Red",
+                        "Green",
+                        "Blue",
+                        "Yellow",
+                        "Cyan",
+                        "Magenta",
+                        "Orange",
+                    ],
+                    tooltip="Background color for the output video.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                ),
+            ],
+            outputs=[IO.Video.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"usd","usd":0.14,"format":{"suffix":"/second"}}""",
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        video: Input.Video,
+        background_color: str,
+        seed: int,
+    ) -> IO.NodeOutput:
+        validate_video_duration(video, max_duration=60.0)
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/bria/v2/video/edit/remove_background", method="POST"),
+            data=BriaRemoveVideoBackgroundRequest(
+                video=await upload_video_to_comfyapi(cls, video),
+                background_color=background_color,
+                output_container_and_codec="mp4_h264",
+                seed=seed,
+            ),
+            response_model=BriaStatusResponse,
+        )
+        response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"),
+            status_extractor=lambda r: r.status,
+            response_model=BriaRemoveVideoBackgroundResponse,
+        )
+        return IO.NodeOutput(await download_url_to_video_output(response.result.video_url))
+
+
 class BriaExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
        return [
            BriaImageEditNode,
+            BriaRemoveImageBackground,
+            BriaRemoveVideoBackground,
        ]


--- a/comfy_api_nodes/util/conversions.py
+++ b/comfy_api_nodes/util/conversions.py
@ -57,7 +57,7 @@ def tensor_to_bytesio(
    image: torch.Tensor,
    *,
    total_pixels: int | None = 2048 * 2048,
-    mime_type: str = "image/png",
+    mime_type: str | None = "image/png",
 ) -> BytesIO:
    """Converts a torch.Tensor image to a named BytesIO object.

--- a/comfy_extras/nodes_post_processing.py
+++ b/comfy_extras/nodes_post_processing.py
@ -655,7 +655,6 @@ class BatchImagesMasksLatentsNode(io.ComfyNode):
            batched = batch_masks(values)
        return io.NodeOutput(batched)

-
 class PostProcessingExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[io.ComfyNode]]:
--- a/comfy_extras/nodes_replacements.py
+++ b/comfy_extras/nodes_replacements.py
@ -1,103 +0,0 @@
-from comfy_api.latest import ComfyExtension, io, node_replace
-from server import PromptServer
-
-def _register(nr: node_replace.NodeReplace):
-    """Helper to register replacements via PromptServer."""
-    PromptServer.instance.node_replace_manager.register(nr)
-
-async def register_replacements():
-    """Register all built-in node replacements."""
-    register_replacements_longeredge()
-    register_replacements_batchimages()
-    register_replacements_upscaleimage()
-    register_replacements_controlnet()
-    register_replacements_load3d()
-    register_replacements_preview3d()
-    register_replacements_svdimg2vid()
-    register_replacements_conditioningavg()
-
-def register_replacements_longeredge():
-    # No dynamic inputs here
-    _register(node_replace.NodeReplace(
-            new_node_id="ImageScaleToMaxDimension",
-            old_node_id="ResizeImagesByLongerEdge",
-            old_widget_ids=["longer_edge"],
-            input_mapping=[
-                {"new_id": "image", "old_id": "images"},
-                {"new_id": "largest_size", "old_id": "longer_edge"},
-                {"new_id": "upscale_method", "set_value": "lanczos"},
-            ],
-            # just to test the frontend output_mapping code, does nothing really here
-            output_mapping=[{"new_idx": 0, "old_idx": 0}],
-        ))
-
-def register_replacements_batchimages():
-    # BatchImages node uses Autogrow
-    _register(node_replace.NodeReplace(
-            new_node_id="BatchImagesNode",
-            old_node_id="ImageBatch",
-            input_mapping=[
-                {"new_id": "images.image0", "old_id": "image1"},
-                {"new_id": "images.image1", "old_id": "image2"},
-            ],
-        ))
-
-def register_replacements_upscaleimage():
-    # ResizeImageMaskNode uses DynamicCombo
-    _register(node_replace.NodeReplace(
-            new_node_id="ResizeImageMaskNode",
-            old_node_id="ImageScaleBy",
-            old_widget_ids=["upscale_method", "scale_by"],
-            input_mapping=[
-                {"new_id": "input", "old_id": "image"},
-                {"new_id": "resize_type", "set_value": "scale by multiplier"},
-                {"new_id": "resize_type.multiplier", "old_id": "scale_by"},
-                {"new_id": "scale_method", "old_id": "upscale_method"},
-            ],
-        ))
-
-def register_replacements_controlnet():
-    # T2IAdapterLoader → ControlNetLoader
-    _register(node_replace.NodeReplace(
-            new_node_id="ControlNetLoader",
-            old_node_id="T2IAdapterLoader",
-            input_mapping=[
-                {"new_id": "control_net_name", "old_id": "t2i_adapter_name"},
-            ],
-        ))
-
-def register_replacements_load3d():
-    # Load3DAnimation merged into Load3D
-    _register(node_replace.NodeReplace(
-            new_node_id="Load3D",
-            old_node_id="Load3DAnimation",
-        ))
-
-def register_replacements_preview3d():
-    # Preview3DAnimation merged into Preview3D
-    _register(node_replace.NodeReplace(
-            new_node_id="Preview3D",
-            old_node_id="Preview3DAnimation",
-        ))
-
-def register_replacements_svdimg2vid():
-    # Typo fix: SDV → SVD
-    _register(node_replace.NodeReplace(
-            new_node_id="SVD_img2vid_Conditioning",
-            old_node_id="SDV_img2vid_Conditioning",
-        ))
-
-def register_replacements_conditioningavg():
-    # Typo fix: trailing space in node name
-    _register(node_replace.NodeReplace(
-            new_node_id="ConditioningAverage",
-            old_node_id="ConditioningAverage ",
-        ))
-
-class NodeReplacementsExtension(ComfyExtension):
-    async def get_node_list(self) -> list[type[io.ComfyNode]]:
-        return []
-
-async def comfy_entrypoint() -> NodeReplacementsExtension:
-    await register_replacements()
-    return NodeReplacementsExtension()
--- a/comfy_extras/nodes_train.py
+++ b/comfy_extras/nodes_train.py
@ -1035,7 +1035,7 @@ class TrainLoraNode(io.ComfyNode):
                io.Boolean.Input(
                    "offloading",
                    default=False,
-                    tooltip="Depth level for gradient checkpointing.",
+                    tooltip="Offload the Model to RAM. Requires Bypass Mode.",
                ),
                io.Combo.Input(
                    "existing_lora",
@ -1124,6 +1124,15 @@ class TrainLoraNode(io.ComfyNode):
        lora_dtype = node_helpers.string_to_torch_dtype(lora_dtype)
        mp.set_model_compute_dtype(dtype)

+        if mp.is_dynamic():
+            if not bypass_mode:
+                logging.info("Training MP is Dynamic - forcing bypass mode. Start comfy with --highvram to force weight diff mode")
+                bypass_mode = True
+            offloading = True
+        elif offloading:
+            if not bypass_mode:
+                logging.info("Training Offload selected - forcing bypass mode. Set bypass = True to remove this message")
+
        # Prepare latents and compute counts
        latents, num_images, multi_res = _prepare_latents_and_count(
            latents, dtype, bucket_mode
--- a/nodes.py
+++ b/nodes.py
@ -2435,7 +2435,6 @@ async def init_builtin_extra_nodes():
        "nodes_lora_debug.py",
        "nodes_color.py",
        "nodes_toolkit.py",
-        "nodes_replacements.py",
    ]

    import_failed = []
--- a/server.py
+++ b/server.py
@ -40,7 +40,6 @@ from app.user_manager import UserManager
 from app.model_manager import ModelFileManager
 from app.custom_node_manager import CustomNodeManager
 from app.subgraph_manager import SubgraphManager
-from app.node_replace_manager import NodeReplaceManager
 from typing import Optional, Union
 from api_server.routes.internal.internal_routes import InternalRoutes
 from protocol import BinaryEventTypes
@ -205,7 +204,6 @@ class PromptServer():
        self.model_file_manager = ModelFileManager()
        self.custom_node_manager = CustomNodeManager()
        self.subgraph_manager = SubgraphManager()
-        self.node_replace_manager = NodeReplaceManager()
        self.internal_routes = InternalRoutes(self)
        self.supports = ["custom_nodes_from_web"]
        self.prompt_queue = execution.PromptQueue(self)
@ -889,8 +887,6 @@ class PromptServer():
                if "partial_execution_targets" in json_data:
                    partial_execution_targets = json_data["partial_execution_targets"]

-                self.node_replace_manager.apply_replacements(prompt)
-
                valid = await execution.validate_prompt(prompt_id, prompt, partial_execution_targets)
                extra_data = {}
                if "extra_data" in json_data:
@ -999,7 +995,6 @@ class PromptServer():
        self.model_file_manager.add_routes(self.routes)
        self.custom_node_manager.add_routes(self.routes, self.app, nodes.LOADED_MODULE_DIRS.items())
        self.subgraph_manager.add_routes(self.routes, nodes.LOADED_MODULE_DIRS.items())
-        self.node_replace_manager.add_routes(self.routes)
        self.app.add_subapp('/internal', self.internal_routes.get_app())

        # Prefix every route with /api for easier matching for delegation.
Author	SHA1	Message	Date
bigcat88	fe8020670b	feat(api-nodes): add Bria RMBG nodes	2026-02-14 14:13:27 +02:00
krigeta	dc9822b7df	Add working Qwen 2512 ControlNet (Fun ControlNet) support (#12359 )	2026-02-13 22:23:52 -05:00
comfyanonymous	712efb466b	Add left padding to LTXAV text encoder. (#12456 )	2026-02-13 21:56:54 -05:00
comfyanonymous	726af73867	Fix some custom nodes. (#12455 )	2026-02-13 20:21:10 -05:00
comfyanonymous	831351a29e	Support generating attention masks for left padded text encoders. (#12454 )	2026-02-13 20:15:23 -05:00
comfyanonymous	e1add563f9	Use torch RMSNorm for flux models and refactor hunyuan video code. (#12432 )	2026-02-13 15:35:13 -05:00
rattus	8902907d7a	dynamic_vram: Training fixes (#12442 )	2026-02-13 15:29:37 -05:00