Use comfy kitchen apply rope in omnigen2 model.

2026-06-18 20:48:00 +08:00 · 2026-06-12 13:57:57 -07:00
19 changed files with 61 additions and 520 deletions
--- a/README.md
+++ b/README.md
@ -382,7 +382,11 @@ For AMD 7600 and maybe other RDNA3 cards: ```HSA_OVERRIDE_GFX_VERSION=11.0.0 pyt

 ### AMD ROCm Tips

-You can try setting this env variable `PYTORCH_TUNABLEOP_ENABLED=1` which might speed things up at the cost of a very slow initial run.
+You can enable experimental memory efficient attention on recent pytorch in ComfyUI on some AMD GPUs using this command, it should already be enabled by default on RDNA3. If this improves speed for you on latest pytorch on your GPU please report it so that I can enable it by default.
+
+```TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1 python main.py --use-pytorch-cross-attention```
+
+You can also try setting this env variable `PYTORCH_TUNABLEOP_ENABLED=1` which might speed things up at the cost of a very slow initial run.

 # Notes

--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@ -145,7 +145,6 @@ vram_group.add_argument("--novram", action="store_true", help="When lowvram isn'
 vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).")

 parser.add_argument("--reserve-vram", type=float, default=None, help="Set the amount of vram in GB you want to reserve for use by your OS/other software. By default some amount is reserved depending on your OS.")
-parser.add_argument("--vram-headroom", type=float, default=0, help="Set the amount of vram in GB for DynamicVRAM to maintain as extra headroom above default. ComfyUI will try and keep this much VRAM completely free and unused, even counting VRAM from other apps.")

 parser.add_argument("--async-offload", nargs='?', const=2, type=int, default=None, metavar="NUM_STREAMS", help="Use async weight offloading. An optional argument controls the amount of offload streams. Default is 2. Enabled by default on Nvidia.")
 parser.add_argument("--disable-async-offload", action="store_true", help="Disable async weight offloading.")
--- a/comfy_api/latest/_input/video_types.py
+++ b/comfy_api/latest/_input/video_types.py
@ -27,13 +27,10 @@ class VideoInput(ABC):
        path: Union[str, IO[bytes]],
        format: VideoContainer = VideoContainer.AUTO,
        codec: VideoCodec = VideoCodec.AUTO,
-        metadata: Optional[dict] = None,
-        bit_depth: int | None = None,
+        metadata: Optional[dict] = None
    ):
        """
        Abstract method to save the video input to a file.
-
-        bit_depth selects the encoded bit depth; None keeps the video's native depth.
        """
        pass

@ -86,14 +83,6 @@ class VideoInput(ABC):
        components = self.get_components()
        return components.images.shape[2], components.images.shape[1]

-    def get_bit_depth(self) -> int:
-        """
-        Returns the bit depth of the video (e.g. 8 or 10).
-
-        Default implementation returns 8; subclasses report their real depth.
-        """
-        return 8
-
    def get_duration(self) -> float:
        """
        Returns the duration of the video in seconds.
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@ -52,12 +52,6 @@ def get_open_write_kwargs(
    return open_kwargs


-def video_stream_bit_depth(stream) -> int:
-    if stream is None or stream.format is None or not stream.format.components:
-        return 8
-    return max(component.bits for component in stream.format.components)
-
-
 class VideoFromFile(VideoInput):
    """
    Class representing video input from a file.
@ -103,13 +97,6 @@ class VideoFromFile(VideoInput):
                    return stream.width, stream.height
        raise ValueError(f"No video stream found in file '{self.__file}'")

-    def get_bit_depth(self) -> int:
-        if isinstance(self.__file, io.BytesIO):
-            self.__file.seek(0)  # Reset the BytesIO object to the beginning
-        with av.open(self.__file, mode="r") as container:
-            video_stream = container.streams.video[0] if len(container.streams.video) > 0 else None
-            return video_stream_bit_depth(video_stream)
-
    def get_duration(self) -> float:
        """
        Returns the duration of the video in seconds.
@ -270,7 +257,6 @@ class VideoFromFile(VideoInput):

        image_format = 'gbrpf32le'
        process_image_format = lambda a: a
-        align_graph = None
        audio = None

        streams = [video_stream]
@ -324,28 +310,7 @@ class VideoFromFile(VideoInput):

                            checked_alpha = True

-                        # Fix non-deterministic video decode when the video width is not a multiple of 32
-                        # For non-yuvj pixel formats: most H.264/H.265 video and static images (e.g. lossy WebP via LoadImage)
-                        # Pad both axes to a multiple of 32 and smear the border so the alignment padding never bleeds into the cropped edges
-                        if image_format in ('gbrpf32le', 'gbrapf32le') and frame.width % 32 != 0:
-                            if align_graph is None:
-                                pad_w = ((frame.width + 31) // 32) * 32
-                                pad_h = ((frame.height + 31) // 32) * 32
-                                g = av.filter.Graph()
-                                g_src = g.add_buffer(width=frame.width, height=frame.height,
-                                                     format=frame.format.name, time_base=video_stream.time_base)
-                                g_pad = g.add('pad', f'{pad_w}:{pad_h}:0:0')
-                                g_fill = g.add('fillborders', f'left=0:right={pad_w - frame.width}:top=0:bottom={pad_h - frame.height}:mode=smear')
-                                g_sink = g.add('buffersink')
-                                g_src.link_to(g_pad)
-                                g_pad.link_to(g_fill)
-                                g_fill.link_to(g_sink)
-                                g.configure()
-                                align_graph = (g, g_src, g_sink)
-                            align_graph[1].push(frame)
-                            img = np.ascontiguousarray(align_graph[2].pull().to_ndarray(format=image_format)[:frame.height, :frame.width])
-                        else:
-                            img = frame.to_ndarray(format=image_format)
+                        img = frame.to_ndarray(format=image_format)  # shape: (H, W, 4)
                        if frame.rotation != 0:
                            k = int(round(frame.rotation // 90))
                            img = np.rot90(img, k=k, axes=(0, 1)).copy()
@ -412,32 +377,25 @@ class VideoFromFile(VideoInput):
        format: VideoContainer = VideoContainer.AUTO,
        codec: VideoCodec = VideoCodec.AUTO,
        metadata: Optional[dict] = None,
-        bit_depth: int | None = None,
    ):
        if isinstance(self.__file, io.BytesIO):
            self.__file.seek(0)  # Reset the BytesIO object to the beginning
        with av.open(self.__file, mode='r') as container:
            container_format = container.format.name
-            video_stream = container.streams.video[0] if len(container.streams.video) > 0 else None
-            video_encoding = video_stream.codec.name if video_stream is not None else None
-            source_bit_depth = video_stream_bit_depth(video_stream)
+            video_encoding = container.streams.video[0].codec.name if len(container.streams.video) > 0 else None
            reuse_streams = True
            if format != VideoContainer.AUTO and format not in container_format.split(","):
                reuse_streams = False
            if codec != VideoCodec.AUTO and codec != video_encoding and video_encoding is not None:
                reuse_streams = False
-            if bit_depth is not None and video_encoding is not None and bit_depth != source_bit_depth:
-                reuse_streams = False
            if self.__start_time or self.__duration:
                reuse_streams = False

            if not reuse_streams:
-                if bit_depth is None:
-                    bit_depth = source_bit_depth
                components = self.get_components_internal(container)
                video = VideoFromComponents(components)
                return video.save_to(
-                    path, format=format, codec=codec, metadata=metadata, bit_depth=bit_depth,
+                    path, format=format, codec=codec, metadata=metadata
                )

            streams = container.streams
@ -493,10 +451,8 @@ class VideoFromComponents(VideoInput):
    Class representing video input from tensors.
    """

-    def __init__(self, components: VideoComponents, bit_depth: int = 8):
+    def __init__(self, components: VideoComponents):
        self.__components = components
-        # Tensor components have no inherent bit depth; this is the depth used when encoding.
-        self.__bit_depth = bit_depth

    def get_components(self) -> VideoComponents:
        return VideoComponents(
@ -505,26 +461,18 @@ class VideoFromComponents(VideoInput):
            frame_rate=self.__components.frame_rate,
        )

-    def get_bit_depth(self) -> int:
-        return self.__bit_depth
-
    def save_to(
        self,
        path: str,
        format: VideoContainer = VideoContainer.AUTO,
        codec: VideoCodec = VideoCodec.AUTO,
        metadata: Optional[dict] = None,
-        bit_depth: int | None = None,
    ):
        """Save the video to a file path or BytesIO buffer."""
        if format != VideoContainer.AUTO and format != VideoContainer.MP4:
            raise ValueError("Only MP4 format is supported for now")
        if codec != VideoCodec.AUTO and codec != VideoCodec.H264:
            raise ValueError("Only H264 codec is supported for now")
-        # None means "use the depth this video was created with" (CreateVideo's choice).
-        if bit_depth is None:
-            bit_depth = self.__bit_depth
-        is_10bit = bit_depth >= 10
        extra_kwargs = {}
        if isinstance(format, VideoContainer) and format != VideoContainer.AUTO:
            extra_kwargs["format"] = format.value
@ -540,11 +488,10 @@ class VideoFromComponents(VideoInput):

            frame_rate = Fraction(round(self.__components.frame_rate * 1000), 1000)
            # Create a video stream
-            pix_fmt = "yuv420p10le" if is_10bit else "yuv420p"
            video_stream = output.add_stream('h264', rate=frame_rate)
            video_stream.width = self.__components.images.shape[2]
            video_stream.height = self.__components.images.shape[1]
-            video_stream.pix_fmt = pix_fmt
+            video_stream.pix_fmt = 'yuv420p'

            # Create an audio stream
            audio_sample_rate = 1
@ -558,14 +505,9 @@ class VideoFromComponents(VideoInput):

            # Encode video
            for i, frame in enumerate(self.__components.images):
-                if is_10bit:
-                    # 16-bit RGB keeps float precision through the conversion to 10-bit YUV.
-                    img = (frame.float() * 65535).clamp(0, 65535).cpu().numpy().astype(np.uint16)  # shape: (H, W, 3)
-                    frame = av.VideoFrame.from_ndarray(img, format="rgb48le")
-                else:
-                    img = (frame * 255).clamp(0, 255).byte().cpu().numpy() # shape: (H, W, 3)
-                    frame = av.VideoFrame.from_ndarray(img, format='rgb24')
-                frame = frame.reformat(format=pix_fmt)
+                img = (frame * 255).clamp(0, 255).byte().cpu().numpy() # shape: (H, W, 3)
+                frame = av.VideoFrame.from_ndarray(img, format='rgb24')
+                frame = frame.reformat(format='yuv420p')  # Convert to YUV420P as required by h264
                packet = video_stream.encode(frame)
                output.mux(packet)

--- a/comfy_api_nodes/apis/kling.py
+++ b/comfy_api_nodes/apis/kling.py
@ -149,59 +149,3 @@ class MotionControlRequest(BaseModel):
    character_orientation: str = Field(...)
    mode: str = Field(..., description="'pro' or 'std'")
    model_name: str = Field(...)
-
-
-class Kling3TurboSettings(BaseModel):
-    resolution: str = Field("720p", description="'720p' or '1080p'")
-    aspect_ratio: str | None = Field(None, description="'16:9'/'9:16'/'1:1'; text-to-video only")
-    duration: int = Field(5, description="3-15 second")
-
-
-class Kling3TurboText2VideoRequest(BaseModel):
-    prompt: str = Field(..., description="<=3072 chars; may use multi-shot 'shot n, m, words; ...'")
-    settings: Kling3TurboSettings | None = Field(None)
-
-
-class Kling3TurboContent(BaseModel):
-    type: str = Field(..., description="'prompt' or 'first_frame'")
-    text: str | None = Field(None, description="for type=prompt; <=2500 chars")
-    url: str | None = Field(None, description="for type=first_frame")
-
-
-class Kling3TurboImage2VideoRequest(BaseModel):
-    contents: list[Kling3TurboContent] = Field(..., description="prompt + first_frame materials")
-    settings: Kling3TurboSettings | None = Field(None)
-
-
-class Kling3TurboCreateData(BaseModel):
-    id: str | None = Field(None, description="Task ID")
-    status: str | None = Field(None)
-    message: str | None = Field(None)
-
-
-class Kling3TurboCreateResponse(BaseModel):
-    code: int | None = Field(None)
-    message: str | None = Field(None)
-    request_id: str | None = Field(None)
-    data: Kling3TurboCreateData | None = Field(None)
-
-
-class Kling3TurboOutput(BaseModel):
-    type: str | None = Field(None, description="'video', 'image', 'audio', ...")
-    id: str | None = Field(None)
-    url: str | None = Field(None)
-    duration: str | None = Field(None)
-
-
-class Kling3TurboTaskData(BaseModel):
-    id: str | None = Field(None)
-    status: str | None = Field(None, description="submitted | processing | succeeded | failed")
-    message: str | None = Field(None)
-    outputs: list[Kling3TurboOutput] | None = Field(None)
-
-
-class Kling3TurboQueryResponse(BaseModel):
-    code: int | None = Field(None)
-    message: str | None = Field(None)
-    request_id: str | None = Field(None)
-    data: list[Kling3TurboTaskData] | None = Field(None)
--- a/comfy_api_nodes/apis/tripo.py
+++ b/comfy_api_nodes/apis/tripo.py
@ -208,10 +208,6 @@ class TripoMultiviewToModelRequest(BaseModel):
    quad: bool | None = Field(False, description="Whether to apply quad to the generated model")


-class TripoTexturePrompt(BaseModel):
-    text: str | None = Field(None, description="Text guidance for texture generation")
-
-
 class TripoTextureModelRequest(BaseModel):
    type: TripoTaskType = Field(TripoTaskType.TEXTURE_MODEL, description="Type of task")
    original_model_task_id: str = Field(..., description="The task ID of the original model")
@ -223,11 +219,6 @@ class TripoTextureModelRequest(BaseModel):
    texture_alignment: TripoTextureAlignment | None = Field(
        TripoTextureAlignment.ORIGINAL_IMAGE, description="The texture alignment method"
    )
-    texture_prompt: TripoTexturePrompt | None = Field(
-        None,
-        description="Optional guidance for texturing. Required in practice for imported models, "
-        "which carry no source image to infer texture from.",
-    )


 class TripoRefineModelRequest(BaseModel):
@ -316,17 +307,6 @@ class TripoP1MultiviewToModelRequest(TripoP1CommonRequest):
    orientation: str | None = None


-class TripoImportModelRequest(BaseModel):
-    """Request for the comfy-api composite import endpoint (/proxy/tripo/v2/openapi/import).
-
-    The model file is uploaded to ComfyUI API storage first; the backend downloads it from
-    `url`, re-uploads it to Tripo's storage and creates the import_model task server-side.
-    """
-
-    url: str = Field(..., description="ComfyUI API storage download URL of the model file")
-    format: str = Field(..., description='File format: "glb", "fbx", "obj" or "stl"')
-
-
 class TripoTaskOutput(BaseModel):
    model: str | None = Field(None, description="URL to the model")
    base_model: str | None = Field(None, description="URL to the base model")
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@ -60,12 +60,6 @@ from comfy_api_nodes.apis.kling import (
    OmniProImageRequest,
    OmniProReferences2VideoRequest,
    OmniProText2VideoRequest,
-    Kling3TurboSettings,
-    Kling3TurboText2VideoRequest,
-    Kling3TurboContent,
-    Kling3TurboImage2VideoRequest,
-    Kling3TurboCreateResponse,
-    Kling3TurboQueryResponse,
    TaskStatusResponse,
    TextToVideoWithAudioRequest,
 )
@ -2853,67 +2847,6 @@ class MotionControl(IO.ComfyNode):
        return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))


-def build_turbo_shot_prompt(multi_prompt: list[MultiPromptEntry]) -> str:
-    """Render storyboard entries into the Turbo multi-shot prompt 'shot n, m, words; ...'."""
-    return "; ".join(f"shot {i}, {int(e.duration)}, {e.prompt}" for i, e in enumerate(multi_prompt, 1)) + ";"
-
-
-def _turbo_video_url(response: Kling3TurboQueryResponse) -> str:
-    """Extract the result video URL from a /tasks response (data[].outputs[] where type == 'video')."""
-    task = response.data[0] if response.data else None
-    if task and task.outputs:
-        for output in task.outputs:
-            if output.type == "video" and output.url:
-                return output.url
-    raise RuntimeError(f"Kling 3.0 Turbo task finished without a video output: {response.model_dump()}")
-
-
-async def execute_kling_turbo(
-    cls: type[IO.ComfyNode],
-    *,
-    prompt: str,
-    resolution: str,
-    aspect_ratio: str,
-    duration: int,
-    start_frame: torch.Tensor | None,
-) -> IO.NodeOutput:
-    """Create + poll a Kling 3.0 Turbo task. Image-to-video when start_frame is given, else text-to-video."""
-    if start_frame is not None:
-        validate_image_dimensions(start_frame, min_width=300, min_height=300)
-        validate_image_aspect_ratio(start_frame, (1, 2.5), (2.5, 1))
-        contents = [Kling3TurboContent(type="first_frame", url=tensor_to_base64_string(start_frame))]
-        if prompt:
-            contents.insert(0, Kling3TurboContent(type="prompt", text=prompt))
-        create = await sync_op(
-            cls,
-            ApiEndpoint(path="/proxy/kling/image-to-video/kling-3.0-turbo", method="POST"),
-            response_model=Kling3TurboCreateResponse,
-            data=Kling3TurboImage2VideoRequest(
-                contents=contents,
-                settings=Kling3TurboSettings(resolution=resolution, duration=duration),  # i2v: no aspect_ratio
-            ),
-        )
-    else:
-        create = await sync_op(
-            cls,
-            ApiEndpoint(path="/proxy/kling/text-to-video/kling-3.0-turbo", method="POST"),
-            response_model=Kling3TurboCreateResponse,
-            data=Kling3TurboText2VideoRequest(
-                prompt=prompt,
-                settings=Kling3TurboSettings(resolution=resolution, aspect_ratio=aspect_ratio, duration=duration),
-            ),
-        )
-    if not (create.data and create.data.id):
-        raise RuntimeError(f"Kling 3.0 Turbo create failed. Code: {create.code}, Message: {create.message}")
-    final_response = await poll_op(
-        cls,
-        ApiEndpoint(path="/proxy/kling/tasks", query_params={"task_ids": create.data.id}),
-        response_model=Kling3TurboQueryResponse,
-        status_extractor=lambda r: (r.data[0].status if r.data else None),
-    )
-    return IO.NodeOutput(await download_url_to_video_output(_turbo_video_url(final_response)))
-
-
 class KlingVideoNode(IO.ComfyNode):

    @classmethod
@ -2951,11 +2884,7 @@ class KlingVideoNode(IO.ComfyNode):
                    ],
                    tooltip="Generate a series of video segments with individual prompts and durations.",
                ),
-                IO.Boolean.Input(
-                    "generate_audio",
-                    default=True,
-                    tooltip="'kling-3.0-turbo' always generates native audio, so the audio toggle is ignored.",
-                ),
+                IO.Boolean.Input("generate_audio", default=True),
                IO.DynamicCombo.Input(
                    "model",
                    options=[
@ -2970,17 +2899,6 @@ class KlingVideoNode(IO.ComfyNode):
                                ),
                            ],
                        ),
-                        IO.DynamicCombo.Option(
-                            "kling-3.0-turbo",
-                            [
-                                IO.Combo.Input("resolution", options=["1080p", "720p"], default="720p"),
-                                IO.Combo.Input(
-                                    "aspect_ratio",
-                                    options=["16:9", "9:16", "1:1"],
-                                    tooltip="Ignored in image-to-video mode.",
-                                ),
-                            ],
-                        ),
                    ],
                    tooltip="Model and generation settings.",
                ),
@ -3012,7 +2930,6 @@ class KlingVideoNode(IO.ComfyNode):
            price_badge=IO.PriceBadge(
                depends_on=IO.PriceBadgeDepends(
                    widgets=[
-                        "model",
                        "model.resolution",
                        "generate_audio",
                        "multi_shot",
@ -3027,7 +2944,14 @@ class KlingVideoNode(IO.ComfyNode):
                ),
                expr="""
                (
+                  $rates := {
+                    "4k": {"off": 0.42, "on": 0.42},
+                    "1080p": {"off": 0.112, "on": 0.168},
+                    "720p": {"off": 0.084, "on": 0.126}
+                  };
                  $res := $lookup(widgets, "model.resolution");
+                  $audio := widgets.generate_audio ? "on" : "off";
+                  $rate := $lookup($lookup($rates, $res), $audio);
                  $ms := widgets.multi_shot;
                  $isSb := $ms != "disabled";
                  $n := $isSb ? $number($substring($ms, 0, 1)) : 0;
@ -3038,18 +2962,7 @@ class KlingVideoNode(IO.ComfyNode):
                  $d5 := $n >= 5 ? $lookup(widgets, "multi_shot.storyboard_5_duration") : 0;
                  $d6 := $n >= 6 ? $lookup(widgets, "multi_shot.storyboard_6_duration") : 0;
                  $dur := $isSb ? $d1 + $d2 + $d3 + $d4 + $d5 + $d6 : $lookup(widgets, "multi_shot.duration");
-                  widgets.model = "kling-3.0-turbo"
-                    ? {"type":"usd","usd": ($res = "1080p" ? 0.14 : 0.112) * $dur}
-                    : (
-                        $rates := {
-                          "4k": {"off": 0.42, "on": 0.42},
-                          "1080p": {"off": 0.112, "on": 0.168},
-                          "720p": {"off": 0.084, "on": 0.126}
-                        };
-                        $audio := widgets.generate_audio ? "on" : "off";
-                        $rate := $lookup($lookup($rates, $res), $audio);
-                        {"type":"usd","usd": $rate * $dur}
-                      )
+                  {"type":"usd","usd": $rate * $dur}
                )
                """,
            ),
@ -3102,17 +3015,6 @@ class KlingVideoNode(IO.ComfyNode):
            duration = multi_shot["duration"]
            validate_string(multi_shot["prompt"], min_length=1, max_length=2500)

-        if model["model"] == "kling-3.0-turbo":
-            turbo_prompt = build_turbo_shot_prompt(multi_prompt_list) if custom_multi_shot else multi_shot["prompt"]
-            return await execute_kling_turbo(
-                cls,
-                prompt=turbo_prompt,
-                resolution=model["resolution"],
-                aspect_ratio=model["aspect_ratio"],
-                duration=duration,
-                start_frame=start_frame,
-            )
-
        if start_frame is not None:
            validate_image_dimensions(start_frame, min_width=300, min_height=300)
            validate_image_aspect_ratio(start_frame, (1, 2.5), (2.5, 1))
--- a/comfy_api_nodes/nodes_sonilo.py
+++ b/comfy_api_nodes/nodes_sonilo.py
@ -111,10 +111,11 @@ class SoniloTextToMusic(IO.ComfyNode):
                ),
                IO.Int.Input(
                    "duration",
-                    default=30,
-                    min=1,
+                    default=0,
+                    min=0,
                    max=360,
-                    tooltip="Target duration in seconds. Maximum: 6 minutes.",
+                    tooltip="Target duration in seconds. Set to 0 to let the model "
+                    "infer the duration from the prompt. Maximum: 6 minutes.",
                ),
                IO.Int.Input(
                    "seed",
@ -149,13 +150,14 @@ class SoniloTextToMusic(IO.ComfyNode):
    async def execute(
        cls,
        prompt: str,
-        duration: int = 1,
+        duration: int = 0,
        seed: int = 0,
    ) -> IO.NodeOutput:
-        validate_string(prompt, strip_whitespace=True, min_length=1, max_length=1000)
+        validate_string(prompt, strip_whitespace=True, min_length=1)
        form = aiohttp.FormData()
        form.add_field("prompt", prompt)
-        form.add_field("duration", str(duration))
+        if duration > 0:
+            form.add_field("duration", str(duration))
        audio_bytes = await _stream_sonilo_music(
            cls,
            ApiEndpoint(path="/proxy/sonilo/t2m/generate", method="POST"),
--- a/comfy_api_nodes/nodes_tripo.py
+++ b/comfy_api_nodes/nodes_tripo.py
@ -1,6 +1,6 @@
 from typing_extensions import override

-from comfy_api.latest import IO, ComfyExtension, Input, Types
+from comfy_api.latest import IO, ComfyExtension, Input
 from comfy_api_nodes.apis.tripo import (
    TripoAnimateRetargetRequest,
    TripoAnimateRigRequest,
@ -8,7 +8,6 @@ from comfy_api_nodes.apis.tripo import (
    TripoFileEmptyReference,
    TripoFileReference,
    TripoImageToModelRequest,
-    TripoImportModelRequest,
    TripoModelVersion,
    TripoMultiviewToModelRequest,
    TripoOrientation,
@ -22,7 +21,6 @@ from comfy_api_nodes.apis.tripo import (
    TripoTaskType,
    TripoTextToModelRequest,
    TripoTextureModelRequest,
-    TripoTexturePrompt,
    TripoUrlReference,
 )
 from comfy_api_nodes.util import (
@ -30,7 +28,6 @@ from comfy_api_nodes.util import (
    download_url_to_file_3d,
    poll_op,
    sync_op,
-    upload_3d_model_to_comfyapi,
    upload_images_to_comfyapi,
 )

@ -541,14 +538,6 @@ class TripoTextureNode(IO.ComfyNode):
                    optional=True,
                    advanced=True,
                ),
-                IO.String.Input(
-                    "texture_prompt",
-                    default="",
-                    multiline=True,
-                    optional=True,
-                    tooltip="Optional text guidance for texturing. Required in practice for imported "
-                    "models (Tripo: Import Model), which carry no source image to infer colors from.",
-                ),
            ],
            outputs=[
                IO.String.Output(display_name="model_file"),  # for backward compatibility only
@ -582,7 +571,6 @@ class TripoTextureNode(IO.ComfyNode):
        texture_seed: int | None = None,
        texture_quality: str | None = None,
        texture_alignment: str | None = None,
-        texture_prompt: str = "",
    ) -> IO.NodeOutput:
        response = await sync_op(
            cls,
@ -595,7 +583,6 @@ class TripoTextureNode(IO.ComfyNode):
                texture_seed=texture_seed,
                texture_quality=texture_quality,
                texture_alignment=texture_alignment,
-                texture_prompt=TripoTexturePrompt(text=texture_prompt.strip()) if texture_prompt.strip() else None,
            ),
        )
        return await poll_until_finished(cls, response, average_duration=80)
@ -928,90 +915,6 @@ class TripoConversionNode(IO.ComfyNode):
        return await poll_until_finished(cls, response, average_duration=30)


-class TripoImportModelNode(IO.ComfyNode):
-    """Imports an external 3D model into Tripo, producing a MODEL_TASK_ID for post-processing nodes."""
-
-    SUPPORTED_FORMATS = ("glb", "fbx", "obj", "stl")
-
-    @classmethod
-    def define_schema(cls):
-        return IO.Schema(
-            node_id="TripoImportModelNode",
-            display_name="Tripo: Import Model",
-            category="partner/3d/Tripo",
-            description="Import an external 3D model (e.g. from Rodin, Hunyuan3D or a local file) into Tripo "
-            "to use it with Tripo's post-processing nodes: Texture, Rig, Convert. "
-            "GLB is recommended: textures survive import only when embedded in the file. "
-            "Note that texturing an imported model requires a texture prompt.",
-            inputs=[
-                IO.MultiType.Input(
-                    "model_3d",
-                    types=[IO.File3DGLB, IO.File3DFBX, IO.File3DOBJ, IO.File3DSTL, IO.File3DAny],
-                    tooltip="3D model to import (GLB / FBX / OBJ / STL, up to 150 MB). "
-                    "OBJ and STL files carry no embedded textures.",
-                ),
-            ],
-            outputs=[
-                IO.Custom("MODEL_TASK_ID").Output(display_name="model task_id"),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                expr="""{"type":"text","text":"Free"}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(cls, model_3d: Types.File3D) -> IO.NodeOutput:
-        file_format = (model_3d.format or "").lstrip(".").lower()
-        if file_format == "gltf":
-            raise ValueError(
-                "GLTF (.gltf) references external files and cannot be imported. Export a single-file GLB instead."
-            )
-        if file_format not in cls.SUPPORTED_FORMATS:
-            raise ValueError(
-                f"Unsupported 3D format '{file_format or 'unknown'}'. "
-                f"Tripo import supports: {', '.join(f.upper() for f in cls.SUPPORTED_FORMATS)}."
-            )
-        size = len(model_3d.get_bytes())
-        if size > 150 * 1024 * 1024:
-            raise ValueError(f"Model file is {size / (1024 * 1024):.1f} MB; Tripo import allows up to 150 MB.")
-
-        url = await upload_3d_model_to_comfyapi(cls, model_3d, file_format)
-        response = await sync_op(
-            cls,
-            endpoint=ApiEndpoint(path="/proxy/tripo/v2/openapi/import", method="POST"),
-            response_model=TripoTaskResponse,
-            data=TripoImportModelRequest(url=url, format=file_format),
-        )
-        if response.code != 0:
-            raise RuntimeError(f"Failed to import model: {response.error}")
-
-        task_id = response.data.task_id
-        response_poll = await poll_op(
-            cls,
-            poll_endpoint=ApiEndpoint(path=f"/proxy/tripo/v2/openapi/task/{task_id}"),
-            response_model=TripoTaskResponse,
-            failed_statuses=[
-                TripoTaskStatus.FAILED,
-                TripoTaskStatus.CANCELLED,
-                TripoTaskStatus.UNKNOWN,
-                TripoTaskStatus.BANNED,
-                TripoTaskStatus.EXPIRED,
-            ],
-            status_extractor=lambda x: x.data.status,
-            progress_extractor=lambda x: x.data.progress,
-            estimated_duration=10,
-        )
-        if response_poll.data.status != TripoTaskStatus.SUCCESS:
-            raise RuntimeError(f"Failed to import model: {response_poll}")
-        return IO.NodeOutput(task_id)
-
-
 def _p1_price_expr(*, geometry_credits: int, textured_credits: int, detailed_credits: int) -> str:
    return (
        "("
@ -1389,7 +1292,6 @@ class TripoExtension(ComfyExtension):
            TripoP1TextToModelNode,
            TripoP1ImageToModelNode,
            TripoP1MultiviewToModelNode,
-            TripoImportModelNode,
            TripoTextureNode,
            TripoRefineNode,
            TripoRigNode,
--- a/comfy_extras/nodes_rtdetr.py
+++ b/comfy_extras/nodes_rtdetr.py
@ -14,7 +14,7 @@ class RTDETR_detect(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="RTDETR_detect",
-            display_name="Run Real-Time Detection (RT-DETR)",
+            display_name="RT-DETR Detect",
            category="image/detection",
            search_aliases=["bbox", "bounding box", "object detection", "coco"],
            inputs=[
--- a/comfy_extras/nodes_sam3.py
+++ b/comfy_extras/nodes_sam3.py
@ -264,7 +264,7 @@ class SAM3_VideoTrack(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SAM3_VideoTrack",
-            display_name="Run SAM3 Video Track",
+            display_name="SAM3 Video Track",
            category="image/detection",
            search_aliases=["sam3", "video", "track", "propagate"],
            inputs=[
--- a/comfy_extras/nodes_video.py
+++ b/comfy_extras/nodes_video.py
@ -134,17 +134,6 @@ class CreateVideo(io.ComfyNode):
                io.Image.Input("images", tooltip="The images to create a video from."),
                io.Float.Input("fps", default=30.0, min=1.0, max=120.0, step=1.0),
                io.Audio.Input("audio", optional=True, tooltip="The audio to add to the video."),
-                io.Int.Input(
-                    "bit_depth",
-                    min=8,
-                    max=10,
-                    default=8,
-                    step=2,
-                    tooltip="Bit depth of the created video. 10-bit keeps smoother gradients with less"
-                    " banding, but some players and downstream nodes may not support it.",
-                    optional=True,
-                    display_mode=io.NumberDisplay.number,
-                ),
            ],
            outputs=[
                io.Video.Output(),
@ -152,14 +141,9 @@ class CreateVideo(io.ComfyNode):
        )

    @classmethod
-    def execute(
-        cls, images: Input.Image, fps: float, audio: Optional[Input.Audio] = None, bit_depth: int = 8,
-    ) -> io.NodeOutput:
+    def execute(cls, images: Input.Image, fps: float, audio: Optional[Input.Audio] = None) -> io.NodeOutput:
        return io.NodeOutput(
-            InputImpl.VideoFromComponents(
-                Types.VideoComponents(images=images, audio=audio, frame_rate=Fraction(fps)),
-                bit_depth=bit_depth,
-            )
+            InputImpl.VideoFromComponents(Types.VideoComponents(images=images, audio=audio, frame_rate=Fraction(fps)))
        )

 class GetVideoComponents(io.ComfyNode):
@ -170,7 +154,7 @@ class GetVideoComponents(io.ComfyNode):
            search_aliases=["extract frames", "split video", "video to images", "demux"],
            display_name="Get Video Components",
            category="video",
-            description="Extracts all components from a video: frames, audio, framerate, and bit depth.",
+            description="Extracts all components from a video: frames, audio, and framerate.",
            inputs=[
                io.Video.Input("video", tooltip="The video to extract components from."),
            ],
@ -178,14 +162,13 @@ class GetVideoComponents(io.ComfyNode):
                io.Image.Output(display_name="images"),
                io.Audio.Output(display_name="audio"),
                io.Float.Output(display_name="fps"),
-                io.Int.Output(display_name="bit_depth"),
            ],
        )

    @classmethod
    def execute(cls, video: Input.Video) -> io.NodeOutput:
        components = video.get_components()
-        return io.NodeOutput(components.images, components.audio, float(components.frame_rate), video.get_bit_depth())
+        return io.NodeOutput(components.images, components.audio, float(components.frame_rate))


 class LoadVideo(io.ComfyNode):
--- a/comfyui_version.py
+++ b/comfyui_version.py
@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.25.1"
+__version__ = "0.24.0"
--- a/main.py
+++ b/main.py
@ -55,11 +55,7 @@ if __name__ == "__main__" and args.debug_hang:
 import comfy_aimdo.control

 if enables_dynamic_vram():
-    try:
-        comfy_aimdo.control.init(simple_vram_headroom=None if args.reserve_vram is None else int(args.reserve_vram * 1024 ** 3))
-    except TypeError:
-        # comfy-aimdo 0.4.9 protocol.
-        comfy_aimdo.control.init()
+    comfy_aimdo.control.init()

 if os.name == "nt":
    os.environ['MIMALLOC_PURGE_DELAY'] = '0'
@ -235,30 +231,23 @@ import comfy.model_patcher
 if args.enable_dynamic_vram or (enables_dynamic_vram() and comfy.model_management.is_nvidia() and not comfy.model_management.is_wsl()):
    if (not args.enable_dynamic_vram) and (comfy.model_management.torch_version_numeric < (2, 8)):
        logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
+    elif comfy_aimdo.control.init_devices(d.index for d in comfy.model_management.get_all_torch_devices()):
+        if args.verbose == 'DEBUG':
+            comfy_aimdo.control.set_log_debug()
+        elif args.verbose == 'CRITICAL':
+            comfy_aimdo.control.set_log_critical()
+        elif args.verbose == 'ERROR':
+            comfy_aimdo.control.set_log_error()
+        elif args.verbose == 'WARNING':
+            comfy_aimdo.control.set_log_warning()
+        else: #INFO
+            comfy_aimdo.control.set_log_info()
+
+        comfy.model_patcher.CoreModelPatcher = comfy.model_patcher.ModelPatcherDynamic
+        comfy.memory_management.aimdo_enabled = True
+        logging.info("DynamicVRAM support detected and enabled")
    else:
-        try:
-            aimdo_initialized = comfy_aimdo.control.init_devices((d.index, int(args.vram_headroom * 1024 ** 3)) for d in comfy.model_management.get_all_torch_devices())
-        except TypeError:
-            # comfy-aimdo 0.4.9 protocol.
-            aimdo_initialized = comfy_aimdo.control.init_devices(d.index for d in comfy.model_management.get_all_torch_devices())
-
-        if aimdo_initialized:
-            if args.verbose == 'DEBUG':
-                comfy_aimdo.control.set_log_debug()
-            elif args.verbose == 'CRITICAL':
-                comfy_aimdo.control.set_log_critical()
-            elif args.verbose == 'ERROR':
-                comfy_aimdo.control.set_log_error()
-            elif args.verbose == 'WARNING':
-                comfy_aimdo.control.set_log_warning()
-            else: #INFO
-                comfy_aimdo.control.set_log_info()
-
-            comfy.model_patcher.CoreModelPatcher = comfy.model_patcher.ModelPatcherDynamic
-            comfy.memory_management.aimdo_enabled = True
-            logging.info("DynamicVRAM support detected and enabled")
-        else:
-            logging.warning("No working comfy-aimdo install detected. DynamicVRAM support disabled. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
+        logging.warning("No working comfy-aimdo install detected. DynamicVRAM support disabled. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")


 def cuda_malloc_warning():
--- a/manager_requirements.txt
+++ b/manager_requirements.txt
@ -1 +1 @@
-comfyui_manager==4.2.2
+comfyui_manager==4.2.1
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.25.1"
+version = "0.24.0"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +1,6 @@
 comfyui-frontend-package==1.45.15
-comfyui-workflow-templates==0.10.0
-comfyui-embedded-docs==0.5.4
+comfyui-workflow-templates==0.9.98
+comfyui-embedded-docs==0.5.3
 torch
 torchsde
 torchvision
@ -23,7 +23,7 @@ SQLAlchemy>=2.0.0
 filelock
 av>=16.0.0
 comfy-kitchen==0.2.10
-comfy-aimdo==0.4.10
+comfy-aimdo==0.4.9
 requests
 simpleeval>=1.0.0
 blake3
--- a/server.py
+++ b/server.py
@ -27,7 +27,6 @@ import logging

 import mimetypes
 from comfy.cli_args import args
-from comfy.deploy_environment import get_deploy_environment
 import comfy.utils
 import comfy.model_management
 from comfy_api import feature_flags
@ -691,7 +690,6 @@ class PromptServer():
                    "python_version": sys.version,
                    "pytorch_version": comfy.model_management.torch_version,
                    "embedded_python": os.path.split(os.path.split(sys.executable)[0])[1] == "python_embeded",
-                    "deploy_environment": get_deploy_environment(),
                    "argv": sys.argv
                },
                "devices": device_entries
--- a/tests-unit/comfy_api_test/video_bit_depth_test.py
+++ b/tests-unit/comfy_api_test/video_bit_depth_test.py
@ -1,93 +0,0 @@
-import pytest
-import torch
-import av
-import numpy as np
-from fractions import Fraction
-from comfy_api.latest._input_impl.video_types import VideoFromFile, VideoFromComponents
-from comfy_api.latest._util.video_types import VideoComponents
-
-
-@pytest.fixture(scope="module")
-def gradient_components():
-    """Narrow horizontal ramp (0.25..0.30) that needs more than 8 bits to stay smooth"""
-    width, height, frames = 64, 64, 3
-    ramp = torch.linspace(0.25, 0.30, width).view(1, 1, width, 1).expand(frames, height, width, 3)
-    return VideoComponents(images=ramp.contiguous(), frame_rate=Fraction(30))
-
-
-@pytest.fixture(scope="module")
-def src8(gradient_components, tmp_path_factory):
-    """8-bit h264 mp4 (Create Video default)"""
-    path = str(tmp_path_factory.mktemp("video") / "src8.mp4")
-    VideoFromComponents(gradient_components).save_to(path)
-    return path
-
-
-@pytest.fixture(scope="module")
-def src10(gradient_components, tmp_path_factory):
-    """10-bit h264 mp4 (Create Video with bit_depth=10)"""
-    path = str(tmp_path_factory.mktemp("video") / "src10.mp4")
-    VideoFromComponents(gradient_components, bit_depth=10).save_to(path)
-    return path
-
-
-def probe(path):
-    """(codec, pix_fmt, bit_depth) of the first video stream"""
-    with av.open(path) as container:
-        stream = container.streams.video[0]
-        return (stream.codec.name, stream.format.name, max(c.bits for c in stream.format.components))
-
-
-def decoded_levels(path):
-    """Unique tonal levels in the first decoded frame (banding measure)"""
-    with av.open(path) as container:
-        frame = next(container.decode(container.streams.video[0]))
-        return len(np.unique(frame.to_ndarray(format="gbrpf32le")[..., 0]))
-
-
-def video_packet_bytes(path):
-    """Raw video packet payloads; identical to the source's only for a true remux"""
-    with av.open(path) as container:
-        return [bytes(p) for p in container.demux(container.streams.video[0]) if p.size]
-
-
-def test_create_video_bit_depth(src8, src10):
-    """Create Video's bit_depth picks the encoded depth (default 8-bit); 10-bit reduces banding"""
-    assert probe(src8) == ("h264", "yuv420p", 8)
-    assert probe(src10) == ("h264", "yuv420p10le", 10)
-    assert decoded_levels(src10) > 2 * decoded_levels(src8)
-
-
-def test_save_auto_keeps_source_depth(src8, src10, tmp_path):
-    """Save Video (no bit_depth = auto) stream-copies the source, preserving its depth byte-for-byte"""
-    for name, src in [("p8", src8), ("p10", src10)]:
-        path = str(tmp_path / f"{name}.mp4")
-        VideoFromFile(src).save_to(path)
-        assert probe(path) == probe(src)
-        assert video_packet_bytes(path) == video_packet_bytes(src)
-
-
-def test_save_explicit_depth_reencodes(src8, src10, tmp_path):
-    """An explicit bit_depth different from the source forces a re-encode to that depth"""
-    down = str(tmp_path / "down8.mp4")
-    VideoFromFile(src10).save_to(down, bit_depth=8)
-    assert probe(down) == ("h264", "yuv420p", 8)
-
-    up = str(tmp_path / "up10.mp4")
-    VideoFromFile(src8).save_to(up, bit_depth=10)
-    assert probe(up) == ("h264", "yuv420p10le", 10)
-
-
-def test_trim_keeps_source_depth(src10, tmp_path):
-    """Video Slice re-encodes (trim) but preserves the source's 10-bit depth"""
-    path = str(tmp_path / "trim.mp4")
-    VideoFromFile(src10).as_trimmed(start_time=0, duration=1 / 30, strict_duration=False).save_to(path)
-    assert probe(path) == ("h264", "yuv420p10le", 10)
-
-
-def test_get_bit_depth(gradient_components, src8, src10):
-    """get_bit_depth reports a video's depth (backs the Get Video Components output)"""
-    assert VideoFromFile(src8).get_bit_depth() == 8
-    assert VideoFromFile(src10).get_bit_depth() == 10
-    assert VideoFromComponents(gradient_components, bit_depth=10).get_bit_depth() == 10
-    assert VideoFromComponents(gradient_components).get_bit_depth() == 8