chore(openapi): sync shared API contract from cloud@447a63e

2026-06-15 11:27:16 +08:00 · 2026-06-13 01:59:15 +00:00
7 changed files with 22 additions and 200 deletions
--- a/comfy_api/latest/_input/video_types.py
+++ b/comfy_api/latest/_input/video_types.py
@ -27,13 +27,10 @@ class VideoInput(ABC):
        path: Union[str, IO[bytes]],
        format: VideoContainer = VideoContainer.AUTO,
        codec: VideoCodec = VideoCodec.AUTO,
-        metadata: Optional[dict] = None,
-        bit_depth: int | None = None,
+        metadata: Optional[dict] = None
    ):
        """
        Abstract method to save the video input to a file.
-
-        bit_depth selects the encoded bit depth; None keeps the video's native depth.
        """
        pass

@ -86,14 +83,6 @@ class VideoInput(ABC):
        components = self.get_components()
        return components.images.shape[2], components.images.shape[1]

-    def get_bit_depth(self) -> int:
-        """
-        Returns the bit depth of the video (e.g. 8 or 10).
-
-        Default implementation returns 8; subclasses report their real depth.
-        """
-        return 8
-
    def get_duration(self) -> float:
        """
        Returns the duration of the video in seconds.
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@ -52,12 +52,6 @@ def get_open_write_kwargs(
    return open_kwargs


-def video_stream_bit_depth(stream) -> int:
-    if stream is None or stream.format is None or not stream.format.components:
-        return 8
-    return max(component.bits for component in stream.format.components)
-
-
 class VideoFromFile(VideoInput):
    """
    Class representing video input from a file.
@ -103,13 +97,6 @@ class VideoFromFile(VideoInput):
                    return stream.width, stream.height
        raise ValueError(f"No video stream found in file '{self.__file}'")

-    def get_bit_depth(self) -> int:
-        if isinstance(self.__file, io.BytesIO):
-            self.__file.seek(0)  # Reset the BytesIO object to the beginning
-        with av.open(self.__file, mode="r") as container:
-            video_stream = container.streams.video[0] if len(container.streams.video) > 0 else None
-            return video_stream_bit_depth(video_stream)
-
    def get_duration(self) -> float:
        """
        Returns the duration of the video in seconds.
@ -270,7 +257,6 @@ class VideoFromFile(VideoInput):

        image_format = 'gbrpf32le'
        process_image_format = lambda a: a
-        align_graph = None
        audio = None

        streams = [video_stream]
@ -324,24 +310,7 @@ class VideoFromFile(VideoInput):

                            checked_alpha = True

-                        # Fix non-deterministic video decode when the video width is not a multiple of 32
-                        # For non-yuvj pixel formats (all H.264/H.265 video)
-                        if image_format in ('gbrpf32le', 'gbrapf32le') and frame.width % 32 != 0:
-                            if align_graph is None:
-                                pad_w = ((frame.width + 31) // 32) * 32
-                                g = av.filter.Graph()
-                                g_src = g.add_buffer(width=frame.width, height=frame.height,
-                                                     format=frame.format.name, time_base=video_stream.time_base)
-                                g_pad = g.add('pad', f'{pad_w}:{frame.height}:0:0')
-                                g_sink = g.add('buffersink')
-                                g_src.link_to(g_pad)
-                                g_pad.link_to(g_sink)
-                                g.configure()
-                                align_graph = (g, g_src, g_sink)
-                            align_graph[1].push(frame)
-                            img = np.ascontiguousarray(align_graph[2].pull().to_ndarray(format=image_format)[:, :frame.width])
-                        else:
-                            img = frame.to_ndarray(format=image_format)
+                        img = frame.to_ndarray(format=image_format)  # shape: (H, W, 4)
                        if frame.rotation != 0:
                            k = int(round(frame.rotation // 90))
                            img = np.rot90(img, k=k, axes=(0, 1)).copy()
@ -408,32 +377,25 @@ class VideoFromFile(VideoInput):
        format: VideoContainer = VideoContainer.AUTO,
        codec: VideoCodec = VideoCodec.AUTO,
        metadata: Optional[dict] = None,
-        bit_depth: int | None = None,
    ):
        if isinstance(self.__file, io.BytesIO):
            self.__file.seek(0)  # Reset the BytesIO object to the beginning
        with av.open(self.__file, mode='r') as container:
            container_format = container.format.name
-            video_stream = container.streams.video[0] if len(container.streams.video) > 0 else None
-            video_encoding = video_stream.codec.name if video_stream is not None else None
-            source_bit_depth = video_stream_bit_depth(video_stream)
+            video_encoding = container.streams.video[0].codec.name if len(container.streams.video) > 0 else None
            reuse_streams = True
            if format != VideoContainer.AUTO and format not in container_format.split(","):
                reuse_streams = False
            if codec != VideoCodec.AUTO and codec != video_encoding and video_encoding is not None:
                reuse_streams = False
-            if bit_depth is not None and video_encoding is not None and bit_depth != source_bit_depth:
-                reuse_streams = False
            if self.__start_time or self.__duration:
                reuse_streams = False

            if not reuse_streams:
-                if bit_depth is None:
-                    bit_depth = source_bit_depth
                components = self.get_components_internal(container)
                video = VideoFromComponents(components)
                return video.save_to(
-                    path, format=format, codec=codec, metadata=metadata, bit_depth=bit_depth,
+                    path, format=format, codec=codec, metadata=metadata
                )

            streams = container.streams
@ -489,10 +451,8 @@ class VideoFromComponents(VideoInput):
    Class representing video input from tensors.
    """

-    def __init__(self, components: VideoComponents, bit_depth: int = 8):
+    def __init__(self, components: VideoComponents):
        self.__components = components
-        # Tensor components have no inherent bit depth; this is the depth used when encoding.
-        self.__bit_depth = bit_depth

    def get_components(self) -> VideoComponents:
        return VideoComponents(
@ -501,26 +461,18 @@ class VideoFromComponents(VideoInput):
            frame_rate=self.__components.frame_rate,
        )

-    def get_bit_depth(self) -> int:
-        return self.__bit_depth
-
    def save_to(
        self,
        path: str,
        format: VideoContainer = VideoContainer.AUTO,
        codec: VideoCodec = VideoCodec.AUTO,
        metadata: Optional[dict] = None,
-        bit_depth: int | None = None,
    ):
        """Save the video to a file path or BytesIO buffer."""
        if format != VideoContainer.AUTO and format != VideoContainer.MP4:
            raise ValueError("Only MP4 format is supported for now")
        if codec != VideoCodec.AUTO and codec != VideoCodec.H264:
            raise ValueError("Only H264 codec is supported for now")
-        # None means "use the depth this video was created with" (CreateVideo's choice).
-        if bit_depth is None:
-            bit_depth = self.__bit_depth
-        is_10bit = bit_depth >= 10
        extra_kwargs = {}
        if isinstance(format, VideoContainer) and format != VideoContainer.AUTO:
            extra_kwargs["format"] = format.value
@ -536,11 +488,10 @@ class VideoFromComponents(VideoInput):

            frame_rate = Fraction(round(self.__components.frame_rate * 1000), 1000)
            # Create a video stream
-            pix_fmt = "yuv420p10le" if is_10bit else "yuv420p"
            video_stream = output.add_stream('h264', rate=frame_rate)
            video_stream.width = self.__components.images.shape[2]
            video_stream.height = self.__components.images.shape[1]
-            video_stream.pix_fmt = pix_fmt
+            video_stream.pix_fmt = 'yuv420p'

            # Create an audio stream
            audio_sample_rate = 1
@ -554,14 +505,9 @@ class VideoFromComponents(VideoInput):

            # Encode video
            for i, frame in enumerate(self.__components.images):
-                if is_10bit:
-                    # 16-bit RGB keeps float precision through the conversion to 10-bit YUV.
-                    img = (frame.float() * 65535).clamp(0, 65535).cpu().numpy().astype(np.uint16)  # shape: (H, W, 3)
-                    frame = av.VideoFrame.from_ndarray(img, format="rgb48le")
-                else:
-                    img = (frame * 255).clamp(0, 255).byte().cpu().numpy() # shape: (H, W, 3)
-                    frame = av.VideoFrame.from_ndarray(img, format='rgb24')
-                frame = frame.reformat(format=pix_fmt)
+                img = (frame * 255).clamp(0, 255).byte().cpu().numpy() # shape: (H, W, 3)
+                frame = av.VideoFrame.from_ndarray(img, format='rgb24')
+                frame = frame.reformat(format='yuv420p')  # Convert to YUV420P as required by h264
                packet = video_stream.encode(frame)
                output.mux(packet)

--- a/comfy_extras/nodes_video.py
+++ b/comfy_extras/nodes_video.py
@ -134,17 +134,6 @@ class CreateVideo(io.ComfyNode):
                io.Image.Input("images", tooltip="The images to create a video from."),
                io.Float.Input("fps", default=30.0, min=1.0, max=120.0, step=1.0),
                io.Audio.Input("audio", optional=True, tooltip="The audio to add to the video."),
-                io.Int.Input(
-                    "bit_depth",
-                    min=8,
-                    max=10,
-                    default=8,
-                    step=2,
-                    tooltip="Bit depth of the created video. 10-bit keeps smoother gradients with less"
-                    " banding, but some players and downstream nodes may not support it.",
-                    optional=True,
-                    display_mode=io.NumberDisplay.number,
-                ),
            ],
            outputs=[
                io.Video.Output(),
@ -152,14 +141,9 @@ class CreateVideo(io.ComfyNode):
        )

    @classmethod
-    def execute(
-        cls, images: Input.Image, fps: float, audio: Optional[Input.Audio] = None, bit_depth: int = 8,
-    ) -> io.NodeOutput:
+    def execute(cls, images: Input.Image, fps: float, audio: Optional[Input.Audio] = None) -> io.NodeOutput:
        return io.NodeOutput(
-            InputImpl.VideoFromComponents(
-                Types.VideoComponents(images=images, audio=audio, frame_rate=Fraction(fps)),
-                bit_depth=bit_depth,
-            )
+            InputImpl.VideoFromComponents(Types.VideoComponents(images=images, audio=audio, frame_rate=Fraction(fps)))
        )

 class GetVideoComponents(io.ComfyNode):
@ -170,7 +154,7 @@ class GetVideoComponents(io.ComfyNode):
            search_aliases=["extract frames", "split video", "video to images", "demux"],
            display_name="Get Video Components",
            category="video",
-            description="Extracts all components from a video: frames, audio, framerate, and bit depth.",
+            description="Extracts all components from a video: frames, audio, and framerate.",
            inputs=[
                io.Video.Input("video", tooltip="The video to extract components from."),
            ],
@ -178,14 +162,13 @@ class GetVideoComponents(io.ComfyNode):
                io.Image.Output(display_name="images"),
                io.Audio.Output(display_name="audio"),
                io.Float.Output(display_name="fps"),
-                io.Int.Output(display_name="bit_depth"),
            ],
        )

    @classmethod
    def execute(cls, video: Input.Video) -> io.NodeOutput:
        components = video.get_components()
-        return io.NodeOutput(components.images, components.audio, float(components.frame_rate), video.get_bit_depth())
+        return io.NodeOutput(components.images, components.audio, float(components.frame_rate))


 class LoadVideo(io.ComfyNode):
--- a/nodes.py
+++ b/nodes.py
@ -20,6 +20,8 @@ from PIL.PngImagePlugin import PngInfo
 import numpy as np
 import safetensors.torch

+sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy"))
+
 import comfy.diffusers_load
 import comfy.samplers
 import comfy.sample
@ -2293,9 +2295,6 @@ async def init_external_custom_nodes():
    Returns:
        None
    """
-    # TODO: remove at some point when custom nodes don't break.
-    sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy"))
-
    base_node_names = set(NODE_CLASS_MAPPINGS.keys())
    node_paths = folder_paths.get_folder_paths("custom_nodes")
    node_import_times = []
--- a/openapi.yaml
+++ b/openapi.yaml
@ -896,11 +896,6 @@ components:
                    additionalProperties: true
                    description: The workflow graph to execute
                    type: object
-                prompt_id:
-                    description: Optional client-supplied job id. Must be a UUID in canonical lowercase hyphenated form; it is echoed back in the response. Omitted or null means the server generates one.
-                    format: uuid
-                    nullable: true
-                    type: string
                workflow_id:
                    description: UUID identifying the cloud workflow entity to associate with this job
                    type: string
@ -1800,7 +1795,9 @@ paths:
                        application/json:
                            schema:
                                $ref: '#/components/schemas/ErrorResponse'
-                    description: Invalid request (no fields provided)
+                    description: |
+                        Invalid request — no fields provided, or `preview_id` is the zero UUID
+                        (`INVALID_PREVIEW_ID`).
                "401":
                    content:
                        application/json:
@ -1812,7 +1809,10 @@ paths:
                        application/json:
                            schema:
                                $ref: '#/components/schemas/ErrorResponse'
-                    description: Asset not found
+                    description: |
+                        Asset not found — returned both when the asset being updated does
+                        not exist and when `preview_id` does not reference an asset
+                        accessible to the caller.
                "500":
                    content:
                        application/json:
--- a/server.py
+++ b/server.py
@ -27,7 +27,6 @@ import logging

 import mimetypes
 from comfy.cli_args import args
-from comfy.deploy_environment import get_deploy_environment
 import comfy.utils
 import comfy.model_management
 from comfy_api import feature_flags
@ -691,7 +690,6 @@ class PromptServer():
                    "python_version": sys.version,
                    "pytorch_version": comfy.model_management.torch_version,
                    "embedded_python": os.path.split(os.path.split(sys.executable)[0])[1] == "python_embeded",
-                    "deploy_environment": get_deploy_environment(),
                    "argv": sys.argv
                },
                "devices": device_entries
--- a/tests-unit/comfy_api_test/video_bit_depth_test.py
+++ b/tests-unit/comfy_api_test/video_bit_depth_test.py
@ -1,93 +0,0 @@
-import pytest
-import torch
-import av
-import numpy as np
-from fractions import Fraction
-from comfy_api.latest._input_impl.video_types import VideoFromFile, VideoFromComponents
-from comfy_api.latest._util.video_types import VideoComponents
-
-
-@pytest.fixture(scope="module")
-def gradient_components():
-    """Narrow horizontal ramp (0.25..0.30) that needs more than 8 bits to stay smooth"""
-    width, height, frames = 64, 64, 3
-    ramp = torch.linspace(0.25, 0.30, width).view(1, 1, width, 1).expand(frames, height, width, 3)
-    return VideoComponents(images=ramp.contiguous(), frame_rate=Fraction(30))
-
-
-@pytest.fixture(scope="module")
-def src8(gradient_components, tmp_path_factory):
-    """8-bit h264 mp4 (Create Video default)"""
-    path = str(tmp_path_factory.mktemp("video") / "src8.mp4")
-    VideoFromComponents(gradient_components).save_to(path)
-    return path
-
-
-@pytest.fixture(scope="module")
-def src10(gradient_components, tmp_path_factory):
-    """10-bit h264 mp4 (Create Video with bit_depth=10)"""
-    path = str(tmp_path_factory.mktemp("video") / "src10.mp4")
-    VideoFromComponents(gradient_components, bit_depth=10).save_to(path)
-    return path
-
-
-def probe(path):
-    """(codec, pix_fmt, bit_depth) of the first video stream"""
-    with av.open(path) as container:
-        stream = container.streams.video[0]
-        return (stream.codec.name, stream.format.name, max(c.bits for c in stream.format.components))
-
-
-def decoded_levels(path):
-    """Unique tonal levels in the first decoded frame (banding measure)"""
-    with av.open(path) as container:
-        frame = next(container.decode(container.streams.video[0]))
-        return len(np.unique(frame.to_ndarray(format="gbrpf32le")[..., 0]))
-
-
-def video_packet_bytes(path):
-    """Raw video packet payloads; identical to the source's only for a true remux"""
-    with av.open(path) as container:
-        return [bytes(p) for p in container.demux(container.streams.video[0]) if p.size]
-
-
-def test_create_video_bit_depth(src8, src10):
-    """Create Video's bit_depth picks the encoded depth (default 8-bit); 10-bit reduces banding"""
-    assert probe(src8) == ("h264", "yuv420p", 8)
-    assert probe(src10) == ("h264", "yuv420p10le", 10)
-    assert decoded_levels(src10) > 2 * decoded_levels(src8)
-
-
-def test_save_auto_keeps_source_depth(src8, src10, tmp_path):
-    """Save Video (no bit_depth = auto) stream-copies the source, preserving its depth byte-for-byte"""
-    for name, src in [("p8", src8), ("p10", src10)]:
-        path = str(tmp_path / f"{name}.mp4")
-        VideoFromFile(src).save_to(path)
-        assert probe(path) == probe(src)
-        assert video_packet_bytes(path) == video_packet_bytes(src)
-
-
-def test_save_explicit_depth_reencodes(src8, src10, tmp_path):
-    """An explicit bit_depth different from the source forces a re-encode to that depth"""
-    down = str(tmp_path / "down8.mp4")
-    VideoFromFile(src10).save_to(down, bit_depth=8)
-    assert probe(down) == ("h264", "yuv420p", 8)
-
-    up = str(tmp_path / "up10.mp4")
-    VideoFromFile(src8).save_to(up, bit_depth=10)
-    assert probe(up) == ("h264", "yuv420p10le", 10)
-
-
-def test_trim_keeps_source_depth(src10, tmp_path):
-    """Video Slice re-encodes (trim) but preserves the source's 10-bit depth"""
-    path = str(tmp_path / "trim.mp4")
-    VideoFromFile(src10).as_trimmed(start_time=0, duration=1 / 30, strict_duration=False).save_to(path)
-    assert probe(path) == ("h264", "yuv420p10le", 10)
-
-
-def test_get_bit_depth(gradient_components, src8, src10):
-    """get_bit_depth reports a video's depth (backs the Get Video Components output)"""
-    assert VideoFromFile(src8).get_bit_depth() == 8
-    assert VideoFromFile(src10).get_bit_depth() == 10
-    assert VideoFromComponents(gradient_components, bit_depth=10).get_bit_depth() == 10
-    assert VideoFromComponents(gradient_components).get_bit_depth() == 8