fix(Video): don't crash on videos with undecodable audio streams

Signed-off-by: bigcat88 <bigcat88@icloud.com>
2026-07-05 12:06:48 +08:00 · 2026-07-03 16:30:15 +03:00
9 changed files with 47 additions and 79 deletions
--- a/.coderabbit.yaml
+++ b/.coderabbit.yaml
@ -4,12 +4,12 @@ early_access: false
 tone_instructions: "Only comment on issues introduced by this PR's changes. Do not flag pre-existing problems in moved, re-indented, or reformatted code."

 reviews:
-  profile: "assertive"
-  request_changes_workflow: true
+  profile: "chill"
+  request_changes_workflow: false
  high_level_summary: false
  poem: false
  review_status: false
-  review_details: true
+  review_details: false
  commit_status: true
  collapse_walkthrough: true
  changed_files_summary: false
@ -39,14 +39,6 @@ reviews:
    - path: "**"
      instructions: |
        IMPORTANT: Only comment on issues directly introduced by this PR's code changes.
-        Treat AGENTS.md as mandatory repository policy, not optional style guidance.
-        Flag PR changes that violate AGENTS.md even when the code is otherwise functional.
-        In particular, enforce architecture boundaries, dtype/device/memory rules,
-        interface contracts, import style, no unnecessary try/except blocks, no inline
-        imports, no outbound internet paths in core ComfyUI, and narrow scoped fixes.
-        Prefer direct findings over suggestions when a rule is violated. Only ignore
-        AGENTS.md when it clearly conflicts with a newer explicit maintainer instruction
-        in the PR.
        Do NOT flag pre-existing issues in code that was merely moved, re-indented,
        de-indented, or reformatted without logic changes. If code appears in the diff
        only due to whitespace or structural reformatting (e.g., removing a `with:` block),
@ -131,10 +123,5 @@ chat:

 knowledge_base:
  opt_out: false
-  code_guidelines:
-    enabled: true
-    filePatterns:
-      - files: "AGENTS.md"
-        applyTo: "**"
  learnings:
    scope: "auto"
--- a/AGENTS.md
+++ b/AGENTS.md
@ -171,9 +171,6 @@
 - Reuse existing model classes, blocks, ops, and helper modules when appropriate.
  Before implementing a new version of a model component, search the existing
  model code for a class or helper that already provides the behavior.
- Model detection code that inspects linear weight shapes should only use the
-  first dimension. The second dimension may be half the original size for
-  NVFP4 or other 4-bit quantized models.
 - Avoid adding `einops` usage in core inference code. Use native torch tensor
  ops such as `reshape`, `view`, `permute`, `transpose`, `flatten`, `unflatten`,
  `unsqueeze`, and `squeeze` instead.
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -1 +0,0 @@
-AGENTS.md
--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@ -543,24 +543,18 @@ class SDTokenizer:
    def _try_get_embedding(self, embedding_name:str):
        '''
        Takes a potential embedding name and tries to retrieve it.
-        Returns a Tuple consisting of the embedding, the cleaned embedding name, and any leftover string, embedding can be None.
+        Returns a Tuple consisting of the embedding and any leftover string, embedding can be None.
        '''
        split_embed = embedding_name.split()
        embedding_name = split_embed[0]
        leftover = ' '.join(split_embed[1:])
-
-        match = re.search(r'[<\[]', embedding_name)
-        if match is not None:
-            leftover = embedding_name[match.start():] + (" " + leftover if leftover else "")
-            embedding_name = embedding_name[:match.start()]
-
        embed = load_embed(embedding_name, self.embedding_directory, self.embedding_size, self.embedding_key)
        if embed is None:
            stripped = embedding_name.strip(',')
            if len(stripped) < len(embedding_name):
                embed = load_embed(stripped, self.embedding_directory, self.embedding_size, self.embedding_key)
-                return (embed, embedding_name, "{} {}".format(embedding_name[len(stripped):], leftover))
-        return (embed, embedding_name, leftover)
+                return (embed, "{} {}".format(embedding_name[len(stripped):], leftover))
+        return (embed, leftover)

    def pad_tokens(self, tokens, amount):
        if self.pad_left:
@ -591,7 +585,7 @@ class SDTokenizer:
        tokens = []
        for weighted_segment, weight in parsed_weights:
            to_tokenize = unescape_important(weighted_segment)
-            split = re.split(r'(?<=\s){}'.format(re.escape(self.embedding_identifier)), to_tokenize)
+            split = re.split(' {0}|\n{0}'.format(self.embedding_identifier), to_tokenize)
            to_tokenize = [split[0]]
            for i in range(1, len(split)):
                to_tokenize.append("{}{}".format(self.embedding_identifier, split[i]))
@ -601,7 +595,7 @@ class SDTokenizer:
                # if we find an embedding, deal with the embedding
                if word.startswith(self.embedding_identifier) and self.embedding_directory is not None:
                    embedding_name = word[len(self.embedding_identifier):].strip('\n')
-                    embed, embedding_name, leftover = self._try_get_embedding(embedding_name)
+                    embed, leftover = self._try_get_embedding(embedding_name)
                    if embed is None:
                        logging.warning(f"warning, embedding:{embedding_name} does not exist, ignoring")
                    else:
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@ -937,41 +937,22 @@ class BaseGenerate:
            return torch.argmax(logits, dim=-1, keepdim=True)

        # Sampling mode
-        if len(token_history) > 0 and (repetition_penalty != 1.0 or (presence_penalty is not None and presence_penalty != 0.0)):
-            token_ids = torch.tensor(list(set(token_history)), device=logits.device)
-            token_logits = logits[:, token_ids]
-            if repetition_penalty != 1.0:
-                token_logits = torch.where(token_logits < 0, token_logits * repetition_penalty, token_logits / repetition_penalty)
-            if presence_penalty is not None and presence_penalty != 0.0:
-                token_logits = token_logits - presence_penalty
-            logits[:, token_ids] = token_logits
+        if repetition_penalty != 1.0:
+            for i in range(logits.shape[0]):
+                for token_id in set(token_history):
+                    logits[i, token_id] *= repetition_penalty if logits[i, token_id] < 0 else 1/repetition_penalty
+
+        if presence_penalty is not None and presence_penalty != 0.0:
+            for i in range(logits.shape[0]):
+                for token_id in set(token_history):
+                    logits[i, token_id] -= presence_penalty

        if temperature != 1.0:
            logits = logits / temperature

        if top_k > 0:
-            top_k = min(top_k, logits.shape[-1])
-            logits, top_indices = torch.topk(logits, top_k)
-
-            if min_p > 0.0:
-                probs_before_filter = torch.nn.functional.softmax(logits, dim=-1)
-                top_probs, _ = probs_before_filter.max(dim=-1, keepdim=True)
-                min_threshold = min_p * top_probs
-                indices_to_remove = probs_before_filter < min_threshold
-                logits[indices_to_remove] = torch.finfo(logits.dtype).min
-
-            if top_p < 1.0:
-                sorted_logits, sorted_indices = torch.sort(logits, descending=True)
-                cumulative_probs = torch.cumsum(torch.nn.functional.softmax(sorted_logits, dim=-1), dim=-1)
-                sorted_indices_to_remove = cumulative_probs > top_p
-                sorted_indices_to_remove[..., 0] = False
-                indices_to_remove = torch.zeros_like(logits, dtype=torch.bool)
-                indices_to_remove.scatter_(1, sorted_indices, sorted_indices_to_remove)
-                logits[indices_to_remove] = torch.finfo(logits.dtype).min
-
-            probs = torch.nn.functional.softmax(logits, dim=-1)
-            next_token = torch.multinomial(probs, num_samples=1, generator=generator)
-            return top_indices.gather(1, next_token)
+            indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
+            logits[indices_to_remove] = torch.finfo(logits.dtype).min

        if min_p > 0.0:
            probs_before_filter = torch.nn.functional.softmax(logits, dim=-1)
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@ -281,11 +281,18 @@ class VideoFromFile(VideoInput):
        video_done = False
        audio_done = True

-        if len(container.streams.audio):
-            audio_stream = container.streams.audio[-1]
+        # Use the last decodable audio stream. Streams FFmpeg has no decoder for have no codec context,
+        # and decoding their packets crashes the process. (e.g. APAC spatial-audio track in iPhone)
+        audio_stream = next(
+            (s for s in reversed(container.streams.audio) if s.codec_context is not None),
+            None,
+        )
+        if audio_stream is not None:
            streams += [audio_stream]
            resampler = av.audio.resampler.AudioResampler(format='fltp')
            audio_done = False
+        elif len(container.streams.audio):
+            logging.warning("No decodable audio stream found in video; ignoring audio.")

        for packet in container.demux(*streams):
            if video_done and audio_done:
@ -457,10 +464,13 @@ class VideoFromFile(VideoInput):
                        else:
                            output_container.metadata[key] = json.dumps(value)

-                # Add streams to the new container
+                # Add streams to the new container. Streams with no codec context cannot be used as an output template.
                stream_map = {}
                for stream in streams:
                    if isinstance(stream, (av.VideoStream, av.AudioStream, SubtitleStream)):
+                        if stream.codec_context is None:
+                            logging.warning("Skipping %s stream %d with unsupported codec", stream.type, stream.index)
+                            continue
                        out_stream = output_container.add_stream_from_template(template=stream, opaque=True)
                        stream_map[stream] = out_stream

--- a/comfy_api_nodes/nodes_bytedance.py
+++ b/comfy_api_nodes/nodes_bytedance.py
@ -2611,7 +2611,7 @@ class ByteDanceSeedAudioNode(IO.ComfyNode):
        return IO.Schema(
            node_id="ByteDanceSeedAudio",
            display_name="ByteDance Seed Audio 1.0",
-            category="partner/audio/ByteDance",
+            category="api node/audio/ByteDance",
            description=(
                "Generate speech, music, sound effects and multi-speaker dialogue from a single prompt "
                "with ByteDance Seed Audio 1.0. Describe the voice(s), emotion, ambience, background music "
--- a/comfy_api_nodes/util/upload_helpers.py
+++ b/comfy_api_nodes/util/upload_helpers.py
@ -158,7 +158,14 @@ async def upload_video_to_comfyapi(

    # Convert VideoInput to BytesIO using specified container/codec
    video_bytes_io = BytesIO()
-    video.save_to(video_bytes_io, format=container, codec=codec)
+    try:
+        video.save_to(video_bytes_io, format=container, codec=codec)
+    except Exception as e:
+        raise ValueError(
+            f"Could not convert the input video to {container.value.upper()} for upload; "
+            f"the file may be corrupt or use an unsupported codec. "
+            f"Try re-exporting it as MP4 (H.264). Original error: {e}"
+        ) from e
    video_bytes_io.seek(0)

    return await upload_file_to_comfyapi(cls, video_bytes_io, filename, upload_mime_type, wait_label)
--- a/comfy_extras/nodes_color.py
+++ b/comfy_extras/nodes_color.py
@ -16,30 +16,23 @@ class ColorToRGBInt(io.ComfyNode):
            ],
            outputs=[
                io.Int.Output(display_name="rgb_int"),
-                io.Color.Output(display_name="hex"),
-                io.Float.Output(display_name="alpha"),
+                io.Color.Output(display_name="hex")
            ],
        )

    @classmethod
    def execute(cls, color: str) -> io.NodeOutput:
-        # expect format #RRGGBB or #RRGGBBAA
-        if len(color) not in (7, 9) or color[0] != "#":
-            raise ValueError("Color must be in format #RRGGBB or #RRGGBBAA")
+        # expect format #RRGGBB
+        if len(color) != 7 or color[0] != "#":
+            raise ValueError("Color must be in format #RRGGBB")
        try:
            int(color[1:], 16)
        except ValueError:
-            raise ValueError("Color must be in format #RRGGBB or #RRGGBBAA") from None
-
-        alpha = 1.0
-        if len(color) == 9:
-            alpha = int(color[7:9], 16) / 255.0
-            color = color[:7]
-
+            raise ValueError("Color must be in format #RRGGBB") from None
        r, g, b = hex_to_rgb(color)

        rgb_int = r * 256 * 256 + g * 256 + b
-        return io.NodeOutput(rgb_int, color, alpha)
+        return io.NodeOutput(rgb_int, color)


 class ColorExtension(ComfyExtension):