Compare commits

..

1 Commits

Author SHA1 Message Date
15f94d06e9 fix(Video): don't crash on videos with undecodable audio streams
Signed-off-by: bigcat88 <bigcat88@icloud.com>
2026-07-03 16:30:15 +03:00
9 changed files with 47 additions and 79 deletions

View File

@ -4,12 +4,12 @@ early_access: false
tone_instructions: "Only comment on issues introduced by this PR's changes. Do not flag pre-existing problems in moved, re-indented, or reformatted code."
reviews:
profile: "assertive"
request_changes_workflow: true
profile: "chill"
request_changes_workflow: false
high_level_summary: false
poem: false
review_status: false
review_details: true
review_details: false
commit_status: true
collapse_walkthrough: true
changed_files_summary: false
@ -39,14 +39,6 @@ reviews:
- path: "**"
instructions: |
IMPORTANT: Only comment on issues directly introduced by this PR's code changes.
Treat AGENTS.md as mandatory repository policy, not optional style guidance.
Flag PR changes that violate AGENTS.md even when the code is otherwise functional.
In particular, enforce architecture boundaries, dtype/device/memory rules,
interface contracts, import style, no unnecessary try/except blocks, no inline
imports, no outbound internet paths in core ComfyUI, and narrow scoped fixes.
Prefer direct findings over suggestions when a rule is violated. Only ignore
AGENTS.md when it clearly conflicts with a newer explicit maintainer instruction
in the PR.
Do NOT flag pre-existing issues in code that was merely moved, re-indented,
de-indented, or reformatted without logic changes. If code appears in the diff
only due to whitespace or structural reformatting (e.g., removing a `with:` block),
@ -131,10 +123,5 @@ chat:
knowledge_base:
opt_out: false
code_guidelines:
enabled: true
filePatterns:
- files: "AGENTS.md"
applyTo: "**"
learnings:
scope: "auto"

View File

@ -171,9 +171,6 @@
- Reuse existing model classes, blocks, ops, and helper modules when appropriate.
Before implementing a new version of a model component, search the existing
model code for a class or helper that already provides the behavior.
- Model detection code that inspects linear weight shapes should only use the
first dimension. The second dimension may be half the original size for
NVFP4 or other 4-bit quantized models.
- Avoid adding `einops` usage in core inference code. Use native torch tensor
ops such as `reshape`, `view`, `permute`, `transpose`, `flatten`, `unflatten`,
`unsqueeze`, and `squeeze` instead.

View File

@ -1 +0,0 @@
AGENTS.md

View File

@ -543,24 +543,18 @@ class SDTokenizer:
def _try_get_embedding(self, embedding_name:str):
'''
Takes a potential embedding name and tries to retrieve it.
Returns a Tuple consisting of the embedding, the cleaned embedding name, and any leftover string, embedding can be None.
Returns a Tuple consisting of the embedding and any leftover string, embedding can be None.
'''
split_embed = embedding_name.split()
embedding_name = split_embed[0]
leftover = ' '.join(split_embed[1:])
match = re.search(r'[<\[]', embedding_name)
if match is not None:
leftover = embedding_name[match.start():] + (" " + leftover if leftover else "")
embedding_name = embedding_name[:match.start()]
embed = load_embed(embedding_name, self.embedding_directory, self.embedding_size, self.embedding_key)
if embed is None:
stripped = embedding_name.strip(',')
if len(stripped) < len(embedding_name):
embed = load_embed(stripped, self.embedding_directory, self.embedding_size, self.embedding_key)
return (embed, embedding_name, "{} {}".format(embedding_name[len(stripped):], leftover))
return (embed, embedding_name, leftover)
return (embed, "{} {}".format(embedding_name[len(stripped):], leftover))
return (embed, leftover)
def pad_tokens(self, tokens, amount):
if self.pad_left:
@ -591,7 +585,7 @@ class SDTokenizer:
tokens = []
for weighted_segment, weight in parsed_weights:
to_tokenize = unescape_important(weighted_segment)
split = re.split(r'(?<=\s){}'.format(re.escape(self.embedding_identifier)), to_tokenize)
split = re.split(' {0}|\n{0}'.format(self.embedding_identifier), to_tokenize)
to_tokenize = [split[0]]
for i in range(1, len(split)):
to_tokenize.append("{}{}".format(self.embedding_identifier, split[i]))
@ -601,7 +595,7 @@ class SDTokenizer:
# if we find an embedding, deal with the embedding
if word.startswith(self.embedding_identifier) and self.embedding_directory is not None:
embedding_name = word[len(self.embedding_identifier):].strip('\n')
embed, embedding_name, leftover = self._try_get_embedding(embedding_name)
embed, leftover = self._try_get_embedding(embedding_name)
if embed is None:
logging.warning(f"warning, embedding:{embedding_name} does not exist, ignoring")
else:

View File

@ -937,41 +937,22 @@ class BaseGenerate:
return torch.argmax(logits, dim=-1, keepdim=True)
# Sampling mode
if len(token_history) > 0 and (repetition_penalty != 1.0 or (presence_penalty is not None and presence_penalty != 0.0)):
token_ids = torch.tensor(list(set(token_history)), device=logits.device)
token_logits = logits[:, token_ids]
if repetition_penalty != 1.0:
token_logits = torch.where(token_logits < 0, token_logits * repetition_penalty, token_logits / repetition_penalty)
if presence_penalty is not None and presence_penalty != 0.0:
token_logits = token_logits - presence_penalty
logits[:, token_ids] = token_logits
if repetition_penalty != 1.0:
for i in range(logits.shape[0]):
for token_id in set(token_history):
logits[i, token_id] *= repetition_penalty if logits[i, token_id] < 0 else 1/repetition_penalty
if presence_penalty is not None and presence_penalty != 0.0:
for i in range(logits.shape[0]):
for token_id in set(token_history):
logits[i, token_id] -= presence_penalty
if temperature != 1.0:
logits = logits / temperature
if top_k > 0:
top_k = min(top_k, logits.shape[-1])
logits, top_indices = torch.topk(logits, top_k)
if min_p > 0.0:
probs_before_filter = torch.nn.functional.softmax(logits, dim=-1)
top_probs, _ = probs_before_filter.max(dim=-1, keepdim=True)
min_threshold = min_p * top_probs
indices_to_remove = probs_before_filter < min_threshold
logits[indices_to_remove] = torch.finfo(logits.dtype).min
if top_p < 1.0:
sorted_logits, sorted_indices = torch.sort(logits, descending=True)
cumulative_probs = torch.cumsum(torch.nn.functional.softmax(sorted_logits, dim=-1), dim=-1)
sorted_indices_to_remove = cumulative_probs > top_p
sorted_indices_to_remove[..., 0] = False
indices_to_remove = torch.zeros_like(logits, dtype=torch.bool)
indices_to_remove.scatter_(1, sorted_indices, sorted_indices_to_remove)
logits[indices_to_remove] = torch.finfo(logits.dtype).min
probs = torch.nn.functional.softmax(logits, dim=-1)
next_token = torch.multinomial(probs, num_samples=1, generator=generator)
return top_indices.gather(1, next_token)
indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
logits[indices_to_remove] = torch.finfo(logits.dtype).min
if min_p > 0.0:
probs_before_filter = torch.nn.functional.softmax(logits, dim=-1)

View File

@ -281,11 +281,18 @@ class VideoFromFile(VideoInput):
video_done = False
audio_done = True
if len(container.streams.audio):
audio_stream = container.streams.audio[-1]
# Use the last decodable audio stream. Streams FFmpeg has no decoder for have no codec context,
# and decoding their packets crashes the process. (e.g. APAC spatial-audio track in iPhone)
audio_stream = next(
(s for s in reversed(container.streams.audio) if s.codec_context is not None),
None,
)
if audio_stream is not None:
streams += [audio_stream]
resampler = av.audio.resampler.AudioResampler(format='fltp')
audio_done = False
elif len(container.streams.audio):
logging.warning("No decodable audio stream found in video; ignoring audio.")
for packet in container.demux(*streams):
if video_done and audio_done:
@ -457,10 +464,13 @@ class VideoFromFile(VideoInput):
else:
output_container.metadata[key] = json.dumps(value)
# Add streams to the new container
# Add streams to the new container. Streams with no codec context cannot be used as an output template.
stream_map = {}
for stream in streams:
if isinstance(stream, (av.VideoStream, av.AudioStream, SubtitleStream)):
if stream.codec_context is None:
logging.warning("Skipping %s stream %d with unsupported codec", stream.type, stream.index)
continue
out_stream = output_container.add_stream_from_template(template=stream, opaque=True)
stream_map[stream] = out_stream

View File

@ -2611,7 +2611,7 @@ class ByteDanceSeedAudioNode(IO.ComfyNode):
return IO.Schema(
node_id="ByteDanceSeedAudio",
display_name="ByteDance Seed Audio 1.0",
category="partner/audio/ByteDance",
category="api node/audio/ByteDance",
description=(
"Generate speech, music, sound effects and multi-speaker dialogue from a single prompt "
"with ByteDance Seed Audio 1.0. Describe the voice(s), emotion, ambience, background music "

View File

@ -158,7 +158,14 @@ async def upload_video_to_comfyapi(
# Convert VideoInput to BytesIO using specified container/codec
video_bytes_io = BytesIO()
video.save_to(video_bytes_io, format=container, codec=codec)
try:
video.save_to(video_bytes_io, format=container, codec=codec)
except Exception as e:
raise ValueError(
f"Could not convert the input video to {container.value.upper()} for upload; "
f"the file may be corrupt or use an unsupported codec. "
f"Try re-exporting it as MP4 (H.264). Original error: {e}"
) from e
video_bytes_io.seek(0)
return await upload_file_to_comfyapi(cls, video_bytes_io, filename, upload_mime_type, wait_label)

View File

@ -16,30 +16,23 @@ class ColorToRGBInt(io.ComfyNode):
],
outputs=[
io.Int.Output(display_name="rgb_int"),
io.Color.Output(display_name="hex"),
io.Float.Output(display_name="alpha"),
io.Color.Output(display_name="hex")
],
)
@classmethod
def execute(cls, color: str) -> io.NodeOutput:
# expect format #RRGGBB or #RRGGBBAA
if len(color) not in (7, 9) or color[0] != "#":
raise ValueError("Color must be in format #RRGGBB or #RRGGBBAA")
# expect format #RRGGBB
if len(color) != 7 or color[0] != "#":
raise ValueError("Color must be in format #RRGGBB")
try:
int(color[1:], 16)
except ValueError:
raise ValueError("Color must be in format #RRGGBB or #RRGGBBAA") from None
alpha = 1.0
if len(color) == 9:
alpha = int(color[7:9], 16) / 255.0
color = color[:7]
raise ValueError("Color must be in format #RRGGBB") from None
r, g, b = hex_to_rgb(color)
rgb_int = r * 256 * 256 + g * 256 + b
return io.NodeOutput(rgb_int, color, alpha)
return io.NodeOutput(rgb_int, color)
class ColorExtension(ComfyExtension):