Compare commits

..

15 Commits

Author SHA1 Message Date
85b64737d5 feat: Load3DAdvanced stores uploads under input/3d 2026-06-15 08:28:50 -04:00
3c0aaff725 feat: add Load3DAdvanced node 2026-06-15 08:05:55 -04:00
7d4194d984 chore: update embedded docs to v0.5.4 (#14478) 2026-06-15 16:35:36 +08:00
4388eb781a This is already auto enabled by default. (#14476) 2026-06-14 18:47:22 -07:00
e1b9366898 bump manager version to 4.2.2 (#14471) 2026-06-14 14:42:03 -04:00
5897d0c3ae [Partner Nodes] feat(Tripo3d): add new "Import 3D" node (#14466)
Signed-off-by: bigcat88 <bigcat88@icloud.com>
2026-06-14 17:19:20 +03:00
a1d95f3f82 Fix nondeterministic video decode at unaligned widths (CORE-299) (#14438) 2026-06-14 08:58:48 +08:00
64cc078069 Revert last commit. Last time I use this stupid GitHub app. 2026-06-13 12:50:31 -07:00
740d347279 Remove the comfy python path append. 2026-06-13 12:47:04 -07:00
b664349ae7 Expose deploy_environment in /system_stats (#14402) 2026-06-13 22:15:49 +08:00
fe54b5e955 Add 10-bit video support (#14452)
Create Video gets a bit_depth option (8-bit/10-bit); the selected depth is carried by the video and applied when it gets encoded. Save Video and Video Slice now keep the source bit depth instead of always quantizing to 8-bit, so 10-bit videos stay 10-bit. 10-bit uses h264 with the yuv420p10le pixel format,so there's no new codec or container.

Signed-off-by: bigcat88 <bigcat88@icloud.com>
2026-06-13 16:05:25 +03:00
7277d99d3a Use comfy kitchen apply rope in omnigen2 model. (#14442) 2026-06-13 09:38:39 +08:00
28a40fb2b2 [Partner Nodes] feat: add Runway Aleph2 node (#14306)
Signed-off-by: bigcat88 <bigcat88@icloud.com>
2026-06-12 10:17:11 -07:00
d7a552720c add --high-ram option (#14437)
Add this option for users who know they have so much ram they want
to pin everything or have a pagefile that outruns their disk speed.

The removes the RAM pressure caps completely and pins behind the
primary model load forcing all models to be permanently comitted
to RAM.
2026-06-12 07:53:33 -07:00
02656ea0bb Fix potential dtype issue with ideogram 4. (#14436) 2026-06-12 07:51:12 -07:00
19 changed files with 870 additions and 71 deletions

View File

@ -97,16 +97,3 @@ jobs:
torch_version: ${{ matrix.torch_version }}
google_credentials: ${{ secrets.GCS_SERVICE_ACCOUNT_JSON }}
comfyui_flags: ${{ matrix.flags }}
notify-failure:
needs: [test-stable, test-unix-nightly]
if: ${{ failure() && github.event_name == 'push' }}
runs-on: ubuntu-latest
steps:
- name: Notify Slack of CI failure
uses: slackapi/slack-github-action@v2.1.0
with:
webhook: ${{ secrets.CI_ALERTS_SLACK_WEBHOOK }}
webhook-type: incoming-webhook
payload: |
text: ":rotating_siren: ComfyUI CI failed on `${{ github.ref_name }}`\n*Commit:* <${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }}|${{ github.sha }}>\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|view logs>\n*Dashboard:* <https://ci.comfy.org/?branch=${{ github.ref_name }}|ci.comfy.org>"

View File

@ -382,11 +382,7 @@ For AMD 7600 and maybe other RDNA3 cards: ```HSA_OVERRIDE_GFX_VERSION=11.0.0 pyt
### AMD ROCm Tips
You can enable experimental memory efficient attention on recent pytorch in ComfyUI on some AMD GPUs using this command, it should already be enabled by default on RDNA3. If this improves speed for you on latest pytorch on your GPU please report it so that I can enable it by default.
```TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1 python main.py --use-pytorch-cross-attention```
You can also try setting this env variable `PYTORCH_TUNABLEOP_ENABLED=1` which might speed things up at the cost of a very slow initial run.
You can try setting this env variable `PYTORCH_TUNABLEOP_ENABLED=1` which might speed things up at the cost of a very slow initial run.
# Notes

View File

@ -115,6 +115,7 @@ cache_group.add_argument("--cache-ram", nargs='*', type=float, default=[], metav
cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
cache_group.add_argument("--high-ram", action="store_true", help="Can improve performance slightly on high RAM or on systems where pagefile use is preferred over model loading.")
attn_group = parser.add_mutually_exclusive_group()
attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.")
@ -249,6 +250,9 @@ else:
if args.cache_ram is not None and len(args.cache_ram) > 2:
parser.error("--cache-ram accepts at most two values: active GB and inactive GB")
if args.high_ram:
args.cache_classic = True
if args.windows_standalone_build:
args.auto_launch = True

View File

@ -106,11 +106,11 @@ class Ideogram4EmbedScalar(nn.Module):
self.mlp_in = operations.Linear(dim, dim, bias=True, dtype=dtype, device=device)
self.mlp_out = operations.Linear(dim, dim, bias=True, dtype=dtype, device=device)
def forward(self, x):
def forward(self, x, dtype):
x = x.to(torch.float32)
scaled = 1e4 * (x - self.range_min) / (self.range_max - self.range_min)
emb = _sinusoidal_embedding(scaled, self.dim)
emb = emb.to(self.mlp_in.weight.dtype)
emb = emb.to(dtype)
emb = F.silu(self.mlp_in(emb))
return self.mlp_out(emb)
@ -161,7 +161,7 @@ class Ideogram4Transformer(nn.Module):
x = x * output_image_mask
h = self.input_proj(x) * output_image_mask
t_cond = self.t_embedding(t)
t_cond = self.t_embedding(t, dtype=x.dtype)
if t.dim() == 1:
t_cond = t_cond.unsqueeze(1)
adaln_input = F.silu(self.adaln_proj(t_cond))

View File

@ -8,6 +8,7 @@ import torch.nn.functional as F
from einops import rearrange, repeat
from comfy.ldm.lightricks.model import Timesteps
from comfy.ldm.flux.layers import EmbedND
from comfy.ldm.flux.math import apply_rope1
from comfy.ldm.modules.attention import optimized_attention_masked
import comfy.model_management
import comfy.ldm.common_dit
@ -17,9 +18,7 @@ def apply_rotary_emb(x, freqs_cis):
if x.shape[1] == 0:
return x
t_ = x.reshape(*x.shape[:-1], -1, 1, 2)
t_out = freqs_cis[..., 0] * t_[..., 0] + freqs_cis[..., 1] * t_[..., 1]
return t_out.reshape(*x.shape).to(dtype=x.dtype)
return apply_rope1(x, freqs_cis)
def swiglu(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:

View File

@ -643,6 +643,8 @@ def free_pins(size, evict_active=False):
return freed_total
def ensure_pin_budget(size, evict_active=False):
if args.high_ram:
return True
if args.fast_disk:
shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY
else:
@ -1496,6 +1498,8 @@ if not args.disable_pinned_memory:
PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])
def pinned_hostbuf_size(size):
if args.high_ram:
return max(0, int(size * 2))
return max(0, int(min(size, MAX_PINNED_MEMORY) * 2))
def discard_cuda_async_error():

View File

@ -180,7 +180,7 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
if pin is not None:
cast_maybe_lowvram_patch([pin], dest, offload_stream)
return
if signature is None:
if signature is None or args.high_ram:
comfy.pinned_memory.pin_memory(m, subset=subset, size=size)
pin = comfy.pinned_memory.get_pin(m, subset=subset)
cast_maybe_lowvram_patch(source, pin, offload_stream, xfer_dest2=dest)

View File

@ -27,10 +27,13 @@ class VideoInput(ABC):
path: Union[str, IO[bytes]],
format: VideoContainer = VideoContainer.AUTO,
codec: VideoCodec = VideoCodec.AUTO,
metadata: Optional[dict] = None
metadata: Optional[dict] = None,
bit_depth: int | None = None,
):
"""
Abstract method to save the video input to a file.
bit_depth selects the encoded bit depth; None keeps the video's native depth.
"""
pass
@ -83,6 +86,14 @@ class VideoInput(ABC):
components = self.get_components()
return components.images.shape[2], components.images.shape[1]
def get_bit_depth(self) -> int:
"""
Returns the bit depth of the video (e.g. 8 or 10).
Default implementation returns 8; subclasses report their real depth.
"""
return 8
def get_duration(self) -> float:
"""
Returns the duration of the video in seconds.

View File

@ -52,6 +52,12 @@ def get_open_write_kwargs(
return open_kwargs
def video_stream_bit_depth(stream) -> int:
if stream is None or stream.format is None or not stream.format.components:
return 8
return max(component.bits for component in stream.format.components)
class VideoFromFile(VideoInput):
"""
Class representing video input from a file.
@ -97,6 +103,13 @@ class VideoFromFile(VideoInput):
return stream.width, stream.height
raise ValueError(f"No video stream found in file '{self.__file}'")
def get_bit_depth(self) -> int:
if isinstance(self.__file, io.BytesIO):
self.__file.seek(0) # Reset the BytesIO object to the beginning
with av.open(self.__file, mode="r") as container:
video_stream = container.streams.video[0] if len(container.streams.video) > 0 else None
return video_stream_bit_depth(video_stream)
def get_duration(self) -> float:
"""
Returns the duration of the video in seconds.
@ -257,6 +270,7 @@ class VideoFromFile(VideoInput):
image_format = 'gbrpf32le'
process_image_format = lambda a: a
align_graph = None
audio = None
streams = [video_stream]
@ -310,7 +324,24 @@ class VideoFromFile(VideoInput):
checked_alpha = True
img = frame.to_ndarray(format=image_format) # shape: (H, W, 4)
# Fix non-deterministic video decode when the video width is not a multiple of 32
# For non-yuvj pixel formats (all H.264/H.265 video)
if image_format in ('gbrpf32le', 'gbrapf32le') and frame.width % 32 != 0:
if align_graph is None:
pad_w = ((frame.width + 31) // 32) * 32
g = av.filter.Graph()
g_src = g.add_buffer(width=frame.width, height=frame.height,
format=frame.format.name, time_base=video_stream.time_base)
g_pad = g.add('pad', f'{pad_w}:{frame.height}:0:0')
g_sink = g.add('buffersink')
g_src.link_to(g_pad)
g_pad.link_to(g_sink)
g.configure()
align_graph = (g, g_src, g_sink)
align_graph[1].push(frame)
img = np.ascontiguousarray(align_graph[2].pull().to_ndarray(format=image_format)[:, :frame.width])
else:
img = frame.to_ndarray(format=image_format)
if frame.rotation != 0:
k = int(round(frame.rotation // 90))
img = np.rot90(img, k=k, axes=(0, 1)).copy()
@ -377,25 +408,32 @@ class VideoFromFile(VideoInput):
format: VideoContainer = VideoContainer.AUTO,
codec: VideoCodec = VideoCodec.AUTO,
metadata: Optional[dict] = None,
bit_depth: int | None = None,
):
if isinstance(self.__file, io.BytesIO):
self.__file.seek(0) # Reset the BytesIO object to the beginning
with av.open(self.__file, mode='r') as container:
container_format = container.format.name
video_encoding = container.streams.video[0].codec.name if len(container.streams.video) > 0 else None
video_stream = container.streams.video[0] if len(container.streams.video) > 0 else None
video_encoding = video_stream.codec.name if video_stream is not None else None
source_bit_depth = video_stream_bit_depth(video_stream)
reuse_streams = True
if format != VideoContainer.AUTO and format not in container_format.split(","):
reuse_streams = False
if codec != VideoCodec.AUTO and codec != video_encoding and video_encoding is not None:
reuse_streams = False
if bit_depth is not None and video_encoding is not None and bit_depth != source_bit_depth:
reuse_streams = False
if self.__start_time or self.__duration:
reuse_streams = False
if not reuse_streams:
if bit_depth is None:
bit_depth = source_bit_depth
components = self.get_components_internal(container)
video = VideoFromComponents(components)
return video.save_to(
path, format=format, codec=codec, metadata=metadata
path, format=format, codec=codec, metadata=metadata, bit_depth=bit_depth,
)
streams = container.streams
@ -451,8 +489,10 @@ class VideoFromComponents(VideoInput):
Class representing video input from tensors.
"""
def __init__(self, components: VideoComponents):
def __init__(self, components: VideoComponents, bit_depth: int = 8):
self.__components = components
# Tensor components have no inherent bit depth; this is the depth used when encoding.
self.__bit_depth = bit_depth
def get_components(self) -> VideoComponents:
return VideoComponents(
@ -461,18 +501,26 @@ class VideoFromComponents(VideoInput):
frame_rate=self.__components.frame_rate,
)
def get_bit_depth(self) -> int:
return self.__bit_depth
def save_to(
self,
path: str,
format: VideoContainer = VideoContainer.AUTO,
codec: VideoCodec = VideoCodec.AUTO,
metadata: Optional[dict] = None,
bit_depth: int | None = None,
):
"""Save the video to a file path or BytesIO buffer."""
if format != VideoContainer.AUTO and format != VideoContainer.MP4:
raise ValueError("Only MP4 format is supported for now")
if codec != VideoCodec.AUTO and codec != VideoCodec.H264:
raise ValueError("Only H264 codec is supported for now")
# None means "use the depth this video was created with" (CreateVideo's choice).
if bit_depth is None:
bit_depth = self.__bit_depth
is_10bit = bit_depth >= 10
extra_kwargs = {}
if isinstance(format, VideoContainer) and format != VideoContainer.AUTO:
extra_kwargs["format"] = format.value
@ -488,10 +536,11 @@ class VideoFromComponents(VideoInput):
frame_rate = Fraction(round(self.__components.frame_rate * 1000), 1000)
# Create a video stream
pix_fmt = "yuv420p10le" if is_10bit else "yuv420p"
video_stream = output.add_stream('h264', rate=frame_rate)
video_stream.width = self.__components.images.shape[2]
video_stream.height = self.__components.images.shape[1]
video_stream.pix_fmt = 'yuv420p'
video_stream.pix_fmt = pix_fmt
# Create an audio stream
audio_sample_rate = 1
@ -505,9 +554,14 @@ class VideoFromComponents(VideoInput):
# Encode video
for i, frame in enumerate(self.__components.images):
img = (frame * 255).clamp(0, 255).byte().cpu().numpy() # shape: (H, W, 3)
frame = av.VideoFrame.from_ndarray(img, format='rgb24')
frame = frame.reformat(format='yuv420p') # Convert to YUV420P as required by h264
if is_10bit:
# 16-bit RGB keeps float precision through the conversion to 10-bit YUV.
img = (frame.float() * 65535).clamp(0, 65535).cpu().numpy().astype(np.uint16) # shape: (H, W, 3)
frame = av.VideoFrame.from_ndarray(img, format="rgb48le")
else:
img = (frame * 255).clamp(0, 255).byte().cpu().numpy() # shape: (H, W, 3)
frame = av.VideoFrame.from_ndarray(img, format='rgb24')
frame = frame.reformat(format=pix_fmt)
packet = video_stream.encode(frame)
output.mux(packet)

View File

@ -67,15 +67,6 @@ class RunwayImageToVideoResponse(BaseModel):
id: Optional[str] = Field(None, description='Task ID')
class RunwayTaskStatusEnum(str, Enum):
SUCCEEDED = 'SUCCEEDED'
RUNNING = 'RUNNING'
FAILED = 'FAILED'
PENDING = 'PENDING'
CANCELLED = 'CANCELLED'
THROTTLED = 'THROTTLED'
class RunwayTaskStatusResponse(BaseModel):
createdAt: datetime = Field(..., description='Task creation timestamp')
id: str = Field(..., description='Task ID')
@ -86,7 +77,7 @@ class RunwayTaskStatusResponse(BaseModel):
ge=0.0,
le=1.0,
)
status: RunwayTaskStatusEnum
status: str = Field(..., description="SUCCEEDED, RUNNING, FAILED, PENDING, CANCELLED or THROTTLED")
class Model4(str, Enum):
@ -125,3 +116,144 @@ class RunwayTextToImageRequest(BaseModel):
class RunwayTextToImageResponse(BaseModel):
id: Optional[str] = Field(None, description='Task ID')
class RunwayAleph2IO:
"""Custom socket types for chaining Aleph2 guidance images."""
KEYFRAME = "RUNWAY_ALEPH2_KEYFRAME"
PROMPT_IMAGE = "RUNWAY_ALEPH2_PROMPT_IMAGE"
# Keyframe timing modes (anchored to the INPUT video). Stored on the chain item and used to
# choose the request model below. The values match the Aleph2 keyframe union field names.
KEYFRAME_MODE_SECONDS = "seconds" # absolute time, in seconds, from the start of the input video
KEYFRAME_MODE_AT = "at" # fraction [0.0, 1.0] of the input video duration
# Prompt-image position modes (anchored to the OUTPUT video). Values match the Aleph2 position `type`.
PROMPT_IMAGE_MODE_TIMESTAMP = "timestamp" # absolute time, in seconds, from the start of the output video
PROMPT_IMAGE_MODE_POSITION = "position" # fraction [0.0, 1.0] of the output video duration
class RunwayAleph2KeyframeItem:
"""A guidance image anchored to a point of the INPUT video (one Aleph2 ``keyframe``)."""
def __init__(self, image, mode: str, value: float):
self.image = image
self.mode = mode # KEYFRAME_MODE_SECONDS | KEYFRAME_MODE_AT
self.value = value
class RunwayAleph2KeyframeChain:
"""An ordered collection of keyframes, built by chaining Runway Aleph2 Keyframe nodes."""
def __init__(self):
self.items: list[RunwayAleph2KeyframeItem] = []
def add(self, item: RunwayAleph2KeyframeItem) -> None:
self.items.append(item)
def clone(self) -> "RunwayAleph2KeyframeChain":
c = RunwayAleph2KeyframeChain()
c.items = list(self.items)
return c
class RunwayAleph2PromptImageItem:
"""A guidance image anchored to a point of the OUTPUT video (one Aleph2 ``promptImage``)."""
def __init__(self, image, mode: str, value: float):
self.image = image
self.mode = mode # PROMPT_IMAGE_MODE_TIMESTAMP | PROMPT_IMAGE_MODE_POSITION
self.value = value
class RunwayAleph2PromptImageChain:
"""An ordered collection of prompt images, built by chaining Runway Aleph2 Prompt Image nodes."""
def __init__(self):
self.items: list[RunwayAleph2PromptImageItem] = []
def add(self, item: RunwayAleph2PromptImageItem) -> None:
self.items.append(item)
def clone(self) -> "RunwayAleph2PromptImageChain":
c = RunwayAleph2PromptImageChain()
c.items = list(self.items)
return c
class RunwayAleph2KeyframeSeconds(BaseModel):
seconds: float = Field(
...,
description="Absolute timestamp in seconds from the start of the input video when this guidance image should apply.",
ge=0.0,
)
uri: str = Field(...)
class RunwayAleph2KeyframeAt(BaseModel):
at: float = Field(
...,
description="Position as a fraction [0.0, 1.0] of the input video duration.",
ge=0.0,
le=1.0,
)
uri: str = Field(...)
class RunwayAleph2TimestampPosition(BaseModel):
type: str = Field(default="timestamp")
timestampSeconds: float = Field(
...,
description="Absolute timestamp in seconds from the start of the output video.",
ge=0.0,
)
class RunwayAleph2RelativePosition(BaseModel):
type: str = Field(default="position")
positionPercentage: float = Field(
...,
description="Position as a fraction [0.0, 1.0] of the total output video duration.",
ge=0.0,
le=1.0,
)
class RunwayAleph2PromptImage(BaseModel):
position: RunwayAleph2TimestampPosition | RunwayAleph2RelativePosition
uri: str = Field(...)
class RunwayAleph2ContentModeration(BaseModel):
publicFigureThreshold: str = Field(
...,
description='When set to "low", the content moderation system is less strict about '
'recognizable public figures. One of "auto" or "low".',
)
class RunwayAleph2Request(BaseModel):
model: str = Field(default="aleph2")
promptText: str = Field(
...,
description="A non-empty string describing what should appear in the output.",
min_length=1,
max_length=1000,
)
videoUri: str = Field(...)
seed: int = Field(..., description="Random seed for generation", ge=0, le=4294967295)
contentModeration: RunwayAleph2ContentModeration = Field(...)
keyframes: list[RunwayAleph2KeyframeSeconds | RunwayAleph2KeyframeAt] | None = Field(
None,
description="Timed guidance images placed at specific points in the input video. Up to 5.",
)
promptImage: list[RunwayAleph2PromptImage] | None = Field(
None,
description="Up to 5 image keyframes for guiding the edit at specific points in the output video.",
)
class RunwayAleph2Response(BaseModel):
id: str | None = Field(None, description="Task ID")

View File

@ -208,6 +208,10 @@ class TripoMultiviewToModelRequest(BaseModel):
quad: bool | None = Field(False, description="Whether to apply quad to the generated model")
class TripoTexturePrompt(BaseModel):
text: str | None = Field(None, description="Text guidance for texture generation")
class TripoTextureModelRequest(BaseModel):
type: TripoTaskType = Field(TripoTaskType.TEXTURE_MODEL, description="Type of task")
original_model_task_id: str = Field(..., description="The task ID of the original model")
@ -219,6 +223,11 @@ class TripoTextureModelRequest(BaseModel):
texture_alignment: TripoTextureAlignment | None = Field(
TripoTextureAlignment.ORIGINAL_IMAGE, description="The texture alignment method"
)
texture_prompt: TripoTexturePrompt | None = Field(
None,
description="Optional guidance for texturing. Required in practice for imported models, "
"which carry no source image to infer texture from.",
)
class TripoRefineModelRequest(BaseModel):
@ -307,6 +316,17 @@ class TripoP1MultiviewToModelRequest(TripoP1CommonRequest):
orientation: str | None = None
class TripoImportModelRequest(BaseModel):
"""Request for the comfy-api composite import endpoint (/proxy/tripo/v2/openapi/import).
The model file is uploaded to ComfyUI API storage first; the backend downloads it from
`url`, re-uploads it to Tripo's storage and creates the import_model task server-side.
"""
url: str = Field(..., description="ComfyUI API storage download URL of the model file")
format: str = Field(..., description='File format: "glb", "fbx", "obj" or "stl"')
class TripoTaskOutput(BaseModel):
model: str | None = Field(None, description="URL to the model")
base_model: str | None = Field(None, description="URL to the base model")

View File

@ -30,13 +30,33 @@ from comfy_api_nodes.apis.runway import (
Model4,
ReferenceImage,
RunwayTextToImageAspectRatioEnum,
RunwayAleph2IO,
RunwayAleph2KeyframeChain,
RunwayAleph2KeyframeItem,
RunwayAleph2PromptImageChain,
RunwayAleph2PromptImageItem,
RunwayAleph2Request,
RunwayAleph2Response,
RunwayAleph2KeyframeSeconds,
RunwayAleph2KeyframeAt,
RunwayAleph2PromptImage,
RunwayAleph2TimestampPosition,
RunwayAleph2RelativePosition,
RunwayAleph2ContentModeration,
KEYFRAME_MODE_SECONDS,
KEYFRAME_MODE_AT,
PROMPT_IMAGE_MODE_TIMESTAMP,
PROMPT_IMAGE_MODE_POSITION,
)
from comfy_api_nodes.util import (
image_tensor_pair_to_batch,
validate_string,
validate_image_dimensions,
validate_image_aspect_ratio,
validate_video_duration,
upload_images_to_comfyapi,
upload_image_to_comfyapi,
upload_video_to_comfyapi,
download_url_to_video_output,
download_url_to_image_tensor,
ApiEndpoint,
@ -45,6 +65,7 @@ from comfy_api_nodes.util import (
)
PATH_IMAGE_TO_VIDEO = "/proxy/runway/image_to_video"
PATH_VIDEO_TO_VIDEO = "/proxy/runway/video_to_video"
PATH_TEXT_TO_IMAGE = "/proxy/runway/text_to_image"
PATH_GET_TASK_STATUS = "/proxy/runway/tasks"
@ -53,12 +74,6 @@ AVERAGE_DURATION_FLF_SECONDS = 256
AVERAGE_DURATION_T2I_SECONDS = 41
class RunwayApiError(Exception):
"""Base exception for Runway API errors."""
pass
class RunwayGen4TurboAspectRatio(str, Enum):
"""Aspect ratios supported for Image to Video API when using gen4_turbo model."""
@ -84,14 +99,6 @@ def get_video_url_from_task_status(response: TaskStatusResponse) -> str | None:
return None
def extract_progress_from_task_status(
response: TaskStatusResponse,
) -> float | None:
if hasattr(response, "progress") and response.progress is not None:
return response.progress * 100
return None
def get_image_url_from_task_status(response: TaskStatusResponse) -> str | None:
"""Returns the image URL from the task status response if it exists."""
if hasattr(response, "output") and len(response.output) > 0:
@ -102,14 +109,13 @@ def get_image_url_from_task_status(response: TaskStatusResponse) -> str | None:
async def get_response(
cls: type[IO.ComfyNode], task_id: str, estimated_duration: int | None = None
) -> TaskStatusResponse:
"""Poll the task status until it is finished then get the response."""
return await poll_op(
cls,
ApiEndpoint(path=f"{PATH_GET_TASK_STATUS}/{task_id}"),
response_model=TaskStatusResponse,
status_extractor=lambda r: r.status.value,
status_extractor=lambda r: r.status,
estimated_duration=estimated_duration,
progress_extractor=extract_progress_from_task_status,
progress_extractor=lambda r: r.progress * 100 if r.progress is not None else None,
)
@ -127,7 +133,7 @@ async def generate_video(
final_response = await get_response(cls, initial_response.id, estimated_duration)
if not final_response.output:
raise RunwayApiError("Runway task succeeded but no video data found in response.")
raise ValueError("Runway task succeeded but no video data found in response.")
video_url = get_video_url_from_task_status(final_response)
return await download_url_to_video_output(video_url)
@ -410,7 +416,7 @@ class RunwayFirstLastFrameNode(IO.ComfyNode):
mime_type="image/png",
)
if len(download_urls) != 2:
raise RunwayApiError("Failed to upload one or more images to comfy api.")
raise ValueError("Failed to upload one or more images to comfy api.")
return IO.NodeOutput(
await generate_video(
@ -514,11 +520,321 @@ class RunwayTextToImageNode(IO.ComfyNode):
estimated_duration=AVERAGE_DURATION_T2I_SECONDS,
)
if not final_response.output:
raise RunwayApiError("Runway task succeeded but no image data found in response.")
raise ValueError("Runway task succeeded but no image data found in response.")
return IO.NodeOutput(await download_url_to_image_tensor(get_image_url_from_task_status(final_response)))
_TIMING_ABSOLUTE = "Absolute time (seconds)"
_TIMING_FRACTION = "Fraction of duration (0.0-1.0)"
class RunwayAleph2KeyframeNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="RunwayAleph2KeyframeNode",
display_name="Runway Aleph2 Keyframe",
category="partner/video/Runway",
description="Anchor a guidance image to a moment of the input (source) video, so Aleph2 "
"steers the edit at that point of your footage. Connect this to the 'keyframes' input of "
"the Runway Aleph2 Video to Video node; chain several together (up to 5) via the optional "
"'keyframes' input below.",
inputs=[
IO.Image.Input(
"image",
tooltip="The guidance image to apply at the chosen moment of the input video.",
),
IO.DynamicCombo.Input(
"timing",
options=[
IO.DynamicCombo.Option(
_TIMING_ABSOLUTE,
[
IO.Float.Input(
"seconds",
default=0.0,
min=0.0,
max=30.0,
step=0.1,
display_mode=IO.NumberDisplay.number,
tooltip="Time in seconds from start of the input video where this image applies.",
),
],
),
IO.DynamicCombo.Option(
_TIMING_FRACTION,
[
IO.Float.Input(
"fraction",
default=0.0,
min=0.0,
max=1.0,
step=0.01,
display_mode=IO.NumberDisplay.number,
tooltip="Where in the input video this image applies, "
"as a fraction of its duration (0.0 = start, 1.0 = end).",
),
],
),
],
tooltip="How to place this image on the input video's timeline.",
),
IO.Custom(RunwayAleph2IO.KEYFRAME).Input(
"keyframes",
optional=True,
tooltip="Optional earlier keyframes to chain with this one.",
),
],
outputs=[IO.Custom(RunwayAleph2IO.KEYFRAME).Output(display_name="keyframes")],
)
@classmethod
def execute(
cls,
image: Input.Image,
timing: dict,
keyframes: RunwayAleph2KeyframeChain | None = None,
) -> IO.NodeOutput:
chain = keyframes.clone() if keyframes is not None else RunwayAleph2KeyframeChain()
if timing["timing"] == _TIMING_ABSOLUTE:
mode, value = KEYFRAME_MODE_SECONDS, float(timing["seconds"])
else:
mode, value = KEYFRAME_MODE_AT, float(timing["fraction"])
chain.add(RunwayAleph2KeyframeItem(image=image, mode=mode, value=value))
return IO.NodeOutput(chain)
class RunwayAleph2PromptImageNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="RunwayAleph2PromptImageNode",
display_name="Runway Aleph2 Prompt Image",
category="partner/video/Runway",
description="Anchor a guidance image to a moment of the output (result) video, to guide what "
"the edited video looks like at that point. Connect this to the 'prompt_images' input of the "
"Runway Aleph2 Video to Video node; chain several together (up to 5) via the optional "
"'prompt_images' input below.",
inputs=[
IO.Image.Input(
"image",
tooltip="The guidance image to place at the chosen moment of the output video.",
),
IO.DynamicCombo.Input(
"position",
options=[
IO.DynamicCombo.Option(
_TIMING_ABSOLUTE,
[
IO.Float.Input(
"seconds",
default=0.0,
min=0.0,
max=30.0,
step=0.1,
display_mode=IO.NumberDisplay.number,
tooltip="Time in seconds from start of the output video where this image applies.",
),
],
),
IO.DynamicCombo.Option(
_TIMING_FRACTION,
[
IO.Float.Input(
"fraction",
default=0.0,
min=0.0,
max=1.0,
step=0.01,
display_mode=IO.NumberDisplay.number,
tooltip="Where in the output video this image applies, "
"as a fraction of its duration (0.0 = start, 1.0 = end).",
),
],
),
],
tooltip="How to place this image on the output video's timeline.",
),
IO.Custom(RunwayAleph2IO.PROMPT_IMAGE).Input(
"prompt_images",
optional=True,
tooltip="Optional earlier prompt images to chain with this one.",
),
],
outputs=[IO.Custom(RunwayAleph2IO.PROMPT_IMAGE).Output(display_name="prompt_images")],
)
@classmethod
def execute(
cls,
image: Input.Image,
position: dict,
prompt_images: RunwayAleph2PromptImageChain | None = None,
) -> IO.NodeOutput:
chain = prompt_images.clone() if prompt_images is not None else RunwayAleph2PromptImageChain()
if position["position"] == _TIMING_ABSOLUTE:
mode, value = PROMPT_IMAGE_MODE_TIMESTAMP, float(position["seconds"])
else:
mode, value = PROMPT_IMAGE_MODE_POSITION, float(position["fraction"])
chain.add(RunwayAleph2PromptImageItem(image=image, mode=mode, value=value))
return IO.NodeOutput(chain)
class RunwayAleph2VideoToVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="RunwayAleph2VideoToVideoNode",
display_name="Runway Aleph2 Video to Video",
category="partner/video/Runway",
description="Edit a video with a text prompt using Runway's Aleph2 model. Aleph2 transforms "
"your footage (restyle, relight, add or remove elements, change the viewpoint) while keeping "
"the original motion and timing; the output resolution matches the input video, which must be "
"2-30 seconds at 30 fps or lower. Optionally steer the edit with either keyframes (anchored to "
"the input video) or prompt images (anchored to the output video) - use one or the other, not both.",
inputs=[
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Describes what should appear in the output (1-1000 characters).",
),
IO.Video.Input(
"video",
tooltip="Input video to edit. Must be 2-30 seconds at 30 fps or lower.",
),
IO.Int.Input(
"seed",
default=0,
min=0,
max=4294967295,
step=1,
control_after_generate=True,
display_mode=IO.NumberDisplay.number,
tooltip="Random seed for generation",
),
IO.Combo.Input(
"public_figure_threshold",
options=["auto", "low"],
default="low",
tooltip="Content moderation for recognizable public figures.",
),
IO.Custom(RunwayAleph2IO.KEYFRAME).Input(
"keyframes",
optional=True,
tooltip="Guidance images anchored to the input video, from Aleph2 Keyframe nodes (up to 5). "
"Use keyframes or prompt images, not both.",
),
IO.Custom(RunwayAleph2IO.PROMPT_IMAGE).Input(
"prompt_images",
optional=True,
tooltip="Guidance images anchored to the output video, from Aleph2 Prompt Image nodes (up to 5). "
"Use keyframes or prompt images, not both.",
),
],
outputs=[
IO.Video.Output(),
],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
price_badge=IO.PriceBadge(
expr="""{"type":"usd","usd": 0.4004, "format":{"suffix":"/second"}}""",
),
)
@classmethod
async def execute(
cls,
prompt: str,
video: Input.Video,
seed: int,
public_figure_threshold: str = "low",
keyframes: RunwayAleph2KeyframeChain | None = None,
prompt_images: RunwayAleph2PromptImageChain | None = None,
) -> IO.NodeOutput:
validate_string(prompt, min_length=1, max_length=1000)
validate_video_duration(
video,
min_duration=2.0,
max_duration=30.0,
)
try:
fps = float(video.get_frame_rate())
except Exception:
fps = None
if fps is not None and fps > 30.0 + 0.01:
raise ValueError(f"Input video frame rate ({fps:.2f} fps) exceeds Aleph2's maximum of 30 fps.")
if (keyframes and keyframes.items) and (prompt_images and prompt_images.items):
raise ValueError("Aleph2 accepts either keyframes or prompt images, not both.")
video_duration: float | None = None
try:
video_duration = video.get_duration()
except Exception:
video_duration = None
def _check_seconds(value: float, label: str) -> None:
if video_duration is not None and value > video_duration + 0.0001:
raise ValueError(f"{label} {value:.2f}s exceeds the input video duration ({video_duration:.2f}s).")
video_url = await upload_video_to_comfyapi(cls, video)
keyframe_models: list[RunwayAleph2KeyframeSeconds | RunwayAleph2KeyframeAt] = []
if keyframes is not None:
if len(keyframes.items) > 5:
raise ValueError("Aleph2 supports at most 5 keyframes.")
for item in keyframes.items:
image_url = await upload_image_to_comfyapi(cls, item.image, mime_type="image/png")
if item.mode == KEYFRAME_MODE_SECONDS:
_check_seconds(item.value, "Keyframe timestamp")
keyframe_models.append(RunwayAleph2KeyframeSeconds(seconds=item.value, uri=image_url))
else:
keyframe_models.append(RunwayAleph2KeyframeAt(at=item.value, uri=image_url))
prompt_image_models: list[RunwayAleph2PromptImage] = []
if prompt_images is not None:
if len(prompt_images.items) > 5:
raise ValueError("Aleph2 supports at most 5 prompt images.")
for item in prompt_images.items:
image_url = await upload_image_to_comfyapi(cls, item.image, mime_type="image/png")
position: RunwayAleph2TimestampPosition | RunwayAleph2RelativePosition
if item.mode == PROMPT_IMAGE_MODE_TIMESTAMP:
_check_seconds(item.value, "Prompt image timestamp")
position = RunwayAleph2TimestampPosition(timestampSeconds=item.value)
else:
position = RunwayAleph2RelativePosition(positionPercentage=item.value)
prompt_image_models.append(RunwayAleph2PromptImage(position=position, uri=image_url))
initial_response = await sync_op(
cls,
endpoint=ApiEndpoint(path=PATH_VIDEO_TO_VIDEO, method="POST"),
response_model=RunwayAleph2Response,
data=RunwayAleph2Request(
promptText=prompt,
videoUri=video_url,
seed=seed,
contentModeration=RunwayAleph2ContentModeration(publicFigureThreshold=public_figure_threshold),
keyframes=keyframe_models or None,
promptImage=prompt_image_models or None,
),
)
final_response = await get_response(cls, initial_response.id)
if not final_response.output:
raise ValueError("Runway task succeeded but no video data found in response.")
return IO.NodeOutput(await download_url_to_video_output(get_video_url_from_task_status(final_response)))
class RunwayExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -527,6 +843,9 @@ class RunwayExtension(ComfyExtension):
RunwayImageToVideoNodeGen3a,
RunwayImageToVideoNodeGen4,
RunwayTextToImageNode,
RunwayAleph2VideoToVideoNode,
RunwayAleph2KeyframeNode,
RunwayAleph2PromptImageNode,
]

View File

@ -1,6 +1,6 @@
from typing_extensions import override
from comfy_api.latest import IO, ComfyExtension, Input
from comfy_api.latest import IO, ComfyExtension, Input, Types
from comfy_api_nodes.apis.tripo import (
TripoAnimateRetargetRequest,
TripoAnimateRigRequest,
@ -8,6 +8,7 @@ from comfy_api_nodes.apis.tripo import (
TripoFileEmptyReference,
TripoFileReference,
TripoImageToModelRequest,
TripoImportModelRequest,
TripoModelVersion,
TripoMultiviewToModelRequest,
TripoOrientation,
@ -21,6 +22,7 @@ from comfy_api_nodes.apis.tripo import (
TripoTaskType,
TripoTextToModelRequest,
TripoTextureModelRequest,
TripoTexturePrompt,
TripoUrlReference,
)
from comfy_api_nodes.util import (
@ -28,6 +30,7 @@ from comfy_api_nodes.util import (
download_url_to_file_3d,
poll_op,
sync_op,
upload_3d_model_to_comfyapi,
upload_images_to_comfyapi,
)
@ -538,6 +541,14 @@ class TripoTextureNode(IO.ComfyNode):
optional=True,
advanced=True,
),
IO.String.Input(
"texture_prompt",
default="",
multiline=True,
optional=True,
tooltip="Optional text guidance for texturing. Required in practice for imported "
"models (Tripo: Import Model), which carry no source image to infer colors from.",
),
],
outputs=[
IO.String.Output(display_name="model_file"), # for backward compatibility only
@ -571,6 +582,7 @@ class TripoTextureNode(IO.ComfyNode):
texture_seed: int | None = None,
texture_quality: str | None = None,
texture_alignment: str | None = None,
texture_prompt: str = "",
) -> IO.NodeOutput:
response = await sync_op(
cls,
@ -583,6 +595,7 @@ class TripoTextureNode(IO.ComfyNode):
texture_seed=texture_seed,
texture_quality=texture_quality,
texture_alignment=texture_alignment,
texture_prompt=TripoTexturePrompt(text=texture_prompt.strip()) if texture_prompt.strip() else None,
),
)
return await poll_until_finished(cls, response, average_duration=80)
@ -915,6 +928,90 @@ class TripoConversionNode(IO.ComfyNode):
return await poll_until_finished(cls, response, average_duration=30)
class TripoImportModelNode(IO.ComfyNode):
"""Imports an external 3D model into Tripo, producing a MODEL_TASK_ID for post-processing nodes."""
SUPPORTED_FORMATS = ("glb", "fbx", "obj", "stl")
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="TripoImportModelNode",
display_name="Tripo: Import Model",
category="partner/3d/Tripo",
description="Import an external 3D model (e.g. from Rodin, Hunyuan3D or a local file) into Tripo "
"to use it with Tripo's post-processing nodes: Texture, Rig, Convert. "
"GLB is recommended: textures survive import only when embedded in the file. "
"Note that texturing an imported model requires a texture prompt.",
inputs=[
IO.MultiType.Input(
"model_3d",
types=[IO.File3DGLB, IO.File3DFBX, IO.File3DOBJ, IO.File3DSTL, IO.File3DAny],
tooltip="3D model to import (GLB / FBX / OBJ / STL, up to 150 MB). "
"OBJ and STL files carry no embedded textures.",
),
],
outputs=[
IO.Custom("MODEL_TASK_ID").Output(display_name="model task_id"),
],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
price_badge=IO.PriceBadge(
expr="""{"type":"text","text":"Free"}""",
),
)
@classmethod
async def execute(cls, model_3d: Types.File3D) -> IO.NodeOutput:
file_format = (model_3d.format or "").lstrip(".").lower()
if file_format == "gltf":
raise ValueError(
"GLTF (.gltf) references external files and cannot be imported. Export a single-file GLB instead."
)
if file_format not in cls.SUPPORTED_FORMATS:
raise ValueError(
f"Unsupported 3D format '{file_format or 'unknown'}'. "
f"Tripo import supports: {', '.join(f.upper() for f in cls.SUPPORTED_FORMATS)}."
)
size = len(model_3d.get_bytes())
if size > 150 * 1024 * 1024:
raise ValueError(f"Model file is {size / (1024 * 1024):.1f} MB; Tripo import allows up to 150 MB.")
url = await upload_3d_model_to_comfyapi(cls, model_3d, file_format)
response = await sync_op(
cls,
endpoint=ApiEndpoint(path="/proxy/tripo/v2/openapi/import", method="POST"),
response_model=TripoTaskResponse,
data=TripoImportModelRequest(url=url, format=file_format),
)
if response.code != 0:
raise RuntimeError(f"Failed to import model: {response.error}")
task_id = response.data.task_id
response_poll = await poll_op(
cls,
poll_endpoint=ApiEndpoint(path=f"/proxy/tripo/v2/openapi/task/{task_id}"),
response_model=TripoTaskResponse,
failed_statuses=[
TripoTaskStatus.FAILED,
TripoTaskStatus.CANCELLED,
TripoTaskStatus.UNKNOWN,
TripoTaskStatus.BANNED,
TripoTaskStatus.EXPIRED,
],
status_extractor=lambda x: x.data.status,
progress_extractor=lambda x: x.data.progress,
estimated_duration=10,
)
if response_poll.data.status != TripoTaskStatus.SUCCESS:
raise RuntimeError(f"Failed to import model: {response_poll}")
return IO.NodeOutput(task_id)
def _p1_price_expr(*, geometry_credits: int, textured_credits: int, detailed_credits: int) -> str:
return (
"("
@ -1292,6 +1389,7 @@ class TripoExtension(ComfyExtension):
TripoP1TextToModelNode,
TripoP1ImageToModelNode,
TripoP1MultiviewToModelNode,
TripoImportModelNode,
TripoTextureNode,
TripoRefineNode,
TripoRigNode,

View File

@ -317,11 +317,74 @@ class PreviewPointCloud(IO.ComfyNode):
)
MESH_EXTENSIONS = {'.gltf', '.glb', '.obj', '.fbx', '.stl'}
class Load3DAdvanced(IO.ComfyNode):
@classmethod
def define_schema(cls):
input_dir = os.path.join(folder_paths.get_input_directory(), "3d")
os.makedirs(input_dir, exist_ok=True)
input_path = Path(input_dir)
base_path = Path(folder_paths.get_input_directory())
files = [
normalize_path(str(file_path.relative_to(base_path)))
for file_path in input_path.rglob("*")
if file_path.suffix.lower() in MESH_EXTENSIONS
]
return IO.Schema(
node_id="Load3DAdvanced",
display_name="Load 3D (Advanced)",
category="3d",
search_aliases=[
"load mesh",
"load gltf",
"load glb",
"load obj",
"load fbx",
"load stl",
],
is_experimental=True,
inputs=[
IO.Combo.Input("model_file", options=["none"] + sorted(files), upload=IO.UploadType.model),
IO.Load3D.Input("viewport_state"),
IO.Int.Input("width", default=1024, min=1, max=4096, step=1),
IO.Int.Input("height", default=1024, min=1, max=4096, step=1),
],
outputs=[
IO.File3DAny.Output(display_name="model_3d"),
IO.Load3DModelInfo.Output(display_name="model_3d_info"),
IO.Load3DCamera.Output(display_name="camera_info"),
IO.Int.Output(display_name="width"),
IO.Int.Output(display_name="height"),
],
)
@classmethod
def validate_inputs(cls, model_file, **kwargs) -> bool | str:
if not model_file or model_file == "none":
return True
if not folder_paths.exists_annotated_filepath(model_file):
return f"Invalid 3D model file: {model_file}"
return True
@classmethod
def execute(cls, model_file, viewport_state, width: int, height: int, **kwargs) -> IO.NodeOutput:
file_3d = None
if model_file and model_file != "none":
file_3d = Types.File3D(folder_paths.get_annotated_filepath(model_file))
model_3d_info = viewport_state.get('model_3d_info', [])
return IO.NodeOutput(file_3d, model_3d_info, viewport_state['camera_info'], width, height)
class Load3DExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
Load3D,
Load3DAdvanced,
Preview3D,
Preview3DAdvanced,
PreviewGaussianSplat,

View File

@ -134,6 +134,17 @@ class CreateVideo(io.ComfyNode):
io.Image.Input("images", tooltip="The images to create a video from."),
io.Float.Input("fps", default=30.0, min=1.0, max=120.0, step=1.0),
io.Audio.Input("audio", optional=True, tooltip="The audio to add to the video."),
io.Int.Input(
"bit_depth",
min=8,
max=10,
default=8,
step=2,
tooltip="Bit depth of the created video. 10-bit keeps smoother gradients with less"
" banding, but some players and downstream nodes may not support it.",
optional=True,
display_mode=io.NumberDisplay.number,
),
],
outputs=[
io.Video.Output(),
@ -141,9 +152,14 @@ class CreateVideo(io.ComfyNode):
)
@classmethod
def execute(cls, images: Input.Image, fps: float, audio: Optional[Input.Audio] = None) -> io.NodeOutput:
def execute(
cls, images: Input.Image, fps: float, audio: Optional[Input.Audio] = None, bit_depth: int = 8,
) -> io.NodeOutput:
return io.NodeOutput(
InputImpl.VideoFromComponents(Types.VideoComponents(images=images, audio=audio, frame_rate=Fraction(fps)))
InputImpl.VideoFromComponents(
Types.VideoComponents(images=images, audio=audio, frame_rate=Fraction(fps)),
bit_depth=bit_depth,
)
)
class GetVideoComponents(io.ComfyNode):
@ -154,7 +170,7 @@ class GetVideoComponents(io.ComfyNode):
search_aliases=["extract frames", "split video", "video to images", "demux"],
display_name="Get Video Components",
category="video",
description="Extracts all components from a video: frames, audio, and framerate.",
description="Extracts all components from a video: frames, audio, framerate, and bit depth.",
inputs=[
io.Video.Input("video", tooltip="The video to extract components from."),
],
@ -162,13 +178,14 @@ class GetVideoComponents(io.ComfyNode):
io.Image.Output(display_name="images"),
io.Audio.Output(display_name="audio"),
io.Float.Output(display_name="fps"),
io.Int.Output(display_name="bit_depth"),
],
)
@classmethod
def execute(cls, video: Input.Video) -> io.NodeOutput:
components = video.get_components()
return io.NodeOutput(components.images, components.audio, float(components.frame_rate))
return io.NodeOutput(components.images, components.audio, float(components.frame_rate), video.get_bit_depth())
class LoadVideo(io.ComfyNode):

View File

@ -1 +1 @@
comfyui_manager==4.2.1
comfyui_manager==4.2.2

View File

@ -1,6 +1,6 @@
comfyui-frontend-package==1.45.15
comfyui-workflow-templates==0.9.98
comfyui-embedded-docs==0.5.3
comfyui-embedded-docs==0.5.4
torch
torchsde
torchvision

View File

@ -27,6 +27,7 @@ import logging
import mimetypes
from comfy.cli_args import args
from comfy.deploy_environment import get_deploy_environment
import comfy.utils
import comfy.model_management
from comfy_api import feature_flags
@ -690,6 +691,7 @@ class PromptServer():
"python_version": sys.version,
"pytorch_version": comfy.model_management.torch_version,
"embedded_python": os.path.split(os.path.split(sys.executable)[0])[1] == "python_embeded",
"deploy_environment": get_deploy_environment(),
"argv": sys.argv
},
"devices": device_entries

View File

@ -0,0 +1,93 @@
import pytest
import torch
import av
import numpy as np
from fractions import Fraction
from comfy_api.latest._input_impl.video_types import VideoFromFile, VideoFromComponents
from comfy_api.latest._util.video_types import VideoComponents
@pytest.fixture(scope="module")
def gradient_components():
"""Narrow horizontal ramp (0.25..0.30) that needs more than 8 bits to stay smooth"""
width, height, frames = 64, 64, 3
ramp = torch.linspace(0.25, 0.30, width).view(1, 1, width, 1).expand(frames, height, width, 3)
return VideoComponents(images=ramp.contiguous(), frame_rate=Fraction(30))
@pytest.fixture(scope="module")
def src8(gradient_components, tmp_path_factory):
"""8-bit h264 mp4 (Create Video default)"""
path = str(tmp_path_factory.mktemp("video") / "src8.mp4")
VideoFromComponents(gradient_components).save_to(path)
return path
@pytest.fixture(scope="module")
def src10(gradient_components, tmp_path_factory):
"""10-bit h264 mp4 (Create Video with bit_depth=10)"""
path = str(tmp_path_factory.mktemp("video") / "src10.mp4")
VideoFromComponents(gradient_components, bit_depth=10).save_to(path)
return path
def probe(path):
"""(codec, pix_fmt, bit_depth) of the first video stream"""
with av.open(path) as container:
stream = container.streams.video[0]
return (stream.codec.name, stream.format.name, max(c.bits for c in stream.format.components))
def decoded_levels(path):
"""Unique tonal levels in the first decoded frame (banding measure)"""
with av.open(path) as container:
frame = next(container.decode(container.streams.video[0]))
return len(np.unique(frame.to_ndarray(format="gbrpf32le")[..., 0]))
def video_packet_bytes(path):
"""Raw video packet payloads; identical to the source's only for a true remux"""
with av.open(path) as container:
return [bytes(p) for p in container.demux(container.streams.video[0]) if p.size]
def test_create_video_bit_depth(src8, src10):
"""Create Video's bit_depth picks the encoded depth (default 8-bit); 10-bit reduces banding"""
assert probe(src8) == ("h264", "yuv420p", 8)
assert probe(src10) == ("h264", "yuv420p10le", 10)
assert decoded_levels(src10) > 2 * decoded_levels(src8)
def test_save_auto_keeps_source_depth(src8, src10, tmp_path):
"""Save Video (no bit_depth = auto) stream-copies the source, preserving its depth byte-for-byte"""
for name, src in [("p8", src8), ("p10", src10)]:
path = str(tmp_path / f"{name}.mp4")
VideoFromFile(src).save_to(path)
assert probe(path) == probe(src)
assert video_packet_bytes(path) == video_packet_bytes(src)
def test_save_explicit_depth_reencodes(src8, src10, tmp_path):
"""An explicit bit_depth different from the source forces a re-encode to that depth"""
down = str(tmp_path / "down8.mp4")
VideoFromFile(src10).save_to(down, bit_depth=8)
assert probe(down) == ("h264", "yuv420p", 8)
up = str(tmp_path / "up10.mp4")
VideoFromFile(src8).save_to(up, bit_depth=10)
assert probe(up) == ("h264", "yuv420p10le", 10)
def test_trim_keeps_source_depth(src10, tmp_path):
"""Video Slice re-encodes (trim) but preserves the source's 10-bit depth"""
path = str(tmp_path / "trim.mp4")
VideoFromFile(src10).as_trimmed(start_time=0, duration=1 / 30, strict_duration=False).save_to(path)
assert probe(path) == ("h264", "yuv420p10le", 10)
def test_get_bit_depth(gradient_components, src8, src10):
"""get_bit_depth reports a video's depth (backs the Get Video Components output)"""
assert VideoFromFile(src8).get_bit_depth() == 8
assert VideoFromFile(src10).get_bit_depth() == 10
assert VideoFromComponents(gradient_components, bit_depth=10).get_bit_depth() == 10
assert VideoFromComponents(gradient_components).get_bit_depth() == 8