Compare commits

..

5 Commits

8 changed files with 510 additions and 29 deletions

View File

@ -207,8 +207,9 @@ class PidNet(PixDiT_T2I):
f"Flux1/SD3 = 16 channels, Flux2 = 128 channels."
)
B = x.shape[0]
Hs = x.shape[2] // self.patch_size
Ws = x.shape[3] // self.patch_size
# Match the backbone's pad_to_patch_size (round up) so the LQ grid lines up with the patch stream.
Hs = -(-x.shape[2] // self.patch_size)
Ws = -(-x.shape[3] // self.patch_size)
degrade_sigma = degrade_sigma.to(device=x.device, dtype=torch.float32).reshape(-1)
if degrade_sigma.numel() == 1 and B > 1:

View File

@ -1428,6 +1428,23 @@ class PiD(PixelDiTT2I):
out["degrade_sigma"] = comfy.conds.CONDRegular(degrade_sigma)
return out
def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]):
if cond_key == "lq_latent" and hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor):
lq = cond_value.cond
dim = window.dim
if dim >= lq.ndim:
return None
lq_proj = self.diffusion_model.lq_proj
ratio = lq_proj.sr_scale * lq_proj.latent_spatial_down_factor
# Map x window indices -> lq indices (deduplicated, sorted, in-bounds).
lq_size = lq.size(dim)
lq_indices = sorted({i // ratio for i in window.index_list if 0 <= i // ratio < lq_size})
if not lq_indices:
return None
idx = tuple([slice(None)] * dim + [lq_indices])
return cond_value._copy_with(lq[idx].to(device))
return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list)
class WAN21(BaseModel):
def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):

View File

@ -770,6 +770,23 @@ class Load3DCamera(ComfyTypeIO):
Type = CameraInfo
@comfytype(io_type="LOAD3D_MODEL_INFO")
class Load3DModelInfo(ComfyTypeIO):
class ModelTransform(TypedDict):
uuid: str
name: str
type: str
position: dict[str, float | int]
rotation: dict[str, float | int | str]
quaternion: dict[str, float | int]
scale: dict[str, float | int]
up: dict[str, float | int]
visible: bool
matrix: list[float]
Type = list[ModelTransform]
@comfytype(io_type="LOAD_3D")
class Load3D(ComfyTypeIO):
"""3D models are stored as a dictionary."""
@ -779,6 +796,7 @@ class Load3D(ComfyTypeIO):
normal: str
camera_info: Load3DCamera.CameraInfo
recording: NotRequired[str]
model_info: NotRequired[list[Load3DModelInfo.ModelTransform]]
Type = Model3DDict
@ -2291,6 +2309,7 @@ __all__ = [
"FlowControl",
"Accumulation",
"Load3DCamera",
"Load3DModelInfo",
"Load3D",
"Load3DAnimation",
"Photomaker",

View File

@ -0,0 +1,32 @@
from pydantic import BaseModel, Field
class CreateSwitchXRequest(BaseModel):
generation_type: str = Field(...)
source_uri: str = Field(...)
alpha_mode: str = Field(...)
prompt: str | None = Field(None, max_length=2000)
reference_image_uri: str | None = Field(None)
alpha_uri: str | None = Field(None)
max_resolution: int = Field(1080)
callback_url: str | None = Field(None)
idempotency_key: str | None = Field(None, max_length=256, min_length=1)
class SwitchXOutputUrls(BaseModel):
render: str | None = Field(None)
source: str | None = Field(None)
alpha: str | None = Field(None)
class SwitchXStatusResponse(BaseModel):
id: str = Field(...)
status: str = Field(...)
progress: int | None = Field(None)
generation_type: str | None = Field(None)
alpha_mode: str | None = Field(None)
output: SwitchXOutputUrls | None = Field(None)
error: str | None = Field(None)
created_at: str | None = Field(None)
modified_at: str | None = Field(None)
completed_at: str | None = Field(None)

View File

@ -0,0 +1,404 @@
from fractions import Fraction
from typing_extensions import override
from comfy_api.latest import IO, ComfyExtension, Input, InputImpl, Types
from comfy_api_nodes.apis.beeble import (
CreateSwitchXRequest,
SwitchXStatusResponse,
)
from comfy_api_nodes.util import (
ApiEndpoint,
bytesio_to_image_tensor,
convert_mask_to_image,
download_url_as_bytesio,
download_url_to_image_tensor,
download_url_to_video_output,
downscale_image_tensor,
downscale_video_to_max_pixels,
poll_op,
sync_op,
upload_image_to_comfyapi,
upload_video_to_comfyapi,
validate_string,
validate_video_frame_count,
)
_MAX_PIXELS = 2_770_000
_MAX_FRAMES = 240
_MAX_PROMPT_LEN = 2000
def _validate_inputs(prompt: str | None, reference_image: Input.Image | None) -> str | None:
"""Beeble requires at least one of prompt or reference_image. Returns the cleaned prompt."""
cleaned = prompt.strip() if prompt else ""
if not cleaned and reference_image is None:
raise ValueError("At least one of 'prompt' or 'reference_image' must be provided.")
if cleaned:
validate_string(cleaned, strip_whitespace=False, max_length=_MAX_PROMPT_LEN)
return cleaned or None
async def _upload_mask_as_image(
cls: type[IO.ComfyNode],
mask: Input.Image,
*,
wait_label: str,
) -> str:
"""Encode a single-frame MASK (H, W) or (1, H, W) as a PNG and upload."""
if mask.dim() == 2:
mask = mask.unsqueeze(0)
image = convert_mask_to_image(mask[:1])
return await upload_image_to_comfyapi(
cls,
image,
mime_type="image/png",
wait_label=wait_label,
total_pixels=_MAX_PIXELS,
)
async def _upload_mask_batch_as_video(
cls: type[IO.ComfyNode],
mask: Input.Image,
*,
frame_rate: Fraction,
source_frame_count: int,
wait_label: str,
) -> str:
"""Encode a MASK batch (N, H, W) as a grayscale H.264 MP4 at frame_rate and upload.
The matte is always downscaled to the pixel budget so it stays within Beeble's limit and
keeps the same dimensions as the (similarly downscaled) source — both use the same algorithm
from the same starting dimensions, and downscaling is a no-op when already within budget.
"""
if mask.dim() == 2:
mask = mask.unsqueeze(0)
if mask.shape[0] != source_frame_count:
raise ValueError(
f"Custom alpha video frame count ({mask.shape[0]}) does not match the "
f"source video frame count ({source_frame_count}). The Beeble API requires "
"one mask per source frame."
)
images = downscale_image_tensor(convert_mask_to_image(mask), _MAX_PIXELS)
alpha_video = InputImpl.VideoFromComponents(Types.VideoComponents(images=images, audio=None, frame_rate=frame_rate))
return await upload_video_to_comfyapi(cls, alpha_video, wait_label=wait_label)
def _alpha_mode_input(*, video: bool) -> IO.DynamicCombo.Input:
"""Build the alpha_mode DynamicCombo with mode-specific extra inputs."""
select_keyframe_tooltip = (
"First-frame keyframe mask. Beeble propagates this across the video." if video else "Grayscale keyframe mask."
)
custom_tooltip = (
"Per-frame grayscale mask covering the entire video. "
"Must have the same frame count as the source. "
"Connect a MASK output from SAM3_TrackToMask or similar."
if video
else "Grayscale mask to apply."
)
return IO.DynamicCombo.Input(
"alpha_mode",
tooltip=(
"Controls how SwitchX decides what to keep vs. regenerate. "
"'auto' isolates the main subject automatically. "
"'fill' regenerates the entire frame while preserving geometry. "
"'select' propagates a first-frame keyframe across the clip. "
"'custom' uses a per-frame alpha matte you provide."
),
options=[
IO.DynamicCombo.Option("auto", []),
IO.DynamicCombo.Option("fill", []),
IO.DynamicCombo.Option(
"select",
[IO.Mask.Input("alpha_keyframe", tooltip=select_keyframe_tooltip)],
),
IO.DynamicCombo.Option(
"custom",
[IO.Mask.Input("alpha_mask", tooltip=custom_tooltip)],
),
],
)
def _common_inputs(*, source: IO.Input, video: bool) -> list[IO.Input]:
return [
source,
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip=(
"Text description of the desired output (max 2000 chars). "
"At least one of 'prompt' or 'reference_image' is required."
),
),
IO.Image.Input(
"reference_image",
optional=True,
tooltip=(
"Reference image whose look (background, lighting, costume) the result "
"should adopt. At least one of 'reference_image' or 'prompt' is required."
),
),
_alpha_mode_input(video=video),
IO.Combo.Input(
"max_resolution",
options=["1080p", "720p"],
default="1080p",
tooltip="Maximum output resolution.",
),
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
control_after_generate=True,
tooltip=(
"Seed controls whether the node should re-run; " "results are non-deterministic regardless of seed."
),
),
]
async def _submit_and_poll(
cls: type[IO.ComfyNode],
request: CreateSwitchXRequest,
) -> SwitchXStatusResponse:
initial = await sync_op(
cls,
ApiEndpoint(path="/proxy/beeble/v1/switchx/generations", method="POST"),
response_model=SwitchXStatusResponse,
data=request,
)
return await poll_op(
cls,
ApiEndpoint(path=f"/proxy/beeble/v1/switchx/generations/{initial.id}"),
response_model=SwitchXStatusResponse,
status_extractor=lambda r: r.status,
progress_extractor=lambda r: r.progress,
)
def _require_output_url(response: SwitchXStatusResponse, name: str) -> str:
if response.output is None or getattr(response.output, name) is None:
raise RuntimeError(f"Beeble job {response.id} completed without a {name!r} output URL.")
return getattr(response.output, name)
def _alpha_url(response: SwitchXStatusResponse, mode: str) -> str | None:
"""URL of the alpha matte, or None when the mode produces no separate matte.
'fill' selects the whole frame, so Beeble writes no alpha asset even though the status
response still returns a (dangling) signed URL for it — fetching it 403s with S3
AccessDenied. The other three modes ('auto', 'custom', 'select') all produce a real,
downloadable matte.
"""
if mode == "fill" or response.output is None:
return None
return response.output.alpha
class BeebleSwitchXVideoEdit(IO.ComfyNode):
@classmethod
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="BeebleSwitchXVideoEdit",
display_name="Beeble SwitchX Video Edit",
category="api node/video/Beeble",
description=(
"Edit a video with Beeble SwitchX. Switches anything in the scene (background, "
"lighting, costume) while preserving the original subject's pixels and motion. "
"Provide a reference image and/or text prompt to describe the new look. "
"Max 240 frames, max ~2.77MP per frame."
),
inputs=_common_inputs(source=IO.Video.Input("video"), video=True),
outputs=[
IO.Video.Output(display_name="video"),
IO.Video.Output(
display_name="alpha",
tooltip="The alpha matte Beeble used. Empty for 'fill' mode, which has no separate matte.",
),
],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["max_resolution"]),
expr="""
(
$rate := widgets.max_resolution = "1080p" ? 0.429 : 0.143;
{"type":"usd","usd": $rate, "format":{"suffix":"/30 frames"}}
)
""",
),
)
@classmethod
async def execute(
cls,
video: Input.Video,
prompt: str,
alpha_mode: dict,
max_resolution: str,
seed: int,
reference_image: Input.Image | None = None,
) -> IO.NodeOutput:
cleaned_prompt = _validate_inputs(prompt, reference_image)
validate_video_frame_count(video, max_frame_count=_MAX_FRAMES)
video = downscale_video_to_max_pixels(video, _MAX_PIXELS)
mode = alpha_mode["alpha_mode"]
alpha_uri: str | None = None
if mode == "select":
alpha_uri = await _upload_mask_as_image(cls, alpha_mode["alpha_keyframe"], wait_label="Uploading keyframe")
elif mode == "custom":
alpha_uri = await _upload_mask_batch_as_video(
cls,
alpha_mode["alpha_mask"],
frame_rate=video.get_frame_rate(),
source_frame_count=video.get_frame_count(),
wait_label="Uploading alpha video",
)
source_uri = await upload_video_to_comfyapi(cls, video, wait_label="Uploading source")
reference_uri: str | None = None
if reference_image is not None:
reference_uri = await upload_image_to_comfyapi(
cls,
reference_image,
mime_type="image/png",
wait_label="Uploading reference",
total_pixels=_MAX_PIXELS,
)
request = CreateSwitchXRequest(
generation_type="video",
source_uri=source_uri,
alpha_mode=mode,
prompt=cleaned_prompt,
reference_image_uri=reference_uri,
alpha_uri=alpha_uri,
max_resolution=1080 if max_resolution == "1080p" else 720,
)
response = await _submit_and_poll(cls, request)
render = await download_url_to_video_output(_require_output_url(response, "render"))
alpha = None
if (alpha_url := _alpha_url(response, mode)) is not None:
alpha = await download_url_to_video_output(alpha_url)
return IO.NodeOutput(render, alpha)
class BeebleSwitchXImageEdit(IO.ComfyNode):
@classmethod
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="BeebleSwitchXImageEdit",
display_name="Beeble SwitchX Image Edit",
category="api node/image/Beeble",
description=(
"Edit a single image with Beeble SwitchX. Switches anything in the scene "
"(background, lighting, costume) while preserving the original subject's pixels. "
"Provide a reference image and/or text prompt to describe the new look. "
"Max ~2.77MP."
),
inputs=_common_inputs(source=IO.Image.Input("image"), video=False),
outputs=[
IO.Image.Output(display_name="image"),
IO.Mask.Output(
display_name="alpha",
tooltip="The alpha matte Beeble used. Empty for 'fill' mode, which has no separate matte.",
),
],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["max_resolution"]),
expr="""
(
$rate := widgets.max_resolution = "1080p" ? 0.429 : 0.143;
{"type":"usd","usd": $rate}
)
""",
),
)
@classmethod
async def execute(
cls,
image: Input.Image,
prompt: str,
alpha_mode: dict,
max_resolution: str,
seed: int,
reference_image: Input.Image | None = None,
) -> IO.NodeOutput:
cleaned_prompt = _validate_inputs(prompt, reference_image)
image = downscale_image_tensor(image, _MAX_PIXELS)
mode = alpha_mode["alpha_mode"]
alpha_uri: str | None = None
if mode == "select":
alpha_uri = await _upload_mask_as_image(cls, alpha_mode["alpha_keyframe"], wait_label="Uploading keyframe")
elif mode == "custom":
alpha_uri = await _upload_mask_as_image(cls, alpha_mode["alpha_mask"], wait_label="Uploading alpha")
source_uri = await upload_image_to_comfyapi(
cls,
image,
mime_type="image/png",
wait_label="Uploading source",
total_pixels=None,
)
reference_uri: str | None = None
if reference_image is not None:
reference_uri = await upload_image_to_comfyapi(
cls,
reference_image,
mime_type="image/png",
wait_label="Uploading reference",
total_pixels=_MAX_PIXELS,
)
request = CreateSwitchXRequest(
generation_type="image",
source_uri=source_uri,
alpha_mode=mode,
prompt=cleaned_prompt,
reference_image_uri=reference_uri,
alpha_uri=alpha_uri,
max_resolution=1080 if max_resolution == "1080p" else 720,
)
response = await _submit_and_poll(cls, request)
render = await download_url_to_image_tensor(_require_output_url(response, "render"))
alpha_mask = None
if (alpha_url := _alpha_url(response, mode)) is not None:
alpha_image = bytesio_to_image_tensor(await download_url_as_bytesio(alpha_url), mode="L")
alpha_mask = alpha_image.squeeze(-1) if alpha_image.dim() == 4 else alpha_image
return IO.NodeOutput(render, alpha_mask)
class BeebleExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [
BeebleSwitchXVideoEdit,
BeebleSwitchXImageEdit,
]
async def comfy_entrypoint() -> BeebleExtension:
return BeebleExtension()

View File

@ -86,7 +86,7 @@ class _PollUIState:
_RETRY_STATUS = {408, 500, 502, 503, 504} # status 429 is handled separately
COMPLETED_STATUSES = ["succeeded", "succeed", "success", "completed", "finished", "done", "complete"]
FAILED_STATUSES = ["cancelled", "canceled", "canceling", "fail", "failed", "error"]
QUEUED_STATUSES = ["created", "queued", "queueing", "submitted", "initializing", "wait"]
QUEUED_STATUSES = ["created", "queued", "queueing", "submitted", "initializing", "wait", "in_queue"]
async def sync_op(

View File

@ -47,6 +47,7 @@ class Load3D(IO.ComfyNode):
IO.Load3DCamera.Output(display_name="camera_info"),
IO.Video.Output(display_name="recording_video"),
IO.File3DAny.Output(display_name="model_3d"),
IO.Load3DModelInfo.Output(display_name="model_info"),
],
)
@ -69,7 +70,8 @@ class Load3D(IO.ComfyNode):
video = InputImpl.VideoFromFile(recording_video_path)
file_3d = Types.File3D(folder_paths.get_annotated_filepath(model_file))
return IO.NodeOutput(output_image, output_mask, model_file, normal_image, image['camera_info'], video, file_3d)
model_info = image.get('model_info', [])
return IO.NodeOutput(output_image, output_mask, model_file, normal_image, image['camera_info'], video, file_3d, model_info)
process = execute # TODO: remove

View File

@ -1,32 +1,32 @@
from comfy import model_management
from comfy_api.latest import ComfyExtension, IO
from typing_extensions import override
import math
class LTXVLatentUpsampler:
class LTXVLatentUpsampler(IO.ComfyNode):
"""
Upsamples a video latent by a factor of 2.
"""
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"samples": ("LATENT",),
"upscale_model": ("LATENT_UPSCALE_MODEL",),
"vae": ("VAE",),
}
}
def define_schema(cls):
return IO.Schema(
node_id="LTXVLatentUpsampler",
category="latent/video",
is_experimental=True,
inputs=[
IO.Latent.Input("samples"),
IO.LatentUpscaleModel.Input("upscale_model"),
IO.Vae.Input("vae"),
],
outputs=[
IO.Latent.Output(),
],
)
RETURN_TYPES = ("LATENT",)
FUNCTION = "upsample_latent"
CATEGORY = "latent/video"
EXPERIMENTAL = True
def upsample_latent(
self,
samples: dict,
upscale_model,
vae,
) -> tuple:
@classmethod
def execute(cls, samples, upscale_model, vae) -> IO.NodeOutput:
"""
Upsample the input latent using the provided model.
@ -34,7 +34,6 @@ class LTXVLatentUpsampler:
samples (dict): Input latent samples
upscale_model (LatentUpsampler): Loaded upscale model
vae: VAE model for normalization
auto_tiling (bool): Whether to automatically tile the input for processing
Returns:
tuple: Tuple containing the upsampled latent
@ -67,9 +66,16 @@ class LTXVLatentUpsampler:
return_dict = samples.copy()
return_dict["samples"] = upsampled_latents
return_dict.pop("noise_mask", None)
return (return_dict,)
return IO.NodeOutput(return_dict)
upsample_latent = execute # TODO: remove
NODE_CLASS_MAPPINGS = {
"LTXVLatentUpsampler": LTXVLatentUpsampler,
}
class LTXVLatentUpsamplerExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
return [LTXVLatentUpsampler]
async def comfy_entrypoint() -> LTXVLatentUpsamplerExtension:
return LTXVLatentUpsamplerExtension()