Compare commits

..

11 Commits

Author SHA1 Message Date
df6c3158d4 chore(openapi): sync shared API contract from cloud@7321193 2026-06-03 20:52:54 +00:00
bb84c75283 chore(openapi): sync shared API contract from cloud@7c470f0 (#14174) 2026-06-03 13:20:30 -07:00
f49bdb6557 ComfyUI v0.24.0 2026-06-03 12:42:13 -04:00
8e3045a90b Memory usage factor for ideogram 4 on non dynamic vram. (#14264) 2026-06-03 12:19:18 -04:00
f0619af659 chore: update workflow templates to v0.9.94 (#14263) 2026-06-03 09:10:26 -07:00
f69225df24 Mark DualModelGuider as experimental (#14262) 2026-06-03 08:55:18 -07:00
24f9a020ce Support Ideogram4 (#14259) 2026-06-03 08:41:44 -07:00
c7a22e1b4e [Partner Nodes] feat: add Ideogram V4 node (#14261)
Signed-off-by: bigcat88 <bigcat88@icloud.com>
2026-06-03 18:13:20 +03:00
bd7da053ae comfy-aimdo: 0.4.8 (#14244)
Aimdo 0.4.8 fixes a crash in multi-gpu due to contention on the
singleton bounce buffer.
2026-06-02 18:57:16 -07:00
d4c7ebff9c Remove old useless no comfy kitchen fallback. (#14245)
* Remove old fallback used when no comfy kitchen.

* Remove unused logging import
2026-06-02 17:52:41 -07:00
dc10c0133e PiD: Add SDXL and QwenImage (#14240) 2026-06-02 12:40:49 -07:00
23 changed files with 5682 additions and 11797 deletions

View File

@ -519,14 +519,18 @@ async def update_asset_route(request: web.Request) -> web.Response:
@_require_assets_feature_enabled
async def delete_asset_route(request: web.Request) -> web.Response:
reference_id = str(uuid.UUID(request.match_info["id"]))
delete_content_param = request.query.get("delete_content")
delete_content = (
False
if delete_content_param is None
else delete_content_param.lower() not in {"0", "false", "no"}
)
try:
# Deleting an asset is a soft delete of the reference; the underlying
# content is preserved (it may be shared with other references).
deleted = delete_asset_reference(
reference_id=reference_id,
owner_id=USER_MANAGER.get_request_user_id(request),
delete_content_if_orphan=False,
delete_content_if_orphan=delete_content,
)
except Exception:
logging.exception(

View File

@ -149,16 +149,6 @@ def delete_asset_reference(
owner_id: str,
delete_content_if_orphan: bool = True,
) -> bool:
"""Delete an asset reference.
With ``delete_content_if_orphan=False`` (a soft delete), the reference is
hidden and the underlying content is preserved. With ``True``, the content
is also removed once it becomes orphaned.
Note: the public DELETE /api/assets/{id} endpoint always soft-deletes
(passes ``False``); the orphan-reclamation path is intentionally
internal-only, retained for a future GC/admin caller.
"""
with create_session() as session:
if not delete_content_if_orphan:
# Soft delete: mark the reference as deleted but keep everything

View File

@ -4,7 +4,7 @@ from torch import Tensor
from comfy.ldm.modules.attention import optimized_attention
import comfy.model_management
import logging
import comfy.quant_ops
def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None, transformer_options={}) -> Tensor:
@ -44,21 +44,15 @@ def _apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
try:
import comfy.quant_ops
q_apply_rope = comfy.quant_ops.ck.apply_rope
q_apply_rope1 = comfy.quant_ops.ck.apply_rope1
def apply_rope(xq, xk, freqs_cis):
if comfy.model_management.in_training:
return _apply_rope(xq, xk, freqs_cis)
else:
return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
def apply_rope1(x, freqs_cis):
if comfy.model_management.in_training:
return _apply_rope1(x, freqs_cis)
else:
return q_apply_rope1(x, freqs_cis)
except:
logging.warning("No comfy kitchen, using old apply_rope functions.")
apply_rope = _apply_rope
apply_rope1 = _apply_rope1
def apply_rope(xq, xk, freqs_cis):
if comfy.model_management.in_training:
return _apply_rope(xq, xk, freqs_cis)
else:
return comfy.quant_ops.ck.apply_rope(xq, xk, freqs_cis)
def apply_rope1(x, freqs_cis):
if comfy.model_management.in_training:
return _apply_rope1(x, freqs_cis)
else:
return comfy.quant_ops.ck.apply_rope1(x, freqs_cis)

View File

@ -0,0 +1,297 @@
"""
The Ideogram 4 transformer is a NextDiT/Lumina2-family single-stream model
consumes Qwen3-VL hidden-state features (concatenated from 13 layers -> 53248 dims)
packs ``[text tokens, image tokens]`` into one sequence with block-diagonal segment attention and 3D interleaved MRoPE.
"""
from __future__ import annotations
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import comfy.patcher_extension
from comfy.ldm.lumina.model import FeedForward
from comfy.ldm.modules.attention import optimized_attention_masked
from comfy.text_encoders.llama import apply_rope, precompute_freqs_cis
# Per-token role indicators
SEQUENCE_PADDING_INDICATOR = -1
OUTPUT_IMAGE_INDICATOR = 2
LLM_TOKEN_INDICATOR = 3
# Image grid coordinates are offset so they never collide with text positions
IMAGE_POSITION_OFFSET = 65536
class Ideogram4Attention(nn.Module):
def __init__(self, hidden_size, num_heads, eps=1e-5, dtype=None, device=None, operations=None):
super().__init__()
self.num_heads = num_heads
self.head_dim = hidden_size // num_heads
self.hidden_size = hidden_size
self.qkv = operations.Linear(hidden_size, hidden_size * 3, bias=False, dtype=dtype, device=device)
self.norm_q = operations.RMSNorm(self.head_dim, eps=eps, elementwise_affine=True, dtype=dtype, device=device)
self.norm_k = operations.RMSNorm(self.head_dim, eps=eps, elementwise_affine=True, dtype=dtype, device=device)
self.o = operations.Linear(hidden_size, hidden_size, bias=False, dtype=dtype, device=device)
def forward(self, x, attn_mask, freqs_cis, transformer_options={}):
batch_size, seq_len, _ = x.shape
qkv = self.qkv(x).view(batch_size, seq_len, 3, self.num_heads, self.head_dim)
q, k, v = qkv.unbind(dim=2)
q = self.norm_q(q)
k = self.norm_k(k)
# (B, heads, L, head_dim)
q = q.transpose(1, 2)
k = k.transpose(1, 2)
v = v.transpose(1, 2)
q, k = apply_rope(q, k, freqs_cis)
out = optimized_attention_masked(q, k, v, self.num_heads, attn_mask, skip_reshape=True, transformer_options=transformer_options)
return self.o(out)
class Ideogram4TransformerBlock(nn.Module):
def __init__(self, hidden_size, intermediate_size, num_heads, norm_eps, adaln_dim, dtype=None, device=None, operations=None):
super().__init__()
self.attention = Ideogram4Attention(hidden_size, num_heads, eps=1e-5, dtype=dtype, device=device, operations=operations)
self.feed_forward = FeedForward(
dim=hidden_size, hidden_dim=intermediate_size, multiple_of=1, ffn_dim_multiplier=None,
operation_settings={"operations": operations, "dtype": dtype, "device": device},
)
self.attention_norm1 = operations.RMSNorm(hidden_size, eps=norm_eps, elementwise_affine=True, dtype=dtype, device=device)
self.ffn_norm1 = operations.RMSNorm(hidden_size, eps=norm_eps, elementwise_affine=True, dtype=dtype, device=device)
self.attention_norm2 = operations.RMSNorm(hidden_size, eps=norm_eps, elementwise_affine=True, dtype=dtype, device=device)
self.ffn_norm2 = operations.RMSNorm(hidden_size, eps=norm_eps, elementwise_affine=True, dtype=dtype, device=device)
self.adaln_modulation = operations.Linear(adaln_dim, 4 * hidden_size, bias=True, dtype=dtype, device=device)
def forward(self, x, attn_mask, freqs_cis, adaln_input, transformer_options={}):
mod = self.adaln_modulation(adaln_input)
scale_msa, gate_msa, scale_mlp, gate_mlp = mod.chunk(4, dim=-1)
gate_msa = torch.tanh(gate_msa)
gate_mlp = torch.tanh(gate_mlp)
scale_msa = 1.0 + scale_msa
scale_mlp = 1.0 + scale_mlp
attn_out = self.attention(self.attention_norm1(x) * scale_msa, attn_mask, freqs_cis, transformer_options=transformer_options)
x = x + gate_msa * self.attention_norm2(attn_out)
x = x + gate_mlp * self.ffn_norm2(self.feed_forward(self.ffn_norm1(x) * scale_mlp))
return x
def _sinusoidal_embedding(t, dim, scale=1e4):
t = t.to(torch.float32)
half = dim // 2
freq = math.log(scale) / (half - 1)
freq = torch.exp(torch.arange(half, dtype=torch.float32, device=t.device) * -freq)
emb = t.unsqueeze(-1) * freq
emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1)
if dim % 2 == 1:
emb = F.pad(emb, (0, 1))
return emb
class Ideogram4EmbedScalar(nn.Module):
def __init__(self, dim, input_range=(0.0, 1.0), dtype=None, device=None, operations=None):
super().__init__()
self.dim = dim
self.range_min, self.range_max = input_range
self.mlp_in = operations.Linear(dim, dim, bias=True, dtype=dtype, device=device)
self.mlp_out = operations.Linear(dim, dim, bias=True, dtype=dtype, device=device)
def forward(self, x):
x = x.to(torch.float32)
scaled = 1e4 * (x - self.range_min) / (self.range_max - self.range_min)
emb = _sinusoidal_embedding(scaled, self.dim)
emb = emb.to(self.mlp_in.weight.dtype)
emb = F.silu(self.mlp_in(emb))
return self.mlp_out(emb)
class Ideogram4FinalLayer(nn.Module):
def __init__(self, hidden_size, out_channels, adaln_dim, dtype=None, device=None, operations=None):
super().__init__()
self.norm_final = operations.LayerNorm(hidden_size, eps=1e-6, elementwise_affine=False, dtype=dtype, device=device)
self.linear = operations.Linear(hidden_size, out_channels, bias=True, dtype=dtype, device=device)
self.adaln_modulation = operations.Linear(adaln_dim, hidden_size, bias=True, dtype=dtype, device=device)
def forward(self, x, c):
scale = 1.0 + self.adaln_modulation(F.silu(c))
return self.linear(self.norm_final(x) * scale)
class Ideogram4Transformer(nn.Module):
"""A single Ideogram 4 backbone operating on a packed token sequence."""
def __init__(self, emb_dim, num_layers, num_heads, intermediate_size, adaln_dim,
in_channels, llm_features_dim, rope_theta, mrope_section, norm_eps,
dtype=None, device=None, operations=None):
super().__init__()
self.head_dim = emb_dim // num_heads
self.rope_theta = rope_theta
self.mrope_section = tuple(mrope_section)
self.input_proj = operations.Linear(in_channels, emb_dim, bias=True, dtype=dtype, device=device)
self.llm_cond_norm = operations.RMSNorm(llm_features_dim, eps=1e-6, elementwise_affine=True, dtype=dtype, device=device)
self.llm_cond_proj = operations.Linear(llm_features_dim, emb_dim, bias=True, dtype=dtype, device=device)
self.t_embedding = Ideogram4EmbedScalar(emb_dim, input_range=(0.0, 1.0), dtype=dtype, device=device, operations=operations)
self.adaln_proj = operations.Linear(emb_dim, adaln_dim, bias=True, dtype=dtype, device=device)
self.embed_image_indicator = operations.Embedding(2, emb_dim, dtype=dtype, device=device)
self.layers = nn.ModuleList([
Ideogram4TransformerBlock(emb_dim, intermediate_size, num_heads, norm_eps, adaln_dim,
dtype=dtype, device=device, operations=operations)
for _ in range(num_layers)
])
self.final_layer = Ideogram4FinalLayer(emb_dim, in_channels, adaln_dim, dtype=dtype, device=device, operations=operations)
def _backbone(self, llm_features, x, t, position_ids, attn_mask, indicator, transformer_options={}):
indicator = indicator.to(torch.long)
output_image_mask = (indicator == OUTPUT_IMAGE_INDICATOR).to(x.dtype).unsqueeze(-1)
x = x * output_image_mask
h = self.input_proj(x) * output_image_mask
t_cond = self.t_embedding(t)
if t.dim() == 1:
t_cond = t_cond.unsqueeze(1)
adaln_input = F.silu(self.adaln_proj(t_cond))
# h is zero on the text rows (content lives only on image rows), add writes the text features in place
if llm_features is not None:
L_text = llm_features.shape[1]
text_mask = (indicator[:, :L_text] == LLM_TOKEN_INDICATOR).to(x.dtype).unsqueeze(-1)
llm = self.llm_cond_norm(llm_features * text_mask)
llm = self.llm_cond_proj(llm) * text_mask
h[:, :L_text] = h[:, :L_text] + llm
h = h + self.embed_image_indicator((indicator == OUTPUT_IMAGE_INDICATOR).to(torch.long))
# Qwen3-VL interleaved MRoPE; position_ids (B, L, 3) -> (3, L) (same across batch).
freqs_cis = precompute_freqs_cis(
self.head_dim, position_ids[0].transpose(0, 1), self.rope_theta,
rope_dims=self.mrope_section, interleaved_mrope=True, device=position_ids.device,
)
if attn_mask is not None and attn_mask.dtype == torch.bool:
attn_mask = torch.zeros_like(attn_mask, dtype=h.dtype).masked_fill_(~attn_mask, -torch.finfo(h.dtype).max)
for layer in self.layers:
h = layer(h, attn_mask, freqs_cis, adaln_input, transformer_options=transformer_options)
return self.final_layer(h, adaln_input)
class Ideogram4Transformer2DModel(Ideogram4Transformer):
"""Ideogram 4 single-stream DiT.
Runs a packed ``[text, image]`` sequence when text context is supplied, or an image-only sequence when ``context is None``.
"""
def __init__(self, image_model=None, in_channels=128, num_layers=34, num_attention_heads=18, attention_head_dim=256, intermediate_size=12288,
adaln_dim=512, llm_features_dim=53248, rope_theta=5000000, mrope_section=(24, 20, 20), norm_eps=1e-5,
dtype=None, device=None, operations=None, **kwargs):
emb_dim = num_attention_heads * attention_head_dim
super().__init__(
emb_dim=emb_dim, num_layers=num_layers, num_heads=num_attention_heads,
intermediate_size=intermediate_size, adaln_dim=adaln_dim, in_channels=in_channels,
llm_features_dim=llm_features_dim, rope_theta=rope_theta, mrope_section=mrope_section,
norm_eps=norm_eps, dtype=dtype, device=device, operations=operations)
self.dtype = dtype
self.in_channels = in_channels
self.out_channels = in_channels
# 128-dim token = patch (2x2) * ae_channels (32).
self.patch_size = 2
self.ae_channels = in_channels // (self.patch_size * self.patch_size)
def _img_to_tokens(self, x):
B, C, gh, gw = x.shape
x = x.view(B, self.ae_channels, self.patch_size, self.patch_size, gh, gw)
x = x.permute(0, 4, 5, 2, 3, 1) # (B, gh, gw, pi, pj, c)
return x.reshape(B, gh * gw, C)
def _tokens_to_img(self, tokens, gh, gw):
B = tokens.shape[0]
C = tokens.shape[-1]
x = tokens.reshape(B, gh, gw, self.patch_size, self.patch_size, self.ae_channels)
x = x.permute(0, 5, 3, 4, 1, 2) # (B, c, pi, pj, gh, gw)
return x.reshape(B, C, gh, gw)
def _image_position_ids(self, gh, gw, device):
h_idx = torch.arange(gh, device=device).view(-1, 1).expand(gh, gw).reshape(-1)
w_idx = torch.arange(gw, device=device).view(1, -1).expand(gh, gw).reshape(-1)
t_idx = torch.zeros_like(h_idx)
return torch.stack([t_idx, h_idx, w_idx], dim=1) + IMAGE_POSITION_OFFSET # (L_img, 3)
def _run_conditional(self, x_chunk, context_chunk, attn_mask_chunk, t_chunk, gh, gw, transformer_options):
B = x_chunk.shape[0]
device = x_chunk.device
img_tokens = self._img_to_tokens(x_chunk).to(self.dtype)
L_img = img_tokens.shape[1]
L_text = context_chunk.shape[1]
L = L_text + L_img
latent_dim = img_tokens.shape[-1]
x_full = torch.zeros(B, L, latent_dim, dtype=img_tokens.dtype, device=device)
x_full[:, L_text:] = img_tokens
text_pos = torch.arange(L_text, device=device).view(-1, 1).expand(L_text, 3)
img_pos = self._image_position_ids(gh, gw, device)
position_ids = torch.cat([text_pos, img_pos], dim=0).unsqueeze(0).expand(B, L, 3)
indicator = torch.empty(B, L, dtype=torch.long, device=device)
indicator[:, :L_text] = LLM_TOKEN_INDICATOR
indicator[:, L_text:] = OUTPUT_IMAGE_INDICATOR
attn_mask = None
if attn_mask_chunk is not None:
segment_ids = torch.ones(B, L, dtype=torch.long, device=device)
pad = (attn_mask_chunk == 0)
segment_ids[:, :L_text][pad] = SEQUENCE_PADDING_INDICATOR
indicator[:, :L_text][pad] = 0
# Block-diagonal mask from segment ids: (B, 1, L, L), True = attend.
attn_mask = (segment_ids.unsqueeze(2) == segment_ids.unsqueeze(1)).unsqueeze(1)
out = self._backbone(context_chunk, x_full, t_chunk, position_ids, attn_mask, indicator,
transformer_options=transformer_options)
return self._tokens_to_img(out[:, L_text:], gh, gw)
def _run_image_only(self, x_chunk, t_chunk, gh, gw, transformer_options):
B = x_chunk.shape[0]
device = x_chunk.device
img_tokens = self._img_to_tokens(x_chunk).to(self.dtype)
L_img = img_tokens.shape[1]
position_ids = self._image_position_ids(gh, gw, device).unsqueeze(0).expand(B, L_img, 3)
indicator = torch.full((B, L_img), OUTPUT_IMAGE_INDICATOR, dtype=torch.long, device=device)
# Image-only sequence is a single segment -> no mask, full attention, no LLM context.
out = self._backbone(None, img_tokens, t_chunk, position_ids, None, indicator, transformer_options=transformer_options)
return self._tokens_to_img(out, gh, gw)
def forward(self, x, timesteps, context=None, attention_mask=None, transformer_options={}, **kwargs):
return comfy.patcher_extension.WrapperExecutor.new_class_executor(
self._forward,
self,
comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options),
).execute(x, timesteps, context, attention_mask, transformer_options, **kwargs)
def _forward(self, x, timesteps, context=None, attention_mask=None, transformer_options={}, **kwargs):
bs, c, gh, gw = x.shape
timesteps = 1.0 - timesteps
# unconditional pass
if context is None:
return -self._run_image_only(x, timesteps, gh, gw, transformer_options)
return -self._run_conditional(x, context, attention_mask, timesteps, gh, gw, transformer_options)

View File

@ -55,6 +55,7 @@ import comfy.ldm.pixeldit.pid
import comfy.ldm.ace.model
import comfy.ldm.omnigen.omnigen2
import comfy.ldm.qwen_image.model
import comfy.ldm.ideogram4.model
import comfy.ldm.kandinsky5.model
import comfy.ldm.anima.model
import comfy.ldm.ace.ace_step15
@ -2018,6 +2019,21 @@ class QwenImage(BaseModel):
out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
return out
class Ideogram4(BaseModel):
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.ideogram4.model.Ideogram4Transformer2DModel)
def extra_conds(self, **kwargs):
out = super().extra_conds(**kwargs)
attention_mask = kwargs.get("attention_mask", None)
if attention_mask is not None:
if torch.numel(attention_mask) != attention_mask.sum():
out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
cross_attn = kwargs.get("cross_attn", None)
if cross_attn is not None:
out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
return out
class HunyuanImage21(BaseModel):
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan_video.model.HunyuanVideo)

View File

@ -815,6 +815,13 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
dit_config["default_ref_method"] = "negative_index"
return dit_config
if '{}embed_image_indicator.weight'.format(key_prefix) in state_dict_keys: # Ideogram 4
dit_config = {}
dit_config["image_model"] = "ideogram4"
dit_config["in_channels"] = state_dict['{}input_proj.weight'.format(key_prefix)].shape[1]
dit_config["num_layers"] = count_blocks(state_dict_keys, '{}layers.'.format(key_prefix) + '{}.')
return dit_config
if '{}visual_transformer_blocks.0.cross_attention.key_norm.weight'.format(key_prefix) in state_dict_keys: # Kandinsky 5
dit_config = {}
model_dim = state_dict['{}visual_embeddings.in_layer.bias'.format(key_prefix)].shape[0]

View File

@ -58,6 +58,7 @@ import comfy.text_encoders.omnigen2
import comfy.text_encoders.qwen_image
import comfy.text_encoders.hunyuan_image
import comfy.text_encoders.z_image
import comfy.text_encoders.ideogram4
import comfy.text_encoders.ovis
import comfy.text_encoders.kandinsky5
import comfy.text_encoders.jina_clip_2
@ -1298,6 +1299,7 @@ class CLIPType(Enum):
COGVIDEOX = 27
LENS = 28
PIXELDIT = 29
IDEOGRAM4 = 30
@ -1596,8 +1598,12 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
clip_target.clip = comfy.text_encoders.ovis.te(**llama_detect(clip_data))
clip_target.tokenizer = comfy.text_encoders.ovis.OvisTokenizer
elif te_model == TEModel.QWEN3_8B:
clip_target.clip = comfy.text_encoders.flux.klein_te(**llama_detect(clip_data), model_type="qwen3_8b")
clip_target.tokenizer = comfy.text_encoders.flux.KleinTokenizer8B
if clip_type == CLIPType.IDEOGRAM4:
clip_target.clip = comfy.text_encoders.ideogram4.te(**llama_detect(clip_data))
clip_target.tokenizer = comfy.text_encoders.ideogram4.Ideogram4Tokenizer
else:
clip_target.clip = comfy.text_encoders.flux.klein_te(**llama_detect(clip_data), model_type="qwen3_8b")
clip_target.tokenizer = comfy.text_encoders.flux.KleinTokenizer8B
elif te_model == TEModel.JINA_CLIP_2:
clip_target.clip = comfy.text_encoders.jina_clip_2.JinaClip2TextModelWrapper
clip_target.tokenizer = comfy.text_encoders.jina_clip_2.JinaClip2TokenizerWrapper

View File

@ -24,6 +24,7 @@ import comfy.text_encoders.qwen_image
import comfy.text_encoders.hunyuan_image
import comfy.text_encoders.kandinsky5
import comfy.text_encoders.z_image
import comfy.text_encoders.ideogram4
import comfy.text_encoders.anima
import comfy.text_encoders.ace15
import comfy.text_encoders.longcat_image
@ -1746,6 +1747,44 @@ class Omnigen2(supported_models_base.BASE):
hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen25_3b.transformer.".format(pref))
return supported_models_base.ClipTarget(comfy.text_encoders.omnigen2.Omnigen2Tokenizer, comfy.text_encoders.omnigen2.te(**hunyuan_detect))
class Ideogram4(supported_models_base.BASE):
unet_config = {
"image_model": "ideogram4",
}
sampling_settings = {
"multiplier": 1.0,
"shift": 1.0,
}
memory_usage_factor = 11.6
unet_extra_config = {
"num_attention_heads": 18,
"attention_head_dim": 256,
"intermediate_size": 12288,
"adaln_dim": 512,
"llm_features_dim": 53248,
"rope_theta": 5000000,
"mrope_section": [24, 20, 20],
"norm_eps": 1e-5,
}
latent_format = latent_formats.Flux2
supported_inference_dtypes = [torch.bfloat16, torch.float32]
vae_key_prefix = ["vae."]
text_encoder_key_prefix = ["text_encoders."]
def get_model(self, state_dict, prefix="", device=None):
out = model_base.Ideogram4(self, device=device)
return out
def clip_target(self, state_dict={}):
pref = self.text_encoder_key_prefix[0]
hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen3vl_8b.transformer.".format(pref))
return supported_models_base.ClipTarget(comfy.text_encoders.ideogram4.Ideogram4Tokenizer, comfy.text_encoders.ideogram4.te(**hunyuan_detect))
class QwenImage(supported_models_base.BASE):
unet_config = {
"image_model": "qwen_image",
@ -2233,6 +2272,7 @@ models = [
ACEStep15,
Omnigen2,
QwenImage,
Ideogram4,
Flux2,
Lens,
Kandinsky5Image,

View File

@ -0,0 +1,77 @@
"""Ideogram 4 text encoder: Qwen3-VL-8B language model, 13-layer tap.
Ideogram 4 conditions on the concatenation of hidden states from 13 layers of
Qwen3-VL (layers 0,3,...,33,35), giving a 4096*13 = 53248-dim feature per token.
"""
import os
from transformers import Qwen2Tokenizer
import comfy.text_encoders.llama
from comfy import sd1_clip
# Reference taps outputs of layers (0,3,...,35); comfy captures layer inputs, offset by +1.
IDEOGRAM4_TAP_LAYERS = [1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 36]
class Qwen3VLTokenizer(sd1_clip.SDTokenizer):
def __init__(self, embedding_directory=None, tokenizer_data={}):
tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer")
super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory,
embedding_size=4096, embedding_key='qwen3vl_8b', tokenizer_class=Qwen2Tokenizer,
has_start_token=False, has_end_token=False, pad_to_max_length=False,
max_length=99999999, min_length=1, pad_token=151643, tokenizer_data=tokenizer_data)
class Ideogram4Tokenizer(sd1_clip.SD1Tokenizer):
def __init__(self, embedding_directory=None, tokenizer_data={}):
super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data,
name="qwen3vl_8b", tokenizer=Qwen3VLTokenizer)
self.llama_template = "<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n"
def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, **kwargs):
if llama_template is None:
llama_text = self.llama_template.format(text)
else:
llama_text = llama_template.format(text)
return super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, disable_weights=True, **kwargs)
# Qwen3-VL-8B = 5e6 (vs plain Qwen3-8B's 1e6)
# final_norm/lm_head off -> Ideogram only reads raw tapped hidden states
QWEN3VL_8B_CONFIG = {"rope_theta": 5000000.0, "final_norm": False, "lm_head": False}
class Qwen3VL8BModel(sd1_clip.SDClipModel):
def __init__(self, device="cpu", layer="hidden", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
super().__init__(device=device, layer=IDEOGRAM4_TAP_LAYERS, layer_idx=None,
textmodel_json_config=dict(QWEN3VL_8B_CONFIG),
dtype=dtype, special_tokens={"pad": 151643}, layer_norm_hidden_state=False,
model_class=comfy.text_encoders.llama.Qwen3_8B,
enable_attention_masks=attention_mask, return_attention_masks=attention_mask,
model_options=model_options)
class Ideogram4TEModel(sd1_clip.SD1ClipModel):
def __init__(self, device="cpu", dtype=None, model_options={}):
super().__init__(device=device, dtype=dtype, name="qwen3vl_8b", clip_model=Qwen3VL8BModel, model_options=model_options)
def encode_token_weights(self, token_weight_pairs):
out, pooled, extra = super().encode_token_weights(token_weight_pairs)
b, n, seq, h = out.shape # (B, n_taps=13, seq, 4096) stacked in ascending layer order.
out = out.permute(0, 2, 3, 1).reshape(b, seq, h * n) # (B, seq, 4096*13). permute -> (B, seq, H, taps).
return out, pooled, extra
def te(dtype_llama=None, llama_quantization_metadata=None):
class Ideogram4TEModel_(Ideogram4TEModel):
def __init__(self, device="cpu", dtype=None, model_options={}):
if dtype_llama is not None:
dtype = dtype_llama
if llama_quantization_metadata is not None:
model_options = model_options.copy()
model_options["quantization_metadata"] = llama_quantization_metadata
super().__init__(device=device, dtype=dtype, model_options=model_options)
return Ideogram4TEModel_

View File

@ -290,3 +290,19 @@ class IdeogramV3Request(BaseModel):
None,
description='Optional masks for character reference images. When provided, must match the number of character_reference_images. Each mask should be a grayscale image of the same dimensions as the corresponding character reference image. The images should be in JPEG, PNG or WebP format.'
)
class IdeogramV4Request(BaseModel):
text_prompt: str | None = Field(
None,
description="Natural-language prompt; Magic Prompt is applied automatically. "
"Supply exactly one of text_prompt or json_prompt.",
)
json_prompt: dict[str, Any] | None = Field(
None,
description="Structured V4 prompt object consumed directly (disables Magic Prompt). "
"Supply exactly one of text_prompt or json_prompt.",
)
resolution: str | None = Field(None, description="Output resolution in WIDTHxHEIGHT (e.g. '2048x2048').")
rendering_speed: str | None = Field(None, description="Rendering speed: 'TURBO', 'DEFAULT', or 'QUALITY'.")
enable_copyright_detection: bool | None = Field(None, description="Opt into post-generation copyright detection.")

View File

@ -10,6 +10,7 @@ from comfy_api_nodes.apis.ideogram import (
ImageRequest,
IdeogramV3Request,
IdeogramV3EditRequest,
IdeogramV4Request,
)
from comfy_api_nodes.util import (
ApiEndpoint,
@ -17,6 +18,7 @@ from comfy_api_nodes.util import (
download_url_as_bytesio,
resize_mask_to_image,
sync_op,
validate_string,
)
V1_V1_RES_MAP = {
@ -798,6 +800,119 @@ class IdeogramV3(IO.ComfyNode):
return IO.NodeOutput(await download_and_process_images(image_urls))
class IdeogramV4(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="IdeogramV4",
display_name="Ideogram V4",
category="partner/image/Ideogram",
description="Generates images using the Ideogram 4.0 model from a text prompt.",
inputs=[
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="Text prompt for the image generation.",
),
IO.Combo.Input(
"resolution",
options=[
"Auto",
"2048x2048 (1:1)",
"1440x2880 (1:2)",
"2880x1440 (2:1)",
"1664x2496 (2:3)",
"2496x1664 (3:2)",
"1792x2240 (4:5)",
"2240x1792 (5:4)",
"1440x2560 (9:16)",
"2560x1440 (16:9)",
"1600x2560 (5:8)",
"2560x1600 (8:5)",
"1728x2304 (3:4)",
"2304x1728 (4:3)",
"1296x3168 (9:22)",
"3168x1296 (22:9)",
"1152x2944 (9:23)",
"2944x1152 (23:9)",
"1248x3328 (3:8)",
"3328x1248 (8:3)",
"1280x3072 (5:12)",
"3072x1280 (12:5)",
],
default="Auto",
),
IO.Combo.Input(
"rendering_speed",
options=["DEFAULT", "TURBO", "QUALITY"],
default="DEFAULT",
tooltip="Controls the trade-off between generation speed and quality.",
),
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
step=1,
control_after_generate=True,
display_mode=IO.NumberDisplay.number,
),
],
outputs=[
IO.Image.Output(),
],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["rendering_speed"]),
expr="""
(
$speed := widgets.rendering_speed;
$price :=
$contains($speed,"turbo") ? 0.0429 :
$contains($speed,"quality") ? 0.143 :
0.0858;
{"type":"usd","usd": $price}
)
""",
),
)
@classmethod
async def execute(
cls,
prompt: str,
resolution: str,
rendering_speed: str,
seed: int,
):
validate_string(prompt, strip_whitespace=True, min_length=1)
response = await sync_op(
cls,
ApiEndpoint(path="/proxy/ideogram/ideogram-v4/generate", method="POST"),
response_model=IdeogramGenerateResponse,
data=IdeogramV4Request(
text_prompt=prompt,
resolution=resolution.split(" ")[0] if resolution != "Auto" else None,
rendering_speed=rendering_speed,
),
max_retries=1,
)
if not response.data or len(response.data) == 0:
raise Exception("No images were generated in the response")
image_urls = [image_data.url for image_data in response.data if image_data.url]
if not image_urls:
raise Exception("No image URLs were generated in the response")
return IO.NodeOutput(await download_and_process_images(image_urls))
class IdeogramExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -805,6 +920,7 @@ class IdeogramExtension(ComfyExtension):
IdeogramV1,
IdeogramV2,
IdeogramV3,
IdeogramV4,
]

View File

@ -1,5 +1,7 @@
import math
import comfy.samplers
import comfy.sampler_helpers
import comfy.patcher_extension
import comfy.sample
from comfy.k_diffusion import sampling as k_diffusion_sampling
from comfy.k_diffusion import sa_solver
@ -894,6 +896,84 @@ class DualCFGGuider(io.ComfyNode):
get_guider = execute
class Guider_DualModel(comfy.samplers.CFGGuider):
# Runs the positive (cond) pass on the main model and the negative (uncond) pass on a separate model
def __init__(self, model_patcher, uncond_model_patcher):
super().__init__(model_patcher)
self.uncond_model_patcher = uncond_model_patcher
self.uncond_inner = None
def outer_sample(self, noise, latent_image, sampler, sigmas, denoise_mask=None, callback=None, disable_pbar=False, seed=None, latent_shapes=None):
self.uncond_inner = None
self.uncond_loaded = []
self._uncond_neg = None
# skip at cfg 1.0
if not math.isclose(self.cfg, 1.0):
uc = {"negative": list(map(lambda a: a.copy(), self.conds["negative"]))}
self.uncond_inner, uc, self.uncond_loaded = comfy.sampler_helpers.prepare_sampling(
self.uncond_model_patcher, noise.shape, uc, self.uncond_model_patcher.model_options)
self._uncond_neg = uc["negative"]
self.uncond_model_patcher.pre_run()
try:
return super().outer_sample(noise, latent_image, sampler, sigmas, denoise_mask, callback, disable_pbar, seed, latent_shapes=latent_shapes)
finally:
if self.uncond_inner is not None:
self.uncond_model_patcher.cleanup()
comfy.sampler_helpers.cleanup_models({"negative": self._uncond_neg}, self.uncond_loaded)
self.uncond_inner = None
def inner_sample(self, noise, latent_image, device, sampler, sigmas, denoise_mask, callback, disable_pbar, seed, latent_shapes=None):
if self.uncond_inner is not None:
li = latent_image
if li is not None and torch.count_nonzero(li) > 0:
li = self.uncond_inner.process_latent_in(li)
self._uncond_conds = comfy.samplers.process_conds(
self.uncond_inner, noise, {"negative": self._uncond_neg}, device, li, denoise_mask, seed, latent_shapes=latent_shapes)["negative"]
return super().inner_sample(noise, latent_image, device, sampler, sigmas, denoise_mask, callback, disable_pbar, seed, latent_shapes=latent_shapes)
def predict_noise(self, x, timestep, model_options={}, seed=None):
positive = self.conds.get("positive", None)
if self.uncond_inner is None: # cfg == 1 or no negative -> single model, cond only
return comfy.samplers.calc_cond_batch(self.inner_model, [positive], x, timestep, model_options)[0]
cond = comfy.samplers.calc_cond_batch(self.inner_model, [positive], x, timestep, model_options)[0]
uncond_model_options = model_options
if "multigpu_clones" in model_options: # TODO: support multigpu instead of just running uncond on a single GPU
uncond_model_options = {k: v for k, v in model_options.items() if k != "multigpu_clones"}
uncond = comfy.samplers.calc_cond_batch(self.uncond_inner, [self._uncond_conds], x, timestep, uncond_model_options)[0]
return comfy.samplers.cfg_function(self.inner_model, cond, uncond, self.cfg, x, timestep,
model_options=model_options, cond=positive, uncond=self._uncond_conds)
class DualModelGuider(io.ComfyNode):
@classmethod
def define_schema(cls):
return io.Schema(
node_id="DualModelGuider",
display_name="Dual Model CFG Guider",
category="model/sampling/guiders",
is_experimental=True,
inputs=[
io.Model.Input("model", tooltip="Model used for the positive (conditional) pass."),
io.Model.Input("model_negative", optional=True, tooltip="Model used for the negative (unconditional) pass. Use the same model for ordinary CFG."),
io.Conditioning.Input("positive"),
io.Float.Input("cfg", default=4.0, min=0.0, max=100.0, step=0.1, round=0.01),
io.Conditioning.Input("negative", optional=True, tooltip="Negative conditioning run on the negative model. Leave unconnected for a text-free (image-only) unconditional pass."),
],
outputs=[io.Guider.Output()],
)
@classmethod
def execute(cls, model, positive, cfg, model_negative=None, negative=None) -> io.NodeOutput:
if negative is None:
negative = [[None, {}]] # null cond -> no cross_attn -> model runs image-only
guider = Guider_DualModel(model, model_negative) if model_negative is not None else comfy.samplers.CFGGuider(model)
guider.set_conds(positive, negative)
guider.set_cfg(cfg)
return io.NodeOutput(guider)
get_guider = execute
class DisableNoise(io.ComfyNode):
@classmethod
def define_schema(cls):
@ -1054,11 +1134,53 @@ class ManualSigmas(io.ComfyNode):
sigmas = torch.FloatTensor(sigmas)
return io.NodeOutput(sigmas)
class CFGOverride(io.ComfyNode):
@classmethod
def define_schema(cls) -> io.Schema:
return io.Schema(
node_id="CFGOverride",
display_name="CFG Override",
description="Override cfg to a fixed value over a [start, end] percent slice of the steps. "
"With multiple overrides, the one nearest the sampler wins on overlap.",
category="sampling/custom_sampling",
inputs=[
io.Model.Input("model"),
io.Float.Input("cfg", default=1.0, min=0.0, max=100.0, step=0.1, round=0.01),
io.Float.Input("start_percent", default=0.0, min=0.0, max=1.0, step=0.001),
io.Float.Input("end_percent", default=1.0, min=0.0, max=1.0, step=0.001),
],
outputs=[io.Model.Output()],
)
@classmethod
def execute(cls, model, cfg, start_percent, end_percent) -> io.NodeOutput:
ms = model.get_model_object("model_sampling")
sigma_hi = ms.percent_to_sigma(start_percent) # percent->sigma decreasing, so hi >= lo
sigma_lo = ms.percent_to_sigma(end_percent)
def predict_noise_wrapper(executor, *args, **kwargs):
sigma = float(args[1].flatten()[0]) # args = (x, timestep, model_options, seed)
if not (sigma_lo <= sigma <= sigma_hi):
return executor(*args, **kwargs)
guider = executor.class_obj # guider.cfg feeds cond_scale
saved = guider.cfg
guider.cfg = cfg
try:
return executor(*args, **kwargs)
finally:
guider.cfg = saved # restore for other steps/overrides
m = model.clone()
m.add_wrapper(comfy.patcher_extension.WrappersMP.PREDICT_NOISE, predict_noise_wrapper)
return io.NodeOutput(m)
class CustomSamplersExtension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[io.ComfyNode]]:
return [
SamplerCustom,
CFGOverride,
BasicScheduler,
KarrasScheduler,
ExponentialScheduler,
@ -1087,6 +1209,7 @@ class CustomSamplersExtension(ComfyExtension):
SamplingPercentToSigma,
CFGGuider,
DualCFGGuider,
DualModelGuider,
BasicGuider,
RandomNoise,
DisableNoise,

View File

@ -0,0 +1,64 @@
"""Ideogram 4 sampling helper
"""
import math
import torch
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io
_LOGSNR_MIN = -15.0
_LOGSNR_MAX = 18.0
def _logit_normal_schedule(u, mean, std):
# Reference time (0=noise..1=clean) via the probit/ndtri quantile.
u = torch.as_tensor(u, dtype=torch.float64)
t = 1.0 - torch.special.expit(mean + std * torch.special.ndtri(u))
t_min = 1.0 / (1.0 + math.exp(0.5 * _LOGSNR_MAX))
t_max = 1.0 / (1.0 + math.exp(0.5 * _LOGSNR_MIN))
return t.clamp(t_min, t_max)
def ideogram4_sigmas(num_steps, width, height, mu, std):
"""Descending sigmas (len num_steps+1) for the reference schedule.
mu + the resolution term form the logSNR shift; std is the spread.
"""
mean = mu + 0.5 * math.log((width * height) / (512 * 512))
u = torch.linspace(0.0, 1.0, num_steps + 1, dtype=torch.float64)
sigmas = (1.0 - _logit_normal_schedule(u, mean, std)).flip(0)
sigmas[-1] = 0.0 # clamp leaves ~6e-4; force full denoise
return sigmas.to(torch.float32)
class Ideogram4Scheduler(io.ComfyNode):
@classmethod
def define_schema(cls) -> io.Schema:
return io.Schema(
node_id="Ideogram4Scheduler",
display_name="Ideogram 4 Scheduler",
category="sampling/custom_sampling/schedulers",
inputs=[
io.Int.Input("steps", default=20, min=1, max=200),
io.Int.Input("width", default=1024, min=256, max=8192, step=16),
io.Int.Input("height", default=1024, min=256, max=8192, step=16),
io.Float.Input("mu", default=0.0, min=-10.0, max=10.0, step=0.05),
io.Float.Input("std", default=1.75, min=0.1, max=5.0, step=0.05),
],
outputs=[io.Sigmas.Output()],
)
@classmethod
def execute(cls, steps, width, height, mu, std) -> io.NodeOutput:
return io.NodeOutput(ideogram4_sigmas(steps, width, height, mu, std))
class Ideogram4Extension(ComfyExtension):
@override
async def get_node_list(self) -> list[type[io.ComfyNode]]:
return [Ideogram4Scheduler]
async def comfy_entrypoint() -> Ideogram4Extension:
return Ideogram4Extension()

View File

@ -21,8 +21,8 @@ class PiDConditioning(io.ComfyNode):
inputs=[
io.Conditioning.Input("positive"),
io.Latent.Input("latent", tooltip="latent (from VAEEncode or a KSampler)."),
io.Combo.Input("latent_format", options=["flux", "sd3"], default="flux",
tooltip="Flux1 and Flux2 latents auto-detected from channel dim, sd3 has to be selected manually."),
io.Combo.Input("latent_format", options=["flux", "sd3", "sdxl", "qwenimage"], default="flux",
tooltip="Flux1 (16-ch) and Flux2 (128-ch) latents are auto-detected from channel dim under 'flux'. For SD3 (16-ch), SDXL (4-ch), or QwenImage (16-ch), select manually."),
io.Float.Input(
"degrade_sigma", default=0.0, min=0.0, max=1.0, step=0.01,
tooltip="0 = clean latent. Increase to denoise corrupted latent outputs.",
@ -36,9 +36,17 @@ class PiDConditioning(io.ComfyNode):
samples = latent["samples"]
if latent_format == "flux":
fmt_cls = comfy.latent_formats.Flux2 if samples.shape[1] == 128 else comfy.latent_formats.Flux
else:
elif latent_format == "sd3":
fmt_cls = comfy.latent_formats.SD3
elif latent_format == "sdxl":
fmt_cls = comfy.latent_formats.SDXL
elif latent_format == "qwenimage":
fmt_cls = comfy.latent_formats.Wan21
else:
raise ValueError(f"Unknown latent_format: {latent_format}")
lq_latent = fmt_cls().process_in(samples)
if lq_latent.ndim == 5:
lq_latent = lq_latent[:, :, 0]
sigma_t = torch.tensor([float(degrade_sigma)], dtype=torch.float32)
return io.NodeOutput(node_helpers.conditioning_set_values(
positive, {"lq_latent": lq_latent, "degrade_sigma": sigma_t},

View File

@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is
# updated in pyproject.toml.
__version__ = "0.23.0"
__version__ = "0.24.0"

View File

@ -969,7 +969,7 @@ class CLIPLoader:
@classmethod
def INPUT_TYPES(s):
return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ),
"type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2", "qwen_image", "hunyuan_image", "flux2", "ovis", "longcat_image", "cogvideox", "lens", "pixeldit"], ),
"type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2", "qwen_image", "hunyuan_image", "flux2", "ovis", "longcat_image", "cogvideox", "lens", "pixeldit", "ideogram4"], ),
},
"optional": {
"device": (["default", "cpu"], {"advanced": True}),
@ -2362,6 +2362,7 @@ async def init_builtin_extra_nodes():
"nodes_model_downscale.py",
"nodes_images.py",
"nodes_video_model.py",
"nodes_ideogram4.py",
"nodes_train.py",
"nodes_dataset.py",
"nodes_sag.py",

16586
openapi.yaml

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
[project]
name = "ComfyUI"
version = "0.23.0"
version = "0.24.0"
readme = "README.md"
license = { file = "LICENSE" }
requires-python = ">=3.10"

View File

@ -1,5 +1,5 @@
comfyui-frontend-package==1.44.19
comfyui-workflow-templates==0.9.92
comfyui-workflow-templates==0.9.94
comfyui-embedded-docs==0.5.2
torch
torchsde
@ -23,7 +23,7 @@ SQLAlchemy>=2.0.0
filelock
av>=16.0.0
comfy-kitchen==0.2.10
comfy-aimdo==0.4.7
comfy-aimdo==0.4.8
requests
simpleeval>=1.0.0
blake3

View File

@ -6,7 +6,6 @@ import subprocess
import sys
import tempfile
import time
import uuid
from pathlib import Path
from typing import Callable, Iterator, Optional
@ -189,17 +188,9 @@ def _post_multipart_asset(
@pytest.fixture
def make_asset_bytes() -> Callable[[str, int], bytes]:
# Salt content per test so it never collides with assets left over from
# earlier tests. Delete is now always a soft delete (content is preserved),
# so the suite can no longer rely on hard-deleting content for isolation.
# Deterministic within a test: the same (name, size) yields the same bytes.
salt = uuid.uuid4().bytes
def _make(name: str, size: int = 8192) -> bytes:
seed = sum(ord(c) for c in name) % 251
body = bytearray((i * 31 + seed) % 256 for i in range(size))
body[: len(salt)] = salt[:size]
return bytes(body)
return bytes((i * 31 + seed) % 256 for i in range(size))
return _make
@ -221,7 +212,7 @@ def asset_factory(http: requests.Session, api_base: str):
for aid in created:
with contextlib.suppress(Exception):
http.delete(f"{api_base}/api/assets/{aid}", timeout=30)
http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=30)
@pytest.fixture
@ -236,11 +227,7 @@ def seeded_asset(request: pytest.FixtureRequest, http: requests.Session, api_bas
if tags is None:
tags = ["models", "checkpoints", "unit-tests", "alpha"]
meta = {"purpose": "test", "epoch": 1, "flags": ["x", "y"], "nullable": None}
# Unique content per test so the seed always creates a fresh asset (201).
# Delete is now always a soft delete, so content from a prior test survives
# and would otherwise dedup this upload into an existing asset (200).
content = uuid.uuid4().bytes + b"A" * (4096 - 16)
files = {"file": (name, content, "application/octet-stream")}
files = {"file": (name, b"A" * 4096, "application/octet-stream")}
form_data = {
"tags": json.dumps(tags),
"name": name,
@ -273,4 +260,4 @@ def autoclean_unit_test_assets(http: requests.Session, api_base: str):
break
for aid in ids:
with contextlib.suppress(Exception):
http.delete(f"{api_base}/api/assets/{aid}", timeout=30)
http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=30)

View File

@ -45,8 +45,8 @@ def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asse
assert "user_metadata" in detail
assert "filename" in detail["user_metadata"]
# Soft delete — the reference is hidden, content is preserved
rd = http.delete(f"{api_base}/api/assets/{aid}", timeout=120)
# DELETE (hard delete to also remove underlying asset and file)
rd = http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120)
assert rd.status_code == 204
# GET again -> 404
@ -60,7 +60,7 @@ def test_soft_delete_hides_from_get(http: requests.Session, api_base: str, seede
aid = seeded_asset["id"]
asset_hash = seeded_asset["asset_hash"]
# Soft delete — the reference is hidden, content is preserved
# Soft-delete (default, no delete_content param)
rd = http.delete(f"{api_base}/api/assets/{aid}", timeout=120)
assert rd.status_code == 204
@ -81,10 +81,11 @@ def test_soft_delete_hides_from_get(http: requests.Session, api_base: str, seede
ids = [a["id"] for a in rl.json().get("assets", [])]
assert aid not in ids
# The reference is already soft-deleted; content is preserved.
# Clean up: hard-delete the soft-deleted reference and orphaned asset
http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120)
def test_soft_delete_preserves_asset_identity_across_references(
def test_delete_upon_reference_count(
http: requests.Session, api_base: str, seeded_asset: dict
):
# Create a second reference to the same asset via from-hash
@ -118,20 +119,16 @@ def test_soft_delete_preserves_asset_identity_across_references(
rh2 = http.head(f"{api_base}/api/assets/hash/{src_hash}", timeout=120)
assert rh2.status_code == 200 # asset identity preserved (soft delete)
# Re-associate via from-hash: it must reuse the same preserved content
# (created_new False AND the same hash), proving the soft deletes did not
# destroy the underlying asset. Then soft-delete again -> still preserved.
# Re-associate via from-hash, then hard-delete -> orphan content removed
r3 = http.post(f"{api_base}/api/assets/from-hash", json=payload, timeout=120)
assert r3.status_code == 201, r3.json()
assert r3.json()["created_new"] is False
assert r3.json()["asset_hash"] == src_hash # reused the surviving content
aid3 = r3.json()["id"]
rd3 = http.delete(f"{api_base}/api/assets/{aid3}", timeout=120)
rd3 = http.delete(f"{api_base}/api/assets/{aid3}?delete_content=true", timeout=120)
assert rd3.status_code == 204
rh3 = http.head(f"{api_base}/api/assets/hash/{src_hash}", timeout=120)
assert rh3.status_code == 200 # content preserved (soft delete)
assert rh3.status_code == 404 # orphan content removed
def test_update_asset_fields(http: requests.Session, api_base: str, seeded_asset: dict):
@ -252,7 +249,7 @@ def test_concurrent_delete_same_asset_info_single_204(
# Hit the same endpoint N times in parallel.
n_tests = 4
url = f"{api_base}/api/assets/{aid}"
url = f"{api_base}/api/assets/{aid}?delete_content=false"
def _do_delete(delete_url):
with requests.Session() as s:

View File

@ -117,7 +117,7 @@ def test_download_missing_file_returns_404(
assert body["error"]["code"] == "FILE_NOT_FOUND"
finally:
# We created asset without the "unit-tests" tag(see `autoclean_unit_test_assets`), we need to clear it manually.
dr = http.delete(f"{api_base}/api/assets/{aid}", timeout=120)
dr = http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120)
dr.content

View File

@ -69,8 +69,8 @@ def test_tags_empty_usage(http: requests.Session, api_base: str, asset_factory,
used_names = [t["name"] for t in body2["tags"]]
assert custom_tag in used_names
# Delete the asset reference so the tag usage drops to zero
rd = http.delete(f"{api_base}/api/assets/{_asset['id']}", timeout=120)
# Hard-delete the asset so the tag usage drops to zero
rd = http.delete(f"{api_base}/api/assets/{_asset['id']}?delete_content=true", timeout=120)
assert rd.status_code == 204
# Now the custom tag must not be returned when include_zero=false