mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-22 01:00:07 +08:00
Compare commits
15 Commits
v0.22.0
...
matt/asset
| Author | SHA1 | Date | |
|---|---|---|---|
| dc6190e8ba | |||
| 2d21956ac7 | |||
| 396bfe4056 | |||
| 95fdc6cf91 | |||
| 5aa5ccc9e0 | |||
| 4d6a058bf1 | |||
| 00940fb24e | |||
| 7ff001d7c8 | |||
| 19ba85bb2e | |||
| 3ffc49aa0e | |||
| 36f9a6fdef | |||
| a0d1238829 | |||
| 1688a5e262 | |||
| 7ab346fc7b | |||
| 5b7288d700 |
@ -1,2 +1,5 @@
|
||||
# Admins
|
||||
* @comfyanonymous @kosinkadink @guill @alexisrolland @rattus128 @kijai
|
||||
|
||||
/CODEOWNERS @comfyanonymous
|
||||
/.ci/ @comfyanonymous
|
||||
/.github/ @comfyanonymous
|
||||
|
||||
@ -401,12 +401,16 @@ async def upload_asset(request: web.Request) -> web.Response:
|
||||
)
|
||||
|
||||
if spec.tags and spec.tags[0] == "models":
|
||||
# tag[1] may be the standalone category ("checkpoints") or the
|
||||
# slash-joined shape ("checkpoints/flux/...") that
|
||||
# `get_name_and_tags_from_asset_path` and cloud both emit. Match
|
||||
# `resolve_destination_from_tags` by extracting the first segment.
|
||||
category = spec.tags[1].split("/", 1)[0] if len(spec.tags) >= 2 else ""
|
||||
if (
|
||||
len(spec.tags) < 2
|
||||
or spec.tags[1] not in folder_paths.folder_names_and_paths
|
||||
or category not in folder_paths.folder_names_and_paths
|
||||
):
|
||||
delete_temp_file_if_exists(parsed.tmp_path)
|
||||
category = spec.tags[1] if len(spec.tags) >= 2 else ""
|
||||
return _build_error_response(
|
||||
400, "INVALID_BODY", f"unknown models category '{category}'"
|
||||
)
|
||||
|
||||
@ -327,7 +327,12 @@ def list_references_page(
|
||||
select(AssetReferenceTag.asset_reference_id, Tag.name)
|
||||
.join(Tag, Tag.name == AssetReferenceTag.tag_name)
|
||||
.where(AssetReferenceTag.asset_reference_id.in_(id_list))
|
||||
.order_by(AssetReferenceTag.tag_name.asc())
|
||||
# Preserve insertion order so the structural first tag (the root
|
||||
# category like "models") stays in position 0 and the path-derived
|
||||
# sub-path tag stays in position 1, matching cloud's behavior.
|
||||
# tag_name is a deterministic tiebreaker when multiple tags share
|
||||
# an added_at (same-batch insert via set_reference_tags).
|
||||
.order_by(AssetReferenceTag.added_at.asc(), AssetReferenceTag.tag_name.asc())
|
||||
)
|
||||
for ref_id, tag_name in rows.all():
|
||||
tag_map[ref_id].append(tag_name)
|
||||
@ -355,7 +360,8 @@ def fetch_reference_asset_and_tags(
|
||||
build_visible_owner_clause(owner_id),
|
||||
)
|
||||
.options(noload(AssetReference.tags))
|
||||
.order_by(Tag.name.asc())
|
||||
# See list_references_page for the rationale behind ordering by added_at.
|
||||
.order_by(AssetReferenceTag.added_at.asc(), Tag.name.asc())
|
||||
)
|
||||
|
||||
rows = session.execute(stmt).all()
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Iterable, Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
@ -20,7 +21,12 @@ from app.assets.database.queries.common import (
|
||||
build_visible_owner_clause,
|
||||
iter_row_chunks,
|
||||
)
|
||||
from app.assets.helpers import escape_sql_like_string, get_utc_now, normalize_tags
|
||||
from app.assets.helpers import (
|
||||
escape_sql_like_string,
|
||||
expand_bucket_prefixes,
|
||||
get_utc_now,
|
||||
normalize_tags,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@ -44,6 +50,26 @@ class SetTagsResult:
|
||||
total: list[str]
|
||||
|
||||
|
||||
def _next_added_at_base(session: Session, reference_id: str) -> datetime:
|
||||
"""Return a timestamp strictly greater than any existing
|
||||
`added_at` for this reference. On platforms where the wall clock
|
||||
has insufficient resolution between back-to-back commits (notably
|
||||
Windows), two write batches on the same reference can otherwise
|
||||
share a microsecond — the `ORDER BY added_at, tag_name` retrieval
|
||||
then falls back to the alphabetic tiebreaker and user-tier tags
|
||||
sort ahead of path-tier tags they were meant to follow.
|
||||
"""
|
||||
existing_max = session.execute(
|
||||
sa.select(sa.func.max(AssetReferenceTag.added_at)).where(
|
||||
AssetReferenceTag.asset_reference_id == reference_id
|
||||
)
|
||||
).scalar()
|
||||
now = get_utc_now()
|
||||
if existing_max is None:
|
||||
return now
|
||||
return max(existing_max + timedelta(microseconds=1), now)
|
||||
|
||||
|
||||
def validate_tags_exist(session: Session, tags: list[str]) -> None:
|
||||
"""Raise ValueError if any of the given tag names do not exist."""
|
||||
existing_tag_names = set(
|
||||
@ -77,7 +103,13 @@ def get_reference_tags(session: Session, reference_id: str) -> list[str]:
|
||||
session.execute(
|
||||
select(AssetReferenceTag.tag_name)
|
||||
.where(AssetReferenceTag.asset_reference_id == reference_id)
|
||||
.order_by(AssetReferenceTag.tag_name.asc())
|
||||
# Match the response-path ordering used by
|
||||
# list_references_page / fetch_reference_asset_and_tags so
|
||||
# upload responses and subsequent GETs agree on tag order.
|
||||
.order_by(
|
||||
AssetReferenceTag.added_at.asc(),
|
||||
AssetReferenceTag.tag_name.asc(),
|
||||
)
|
||||
)
|
||||
).all()
|
||||
]
|
||||
@ -89,7 +121,7 @@ def set_reference_tags(
|
||||
tags: Sequence[str],
|
||||
origin: str = "manual",
|
||||
) -> SetTagsResult:
|
||||
desired = normalize_tags(tags)
|
||||
desired = expand_bucket_prefixes(normalize_tags(tags))
|
||||
|
||||
current = set(get_reference_tags(session, reference_id))
|
||||
|
||||
@ -98,15 +130,22 @@ def set_reference_tags(
|
||||
|
||||
if to_add:
|
||||
ensure_tags_exist(session, to_add, tag_type="user")
|
||||
# Stagger added_at by microsecond per tag so the retrieval ORDER BY
|
||||
# added_at preserves input order. Per-tag get_utc_now() calls can
|
||||
# collide at microsecond resolution on fast machines, dropping the
|
||||
# query to the tag_name alphabetical tiebreaker — same fix as in
|
||||
# batch_insert_seed_assets. Read max(existing) so this batch sorts
|
||||
# strictly after any prior batch on the same reference.
|
||||
base_ts = _next_added_at_base(session, reference_id)
|
||||
session.add_all(
|
||||
[
|
||||
AssetReferenceTag(
|
||||
asset_reference_id=reference_id,
|
||||
tag_name=t,
|
||||
origin=origin,
|
||||
added_at=get_utc_now(),
|
||||
added_at=base_ts + timedelta(microseconds=i),
|
||||
)
|
||||
for t in to_add
|
||||
for i, t in enumerate(to_add)
|
||||
]
|
||||
)
|
||||
session.flush()
|
||||
@ -136,7 +175,7 @@ def add_tags_to_reference(
|
||||
if not ref:
|
||||
raise ValueError(f"AssetReference {reference_id} not found")
|
||||
|
||||
norm = normalize_tags(tags)
|
||||
norm = expand_bucket_prefixes(normalize_tags(tags))
|
||||
if not norm:
|
||||
total = get_reference_tags(session, reference_id=reference_id)
|
||||
return AddTagsResult(added=[], already_present=[], total_tags=total)
|
||||
@ -146,10 +185,17 @@ def add_tags_to_reference(
|
||||
|
||||
current = set(get_reference_tags(session, reference_id))
|
||||
|
||||
# Preserve the caller's insertion order rather than alphabetizing —
|
||||
# the retrieval ORDER BY added_at + microsecond stagger only meaningfully
|
||||
# preserves insertion order if "the order we insert in" actually matches
|
||||
# the caller's intent.
|
||||
want = set(norm)
|
||||
to_add = sorted(want - current)
|
||||
to_add = [t for t in norm if t not in current]
|
||||
|
||||
if to_add:
|
||||
# See set_reference_tags for the rationale behind the per-tag stagger
|
||||
# and the max(existing) seed.
|
||||
base_ts = _next_added_at_base(session, reference_id)
|
||||
with session.begin_nested() as nested:
|
||||
try:
|
||||
session.add_all(
|
||||
@ -158,9 +204,9 @@ def add_tags_to_reference(
|
||||
asset_reference_id=reference_id,
|
||||
tag_name=t,
|
||||
origin=origin,
|
||||
added_at=get_utc_now(),
|
||||
added_at=base_ts + timedelta(microseconds=i),
|
||||
)
|
||||
for t in to_add
|
||||
for i, t in enumerate(to_add)
|
||||
]
|
||||
)
|
||||
session.flush()
|
||||
|
||||
@ -47,6 +47,50 @@ def normalize_tags(tags: list[str] | None) -> list[str]:
|
||||
return list(dict.fromkeys(t.strip().lower() for t in (tags or []) if (t or "").strip()))
|
||||
|
||||
|
||||
def _known_bucket_prefixes() -> set[str]:
|
||||
"""Lowercased model-category names eligible for standalone-prefix
|
||||
expansion. Tags whose first slash segment matches one of these get
|
||||
the bucket inserted as a separate token, so FE filters like
|
||||
``include_tags=models,checkpoints`` keep matching even when the
|
||||
asset lives in a nested subfolder (`models/checkpoints/flux/foo`).
|
||||
|
||||
Bare user labels with slashes whose first segment is not a registered
|
||||
bucket (e.g. ``my-org/team-a``) pass through unchanged.
|
||||
"""
|
||||
try:
|
||||
import folder_paths
|
||||
|
||||
return {
|
||||
name.lower()
|
||||
for name in folder_paths.folder_names_and_paths.keys()
|
||||
if name != "custom_nodes"
|
||||
}
|
||||
except Exception:
|
||||
return set()
|
||||
|
||||
|
||||
def expand_bucket_prefixes(tags: list[str]) -> list[str]:
|
||||
"""Insert standalone bucket tokens after any slash-joined tag whose
|
||||
first segment is a registered model category. Preserves caller order
|
||||
and is idempotent (existing bucket tokens are not duplicated).
|
||||
"""
|
||||
if not tags:
|
||||
return list(tags)
|
||||
buckets = _known_bucket_prefixes()
|
||||
if not buckets:
|
||||
return list(tags)
|
||||
seen = set(tags)
|
||||
result: list[str] = []
|
||||
for t in tags:
|
||||
result.append(t)
|
||||
if "/" in t:
|
||||
prefix = t.split("/", 1)[0]
|
||||
if prefix.lower() in buckets and prefix not in seen:
|
||||
result.append(prefix)
|
||||
seen.add(prefix)
|
||||
return result
|
||||
|
||||
|
||||
def validate_blake3_hash(s: str) -> str:
|
||||
"""Validate and normalize a blake3 hash string.
|
||||
|
||||
|
||||
@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
import os
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from typing import TYPE_CHECKING, Any, TypedDict
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
@ -13,13 +13,14 @@ from app.assets.database.queries import (
|
||||
bulk_insert_references_ignore_conflicts,
|
||||
bulk_insert_tags_and_meta,
|
||||
delete_assets_by_ids,
|
||||
ensure_tags_exist,
|
||||
get_existing_asset_ids,
|
||||
get_reference_ids_by_ids,
|
||||
get_references_by_paths_and_asset_ids,
|
||||
get_unreferenced_unhashed_asset_ids,
|
||||
restore_references_by_paths,
|
||||
)
|
||||
from app.assets.helpers import get_utc_now
|
||||
from app.assets.helpers import expand_bucket_prefixes, get_utc_now
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.assets.services.metadata_extract import ExtractedMetadata
|
||||
@ -233,13 +234,20 @@ def batch_insert_seed_assets(
|
||||
if ref_id not in inserted_ref_ids:
|
||||
continue
|
||||
|
||||
for tag in ref_data["tags"]:
|
||||
# Stagger added_at by microsecond per tag within a reference so
|
||||
# the retrieval ORDER BY added_at preserves the input list order
|
||||
# (the path-derived root category stays at position 0). Without
|
||||
# this, every tag in a bulk-insert batch shares current_time and
|
||||
# the tag_name tiebreaker sorts them alphabetically — putting the
|
||||
# subpath tag ahead of "models" since "c"/"d"/"l" < "m".
|
||||
ref_tags = expand_bucket_prefixes(ref_data["tags"])
|
||||
for tag_idx, tag in enumerate(ref_tags):
|
||||
tag_rows.append(
|
||||
{
|
||||
"asset_reference_id": ref_id,
|
||||
"tag_name": tag,
|
||||
"origin": "automatic",
|
||||
"added_at": current_time,
|
||||
"added_at": current_time + timedelta(microseconds=tag_idx),
|
||||
}
|
||||
)
|
||||
|
||||
@ -261,6 +269,16 @@ def batch_insert_seed_assets(
|
||||
}
|
||||
)
|
||||
|
||||
if tag_rows:
|
||||
# Bucket-prefix expansion may have introduced tags the caller did
|
||||
# not register via the upstream tag_pool (e.g. `checkpoints` for a
|
||||
# nested `checkpoints/flux/foo` path). Pre-register the full set so
|
||||
# the AssetReferenceTag.tag_name FK is satisfied; the underlying
|
||||
# insert is ON CONFLICT DO NOTHING so re-registration is idempotent.
|
||||
ensure_tags_exist(
|
||||
session, {row["tag_name"] for row in tag_rows}, tag_type="user"
|
||||
)
|
||||
|
||||
bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=metadata_rows)
|
||||
|
||||
return BulkInsertResult(
|
||||
|
||||
@ -3,7 +3,6 @@ from pathlib import Path
|
||||
from typing import Literal
|
||||
|
||||
import folder_paths
|
||||
from app.assets.helpers import normalize_tags
|
||||
|
||||
|
||||
_NON_MODEL_FOLDER_NAMES = frozenset({"custom_nodes"})
|
||||
@ -27,27 +26,51 @@ def get_comfy_models_folders() -> list[tuple[str, list[str]]]:
|
||||
|
||||
|
||||
def resolve_destination_from_tags(tags: list[str]) -> tuple[str, list[str]]:
|
||||
"""Validates and maps tags -> (base_dir, subdirs_for_fs)"""
|
||||
"""Validates and maps tags -> (base_dir, subdirs_for_fs).
|
||||
|
||||
Accepts both the legacy one-tag-per-directory shape
|
||||
(``["models", "diffusers", "Kolors", "text_encoder"]``) and the
|
||||
slash-joined shape emitted by :func:`get_name_and_tags_from_asset_path`
|
||||
(``["models", "diffusers/Kolors/text_encoder"]``). Hybrid shapes that
|
||||
mix the two within a single call (e.g.
|
||||
``["models", "diffusers", "Kolors/text_encoder"]``) are also
|
||||
accepted: each entry after ``tags[0]`` is split on ``/`` and
|
||||
concatenated, so the two shapes — and any mix of them — resolve to
|
||||
the same destination. The same safety checks are applied to each
|
||||
component after expansion.
|
||||
"""
|
||||
if not tags:
|
||||
raise ValueError("tags must not be empty")
|
||||
root = tags[0].lower()
|
||||
|
||||
# Expand any slash-joined entries into individual path components so
|
||||
# the rest of the function can treat both tag shapes uniformly. Each
|
||||
# component is also stripped, so " a / b " behaves like ["a", "b"].
|
||||
expanded: list[str] = []
|
||||
for t in tags[1:]:
|
||||
for part in str(t).split("/"):
|
||||
part = part.strip()
|
||||
if part:
|
||||
expanded.append(part)
|
||||
|
||||
if root == "models":
|
||||
if len(tags) < 2:
|
||||
if not expanded:
|
||||
raise ValueError("at least two tags required for model asset")
|
||||
category = expanded[0]
|
||||
try:
|
||||
bases = folder_paths.folder_names_and_paths[tags[1]][0]
|
||||
bases = folder_paths.folder_names_and_paths[category][0]
|
||||
except KeyError:
|
||||
raise ValueError(f"unknown model category '{tags[1]}'")
|
||||
raise ValueError(f"unknown model category '{category}'")
|
||||
if not bases:
|
||||
raise ValueError(f"no base path configured for category '{tags[1]}'")
|
||||
raise ValueError(f"no base path configured for category '{category}'")
|
||||
base_dir = os.path.abspath(bases[0])
|
||||
raw_subdirs = tags[2:]
|
||||
raw_subdirs = expanded[1:]
|
||||
elif root == "input":
|
||||
base_dir = os.path.abspath(folder_paths.get_input_directory())
|
||||
raw_subdirs = tags[1:]
|
||||
raw_subdirs = expanded
|
||||
elif root == "output":
|
||||
base_dir = os.path.abspath(folder_paths.get_output_directory())
|
||||
raw_subdirs = tags[1:]
|
||||
raw_subdirs = expanded
|
||||
else:
|
||||
raise ValueError(f"unknown root tag '{tags[0]}'; expected 'models', 'input', or 'output'")
|
||||
_sep_chars = frozenset(("/", "\\", os.sep))
|
||||
@ -160,7 +183,21 @@ def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]:
|
||||
"""Return (name, tags) derived from a filesystem path.
|
||||
|
||||
- name: base filename with extension
|
||||
- tags: [root_category] + parent folder names in order
|
||||
- tags: [root_category] for paths with no parent subdirectories,
|
||||
[root_category, slash_joined_subpath] otherwise. The parent subpath
|
||||
(everything between the root category and the filename) is collapsed
|
||||
into a single tag rather than emitted as one tag per directory, so
|
||||
consumers can use ``tags[1]`` as a stable category identifier that
|
||||
survives nested directory layouts (e.g. diffusers components).
|
||||
|
||||
The subpath is lowercased to match the canonicalization applied by
|
||||
:func:`ensure_tags_exist`; without that, the
|
||||
``asset_reference_tags.tag_name`` FK to the lowercased ``tags.name``
|
||||
would fail for any path containing uppercase letters. The root
|
||||
category is lowercase by construction in
|
||||
:func:`get_asset_category_and_relative_path`, so no separate cast
|
||||
is applied here. Consumers that need to look up providers keyed on
|
||||
original-case paths should normalize their lookup key to lowercase.
|
||||
|
||||
Raises:
|
||||
ValueError: path does not belong to any known root.
|
||||
@ -170,4 +207,7 @@ def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]:
|
||||
parent_parts = [
|
||||
part for part in p.parent.parts if part not in (".", "..", p.anchor)
|
||||
]
|
||||
return p.name, list(dict.fromkeys(normalize_tags([root_category, *parent_parts])))
|
||||
tags = [root_category]
|
||||
if parent_parts:
|
||||
tags.append("/".join(parent_parts).lower())
|
||||
return p.name, list(dict.fromkeys(t.strip() for t in tags if t.strip()))
|
||||
|
||||
@ -110,13 +110,11 @@ parser.add_argument("--preview-method", type=LatentPreviewMethod, default=Latent
|
||||
|
||||
parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.")
|
||||
|
||||
CACHE_RAM_AUTO_GB = -1.0
|
||||
|
||||
cache_group = parser.add_mutually_exclusive_group()
|
||||
cache_group.add_argument("--cache-ram", nargs='*', type=float, default=[], metavar="GB", help="Use RAM pressure caching with the specified headroom thresholds. This is the default caching mode. The first value sets the active-cache threshold; the optional second value sets the inactive-cache/pin threshold. Defaults when no values are provided: active 25%% of system RAM (min 4GB, max 32GB), inactive 75%% of system RAM (min 12GB, max 96GB).")
|
||||
cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
|
||||
cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
|
||||
cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
|
||||
cache_group.add_argument("--cache-ram", nargs='?', const=CACHE_RAM_AUTO_GB, type=float, default=0, help="Use RAM pressure caching with the specified headroom threshold. If available RAM drops below the threshold the cache removes large items to free RAM. Default (when no value is provided): 25%% of system RAM (min 4GB, max 32GB).")
|
||||
|
||||
attn_group = parser.add_mutually_exclusive_group()
|
||||
attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.")
|
||||
@ -245,6 +243,9 @@ if comfy.options.args_parsing:
|
||||
else:
|
||||
args = parser.parse_args([])
|
||||
|
||||
if args.cache_ram is not None and len(args.cache_ram) > 2:
|
||||
parser.error("--cache-ram accepts at most two values: active GB and inactive GB")
|
||||
|
||||
if args.windows_standalone_build:
|
||||
args.auto_launch = True
|
||||
|
||||
|
||||
@ -484,16 +484,23 @@ def calculate_weight(patches, weight, key, intermediate_dtype=torch.float32, ori
|
||||
|
||||
return weight
|
||||
|
||||
def prefetch_prepared_value(value, allocate_buffer, stream):
|
||||
def prefetch_prepared_value(value, counter, destination, stream, copy):
|
||||
if isinstance(value, torch.Tensor):
|
||||
dest = allocate_buffer(comfy.memory_management.vram_aligned_size(value))
|
||||
comfy.model_management.cast_to_gathered([value], dest, non_blocking=True, stream=stream)
|
||||
size = comfy.memory_management.vram_aligned_size(value)
|
||||
offset = counter[0]
|
||||
counter[0] += size
|
||||
if destination is None:
|
||||
return value
|
||||
|
||||
dest = destination[offset:offset + size]
|
||||
if copy:
|
||||
comfy.model_management.cast_to_gathered([value], dest, non_blocking=True, stream=stream)
|
||||
return comfy.memory_management.interpret_gathered_like([value], dest)[0]
|
||||
elif isinstance(value, weight_adapter.WeightAdapterBase):
|
||||
return type(value)(value.loaded_keys, prefetch_prepared_value(value.weights, allocate_buffer, stream))
|
||||
return type(value)(value.loaded_keys, prefetch_prepared_value(value.weights, counter, destination, stream, copy))
|
||||
elif isinstance(value, tuple):
|
||||
return tuple(prefetch_prepared_value(item, allocate_buffer, stream) for item in value)
|
||||
return tuple(prefetch_prepared_value(item, counter, destination, stream, copy) for item in value)
|
||||
elif isinstance(value, list):
|
||||
return [prefetch_prepared_value(item, allocate_buffer, stream) for item in value]
|
||||
return [prefetch_prepared_value(item, counter, destination, stream, copy) for item in value]
|
||||
|
||||
return value
|
||||
|
||||
@ -15,7 +15,7 @@ class TensorFileSlice(NamedTuple):
|
||||
size: int
|
||||
|
||||
|
||||
def read_tensor_file_slice_into(tensor, destination):
|
||||
def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=None):
|
||||
|
||||
if isinstance(tensor, QuantizedTensor):
|
||||
if not isinstance(destination, QuantizedTensor):
|
||||
@ -23,12 +23,17 @@ def read_tensor_file_slice_into(tensor, destination):
|
||||
if tensor._layout_cls != destination._layout_cls:
|
||||
return False
|
||||
|
||||
if not read_tensor_file_slice_into(tensor._qdata, destination._qdata):
|
||||
if not read_tensor_file_slice_into(tensor._qdata, destination._qdata, stream=stream,
|
||||
destination2=(destination2._qdata if destination2 is not None else None)):
|
||||
return False
|
||||
|
||||
dst_orig_dtype = destination._params.orig_dtype
|
||||
destination._params.copy_from(tensor._params, non_blocking=False)
|
||||
destination._params = dataclasses.replace(destination._params, orig_dtype=dst_orig_dtype)
|
||||
if destination2 is not None:
|
||||
dst_orig_dtype = destination2._params.orig_dtype
|
||||
destination2._params.copy_from(destination._params, non_blocking=True)
|
||||
destination2._params = dataclasses.replace(destination2._params, orig_dtype=dst_orig_dtype)
|
||||
return True
|
||||
|
||||
info = getattr(tensor.untyped_storage(), "_comfy_tensor_file_slice", None)
|
||||
@ -48,6 +53,17 @@ def read_tensor_file_slice_into(tensor, destination):
|
||||
if info.size == 0:
|
||||
return True
|
||||
|
||||
hostbuf = getattr(destination.untyped_storage(), "_comfy_hostbuf", None)
|
||||
if hostbuf is not None:
|
||||
stream_ptr = getattr(stream, "cuda_stream", 0) if stream is not None else 0
|
||||
device_ptr = destination2.data_ptr() if destination2 is not None else 0
|
||||
hostbuf.read_file_slice(file_obj, info.offset, info.size,
|
||||
offset=destination.data_ptr() - hostbuf.get_raw_address(),
|
||||
stream=stream_ptr,
|
||||
device_ptr=device_ptr,
|
||||
device=None if destination2 is None else destination2.device.index)
|
||||
return True
|
||||
|
||||
buf_type = ctypes.c_ubyte * info.size
|
||||
view = memoryview(buf_type.from_address(destination.data_ptr()))
|
||||
|
||||
@ -151,7 +167,7 @@ def set_ram_cache_release_state(callback, headroom):
|
||||
extra_ram_release_callback = callback
|
||||
RAM_CACHE_HEADROOM = max(0, int(headroom))
|
||||
|
||||
def extra_ram_release(target):
|
||||
def extra_ram_release(target, free_active=False):
|
||||
if extra_ram_release_callback is None:
|
||||
return 0
|
||||
return extra_ram_release_callback(target)
|
||||
return extra_ram_release_callback(target, free_active=free_active)
|
||||
|
||||
@ -31,6 +31,7 @@ from contextlib import nullcontext
|
||||
import comfy.memory_management
|
||||
import comfy.utils
|
||||
import comfy.quant_ops
|
||||
import comfy_aimdo.host_buffer
|
||||
import comfy_aimdo.vram_buffer
|
||||
|
||||
class VRAMState(Enum):
|
||||
@ -495,6 +496,14 @@ except:
|
||||
|
||||
current_loaded_models = []
|
||||
|
||||
DIRTY_MMAPS = set()
|
||||
|
||||
PIN_PRESSURE_HYSTERESIS = 256 * 1024 * 1024
|
||||
|
||||
#Freeing registerables on pressure does imply a GPU sync, so go big on
|
||||
#the hysteresis so each expensive sync gives us back a good chunk.
|
||||
REGISTERABLE_PIN_HYSTERESIS = 2048 * 1024 * 1024
|
||||
|
||||
def module_size(module):
|
||||
module_mem = 0
|
||||
sd = module.state_dict()
|
||||
@ -503,27 +512,46 @@ def module_size(module):
|
||||
module_mem += t.nbytes
|
||||
return module_mem
|
||||
|
||||
def module_mmap_residency(module, free=False):
|
||||
mmap_touched_mem = 0
|
||||
module_mem = 0
|
||||
bounced_mmaps = set()
|
||||
sd = module.state_dict()
|
||||
for k in sd:
|
||||
t = sd[k]
|
||||
module_mem += t.nbytes
|
||||
storage = t._qdata.untyped_storage() if isinstance(t, comfy.quant_ops.QuantizedTensor) else t.untyped_storage()
|
||||
if not getattr(storage, "_comfy_tensor_mmap_touched", False):
|
||||
continue
|
||||
mmap_touched_mem += t.nbytes
|
||||
if not free:
|
||||
continue
|
||||
storage._comfy_tensor_mmap_touched = False
|
||||
mmap_obj = storage._comfy_tensor_mmap_refs[0]
|
||||
if mmap_obj in bounced_mmaps:
|
||||
continue
|
||||
mmap_obj.bounce()
|
||||
bounced_mmaps.add(mmap_obj)
|
||||
return mmap_touched_mem, module_mem
|
||||
def mark_mmap_dirty(storage):
|
||||
mmap_refs = getattr(storage, "_comfy_tensor_mmap_refs", None)
|
||||
if mmap_refs is not None:
|
||||
DIRTY_MMAPS.add(mmap_refs[0])
|
||||
|
||||
def free_pins(size, evict_active=False):
|
||||
freed_total = 0
|
||||
for loaded_model in reversed(current_loaded_models):
|
||||
if size <= 0:
|
||||
return freed_total
|
||||
model = loaded_model.model
|
||||
if model is not None and model.is_dynamic() and (evict_active or not model.model.dynamic_pins[model.load_device]["active"]):
|
||||
freed = model.partially_unload_ram(size)
|
||||
freed_total += freed
|
||||
size -= freed
|
||||
return freed_total
|
||||
|
||||
def ensure_pin_budget(size, evict_active=False):
|
||||
shortfall = size + comfy.memory_management.RAM_CACHE_HEADROOM / 2 - psutil.virtual_memory().available
|
||||
if shortfall <= 0:
|
||||
return True
|
||||
|
||||
to_free = shortfall + PIN_PRESSURE_HYSTERESIS
|
||||
return free_pins(to_free, evict_active=evict_active) >= shortfall
|
||||
|
||||
def ensure_pin_registerable(size, evict_active=False):
|
||||
shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY
|
||||
if MAX_PINNED_MEMORY <= 0:
|
||||
return False
|
||||
if shortfall <= 0:
|
||||
return True
|
||||
|
||||
shortfall += REGISTERABLE_PIN_HYSTERESIS
|
||||
for loaded_model in reversed(current_loaded_models):
|
||||
model = loaded_model.model
|
||||
if model is not None and model.is_dynamic() and (evict_active or not model.model.dynamic_pins[model.load_device]["active"]):
|
||||
shortfall -= model.unregister_inactive_pins(shortfall)
|
||||
if shortfall <= 0:
|
||||
return True
|
||||
return shortfall <= REGISTERABLE_PIN_HYSTERESIS
|
||||
|
||||
class LoadedModel:
|
||||
def __init__(self, model):
|
||||
@ -553,9 +581,6 @@ class LoadedModel:
|
||||
def model_memory(self):
|
||||
return self.model.model_size()
|
||||
|
||||
def model_mmap_residency(self, free=False):
|
||||
return self.model.model_mmap_residency(free=free)
|
||||
|
||||
def model_loaded_memory(self):
|
||||
return self.model.loaded_size()
|
||||
|
||||
@ -635,15 +660,9 @@ WINDOWS = any(platform.win32_ver())
|
||||
|
||||
EXTRA_RESERVED_VRAM = 400 * 1024 * 1024
|
||||
if WINDOWS:
|
||||
import comfy.windows
|
||||
EXTRA_RESERVED_VRAM = 600 * 1024 * 1024 #Windows is higher because of the shared vram issue
|
||||
if total_vram > (15 * 1024): # more extra reserved vram on 16GB+ cards
|
||||
EXTRA_RESERVED_VRAM += 100 * 1024 * 1024
|
||||
def get_free_ram():
|
||||
return comfy.windows.get_free_ram()
|
||||
else:
|
||||
def get_free_ram():
|
||||
return psutil.virtual_memory().available
|
||||
|
||||
if args.reserve_vram is not None:
|
||||
EXTRA_RESERVED_VRAM = args.reserve_vram * 1024 * 1024 * 1024
|
||||
@ -657,7 +676,6 @@ def minimum_inference_memory():
|
||||
|
||||
def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins_required=0, ram_required=0):
|
||||
cleanup_models_gc()
|
||||
comfy.memory_management.extra_ram_release(max(pins_required, ram_required))
|
||||
unloaded_model = []
|
||||
can_unload = []
|
||||
unloaded_models = []
|
||||
@ -673,11 +691,9 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins
|
||||
for x in can_unload_sorted:
|
||||
i = x[-1]
|
||||
memory_to_free = 1e32
|
||||
pins_to_free = 1e32
|
||||
if not DISABLE_SMART_MEMORY or device is None:
|
||||
if current_loaded_models[i].model.is_dynamic() and (not DISABLE_SMART_MEMORY or device is None):
|
||||
memory_to_free = 0 if device is None else memory_required - get_free_memory(device)
|
||||
pins_to_free = pins_required - get_free_ram()
|
||||
if current_loaded_models[i].model.is_dynamic() and for_dynamic:
|
||||
if for_dynamic:
|
||||
#don't actually unload dynamic models for the sake of other dynamic models
|
||||
#as that works on-demand.
|
||||
memory_required -= current_loaded_models[i].model.loaded_size()
|
||||
@ -685,18 +701,6 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins
|
||||
if memory_to_free > 0 and current_loaded_models[i].model_unload(memory_to_free):
|
||||
logging.debug(f"Unloading {current_loaded_models[i].model.model.__class__.__name__}")
|
||||
unloaded_model.append(i)
|
||||
if pins_to_free > 0:
|
||||
logging.debug(f"PIN Unloading {current_loaded_models[i].model.model.__class__.__name__}")
|
||||
current_loaded_models[i].model.partially_unload_ram(pins_to_free)
|
||||
|
||||
for x in can_unload_sorted:
|
||||
i = x[-1]
|
||||
ram_to_free = ram_required - psutil.virtual_memory().available
|
||||
if ram_to_free <= 0 and i not in unloaded_model:
|
||||
continue
|
||||
resident_memory, _ = current_loaded_models[i].model_mmap_residency(free=True)
|
||||
if resident_memory > 0:
|
||||
logging.debug(f"RAM Unloading {current_loaded_models[i].model.model.__class__.__name__}")
|
||||
|
||||
for i in sorted(unloaded_model, reverse=True):
|
||||
unloaded_models.append(current_loaded_models.pop(i))
|
||||
@ -762,29 +766,16 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
|
||||
model_to_unload.model.detach(unpatch_all=False)
|
||||
model_to_unload.model_finalizer.detach()
|
||||
|
||||
|
||||
total_memory_required = {}
|
||||
total_pins_required = {}
|
||||
total_ram_required = {}
|
||||
for loaded_model in models_to_load:
|
||||
device = loaded_model.device
|
||||
total_memory_required[device] = total_memory_required.get(device, 0) + loaded_model.model_memory_required(device)
|
||||
resident_memory, model_memory = loaded_model.model_mmap_residency()
|
||||
pinned_memory = loaded_model.model.pinned_memory_size()
|
||||
#FIXME: This can over-free the pins as it budgets to pin the entire model. We should
|
||||
#make this JIT to keep as much pinned as possible.
|
||||
pins_required = model_memory - pinned_memory
|
||||
ram_required = model_memory - resident_memory
|
||||
total_pins_required[device] = total_pins_required.get(device, 0) + pins_required
|
||||
total_ram_required[device] = total_ram_required.get(device, 0) + ram_required
|
||||
|
||||
for device in total_memory_required:
|
||||
if device != torch.device("cpu"):
|
||||
free_memory(total_memory_required[device] * 1.1 + extra_mem,
|
||||
device,
|
||||
for_dynamic=free_for_dynamic,
|
||||
pins_required=total_pins_required[device],
|
||||
ram_required=total_ram_required[device])
|
||||
for_dynamic=free_for_dynamic)
|
||||
|
||||
for device in total_memory_required:
|
||||
if device != torch.device("cpu"):
|
||||
@ -1180,6 +1171,7 @@ STREAM_CAST_BUFFERS = {}
|
||||
LARGEST_CASTED_WEIGHT = (None, 0)
|
||||
STREAM_AIMDO_CAST_BUFFERS = {}
|
||||
LARGEST_AIMDO_CASTED_WEIGHT = (None, 0)
|
||||
STREAM_PIN_BUFFERS = {}
|
||||
|
||||
DEFAULT_AIMDO_CAST_BUFFER_RESERVATION_SIZE = 16 * 1024 ** 3
|
||||
|
||||
@ -1220,21 +1212,66 @@ def get_aimdo_cast_buffer(offload_stream, device):
|
||||
if cast_buffer is None:
|
||||
cast_buffer = comfy_aimdo.vram_buffer.VRAMBuffer(DEFAULT_AIMDO_CAST_BUFFER_RESERVATION_SIZE, device.index)
|
||||
STREAM_AIMDO_CAST_BUFFERS[offload_stream] = cast_buffer
|
||||
|
||||
return cast_buffer
|
||||
|
||||
def get_pin_buffer(offload_stream):
|
||||
pin_buffer = STREAM_PIN_BUFFERS.get(offload_stream, None)
|
||||
if pin_buffer is None:
|
||||
pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0, 0, pinned_hostbuf_size(8 * 1024**3))
|
||||
STREAM_PIN_BUFFERS[offload_stream] = pin_buffer
|
||||
elif offload_stream is not None:
|
||||
event = getattr(pin_buffer, "_comfy_event", None)
|
||||
if event is not None:
|
||||
event.synchronize()
|
||||
delattr(pin_buffer, "_comfy_event")
|
||||
return pin_buffer
|
||||
|
||||
def resize_pin_buffer(pin_buffer, size):
|
||||
global TOTAL_PINNED_MEMORY
|
||||
old_size = pin_buffer.size
|
||||
if size <= old_size:
|
||||
return True
|
||||
growth = size - old_size
|
||||
comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM)
|
||||
ensure_pin_budget(growth, evict_active=True)
|
||||
ensure_pin_registerable(growth, evict_active=True)
|
||||
try:
|
||||
pin_buffer.extend(size=size, reallocate=True)
|
||||
except RuntimeError:
|
||||
return False
|
||||
TOTAL_PINNED_MEMORY += pin_buffer.size - old_size
|
||||
return True
|
||||
|
||||
def reset_cast_buffers():
|
||||
global TOTAL_PINNED_MEMORY
|
||||
global LARGEST_CASTED_WEIGHT
|
||||
global LARGEST_AIMDO_CASTED_WEIGHT
|
||||
|
||||
LARGEST_CASTED_WEIGHT = (None, 0)
|
||||
LARGEST_AIMDO_CASTED_WEIGHT = (None, 0)
|
||||
for offload_stream in set(STREAM_CAST_BUFFERS) | set(STREAM_AIMDO_CAST_BUFFERS):
|
||||
for offload_stream in set(STREAM_CAST_BUFFERS) | set(STREAM_AIMDO_CAST_BUFFERS) | set(STREAM_PIN_BUFFERS):
|
||||
if offload_stream is not None:
|
||||
offload_stream.synchronize()
|
||||
synchronize()
|
||||
|
||||
for mmap_obj in DIRTY_MMAPS:
|
||||
mmap_obj.bounce()
|
||||
DIRTY_MMAPS.clear()
|
||||
|
||||
for pin_buffer in STREAM_PIN_BUFFERS.values():
|
||||
TOTAL_PINNED_MEMORY -= pin_buffer.size
|
||||
TOTAL_PINNED_MEMORY = max(0, TOTAL_PINNED_MEMORY)
|
||||
|
||||
for loaded_model in current_loaded_models:
|
||||
model = loaded_model.model
|
||||
if model is not None and model.is_dynamic():
|
||||
model.model.dynamic_pins[model.load_device]["active"] = False
|
||||
model.partially_unload_ram(1e30, subsets=[ "patches" ])
|
||||
model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, pinned_hostbuf_size(model.model_size())), [], [-1], [0])
|
||||
|
||||
STREAM_CAST_BUFFERS.clear()
|
||||
STREAM_AIMDO_CAST_BUFFERS.clear()
|
||||
STREAM_PIN_BUFFERS.clear()
|
||||
soft_empty_cache()
|
||||
|
||||
def get_offload_stream(device):
|
||||
@ -1280,7 +1317,7 @@ def sync_stream(device, stream):
|
||||
current_stream(device).wait_stream(stream)
|
||||
|
||||
|
||||
def cast_to_gathered(tensors, r, non_blocking=False, stream=None):
|
||||
def cast_to_gathered(tensors, r, non_blocking=False, stream=None, r2=None):
|
||||
wf_context = nullcontext()
|
||||
if stream is not None:
|
||||
wf_context = stream
|
||||
@ -1288,17 +1325,20 @@ def cast_to_gathered(tensors, r, non_blocking=False, stream=None):
|
||||
wf_context = wf_context.as_context(stream)
|
||||
|
||||
dest_views = comfy.memory_management.interpret_gathered_like(tensors, r)
|
||||
dest2_views = comfy.memory_management.interpret_gathered_like(tensors, r2) if r2 is not None else None
|
||||
with wf_context:
|
||||
for tensor in tensors:
|
||||
dest_view = dest_views.pop(0)
|
||||
dest2_view = dest2_views.pop(0) if dest2_views is not None else None
|
||||
if tensor is None:
|
||||
continue
|
||||
if comfy.memory_management.read_tensor_file_slice_into(tensor, dest_view):
|
||||
if comfy.memory_management.read_tensor_file_slice_into(tensor, dest_view, stream=stream, destination2=dest2_view):
|
||||
continue
|
||||
storage = tensor._qdata.untyped_storage() if isinstance(tensor, comfy.quant_ops.QuantizedTensor) else tensor.untyped_storage()
|
||||
if hasattr(storage, "_comfy_tensor_mmap_touched"):
|
||||
storage._comfy_tensor_mmap_touched = True
|
||||
mark_mmap_dirty(storage)
|
||||
dest_view.copy_(tensor, non_blocking=non_blocking)
|
||||
if dest2_view is not None:
|
||||
dest2_view.copy_(dest_view, non_blocking=non_blocking)
|
||||
|
||||
|
||||
def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, stream=None, r=None):
|
||||
@ -1339,14 +1379,18 @@ TOTAL_PINNED_MEMORY = 0
|
||||
MAX_PINNED_MEMORY = -1
|
||||
if not args.disable_pinned_memory:
|
||||
if is_nvidia() or is_amd():
|
||||
ram = get_total_memory(torch.device("cpu"))
|
||||
if WINDOWS:
|
||||
MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.40 # Windows limit is apparently 50%
|
||||
MAX_PINNED_MEMORY = ram * 0.40 # Windows limit is apparently 50%
|
||||
else:
|
||||
MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.90
|
||||
MAX_PINNED_MEMORY = ram * 0.90
|
||||
logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024)))
|
||||
|
||||
PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])
|
||||
|
||||
def pinned_hostbuf_size(size):
|
||||
return max(0, int(min(size, MAX_PINNED_MEMORY) * 2))
|
||||
|
||||
def discard_cuda_async_error():
|
||||
try:
|
||||
a = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
|
||||
@ -1378,8 +1422,8 @@ def pin_memory(tensor):
|
||||
return False
|
||||
|
||||
size = tensor.nbytes
|
||||
if (TOTAL_PINNED_MEMORY + size) > MAX_PINNED_MEMORY:
|
||||
return False
|
||||
comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM)
|
||||
ensure_pin_registerable(size)
|
||||
|
||||
ptr = tensor.data_ptr()
|
||||
if ptr == 0:
|
||||
@ -1416,7 +1460,8 @@ def unpin_memory(tensor):
|
||||
return False
|
||||
|
||||
if torch.cuda.cudart().cudaHostUnregister(ptr) == 0:
|
||||
TOTAL_PINNED_MEMORY -= PINNED_MEMORY.pop(ptr)
|
||||
size = PINNED_MEMORY.pop(ptr)
|
||||
TOTAL_PINNED_MEMORY -= size
|
||||
return True
|
||||
else:
|
||||
logging.warning("Unpin error.")
|
||||
|
||||
@ -35,6 +35,7 @@ import comfy.model_management
|
||||
import comfy.ops
|
||||
import comfy.patcher_extension
|
||||
import comfy.utils
|
||||
import comfy_aimdo.host_buffer
|
||||
from comfy.comfy_types import UnetWrapperFunction
|
||||
from comfy.quant_ops import QuantizedTensor
|
||||
from comfy.patcher_extension import CallbacksMP, PatcherInjection, WrappersMP
|
||||
@ -117,6 +118,8 @@ def string_to_seed(data):
|
||||
return comfy.utils.string_to_seed(data)
|
||||
|
||||
class LowVramPatch:
|
||||
is_lowvram_patch = True
|
||||
|
||||
def __init__(self, key, patches, convert_func=None, set_func=None):
|
||||
self.key = key
|
||||
self.patches = patches
|
||||
@ -124,11 +127,21 @@ class LowVramPatch:
|
||||
self.set_func = set_func
|
||||
self.prepared_patches = None
|
||||
|
||||
def prepare(self, allocate_buffer, stream):
|
||||
self.prepared_patches = [
|
||||
(patch[0], comfy.lora.prefetch_prepared_value(patch[1], allocate_buffer, stream), patch[2], patch[3], patch[4])
|
||||
def memory_required(self):
|
||||
counter = [0]
|
||||
for patch in self.patches[self.key]:
|
||||
comfy.lora.prefetch_prepared_value(patch[1], counter, None, None, False)
|
||||
return counter[0]
|
||||
|
||||
def prepare(self, destination, stream, copy=True, commit=True):
|
||||
counter = [0]
|
||||
prepared_patches = [
|
||||
(patch[0], comfy.lora.prefetch_prepared_value(patch[1], counter, destination, stream, copy), patch[2], patch[3], patch[4])
|
||||
for patch in self.patches[self.key]
|
||||
]
|
||||
if commit:
|
||||
self.prepared_patches = prepared_patches
|
||||
return prepared_patches
|
||||
|
||||
def clear_prepared(self):
|
||||
self.prepared_patches = None
|
||||
@ -341,9 +354,6 @@ class ModelPatcher:
|
||||
self.size = comfy.model_management.module_size(self.model)
|
||||
return self.size
|
||||
|
||||
def model_mmap_residency(self, free=False):
|
||||
return comfy.model_management.module_mmap_residency(self.model, free=free)
|
||||
|
||||
def loaded_size(self):
|
||||
return self.model.model_loaded_weight_memory
|
||||
|
||||
@ -1118,8 +1128,12 @@ class ModelPatcher:
|
||||
# Pinned memory pressure tracking is only implemented for DynamicVram loading
|
||||
return 0
|
||||
|
||||
def loaded_ram_size(self):
|
||||
# Loaded RAM pressure tracking is only implemented for DynamicVram loading
|
||||
return 0
|
||||
|
||||
def partially_unload_ram(self, ram_to_unload):
|
||||
pass
|
||||
return 0
|
||||
|
||||
def detach(self, unpatch_all=True):
|
||||
self.eject_model()
|
||||
@ -1550,6 +1564,16 @@ class ModelPatcherDynamic(ModelPatcher):
|
||||
super().__init__(model, load_device, offload_device, size, weight_inplace_update)
|
||||
if not hasattr(self.model, "dynamic_vbars"):
|
||||
self.model.dynamic_vbars = {}
|
||||
if not hasattr(self.model, "dynamic_pins"):
|
||||
self.model.dynamic_pins = {}
|
||||
if self.load_device not in self.model.dynamic_pins:
|
||||
self.model.dynamic_pins[self.load_device] = {
|
||||
"weights": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0]),
|
||||
"patches": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0]),
|
||||
"hostbufs_initialized": False,
|
||||
"failed": False,
|
||||
"active": False,
|
||||
}
|
||||
self.non_dynamic_delegate_model = None
|
||||
assert load_device is not None
|
||||
|
||||
@ -1611,6 +1635,14 @@ class ModelPatcherDynamic(ModelPatcher):
|
||||
self.unpatch_hooks()
|
||||
|
||||
vbar = self._vbar_get(create=True)
|
||||
pin_state = self.model.dynamic_pins[self.load_device]
|
||||
if not pin_state["hostbufs_initialized"]:
|
||||
hostbuf_size = comfy.model_management.pinned_hostbuf_size(self.model_size())
|
||||
pin_state["weights"] = (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024, hostbuf_size), [], [-1], [0])
|
||||
pin_state["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, hostbuf_size), [], [-1], [0])
|
||||
pin_state["hostbufs_initialized"] = True
|
||||
pin_state["failed"] = False
|
||||
pin_state["active"] = True
|
||||
if vbar is not None:
|
||||
vbar.prioritize()
|
||||
|
||||
@ -1636,7 +1668,9 @@ class ModelPatcherDynamic(ModelPatcher):
|
||||
if key in self.patches:
|
||||
if comfy.lora.calculate_shape(self.patches[key], weight, key) != weight.shape:
|
||||
return (True, 0)
|
||||
setattr(m, param_key + "_lowvram_function", LowVramPatch(key, self.patches))
|
||||
lowvram_patch = LowVramPatch(key, self.patches)
|
||||
lowvram_patch._pin_state = pin_state
|
||||
setattr(m, param_key + "_lowvram_function", lowvram_patch)
|
||||
num_patches += 1
|
||||
else:
|
||||
setattr(m, param_key + "_lowvram_function", None)
|
||||
@ -1653,6 +1687,9 @@ class ModelPatcherDynamic(ModelPatcher):
|
||||
|
||||
def force_load_param(self, param_key, device_to):
|
||||
key = key_param_name_to_key(n, param_key)
|
||||
weight, _, _ = get_key_weight(self.model, key)
|
||||
if weight is None:
|
||||
return
|
||||
if key in self.backup:
|
||||
comfy.utils.set_attr_param(self.model, key, self.backup[key].weight)
|
||||
self.patch_weight_to_device(key, device_to=device_to, force_cast=True)
|
||||
@ -1662,17 +1699,23 @@ class ModelPatcherDynamic(ModelPatcher):
|
||||
|
||||
if hasattr(m, "comfy_cast_weights"):
|
||||
m.comfy_cast_weights = True
|
||||
m.pin_failed = False
|
||||
m.seed_key = n
|
||||
m._pin_state = pin_state
|
||||
set_dirty(m, dirty)
|
||||
|
||||
force_load, v_weight_size = setup_param(self, m, n, "weight")
|
||||
force_load_bias, v_weight_bias = setup_param(self, m, n, "bias")
|
||||
force_load = force_load or force_load_bias
|
||||
v_weight_size += v_weight_bias
|
||||
#Models that mix tiny and giant weights can causing lopsided stream buffer
|
||||
#rotations and stall. force the tinys over.
|
||||
if module_mem > 16 * 1024:
|
||||
force_load, v_weight_size = setup_param(self, m, n, "weight")
|
||||
force_load_bias, v_weight_bias = setup_param(self, m, n, "bias")
|
||||
force_load = force_load or force_load_bias
|
||||
v_weight_size += v_weight_bias
|
||||
if force_load:
|
||||
logging.info(f"Module {n} has resizing Lora - force loading")
|
||||
else:
|
||||
force_load=True
|
||||
|
||||
if force_load:
|
||||
logging.info(f"Module {n} has resizing Lora - force loading")
|
||||
force_load_param(self, "weight", device_to)
|
||||
force_load_param(self, "bias", device_to)
|
||||
else:
|
||||
@ -1740,23 +1783,58 @@ class ModelPatcherDynamic(ModelPatcher):
|
||||
|
||||
return freed
|
||||
|
||||
def pinned_memory_size(self):
|
||||
total = 0
|
||||
loading = self._load_list(for_dynamic=True)
|
||||
for x in loading:
|
||||
_, _, _, _, m, _ = x
|
||||
pin = comfy.pinned_memory.get_pin(m)
|
||||
if pin is not None:
|
||||
total += pin.numel() * pin.element_size()
|
||||
return total
|
||||
def loaded_ram_size(self):
|
||||
return (self.model.dynamic_pins[self.load_device]["weights"][0].size +
|
||||
self.model.dynamic_pins[self.load_device]["patches"][0].size)
|
||||
|
||||
def partially_unload_ram(self, ram_to_unload):
|
||||
loading = self._load_list(for_dynamic=True, default_device=self.offload_device)
|
||||
for x in loading:
|
||||
*_, m, _ = x
|
||||
ram_to_unload -= comfy.pinned_memory.unpin_memory(m)
|
||||
if ram_to_unload <= 0:
|
||||
return
|
||||
def pinned_memory_size(self):
|
||||
return (self.model.dynamic_pins[self.load_device]["weights"][3][0] +
|
||||
self.model.dynamic_pins[self.load_device]["patches"][3][0])
|
||||
|
||||
def unregister_inactive_pins(self, ram_to_unload, subsets=[ "weights", "patches" ]):
|
||||
freed = 0
|
||||
pin_state = self.model.dynamic_pins[self.load_device]
|
||||
for subset in subsets:
|
||||
hostbuf, stack, stack_split, pinned_size = pin_state[subset]
|
||||
split = stack_split[0]
|
||||
while split >= 0:
|
||||
module, offset = stack[split]
|
||||
split -= 1
|
||||
stack_split[0] = split
|
||||
if not module._pin_registered:
|
||||
continue
|
||||
size = module._pin.numel() * module._pin.element_size()
|
||||
if torch.cuda.cudart().cudaHostUnregister(module._pin.data_ptr()) != 0:
|
||||
comfy.model_management.discard_cuda_async_error()
|
||||
continue
|
||||
module._pin_registered = False
|
||||
comfy.model_management.TOTAL_PINNED_MEMORY = max(0, comfy.model_management.TOTAL_PINNED_MEMORY - size)
|
||||
pinned_size[0] = max(0, pinned_size[0] - size)
|
||||
freed += size
|
||||
ram_to_unload -= size
|
||||
if ram_to_unload <= 0:
|
||||
return freed
|
||||
return freed
|
||||
|
||||
def partially_unload_ram(self, ram_to_unload, subsets=[ "weights", "patches" ]):
|
||||
freed = 0
|
||||
pin_state = self.model.dynamic_pins[self.load_device]
|
||||
for subset in subsets:
|
||||
hostbuf, stack, stack_split, pinned_size = pin_state[subset]
|
||||
while len(stack) > 0:
|
||||
module, offset = stack.pop()
|
||||
size = module._pin.numel() * module._pin.element_size()
|
||||
del module._pin
|
||||
hostbuf.truncate(offset, do_unregister=module._pin_registered)
|
||||
stack_split[0] = min(stack_split[0], len(stack) - 1)
|
||||
if module._pin_registered:
|
||||
comfy.model_management.TOTAL_PINNED_MEMORY = max(0, comfy.model_management.TOTAL_PINNED_MEMORY - size)
|
||||
pinned_size[0] = max(0, pinned_size[0] - size)
|
||||
freed += size
|
||||
ram_to_unload -= size
|
||||
if ram_to_unload <= 0:
|
||||
return freed
|
||||
return freed
|
||||
|
||||
def patch_model(self, device_to=None, lowvram_model_memory=0, load_weights=True, force_patch_weights=False):
|
||||
#This isn't used by the core at all and can only be to load a model out of
|
||||
|
||||
88
comfy/ops.py
88
comfy/ops.py
@ -75,6 +75,8 @@ except:
|
||||
|
||||
cast_to = comfy.model_management.cast_to #TODO: remove once no more references
|
||||
|
||||
STREAM_PIN_BUFFER_HEADROOM = 8 * 1024 * 1024
|
||||
|
||||
def cast_to_input(weight, input, non_blocking=False, copy=True):
|
||||
return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
|
||||
|
||||
@ -91,6 +93,9 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
|
||||
offload_stream = None
|
||||
cast_buffer = None
|
||||
cast_buffer_offset = 0
|
||||
stream_pin_hostbuf = None
|
||||
stream_pin_offset = 0
|
||||
stream_pin_queue = []
|
||||
|
||||
def ensure_offload_stream(module, required_size, check_largest):
|
||||
nonlocal offload_stream
|
||||
@ -124,6 +129,22 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
|
||||
cast_buffer_offset += buffer_size
|
||||
return buffer
|
||||
|
||||
def get_stream_pin_buffer_offset(buffer_size):
|
||||
nonlocal stream_pin_hostbuf
|
||||
nonlocal stream_pin_offset
|
||||
|
||||
if buffer_size == 0 or offload_stream is None:
|
||||
return None
|
||||
|
||||
if stream_pin_hostbuf is None:
|
||||
stream_pin_hostbuf = comfy.model_management.get_pin_buffer(offload_stream)
|
||||
if stream_pin_hostbuf is None:
|
||||
return None
|
||||
|
||||
offset = stream_pin_offset
|
||||
stream_pin_offset += buffer_size
|
||||
return offset
|
||||
|
||||
for s in comfy_modules:
|
||||
signature = comfy_aimdo.model_vbar.vbar_fault(s._v)
|
||||
resident = comfy_aimdo.model_vbar.vbar_signature_compare(signature, s._v_signature)
|
||||
@ -162,23 +183,47 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
|
||||
if xfer_dest is None:
|
||||
xfer_dest = get_cast_buffer(dest_size)
|
||||
|
||||
if signature is None and pin is None:
|
||||
comfy.pinned_memory.pin_memory(s)
|
||||
pin = comfy.pinned_memory.get_pin(s)
|
||||
else:
|
||||
pin = None
|
||||
def cast_maybe_lowvram_patch(xfer_source, xfer_dest, stream):
|
||||
if xfer_source is not None:
|
||||
if getattr(xfer_source, "is_lowvram_patch", False):
|
||||
xfer_source.prepare(xfer_dest, stream, copy=True, commit=False)
|
||||
else:
|
||||
comfy.model_management.cast_to_gathered(xfer_source, xfer_dest, non_blocking=non_blocking, stream=stream)
|
||||
|
||||
if pin is not None:
|
||||
comfy.model_management.cast_to_gathered(xfer_source, pin)
|
||||
xfer_source = [ pin ]
|
||||
#send it over
|
||||
comfy.model_management.cast_to_gathered(xfer_source, xfer_dest, non_blocking=non_blocking, stream=offload_stream)
|
||||
def handle_pin(m, pin, source, dest, subset="weights", size=None):
|
||||
if pin is not None:
|
||||
cast_maybe_lowvram_patch([pin], dest, offload_stream)
|
||||
return
|
||||
if signature is None:
|
||||
comfy.pinned_memory.pin_memory(m, subset=subset, size=size)
|
||||
pin = comfy.pinned_memory.get_pin(m, subset=subset)
|
||||
if pin is not None:
|
||||
if isinstance(source, list):
|
||||
comfy.model_management.cast_to_gathered(source, pin, non_blocking=non_blocking, stream=offload_stream, r2=dest)
|
||||
else:
|
||||
cast_maybe_lowvram_patch(source, pin, None)
|
||||
cast_maybe_lowvram_patch([ pin ], dest, offload_stream)
|
||||
return
|
||||
if pin is None:
|
||||
pin_offset = get_stream_pin_buffer_offset(size)
|
||||
if pin_offset is not None:
|
||||
stream_pin_queue.append((source, pin_offset, size, dest))
|
||||
return
|
||||
cast_maybe_lowvram_patch(source, dest, offload_stream)
|
||||
|
||||
handle_pin(s, pin, xfer_source, xfer_dest, size=dest_size)
|
||||
|
||||
for param_key in ("weight", "bias"):
|
||||
lowvram_fn = getattr(s, param_key + "_lowvram_function", None)
|
||||
if lowvram_fn is not None:
|
||||
lowvram_source = getattr(s, param_key + "_lowvram_function", None)
|
||||
if lowvram_source is not None:
|
||||
ensure_offload_stream(s, cast_buffer_offset, False)
|
||||
lowvram_fn.prepare(lambda size: get_cast_buffer(size), offload_stream)
|
||||
lowvram_size = lowvram_source.memory_required()
|
||||
lowvram_dest = get_cast_buffer(lowvram_size)
|
||||
lowvram_source.prepare(lowvram_dest, None, copy=False, commit=True)
|
||||
|
||||
pin = comfy.pinned_memory.get_pin(lowvram_source, subset="patches")
|
||||
handle_pin(lowvram_source, pin, lowvram_source, lowvram_dest, subset="patches", size=lowvram_size)
|
||||
|
||||
|
||||
prefetch["xfer_dest"] = xfer_dest
|
||||
prefetch["cast_dest"] = cast_dest
|
||||
@ -186,6 +231,23 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
|
||||
prefetch["needs_cast"] = needs_cast
|
||||
s._prefetch = prefetch
|
||||
|
||||
if stream_pin_offset > 0:
|
||||
if stream_pin_hostbuf.size < stream_pin_offset:
|
||||
if not comfy.model_management.resize_pin_buffer(stream_pin_hostbuf, stream_pin_offset + STREAM_PIN_BUFFER_HEADROOM):
|
||||
for xfer_source, _, _, xfer_dest in stream_pin_queue:
|
||||
cast_maybe_lowvram_patch(xfer_source, xfer_dest, offload_stream)
|
||||
return offload_stream
|
||||
stream_pin_tensor = comfy_aimdo.torch.hostbuf_to_tensor(stream_pin_hostbuf)
|
||||
stream_pin_tensor.untyped_storage()._comfy_hostbuf = stream_pin_hostbuf
|
||||
for xfer_source, pin_offset, pin_size, xfer_dest in stream_pin_queue:
|
||||
pin = stream_pin_tensor[pin_offset:pin_offset + pin_size]
|
||||
if isinstance(xfer_source, list):
|
||||
comfy.model_management.cast_to_gathered(xfer_source, pin, non_blocking=non_blocking, stream=offload_stream, r2=xfer_dest)
|
||||
else:
|
||||
cast_maybe_lowvram_patch(xfer_source, pin, None)
|
||||
comfy.model_management.cast_to_gathered([ pin ], xfer_dest, non_blocking=non_blocking, stream=offload_stream)
|
||||
stream_pin_hostbuf._comfy_event = offload_stream.record_event()
|
||||
|
||||
return offload_stream
|
||||
|
||||
|
||||
|
||||
@ -2,42 +2,62 @@ import comfy.model_management
|
||||
import comfy.memory_management
|
||||
import comfy_aimdo.host_buffer
|
||||
import comfy_aimdo.torch
|
||||
import torch
|
||||
|
||||
from comfy.cli_args import args
|
||||
|
||||
def get_pin(module):
|
||||
return getattr(module, "_pin", None)
|
||||
def get_pin(module, subset="weights"):
|
||||
pin = getattr(module, "_pin", None)
|
||||
if pin is None or module._pin_registered or args.disable_pinned_memory:
|
||||
return pin
|
||||
|
||||
def pin_memory(module):
|
||||
if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
|
||||
_, _, stack_split, pinned_size = module._pin_state[subset]
|
||||
size = pin.nbytes
|
||||
comfy.model_management.ensure_pin_registerable(size)
|
||||
|
||||
if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0:
|
||||
comfy.model_management.discard_cuda_async_error()
|
||||
return pin
|
||||
|
||||
module._pin_registered = True
|
||||
stack_split[0] = max(stack_split[0], module._pin_stack_index)
|
||||
comfy.model_management.TOTAL_PINNED_MEMORY += size
|
||||
pinned_size[0] += size
|
||||
return pin
|
||||
|
||||
def pin_memory(module, subset="weights", size=None):
|
||||
pin_state = module._pin_state
|
||||
if args.disable_pinned_memory:
|
||||
return
|
||||
|
||||
size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])
|
||||
pin = get_pin(module, subset)
|
||||
if pin is not None or pin_state["failed"]:
|
||||
return
|
||||
|
||||
if comfy.model_management.MAX_PINNED_MEMORY <= 0 or (comfy.model_management.TOTAL_PINNED_MEMORY + size) > comfy.model_management.MAX_PINNED_MEMORY:
|
||||
module.pin_failed = True
|
||||
hostbuf, stack, stack_split, pinned_size = pin_state[subset]
|
||||
if size is None:
|
||||
size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])
|
||||
offset = hostbuf.size
|
||||
registerable_size = size + max(0, hostbuf.size - pinned_size[0])
|
||||
|
||||
comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM)
|
||||
if (not comfy.model_management.ensure_pin_budget(size) or
|
||||
not comfy.model_management.ensure_pin_registerable(registerable_size)):
|
||||
pin_state["failed"] = True
|
||||
return False
|
||||
|
||||
try:
|
||||
hostbuf = comfy_aimdo.host_buffer.HostBuffer(size)
|
||||
hostbuf.extend(size=size)
|
||||
except RuntimeError:
|
||||
module.pin_failed = True
|
||||
pin_state["failed"] = True
|
||||
return False
|
||||
|
||||
module._pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)
|
||||
module._pin_hostbuf = hostbuf
|
||||
module._pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size]
|
||||
module._pin.untyped_storage()._comfy_hostbuf = hostbuf
|
||||
stack.append((module, offset))
|
||||
module._pin_registered = True
|
||||
module._pin_stack_index = len(stack) - 1
|
||||
stack_split[0] = max(stack_split[0], module._pin_stack_index)
|
||||
comfy.model_management.TOTAL_PINNED_MEMORY += size
|
||||
pinned_size[0] += size
|
||||
return True
|
||||
|
||||
def unpin_memory(module):
|
||||
if get_pin(module) is None:
|
||||
return 0
|
||||
size = module._pin.numel() * module._pin.element_size()
|
||||
|
||||
comfy.model_management.TOTAL_PINNED_MEMORY -= size
|
||||
if comfy.model_management.TOTAL_PINNED_MEMORY < 0:
|
||||
comfy.model_management.TOTAL_PINNED_MEMORY = 0
|
||||
|
||||
del module._pin
|
||||
del module._pin_hostbuf
|
||||
return size
|
||||
|
||||
@ -113,7 +113,6 @@ def load_safetensors(ckpt):
|
||||
"_comfy_tensor_file_slice",
|
||||
comfy.memory_management.TensorFileSlice(f, threading.get_ident(), data_base_offset + start, end - start))
|
||||
setattr(storage, "_comfy_tensor_mmap_refs", (model_mmap, mv))
|
||||
setattr(storage, "_comfy_tensor_mmap_touched", False)
|
||||
sd[name] = tensor
|
||||
|
||||
return sd, header.get("__metadata__", {}),
|
||||
@ -1451,4 +1450,3 @@ def deepcopy_list_dict(obj, memo=None):
|
||||
|
||||
memo[obj_id] = res
|
||||
return res
|
||||
|
||||
|
||||
@ -1,52 +0,0 @@
|
||||
import ctypes
|
||||
import logging
|
||||
import psutil
|
||||
from ctypes import wintypes
|
||||
|
||||
import comfy_aimdo.control
|
||||
|
||||
psapi = ctypes.WinDLL("psapi")
|
||||
kernel32 = ctypes.WinDLL("kernel32")
|
||||
|
||||
class PERFORMANCE_INFORMATION(ctypes.Structure):
|
||||
_fields_ = [
|
||||
("cb", wintypes.DWORD),
|
||||
("CommitTotal", ctypes.c_size_t),
|
||||
("CommitLimit", ctypes.c_size_t),
|
||||
("CommitPeak", ctypes.c_size_t),
|
||||
("PhysicalTotal", ctypes.c_size_t),
|
||||
("PhysicalAvailable", ctypes.c_size_t),
|
||||
("SystemCache", ctypes.c_size_t),
|
||||
("KernelTotal", ctypes.c_size_t),
|
||||
("KernelPaged", ctypes.c_size_t),
|
||||
("KernelNonpaged", ctypes.c_size_t),
|
||||
("PageSize", ctypes.c_size_t),
|
||||
("HandleCount", wintypes.DWORD),
|
||||
("ProcessCount", wintypes.DWORD),
|
||||
("ThreadCount", wintypes.DWORD),
|
||||
]
|
||||
|
||||
def get_free_ram():
|
||||
#Windows is way too conservative and chalks recently used uncommitted model RAM
|
||||
#as "in-use". So, calculate free RAM for the sake of general use as the greater of:
|
||||
#
|
||||
#1: What psutil says
|
||||
#2: Total Memory - (Committed Memory - VRAM in use)
|
||||
#
|
||||
#We have to subtract VRAM in use from the comitted memory as WDDM creates a naked
|
||||
#commit charge for all VRAM used just incase it wants to page it all out. This just
|
||||
#isn't realistic so "overcommit" on our calculations by just subtracting it off.
|
||||
|
||||
pi = PERFORMANCE_INFORMATION()
|
||||
pi.cb = ctypes.sizeof(pi)
|
||||
|
||||
if not psapi.GetPerformanceInfo(ctypes.byref(pi), pi.cb):
|
||||
logging.warning("WARNING: Failed to query windows performance info. RAM usage may be sub optimal")
|
||||
return psutil.virtual_memory().available
|
||||
|
||||
committed = pi.CommitTotal * pi.PageSize
|
||||
total = pi.PhysicalTotal * pi.PageSize
|
||||
|
||||
return max(psutil.virtual_memory().available,
|
||||
total - (committed - comfy_aimdo.control.get_total_vram_usage()))
|
||||
|
||||
111
comfy_extras/mediapipe/face_geometry.py
Normal file
111
comfy_extras/mediapipe/face_geometry.py
Normal file
@ -0,0 +1,111 @@
|
||||
"""Pure-numpy port of MediaPipe's face_geometry (FACE_LANDMARK_PIPELINE mode)
|
||||
+ weighted Procrustes solver. Computes the 4x4 facial transformation matrix.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
|
||||
def _solve_weighted_orthogonal_problem(src: np.ndarray, tgt: np.ndarray, weights: np.ndarray) -> np.ndarray:
|
||||
"""Weighted orthogonal Procrustes (similarity). Returns 4x4 M with
|
||||
`target ≈ M @ homogeneous(source)` in the weighted LS sense. fp64 for
|
||||
SVD stability. Port of procrustes_solver.cc."""
|
||||
sqrt_w = np.sqrt(weights.astype(np.float64))
|
||||
w_total = float((sqrt_w ** 2).sum())
|
||||
ws = src.astype(np.float64) * sqrt_w
|
||||
wt = tgt.astype(np.float64) * sqrt_w
|
||||
|
||||
c_w = (ws @ sqrt_w) / w_total
|
||||
centered = ws - np.outer(c_w, sqrt_w)
|
||||
U, _S, Vt = np.linalg.svd(wt @ centered.T, full_matrices=True)
|
||||
# Disallow reflection: flip the least-significant axis when det(U)·det(V)<0.
|
||||
post, pre = U.copy(), Vt.T.copy()
|
||||
if np.linalg.det(post) * np.linalg.det(pre) < 0:
|
||||
post[:, 2] *= -1.0
|
||||
R = post @ pre.T
|
||||
|
||||
denom = float((centered * ws).sum())
|
||||
if denom < 1e-12:
|
||||
raise ValueError("Procrustes denominator collapsed (degenerate source).")
|
||||
scale = float((R @ centered * wt).sum()) / denom
|
||||
translation = ((wt - scale * (R @ ws)) @ sqrt_w) / w_total
|
||||
|
||||
M = np.eye(4, dtype=np.float64)
|
||||
M[:3, :3] = scale * R
|
||||
M[:3, 3] = translation
|
||||
return M
|
||||
|
||||
|
||||
def _estimate_scale(canonical: np.ndarray, runtime: np.ndarray, weights: np.ndarray) -> float:
|
||||
"""scale = ‖first column of M[:3]‖ per geometry_pipeline.cc::EstimateScale."""
|
||||
return float(np.linalg.norm(_solve_weighted_orthogonal_problem(canonical, runtime, weights)[:3, 0]))
|
||||
|
||||
|
||||
def solve_facial_transformation_matrix(
|
||||
landmarks_normalized: np.ndarray,
|
||||
canonical_vertices: np.ndarray,
|
||||
procrustes_indices: np.ndarray,
|
||||
procrustes_weights: np.ndarray,
|
||||
image_width: int,
|
||||
image_height: int,
|
||||
# face_geometry_calculator_options.pbtxt defaults
|
||||
vertical_fov_degrees: float = 63.0,
|
||||
near: float = 1.0,
|
||||
) -> np.ndarray:
|
||||
"""4x4 facial transformation matrix via two-pass scale recovery
|
||||
`landmarks_normalized` is (N, 3) in MediaPipe normalized convention: x, y
|
||||
in [0,1] with TOP-LEFT origin, z in width-scaled units.
|
||||
"""
|
||||
|
||||
h_near = 2.0 * near * math.tan(0.5 * math.radians(vertical_fov_degrees))
|
||||
w_near = image_width * h_near / image_height
|
||||
|
||||
sub = procrustes_indices.astype(np.int64)
|
||||
screen = landmarks_normalized[sub].T.astype(np.float64).copy()
|
||||
canon = canonical_vertices[sub].T.astype(np.float64).copy()
|
||||
weights = procrustes_weights.astype(np.float64)
|
||||
|
||||
# ProjectXY (TOP_LEFT y-flip, then scale all 3 axes; z uses x-scale).
|
||||
screen[1] = 1.0 - screen[1]
|
||||
screen[0] = screen[0] * w_near - 0.5 * w_near
|
||||
screen[1] = screen[1] * h_near - 0.5 * h_near
|
||||
screen[2] = screen[2] * w_near
|
||||
depth_offset = float(screen[2].mean())
|
||||
|
||||
def _unproject(s: np.ndarray, scale: float) -> np.ndarray:
|
||||
s = s.copy()
|
||||
s[2] = (s[2] - depth_offset + near) / scale
|
||||
s[0] *= s[2] / near
|
||||
s[1] *= s[2] / near
|
||||
s[2] *= -1.0
|
||||
return s
|
||||
|
||||
first = screen.copy()
|
||||
first[2] *= -1.0
|
||||
s1 = _estimate_scale(canon, first, weights) # 1st pass: Procrustes on projected XY
|
||||
s2 = _estimate_scale(canon, _unproject(screen, s1), weights) # 2nd pass: rescale z by s1, un-project XY
|
||||
return _solve_weighted_orthogonal_problem(canon, _unproject(screen, s1 * s2), weights).astype(np.float32)
|
||||
|
||||
|
||||
def transformation_matrix_from_detection(face_dict: dict, image_width: int, image_height: int, canonical_data: dict) -> np.ndarray:
|
||||
"""Adapt a FaceLandmarker face dict to MP's normalized convention and solve.
|
||||
FaceMesh emits (x, y, z) in 192-canonical units; MP's geometry expects
|
||||
z_norm = z_canonical * scale_x / image_width"""
|
||||
|
||||
lmks_xy, lmks_3d = face_dict["landmarks_xy"], face_dict["landmarks_3d"]
|
||||
aug = np.concatenate([lmks_3d[:, :2].astype(np.float64), np.ones((lmks_xy.shape[0], 1))], axis=1)
|
||||
M, *_ = np.linalg.lstsq(aug, lmks_xy.astype(np.float64), rcond=None)
|
||||
scale_x = float(np.linalg.norm(M[0]))
|
||||
z_scale = scale_x / image_width if scale_x > 1e-6 else 1.0 / image_width
|
||||
|
||||
normalized = np.empty((lmks_xy.shape[0], 3), dtype=np.float32)
|
||||
normalized[:, 0] = lmks_xy[:, 0] / image_width
|
||||
normalized[:, 1] = lmks_xy[:, 1] / image_height
|
||||
normalized[:, 2] = lmks_3d[:, 2] * z_scale
|
||||
return solve_facial_transformation_matrix(
|
||||
normalized, canonical_data["canonical_vertices"],
|
||||
canonical_data["procrustes_indices"], canonical_data["procrustes_weights"],
|
||||
image_width=image_width, image_height=image_height,
|
||||
)
|
||||
682
comfy_extras/mediapipe/face_landmarker.py
Normal file
682
comfy_extras/mediapipe/face_landmarker.py
Normal file
@ -0,0 +1,682 @@
|
||||
"""Pure-PyTorch port of MediaPipe's face_landmarker_v2_with_blendshapes.task:
|
||||
BlazeFace detector → FaceMesh v2 → ARKit-52 blendshapes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from functools import lru_cache
|
||||
from typing import List, Tuple
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from scipy.special import expit
|
||||
from torch import Tensor, nn
|
||||
|
||||
|
||||
# Values below must stay verbatim with the published face_landmarker_v2 graph
|
||||
|
||||
# face_blendshapes_graph.cc::kLandmarksSubsetIdxs
|
||||
_BS_INPUT_INDICES: Tuple[int, ...] = (
|
||||
0, 1, 4, 5, 6, 7, 8, 10, 13, 14, 17, 21, 33, 37, 39, 40, 46, 52, 53, 54,
|
||||
55, 58, 61, 63, 65, 66, 67, 70, 78, 80, 81, 82, 84, 87, 88, 91, 93, 95,
|
||||
103, 105, 107, 109, 127, 132, 133, 136, 144, 145, 146, 148, 149, 150, 152,
|
||||
153, 154, 155, 157, 158, 159, 160, 161, 162, 163, 168, 172, 173, 176, 178,
|
||||
181, 185, 191, 195, 197, 234, 246, 249, 251, 263, 267, 269, 270, 276, 282,
|
||||
283, 284, 285, 288, 291, 293, 295, 296, 297, 300, 308, 310, 311, 312, 314,
|
||||
317, 318, 321, 323, 324, 332, 334, 336, 338, 356, 361, 362, 365, 373, 374,
|
||||
375, 377, 378, 379, 380, 381, 382, 384, 385, 386, 387, 388, 389, 390, 397,
|
||||
398, 400, 402, 405, 409, 415, 454, 466, 468, 469, 470, 471, 472, 473, 474,
|
||||
475, 476, 477,
|
||||
)
|
||||
|
||||
# face_blendshapes_graph.cc::kCategoryNames
|
||||
BLENDSHAPE_NAMES: Tuple[str, ...] = (
|
||||
"_neutral", "browDownLeft", "browDownRight", "browInnerUp", "browOuterUpLeft",
|
||||
"browOuterUpRight", "cheekPuff", "cheekSquintLeft", "cheekSquintRight",
|
||||
"eyeBlinkLeft", "eyeBlinkRight", "eyeLookDownLeft", "eyeLookDownRight",
|
||||
"eyeLookInLeft", "eyeLookInRight", "eyeLookOutLeft", "eyeLookOutRight",
|
||||
"eyeLookUpLeft", "eyeLookUpRight", "eyeSquintLeft", "eyeSquintRight",
|
||||
"eyeWideLeft", "eyeWideRight", "jawForward", "jawLeft", "jawOpen",
|
||||
"jawRight", "mouthClose", "mouthDimpleLeft", "mouthDimpleRight",
|
||||
"mouthFrownLeft", "mouthFrownRight", "mouthFunnel", "mouthLeft",
|
||||
"mouthLowerDownLeft", "mouthLowerDownRight", "mouthPressLeft",
|
||||
"mouthPressRight", "mouthPucker", "mouthRight", "mouthRollLower",
|
||||
"mouthRollUpper", "mouthShrugLower", "mouthShrugUpper", "mouthSmileLeft",
|
||||
"mouthSmileRight", "mouthStretchLeft", "mouthStretchRight",
|
||||
"mouthUpperUpLeft", "mouthUpperUpRight", "noseSneerLeft", "noseSneerRight",
|
||||
)
|
||||
|
||||
# face_detection.pbtxt — short-range BlazeFace.
|
||||
_BF_NUM_LAYERS = 4
|
||||
_BF_INPUT_SIZE = 128
|
||||
_BF_STRIDES = (8, 16, 16, 16)
|
||||
_BF_ANCHOR_OFFSET_X = 0.5
|
||||
_BF_ANCHOR_OFFSET_Y = 0.5
|
||||
_BF_ASPECT_RATIOS = (1.0,)
|
||||
_BF_INTERP_SCALE_AR = 1.0
|
||||
_BF_BOX_SCALE = 128.0
|
||||
_BF_KP_OFFSET = 4
|
||||
_BF_SCORE_CLIP = 100.0
|
||||
_BF_MIN_SCORE = 0.5
|
||||
|
||||
# face_detection_full_range.pbtxt — 48x48 grid at stride 4, 1 anchor/cell.
|
||||
_BF_FR_INPUT_SIZE = 192
|
||||
_BF_FR_GRID = 48
|
||||
_BF_FR_NUM_ANCHORS = _BF_FR_GRID * _BF_FR_GRID
|
||||
_BF_FR_BOX_SCALE = 192.0
|
||||
_BF_FR_SCORE_CLIP = 100.0
|
||||
|
||||
_FM_INPUT_SIZE = 192
|
||||
|
||||
# Face ROI: 1.5xbbox rect warped anisotropically into 192x192.
|
||||
_FACE_LEFT_EYE_KP = 0
|
||||
_FACE_RIGHT_EYE_KP = 1
|
||||
_FACE_ROI_SCALE_X = 1.5
|
||||
_FACE_ROI_SCALE_Y = 1.5
|
||||
_FACE_ROI_TARGET_ANGLE = 0.0
|
||||
|
||||
|
||||
def _tf_same_pad(x: Tensor, kernel: int, stride: int) -> Tensor:
|
||||
"""TF SAME pad (asymmetric on stride-2; PyTorch's symmetric pad undershoots by 1 px)."""
|
||||
H, W = x.shape[-2], x.shape[-1]
|
||||
pad_h = max(((H + stride - 1) // stride - 1) * stride + kernel - H, 0)
|
||||
pad_w = max(((W + stride - 1) // stride - 1) * stride + kernel - W, 0)
|
||||
if pad_h == 0 and pad_w == 0:
|
||||
return x
|
||||
return F.pad(x, (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
|
||||
|
||||
|
||||
# BlazeFace short-range: stem 5x5/s2 → 16 BlazeBlocks → parallel heads at
|
||||
# 16²x88 (2 anchors/cell) and 8²x96 (6/cell) = 896 anchors. (in, out, stride):
|
||||
_BLAZEFACE_BLOCKS = [
|
||||
(24, 24, 1), (24, 28, 1), (28, 32, 2), (32, 36, 1),
|
||||
(36, 42, 1), (42, 48, 2), (48, 56, 1), (56, 64, 1),
|
||||
(64, 72, 1), (72, 80, 1), (80, 88, 1), (88, 96, 2),
|
||||
(96, 96, 1), (96, 96, 1), (96, 96, 1), (96, 96, 1),
|
||||
]
|
||||
|
||||
|
||||
class BlazeFaceBlock(nn.Module):
|
||||
"""DW 3x3 + PW + residual. Residual max-pools on stride>1, channel-pads on out_ch>in_ch."""
|
||||
|
||||
def __init__(self, in_ch: int, out_ch: int, stride: int, device=None, dtype=None, operations=None):
|
||||
super().__init__()
|
||||
ops = operations if operations is not None else nn
|
||||
self.in_ch, self.out_ch, self.stride = in_ch, out_ch, stride
|
||||
self.depthwise = ops.Conv2d(in_ch, in_ch, 3, stride=stride, padding=0, groups=in_ch, bias=True, device=device, dtype=dtype)
|
||||
self.pointwise = ops.Conv2d(in_ch, out_ch, 1, padding=0, bias=True, device=device, dtype=dtype)
|
||||
|
||||
def forward(self, x: Tensor) -> Tensor:
|
||||
residual = F.max_pool2d(x, 2, 2) if self.stride > 1 else x
|
||||
if self.out_ch > self.in_ch:
|
||||
residual = F.pad(residual, (0, 0, 0, 0, 0, self.out_ch - self.in_ch))
|
||||
x = _tf_same_pad(x, 3, self.stride) if self.stride > 1 else F.pad(x, (1, 1, 1, 1))
|
||||
return F.relu(self.pointwise(self.depthwise(x)) + residual)
|
||||
|
||||
|
||||
class BlazeFace(nn.Module):
|
||||
"""Short-range BlazeFace: (B, 3, 128, 128) in [-1, 1] → 896 anchors x 17."""
|
||||
|
||||
def __init__(self, device=None, dtype=None, operations=None):
|
||||
super().__init__()
|
||||
ops = operations if operations is not None else nn
|
||||
kw = dict(device=device, dtype=dtype)
|
||||
self.stem = ops.Conv2d(3, 24, 5, stride=2, padding=0, bias=True, **kw)
|
||||
self.blocks = nn.ModuleList(BlazeFaceBlock(i, o, s, device=device, dtype=dtype, operations=operations)
|
||||
for (i, o, s) in _BLAZEFACE_BLOCKS)
|
||||
# 16²x2 + 8²x6 = 512 + 384 = 896 anchors.
|
||||
self.cls_16 = ops.Conv2d(88, 2, 1, padding=0, bias=True, **kw)
|
||||
self.cls_8 = ops.Conv2d(96, 6, 1, padding=0, bias=True, **kw)
|
||||
self.reg_16 = ops.Conv2d(88, 32, 1, padding=0, bias=True, **kw)
|
||||
self.reg_8 = ops.Conv2d(96, 96, 1, padding=0, bias=True, **kw)
|
||||
|
||||
def forward(self, image_chw_normalized: Tensor) -> tuple[Tensor, Tensor]:
|
||||
x = F.relu(self.stem(_tf_same_pad(image_chw_normalized, 5, 2)))
|
||||
# 16x16 tap is block-10 output (before the 88→96 stride-2 in block 11).
|
||||
for i in range(11):
|
||||
x = self.blocks[i](x)
|
||||
feat_16 = x
|
||||
for i in range(11, 16):
|
||||
x = self.blocks[i](x)
|
||||
feat_8 = x
|
||||
|
||||
def flat(t, a, k): # NHWC flatten → (B, H*W*A, K)
|
||||
B, _, H, W = t.shape
|
||||
return t.permute(0, 2, 3, 1).reshape(B, H * W * a, k)
|
||||
|
||||
cls = torch.cat([flat(self.cls_16(feat_16), 2, 1), flat(self.cls_8(feat_8), 6, 1)], dim=1)
|
||||
reg = torch.cat([flat(self.reg_16(feat_16), 2, 16), flat(self.reg_8(feat_8), 6, 16)], dim=1)
|
||||
return reg, cls
|
||||
|
||||
|
||||
# BlazeFace full-range (face_detection_full_range_sparse.tflite): MobileNetV2-ish
|
||||
# backbone + top-down FPN, 192² input → 2304 anchors at the 48x48 grid.
|
||||
class FRBlock(nn.Module):
|
||||
"""Double inverted residual: DW → PW(mid) → DW → PW(out) [+ residual].
|
||||
|
||||
Per source tflite: dw* have no fused activation, pw1 is always ReLU, pw2
|
||||
is ReLU only when no residual (else ReLU fuses into the ADD).
|
||||
"""
|
||||
|
||||
def __init__(self, in_ch: int, mid_ch: int, out_ch: int, stride: int, device=None, dtype=None, operations=None):
|
||||
super().__init__()
|
||||
ops = operations if operations is not None else nn
|
||||
kw = dict(device=device, dtype=dtype)
|
||||
self.has_residual = (in_ch == out_ch and stride == 1)
|
||||
self.dw1 = ops.Conv2d(in_ch, in_ch, 3, stride=stride, padding=0, groups=in_ch, bias=True, **kw)
|
||||
self.pw1 = ops.Conv2d(in_ch, mid_ch, 1, padding=0, bias=True, **kw)
|
||||
self.dw2 = ops.Conv2d(mid_ch, mid_ch, 3, stride=1, padding=0, groups=mid_ch, bias=True, **kw)
|
||||
self.pw2 = ops.Conv2d(mid_ch, out_ch, 1, padding=0, bias=True, **kw)
|
||||
|
||||
def forward(self, x: Tensor) -> Tensor:
|
||||
residual = x if self.has_residual else None
|
||||
x = F.relu(self.pw1(self.dw1(F.pad(x, (1, 1, 1, 1)))))
|
||||
x = self.pw2(self.dw2(F.pad(x, (1, 1, 1, 1))))
|
||||
return F.relu(x + residual) if residual is not None else F.relu(x)
|
||||
|
||||
|
||||
# (in_ch, mid_ch, out_ch, stride). Stages downsample 96²x32 → 48²x64 → 24²x128
|
||||
# → 12²x192 → 6²x384. Lateral taps at indices 4, 7, 10 (see _FR_LATERAL_*).
|
||||
_FR_BACKBONE_BLOCKS = [
|
||||
(32, 8, 32, 1), (32, 8, 32, 1), # 96²x32
|
||||
(32, 16, 64, 2), (64, 16, 64, 1), (64, 16, 64, 1), # 48²x64 — tap[0]
|
||||
(64, 32, 128, 2), (128, 32, 128, 1), (128, 32, 128, 1), # 24²x128 — tap[1]
|
||||
(128, 48, 192, 2), (192, 48, 192, 1), (192, 48, 192, 1), # 12²x192 — tap[2]
|
||||
(192, 96, 384, 2), (384, 96, 384, 1), (384, 96, 384, 1), (384, 96, 384, 1), # 6²x384
|
||||
]
|
||||
_FR_LATERAL_TAP_INDICES = (4, 7, 10)
|
||||
_FR_LATERAL_CHANNELS = ((64, 48), (128, 64), (192, 96)) # (in, out) per side-conv
|
||||
|
||||
# Decoder blocks per FPN level (after upsample-and-merge with the lateral).
|
||||
_FR_DECODER_BLOCKS = [
|
||||
[(96, 48, 96, 1), (96, 48, 96, 1)], # 12²x96
|
||||
[(64, 32, 64, 1), (64, 32, 64, 1)], # 24²x64
|
||||
[(48, 24, 48, 1)], # 48²x48 — feeds the heads
|
||||
]
|
||||
|
||||
|
||||
def _dcr_depth_to_space(t: Tensor, r: int, c_out: int) -> Tensor:
|
||||
"""TF DEPTH_TO_SPACE in DCR layout (input channels = (i, j, c_out)).
|
||||
pixel_shuffle uses CRD which permutes output channels for c_out > 1."""
|
||||
B_, _, H_, W_ = t.shape
|
||||
t = t.reshape(B_, r, r, c_out, H_, W_)
|
||||
t = t.permute(0, 3, 4, 1, 5, 2).contiguous()
|
||||
return t.reshape(B_, c_out, H_ * r, W_ * r)
|
||||
|
||||
|
||||
class BlazeFaceFullRange(nn.Module):
|
||||
"""Full-range face detector: (B, 3, 192, 192) in [-1, 1] → 2304 anchors x 17 values."""
|
||||
|
||||
def __init__(self, device=None, dtype=None, operations=None):
|
||||
super().__init__()
|
||||
ops = operations if operations is not None else nn
|
||||
kw = dict(device=device, dtype=dtype)
|
||||
mk_block = lambda i, m, o, s: FRBlock(i, m, o, s, device=device, dtype=dtype, operations=operations)
|
||||
self.stem = ops.Conv2d(3, 32, 3, stride=2, padding=0, bias=True, **kw)
|
||||
self.backbone = nn.ModuleList(mk_block(i, m, o, s) for (i, m, o, s) in _FR_BACKBONE_BLOCKS)
|
||||
self.lateral_convs = nn.ModuleList(ops.Conv2d(i, o, 1, padding=0, bias=True, **kw) for (i, o) in _FR_LATERAL_CHANNELS)
|
||||
self.top_conv = ops.Conv2d(384, 96, 1, padding=0, bias=True, **kw)
|
||||
self.decoder_levels = nn.ModuleList(
|
||||
nn.ModuleList(mk_block(i, m, o, s) for (i, m, o, s) in lvl) for lvl in _FR_DECODER_BLOCKS
|
||||
)
|
||||
# 96→64 before 12→24, 64→48 before 24→48.
|
||||
self.decoder_reduce_convs = nn.ModuleList([
|
||||
ops.Conv2d(96, 64, 1, padding=0, bias=True, **kw),
|
||||
ops.Conv2d(64, 48, 1, padding=0, bias=True, **kw),
|
||||
])
|
||||
# Heads mix 2x2-cell info via DW-stride-2 + depth_to_space block_size=2.
|
||||
self.cls_conv = ops.Conv2d(48, 4, 1, padding=0, bias=True, **kw)
|
||||
self.cls_dw = ops.Conv2d(4, 4, 3, stride=2, padding=0, groups=4, bias=True, **kw)
|
||||
self.reg_conv = ops.Conv2d(48, 64, 1, padding=0, bias=True, **kw)
|
||||
self.reg_dw = ops.Conv2d(64, 64, 3, stride=2, padding=0, groups=64, bias=True, **kw)
|
||||
|
||||
def forward(self, image_chw_normalized: Tensor) -> tuple[Tensor, Tensor]:
|
||||
# Symmetric pad-1 throughout (full-range tflite uses explicit TF PAD, not SAME).
|
||||
x = F.relu(self.stem(F.pad(image_chw_normalized, (1, 1, 1, 1))))
|
||||
tap_set = set(_FR_LATERAL_TAP_INDICES)
|
||||
laterals: list[Tensor] = []
|
||||
for i, blk in enumerate(self.backbone):
|
||||
x = blk(x)
|
||||
if i in tap_set:
|
||||
laterals.append(x)
|
||||
|
||||
# top_conv / lateral_convs / decoder_reduce_convs all have fused ReLU in the tflite.
|
||||
p = F.relu(self.top_conv(x))
|
||||
laterals_rev = list(reversed(laterals))
|
||||
lateral_convs_rev = list(reversed(self.lateral_convs))
|
||||
for level in range(len(self.decoder_levels)):
|
||||
lateral = laterals_rev[level]
|
||||
p = F.interpolate(p, size=lateral.shape[-2:], mode="bilinear", align_corners=False)
|
||||
p = p + F.relu(lateral_convs_rev[level](lateral))
|
||||
for blk in self.decoder_levels[level]:
|
||||
p = blk(p)
|
||||
if level < len(self.decoder_reduce_convs):
|
||||
p = F.relu(self.decoder_reduce_convs[level](p))
|
||||
|
||||
c = self.cls_dw(F.pad(self.cls_conv(p), (1, 1, 1, 1)))
|
||||
c = _dcr_depth_to_space(c, r=2, c_out=1)
|
||||
r = self.reg_dw(F.pad(self.reg_conv(p), (1, 1, 1, 1)))
|
||||
r = _dcr_depth_to_space(r, r=2, c_out=16)
|
||||
B = c.shape[0]
|
||||
cls_out = c.permute(0, 2, 3, 1).reshape(B, _BF_FR_NUM_ANCHORS, 1)
|
||||
reg_out = r.permute(0, 2, 3, 1).reshape(B, _BF_FR_NUM_ANCHORS, 16)
|
||||
return reg_out, cls_out
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _blazeface_full_range_anchors() -> np.ndarray:
|
||||
"""2304 anchors over 48x48; anchor_w=anchor_h=1 (fixed_anchor_size)."""
|
||||
feat = _BF_FR_GRID
|
||||
yy, xx = np.meshgrid(np.arange(feat, dtype=np.float32), np.arange(feat, dtype=np.float32), indexing="ij")
|
||||
cx, cy, ones = (xx + 0.5) / feat, (yy + 0.5) / feat, np.ones_like(xx)
|
||||
return np.stack([cx, cy, ones, ones], axis=-1).reshape(_BF_FR_NUM_ANCHORS, 4)
|
||||
|
||||
|
||||
def _decode_blazeface_full_range(regressors: np.ndarray, classificators: np.ndarray,
|
||||
score_thresh: float = _BF_MIN_SCORE) -> np.ndarray:
|
||||
"""Same decode as short-range with 2304-anchor grid and box_scale=192."""
|
||||
scores = expit(np.clip(classificators[:, 0], -_BF_FR_SCORE_CLIP, _BF_FR_SCORE_CLIP))
|
||||
keep = scores >= score_thresh
|
||||
if not keep.any():
|
||||
return np.empty((0, 17), dtype=np.float32)
|
||||
r = regressors[keep] / _BF_FR_BOX_SCALE
|
||||
a = _blazeface_full_range_anchors()[keep]
|
||||
cxs, cys, aws, ahs = a[:, 0:1], a[:, 1:2], a[:, 2:3], a[:, 3:4]
|
||||
xc, yc = r[:, 0:1] * aws + cxs, r[:, 1:2] * ahs + cys
|
||||
w, h = r[:, 2:3] * aws, r[:, 3:4] * ahs
|
||||
out = np.empty((r.shape[0], 17), dtype=np.float32)
|
||||
out[:, 0:1], out[:, 1:2], out[:, 2:3], out[:, 3:4] = xc - w / 2, yc - h / 2, xc + w / 2, yc + h / 2
|
||||
out[:, 4:16:2] = r[:, _BF_KP_OFFSET::2] * aws + cxs
|
||||
out[:, 5:16:2] = r[:, _BF_KP_OFFSET + 1::2] * ahs + cys
|
||||
out[:, 16] = scores[keep]
|
||||
return out
|
||||
|
||||
|
||||
# FaceMesh (face_landmarks_detector.tflite): PReLU variant of BlazeBlock,
|
||||
# 17 blocks, heads for 478x3 landmarks + presence.
|
||||
_FACEMESH_BLOCKS = [ # (in_ch, out_ch, stride)
|
||||
(16, 16, 1), (16, 16, 1), (16, 32, 2), (32, 32, 1), (32, 32, 1), (32, 64, 2),
|
||||
(64, 64, 1), (64, 64, 1), (64, 128, 2), (128, 128, 1), (128, 128, 1), (128, 128, 2),
|
||||
(128, 128, 1), (128, 128, 1), (128, 128, 2), (128, 128, 1), (128, 128, 1),
|
||||
]
|
||||
|
||||
|
||||
class FaceMeshBlock(nn.Module):
|
||||
"""PReLU BlazeBlock: PReLU between DW and PW, and after the residual add."""
|
||||
|
||||
def __init__(self, in_ch: int, out_ch: int, stride: int, device=None, dtype=None, operations=None):
|
||||
super().__init__()
|
||||
ops = operations if operations is not None else nn
|
||||
kw = dict(device=device, dtype=dtype)
|
||||
self.in_ch, self.out_ch, self.stride = in_ch, out_ch, stride
|
||||
self.depthwise = ops.Conv2d(in_ch, in_ch, 3, stride=stride, padding=0, groups=in_ch, bias=True, **kw)
|
||||
self.prelu_dwise = nn.PReLU(num_parameters=in_ch, **kw)
|
||||
self.pointwise = ops.Conv2d(in_ch, out_ch, 1, padding=0, bias=True, **kw)
|
||||
self.prelu_out = nn.PReLU(num_parameters=out_ch, **kw)
|
||||
|
||||
def forward(self, x: Tensor) -> Tensor:
|
||||
residual = F.max_pool2d(x, 2, 2) if self.stride > 1 else x
|
||||
if self.out_ch > self.in_ch:
|
||||
residual = F.pad(residual, (0, 0, 0, 0, 0, self.out_ch - self.in_ch))
|
||||
x = _tf_same_pad(x, 3, self.stride) if self.stride > 1 else F.pad(x, (1, 1, 1, 1))
|
||||
return self.prelu_out(self.pointwise(self.prelu_dwise(self.depthwise(x))) + residual)
|
||||
|
||||
|
||||
class FaceMesh(nn.Module):
|
||||
NUM_LANDMARKS = 478
|
||||
|
||||
def __init__(self, device=None, dtype=None, operations=None):
|
||||
super().__init__()
|
||||
ops = operations if operations is not None else nn
|
||||
kw = dict(device=device, dtype=dtype)
|
||||
self.stem = ops.Conv2d(3, 16, 3, stride=2, padding=0, bias=True, **kw)
|
||||
self.prelu_stem = nn.PReLU(num_parameters=16, **kw)
|
||||
self.blocks = nn.ModuleList(FaceMeshBlock(i, o, s, device=device, dtype=dtype, operations=operations)
|
||||
for (i, o, s) in _FACEMESH_BLOCKS)
|
||||
self.head_reduce = ops.Conv2d(128, 8, 1, padding=0, bias=True, **kw)
|
||||
self.prelu_head_reduce = nn.PReLU(num_parameters=8, **kw)
|
||||
self.head_block = FaceMeshBlock(8, 8, 1, device=device, dtype=dtype, operations=operations)
|
||||
self.head_presence = ops.Conv2d(8, 1, 3, padding=0, bias=True, **kw)
|
||||
self.head_landmarks = ops.Conv2d(8, self.NUM_LANDMARKS * 3, 3, padding=0, bias=True, **kw)
|
||||
|
||||
def forward(self, face_chw_normalized: Tensor) -> tuple[Tensor, Tensor]:
|
||||
"""(B, 3, 192, 192) in [0, 1] → ((B, 478, 3) landmarks in 192-canonical, (B,) presence)."""
|
||||
x = self.prelu_stem(self.stem(_tf_same_pad(face_chw_normalized, 3, 2)))
|
||||
for blk in self.blocks:
|
||||
x = blk(x)
|
||||
x = self.prelu_head_reduce(self.head_reduce(x))
|
||||
x = self.head_block(x)
|
||||
B = x.shape[0]
|
||||
presence = self.head_presence(x).reshape(B)
|
||||
lmks = self.head_landmarks(x).reshape(B, self.NUM_LANDMARKS, 3)
|
||||
return lmks, presence
|
||||
|
||||
|
||||
# FaceBlendshapes (MLP-Mixer "GhumMarkerPoserMlpMixerGeneral"):
|
||||
# 146x2 → token-reduce 146→96 → embed 2→64 → +cls token → 4x mixer → cls→52.
|
||||
_BS_NUM_INPUT_LANDMARKS = 146
|
||||
_BS_NUM_TOKENS_REDUCED = 96
|
||||
_BS_NUM_TOKENS = 97 # +1 cls
|
||||
_BS_TOKEN_DIM = 64
|
||||
_BS_TOKEN_MIX_HIDDEN = 384
|
||||
_BS_CHANNEL_MIX_HIDDEN = 256
|
||||
_BS_NUM_BLENDSHAPES = 52
|
||||
_BS_LN_EPS = 1e-6
|
||||
|
||||
|
||||
class MlpMixerBlock(nn.Module):
|
||||
"""MLP-Mixer block: token-mixing MLP (over tokens) → channel-mixing MLP (over dim).
|
||||
Both pre-LN, both residual. LN has no beta (bias=False) to match MP."""
|
||||
|
||||
def __init__(self, num_tokens: int, token_dim: int, token_hidden: int, channel_hidden: int,
|
||||
device=None, dtype=None, operations=None):
|
||||
super().__init__()
|
||||
ops = operations if operations is not None else nn
|
||||
kw = dict(device=device, dtype=dtype)
|
||||
# bias=False → no LN beta (matches MP).
|
||||
self.ln1 = ops.LayerNorm(token_dim, eps=_BS_LN_EPS, bias=False, **kw)
|
||||
self.ln2 = ops.LayerNorm(token_dim, eps=_BS_LN_EPS, bias=False, **kw)
|
||||
self.token_mlp1 = ops.Linear(num_tokens, token_hidden, bias=True, **kw)
|
||||
self.token_mlp2 = ops.Linear(token_hidden, num_tokens, bias=True, **kw)
|
||||
self.channel_mlp1 = ops.Linear(token_dim, channel_hidden, bias=True, **kw)
|
||||
self.channel_mlp2 = ops.Linear(channel_hidden, token_dim, bias=True, **kw)
|
||||
|
||||
def forward(self, x: Tensor) -> Tensor:
|
||||
y = self.ln1(x).transpose(1, 2)
|
||||
x = x + self.token_mlp2(F.relu(self.token_mlp1(y))).transpose(1, 2)
|
||||
return x + self.channel_mlp2(F.relu(self.channel_mlp1(self.ln2(x))))
|
||||
|
||||
|
||||
class FaceBlendshapes(nn.Module):
|
||||
def __init__(self, device=None, dtype=None, operations=None):
|
||||
super().__init__()
|
||||
ops = operations if operations is not None else nn
|
||||
kw = dict(device=device, dtype=dtype)
|
||||
self.token_reduce = ops.Linear(_BS_NUM_INPUT_LANDMARKS, _BS_NUM_TOKENS_REDUCED, bias=True, **kw)
|
||||
self.token_embed = ops.Linear(2, _BS_TOKEN_DIM, bias=True, **kw)
|
||||
self.cls_token = nn.Parameter(torch.zeros(1, 1, _BS_TOKEN_DIM, **kw))
|
||||
self.blocks = nn.ModuleList(
|
||||
MlpMixerBlock(_BS_NUM_TOKENS, _BS_TOKEN_DIM, _BS_TOKEN_MIX_HIDDEN, _BS_CHANNEL_MIX_HIDDEN,
|
||||
device=device, dtype=dtype, operations=operations) for _ in range(4)
|
||||
)
|
||||
self.head = ops.Linear(_BS_TOKEN_DIM, _BS_NUM_BLENDSHAPES, bias=True, **kw)
|
||||
|
||||
@staticmethod
|
||||
def _input_normalize(landmarks_2d: Tensor) -> Tensor:
|
||||
# Centroid-subtract → L2 scale → x0.5. The 0.5 is baked into training.
|
||||
centroid = landmarks_2d.mean(dim=1, keepdim=True)
|
||||
x = landmarks_2d - centroid
|
||||
mag = torch.sqrt((x * x).sum(dim=-1, keepdim=True))
|
||||
scale = mag.mean(dim=1, keepdim=True)
|
||||
return (x / scale.clamp(min=1e-12)) * 0.5
|
||||
|
||||
def forward(self, landmarks_2d: Tensor) -> Tensor:
|
||||
"""(B, 146, 2) → (B, 52) in [0, 1]. Input units don't matter (centroid + L2 normalize)."""
|
||||
x = self._input_normalize(landmarks_2d)
|
||||
x = self.token_reduce(x.transpose(1, 2)).transpose(1, 2)
|
||||
x = self.token_embed(x)
|
||||
cls = self.cls_token.expand(x.shape[0], -1, -1)
|
||||
x = torch.cat([cls, x], dim=1)
|
||||
for blk in self.blocks:
|
||||
x = blk(x)
|
||||
return torch.sigmoid(self.head(x[:, 0]))
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _blazeface_anchors() -> np.ndarray:
|
||||
"""896 anchors per SsdAnchorsCalculator (fixed_anchor_size → anchor_w=anchor_h=1)."""
|
||||
per_ar = len(_BF_ASPECT_RATIOS) + (1 if _BF_INTERP_SCALE_AR > 0 else 0)
|
||||
layer_anchors: List[np.ndarray] = []
|
||||
layer = 0
|
||||
while layer < _BF_NUM_LAYERS:
|
||||
stride = _BF_STRIDES[layer]
|
||||
last = layer
|
||||
while last < _BF_NUM_LAYERS and _BF_STRIDES[last] == stride:
|
||||
last += 1
|
||||
per_cell = per_ar * (last - layer)
|
||||
feat = (_BF_INPUT_SIZE + stride - 1) // stride
|
||||
yy, xx = np.meshgrid(np.arange(feat, dtype=np.float32), np.arange(feat, dtype=np.float32), indexing="ij")
|
||||
cx, cy, ones = (xx + _BF_ANCHOR_OFFSET_X) / feat, (yy + _BF_ANCHOR_OFFSET_Y) / feat, np.ones_like(xx)
|
||||
cell = np.stack([cx, cy, ones, ones], axis=-1).reshape(-1, 4)
|
||||
layer_anchors.append(np.repeat(cell, per_cell, axis=0))
|
||||
layer = last
|
||||
out = np.concatenate(layer_anchors, axis=0)
|
||||
assert out.shape == (896, 4), out.shape
|
||||
return out
|
||||
|
||||
|
||||
def _decode_blazeface(regressors: np.ndarray, classificators: np.ndarray,
|
||||
score_thresh: float = _BF_MIN_SCORE) -> np.ndarray:
|
||||
"""Decode (regs (896,16), cls (896,1)) → (N, 17) = [xyxy, kp0x..kp5y, score] in [0, 1]."""
|
||||
scores = expit(np.clip(classificators[:, 0], -_BF_SCORE_CLIP, _BF_SCORE_CLIP))
|
||||
keep = scores >= score_thresh
|
||||
if not keep.any():
|
||||
return np.empty((0, 17), dtype=np.float32)
|
||||
r = regressors[keep] / _BF_BOX_SCALE
|
||||
a = _blazeface_anchors()[keep] # (N, 4) cx, cy, 1, 1
|
||||
cxs, cys, aws, ahs = a[:, 0:1], a[:, 1:2], a[:, 2:3], a[:, 3:4]
|
||||
xc, yc = r[:, 0:1] * aws + cxs, r[:, 1:2] * ahs + cys
|
||||
w, h = r[:, 2:3] * aws, r[:, 3:4] * ahs
|
||||
out = np.empty((r.shape[0], 17), dtype=np.float32)
|
||||
out[:, 0:1], out[:, 1:2], out[:, 2:3], out[:, 3:4] = xc - w / 2, yc - h / 2, xc + w / 2, yc + h / 2
|
||||
out[:, 4:16:2] = r[:, _BF_KP_OFFSET::2] * aws + cxs
|
||||
out[:, 5:16:2] = r[:, _BF_KP_OFFSET + 1::2] * ahs + cys
|
||||
out[:, 16] = scores[keep]
|
||||
return out
|
||||
|
||||
|
||||
def _weighted_nms(detections: np.ndarray, iou_thresh: float = 0.5) -> np.ndarray:
|
||||
"""MP weighted NMS — kept boxes are score-weighted averages of overlapping detections."""
|
||||
if detections.shape[0] == 0:
|
||||
return detections
|
||||
dets = detections[np.argsort(-detections[:, 16])]
|
||||
N = dets.shape[0]
|
||||
areas = np.clip(dets[:, 2] - dets[:, 0], 0, None) * np.clip(dets[:, 3] - dets[:, 1], 0, None)
|
||||
kept: List[np.ndarray] = []
|
||||
used = np.zeros(N, dtype=bool)
|
||||
for i in range(N):
|
||||
if used[i]:
|
||||
continue
|
||||
ax1, ay1, ax2, ay2 = dets[i, 0:4]
|
||||
merge_idx = [i]
|
||||
for j in range(i + 1, N):
|
||||
if used[j]:
|
||||
continue
|
||||
bx1, by1, bx2, by2 = dets[j, 0:4]
|
||||
iw = max(0.0, min(ax2, bx2) - max(ax1, bx1))
|
||||
ih = max(0.0, min(ay2, by2) - max(ay1, by1))
|
||||
inter = iw * ih
|
||||
union = areas[i] + areas[j] - inter
|
||||
if union > 0 and inter / union > iou_thresh: # strict > matches MP
|
||||
merge_idx.append(j)
|
||||
used[j] = True
|
||||
used[i] = True
|
||||
cluster = dets[merge_idx]
|
||||
ws = cluster[:, 16:17]
|
||||
ws_sum = ws.sum()
|
||||
merged = np.copy(cluster[0])
|
||||
if ws_sum > 0:
|
||||
merged[:16] = (cluster[:, :16] * ws).sum(axis=0) / ws_sum
|
||||
kept.append(merged)
|
||||
return np.stack(kept, axis=0) if kept else np.empty((0, 17), dtype=np.float32)
|
||||
|
||||
|
||||
def _detection_to_face_rect(detection: np.ndarray, image_w: int, image_h: int) -> Tuple[float, float, float, float, float]:
|
||||
"""Detection (normalized) → rotated 1.5xbbox ROI in image pixels (anisotropic)."""
|
||||
xmin, ymin, xmax, ymax = detection[0:4]
|
||||
lx = detection[4 + _FACE_LEFT_EYE_KP * 2 + 0] * image_w
|
||||
ly = detection[4 + _FACE_LEFT_EYE_KP * 2 + 1] * image_h
|
||||
rx = detection[4 + _FACE_RIGHT_EYE_KP * 2 + 0] * image_w
|
||||
ry = detection[4 + _FACE_RIGHT_EYE_KP * 2 + 1] * image_h
|
||||
# Image-y-down convention: angle = target - atan2(-dy, dx).
|
||||
angle = _FACE_ROI_TARGET_ANGLE - math.atan2(ly - ry, rx - lx)
|
||||
return (float((xmin + xmax) * 0.5 * image_w),
|
||||
float((ymin + ymax) * 0.5 * image_h),
|
||||
float((xmax - xmin) * image_w * _FACE_ROI_SCALE_X),
|
||||
float((ymax - ymin) * image_h * _FACE_ROI_SCALE_Y),
|
||||
float(angle))
|
||||
|
||||
|
||||
def _sample_warp(image_chw: Tensor, src_x: Tensor, src_y: Tensor, padding_mode: str) -> Tensor:
|
||||
"""Bilinear-sample image_chw at corner-aligned (src_x, src_y)."""
|
||||
H, W = int(image_chw.shape[-2]), int(image_chw.shape[-1])
|
||||
grid = torch.stack([(2.0 * src_x + 1.0) / W - 1.0,
|
||||
(2.0 * src_y + 1.0) / H - 1.0], dim=-1).unsqueeze(0)
|
||||
return F.grid_sample(image_chw.unsqueeze(0), grid, mode="bilinear",
|
||||
align_corners=False, padding_mode=padding_mode).squeeze(0)
|
||||
|
||||
|
||||
def _warp_face_crop(image_chw: Tensor, cx: float, cy: float, width: float, height: float,
|
||||
angle: float, output_size: int = _FM_INPUT_SIZE) -> Tensor:
|
||||
"""Rotated rect → output_size² with BORDER_REPLICATE. image_chw must be in [0, 1]."""
|
||||
s_x, s_y = width / output_size, height / output_size
|
||||
cos_a, sin_a = math.cos(angle), math.sin(angle)
|
||||
arange = torch.arange(output_size, dtype=image_chw.dtype, device=image_chw.device) - output_size * 0.5
|
||||
v_grid, u_grid = torch.meshgrid(arange, arange, indexing="ij")
|
||||
src_x = cx + u_grid * s_x * cos_a - v_grid * s_y * sin_a
|
||||
src_y = cy + u_grid * s_x * sin_a + v_grid * s_y * cos_a
|
||||
return _sample_warp(image_chw, src_x, src_y, "border")
|
||||
|
||||
|
||||
def _blazeface_input_warp(image_chw_raw: Tensor, target: int = _BF_INPUT_SIZE) -> Tuple[Tensor, float, float, float]:
|
||||
"""Centered max(W,H) square → target² with BORDER_ZERO + [-1, 1] norm.
|
||||
|
||||
Sub-pixel grid_sample matters; integer-pad-then-resize drifts the bbox ~5%.
|
||||
Returns (warped, sub_rect_cx, sub_rect_cy, sub_rect_size) — the triplet maps
|
||||
tensor-normalized [0,1] detections back to image pixels.
|
||||
"""
|
||||
H, W = int(image_chw_raw.shape[1]), int(image_chw_raw.shape[2])
|
||||
sub_rect_size = float(max(W, H))
|
||||
sub_rect_cx, sub_rect_cy = W * 0.5, H * 0.5
|
||||
s = sub_rect_size / target
|
||||
arange = torch.arange(target, dtype=image_chw_raw.dtype, device=image_chw_raw.device) - target * 0.5
|
||||
v_grid, u_grid = torch.meshgrid(arange, arange, indexing="ij")
|
||||
out = _sample_warp(image_chw_raw, sub_rect_cx + u_grid * s, sub_rect_cy + v_grid * s, "zeros")
|
||||
return (out / 127.5) - 1.0, sub_rect_cx, sub_rect_cy, sub_rect_size
|
||||
|
||||
|
||||
class FaceLandmarker(nn.Module):
|
||||
"""BlazeFace → FaceMesh v2 → blendshapes. `detector_variant` selects 'short'
|
||||
(128², ≤2m) or 'full' (192² FPN, ≤5m). State dict uses inner-module prefixes
|
||||
`detector.*` / `mesh.*` / `blendshapes.*`; the outer FaceLandmarkerModel
|
||||
wrapper rewrites `detector_{variant}.*` keys to `detector.*` before loading.
|
||||
"""
|
||||
|
||||
def __init__(self, device=None, dtype=None, operations=None, detector_variant: str = "short"):
|
||||
super().__init__()
|
||||
det_cls = {"short": BlazeFace, "full": BlazeFaceFullRange}.get(detector_variant)
|
||||
|
||||
self.detector_variant = detector_variant
|
||||
self.detector = det_cls(device=device, dtype=dtype, operations=operations)
|
||||
self.mesh = FaceMesh(device=device, dtype=dtype, operations=operations)
|
||||
self.blendshapes = FaceBlendshapes(device=device, dtype=dtype, operations=operations)
|
||||
self.register_buffer("_bs_idx", torch.tensor(_BS_INPUT_INDICES, dtype=torch.long), persistent=False)
|
||||
|
||||
def run_detector_batch(self, images_rgb_uint8: List[np.ndarray],
|
||||
score_thresh: float = _BF_MIN_SCORE,
|
||||
iou_thresh: float = 0.5):
|
||||
"""Batched detector pass. Returns (img_raws, sub_rects, sizes, per_frame_decoded)
|
||||
where per_frame_decoded[b] is (N, 17) in tensor-normalized [0,1] coords."""
|
||||
if not images_rgb_uint8:
|
||||
return [], [], [], []
|
||||
device, dtype = self.detector.stem.weight.device, self.detector.stem.weight.dtype
|
||||
det_input_size, decode_fn = ((_BF_FR_INPUT_SIZE, _decode_blazeface_full_range)
|
||||
if self.detector_variant == "full"
|
||||
else (_BF_INPUT_SIZE, _decode_blazeface))
|
||||
|
||||
# Same-size frames: stack once and transfer once. Variable size falls back
|
||||
# to per-image (only triggers for SAM3DBody's head crops).
|
||||
sizes = [tuple(img.shape[:2]) for img in images_rgb_uint8]
|
||||
if len(set(sizes)) == 1:
|
||||
batch_chw = torch.from_numpy(np.stack(images_rgb_uint8, axis=0)).to(device, dtype).movedim(-1, -3).contiguous()
|
||||
img_raws = [batch_chw[bi] for bi in range(batch_chw.shape[0])]
|
||||
else:
|
||||
img_raws = [torch.from_numpy(img).to(device, dtype).movedim(-1, -3).contiguous() for img in images_rgb_uint8]
|
||||
|
||||
warps = [_blazeface_input_warp(img_raw, det_input_size) for img_raw in img_raws]
|
||||
det_crops = [w[0] for w in warps]
|
||||
sub_rects = [(w[1], w[2], w[3]) for w in warps]
|
||||
|
||||
regs_b, cls_b = self.detector(torch.stack(det_crops, dim=0))
|
||||
regs_np, cls_np = regs_b.float().cpu().numpy(), cls_b.float().cpu().numpy()
|
||||
per_frame = []
|
||||
for b in range(len(images_rgb_uint8)):
|
||||
decoded = decode_fn(regs_np[b], cls_np[b], score_thresh=score_thresh)
|
||||
per_frame.append(_weighted_nms(decoded, iou_thresh=iou_thresh) if decoded.shape[0] > 0 else decoded)
|
||||
return img_raws, sub_rects, sizes, per_frame
|
||||
|
||||
def detect_batch(self, images_rgb_uint8: List[np.ndarray], num_faces: int = 1,
|
||||
score_thresh: float = _BF_MIN_SCORE) -> List[List[dict]]:
|
||||
"""Full pipeline batched across `images_rgb_uint8`. Returns one face-dict
|
||||
list per image (empty if nothing detected). Face dict:
|
||||
bbox_xyxy (4,) image pixels, blendshapes {52} ∈ [0,1],
|
||||
landmarks_xy (478, 2) image pixels, landmarks_3d (478, 3) in
|
||||
192-canonical (pre-transformation) units, presence float (raw logit).
|
||||
"""
|
||||
img_raws, sub_rects, sizes, per_frame_dets = self.run_detector_batch(
|
||||
images_rgb_uint8, score_thresh=score_thresh,
|
||||
)
|
||||
# tensor-normalized → image-normalized [0,1] for _detection_to_face_rect.
|
||||
for b, decoded in enumerate(per_frame_dets):
|
||||
if decoded.shape[0] == 0:
|
||||
continue
|
||||
cx, cy, size = sub_rects[b]
|
||||
H, W = sizes[b]
|
||||
sx0, sy0 = cx - size * 0.5, cy - size * 0.5
|
||||
decoded[:, 0:16:2] = (sx0 + size * decoded[:, 0:16:2]) / W
|
||||
decoded[:, 1:16:2] = (sy0 + size * decoded[:, 1:16:2]) / H
|
||||
if num_faces > 0:
|
||||
per_frame_dets[b] = decoded[: int(num_faces)]
|
||||
|
||||
# Collect every detected face across all frames into one mesh input.
|
||||
face_params: List[Tuple[int, float, float, float, float, float, float]] = []
|
||||
mesh_crops: List[Tensor] = []
|
||||
for b, dets in enumerate(per_frame_dets):
|
||||
if dets.shape[0] == 0:
|
||||
continue
|
||||
H, W = sizes[b]
|
||||
img_for_mesh = img_raws[b] / 255.0
|
||||
for det in dets:
|
||||
cx, cy, w, h, angle = _detection_to_face_rect(det, W, H)
|
||||
mesh_crops.append(_warp_face_crop(img_for_mesh, cx, cy, w, h, angle, _FM_INPUT_SIZE))
|
||||
face_params.append((b, float(det[16]), cx, cy, w, h, angle))
|
||||
|
||||
results: List[List[dict]] = [[] for _ in range(len(images_rgb_uint8))]
|
||||
if not mesh_crops:
|
||||
return results
|
||||
|
||||
lmks_canon_b, presence_b = self.mesh(torch.stack(mesh_crops, dim=0))
|
||||
bs_out_b = self.blendshapes(lmks_canon_b[:, self._bs_idx, :2])
|
||||
|
||||
# Batched canonical→image affine
|
||||
params_t = torch.tensor(
|
||||
[(cx, cy, w, h, math.cos(a), math.sin(a)) for (_b, _s, cx, cy, w, h, a) in face_params],
|
||||
device=lmks_canon_b.device, dtype=lmks_canon_b.dtype,
|
||||
)
|
||||
cxs, cys, ws, hs, cos_a, sin_a = params_t.unbind(dim=1)
|
||||
inv = 1.0 / _FM_INPUT_SIZE
|
||||
u = lmks_canon_b[..., 0] - _FM_INPUT_SIZE * 0.5
|
||||
v = lmks_canon_b[..., 1] - _FM_INPUT_SIZE * 0.5
|
||||
lmks_xy_t = torch.stack([
|
||||
cxs[:, None] + u * (ws * inv * cos_a)[:, None] - v * (hs * inv * sin_a)[:, None],
|
||||
cys[:, None] + u * (ws * inv * sin_a)[:, None] + v * (hs * inv * cos_a)[:, None],
|
||||
], dim=-1)
|
||||
|
||||
lmks_xy_np = lmks_xy_t.float().cpu().numpy()
|
||||
lmks_canon_np = lmks_canon_b.float().cpu().numpy()
|
||||
presence_np = presence_b.float().cpu().numpy()
|
||||
bs_np = bs_out_b.float().cpu().numpy()
|
||||
|
||||
for i, (b, score, *_) in enumerate(face_params):
|
||||
lmks_xy = lmks_xy_np[i]
|
||||
mn, mx = lmks_xy.min(0), lmks_xy.max(0)
|
||||
results[b].append({
|
||||
"bbox_xyxy": np.array([mn[0], mn[1], mx[0], mx[1]], dtype=np.float32),
|
||||
"blendshapes": dict(zip(BLENDSHAPE_NAMES, bs_np[i].tolist())),
|
||||
"landmarks_xy": lmks_xy,
|
||||
"landmarks_3d": lmks_canon_np[i],
|
||||
"presence": float(presence_np[i]),
|
||||
"score": score,
|
||||
})
|
||||
return results
|
||||
502
comfy_extras/nodes_mediapipe.py
Normal file
502
comfy_extras/nodes_mediapipe.py
Normal file
@ -0,0 +1,502 @@
|
||||
"""ComfyUI nodes for the pure-PyTorch MediaPipe Face Landmarker port.
|
||||
|
||||
Custom IO types:
|
||||
FACE_LANDMARKER — FaceLandmarkerModel wrapper (ModelPatcher inside)
|
||||
FACE_LANDMARKS — {"frames": List[List[face_dict]], "image_size": (H, W),
|
||||
"connection_sets": dict[str, frozenset[(int, int)]]}
|
||||
face_dict: bbox_xyxy, blendshapes, landmarks_xy,
|
||||
landmarks_3d, presence, score, transformation_matrix
|
||||
|
||||
MediaPipeFaceLandmarker also emits the core BOUNDING_BOX type — pair with DrawBBoxes.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image, ImageColor, ImageDraw
|
||||
from tqdm.auto import tqdm
|
||||
from typing_extensions import override
|
||||
|
||||
import comfy.model_management
|
||||
import comfy.model_patcher
|
||||
import comfy.utils
|
||||
import folder_paths
|
||||
from comfy_api.latest import ComfyExtension, io
|
||||
|
||||
from comfy_extras.mediapipe.face_landmarker import FaceLandmarker
|
||||
from comfy_extras.mediapipe.face_geometry import transformation_matrix_from_detection
|
||||
|
||||
|
||||
FaceLandmarkerType = io.Custom("FACE_LANDMARKER")
|
||||
FaceLandmarksType = io.Custom("FACE_LANDMARKS")
|
||||
|
||||
_CANONICAL_KEYS = ("canonical_vertices", "procrustes_indices", "procrustes_weights")
|
||||
_CONTOUR_PARTS = ("face_oval", "left_eye", "right_eye", "left_eyebrow", "right_eyebrow", "lips")
|
||||
|
||||
|
||||
class FaceLandmarkerModel:
|
||||
"""Loaded FaceLandmarker variants + ModelPatcher per variant.
|
||||
|
||||
Safetensors layout: `detector_short.*` / `detector_full.*` plus shared
|
||||
`mesh.*`, `blendshapes.*`, `canonical_*`, and `topology.*`.
|
||||
PReLU forces plain-nn / fp32 (manual_cast strands buffers across devices).
|
||||
"""
|
||||
|
||||
def __init__(self, state_dict: dict):
|
||||
self.load_device = comfy.model_management.text_encoder_device()
|
||||
offload_device = comfy.model_management.text_encoder_offload_device()
|
||||
self.dtype = torch.float32
|
||||
|
||||
# FACEMESH_* connection sets, embedded as int32 (N, 2) under topology.*.
|
||||
base: dict[str, frozenset] = {}
|
||||
for k in [k for k in state_dict if k.startswith("topology.")]:
|
||||
base[k[len("topology."):]] = frozenset(map(tuple, state_dict.pop(k).tolist()))
|
||||
base["contours"] = frozenset().union(*(base[p] for p in _CONTOUR_PARTS))
|
||||
base["all"] = base["contours"] | base["irises"] | base["nose"]
|
||||
|
||||
self.connection_sets: dict[str, frozenset] = base
|
||||
self.canonical_data: dict[str, np.ndarray] = {k: state_dict.pop(k).numpy() for k in _CANONICAL_KEYS}
|
||||
|
||||
shared = {k: v for k, v in state_dict.items() if k.startswith(("mesh.", "blendshapes."))}
|
||||
|
||||
self.models: dict[str, FaceLandmarker] = {}
|
||||
self.patchers: dict[str, comfy.model_patcher.ModelPatcher] = {}
|
||||
for variant in ("short", "full"):
|
||||
prefix = f"detector_{variant}."
|
||||
sub = dict(shared)
|
||||
sub.update({f"detector.{k[len(prefix):]}": v for k, v in state_dict.items() if k.startswith(prefix)})
|
||||
fl = FaceLandmarker(device=offload_device, dtype=self.dtype, operations=None, detector_variant=variant).eval()
|
||||
fl.load_state_dict(sub, strict=False)
|
||||
|
||||
self.models[variant] = fl
|
||||
self.patchers[variant] = comfy.model_patcher.CoreModelPatcher(
|
||||
fl, load_device=self.load_device, offload_device=offload_device,
|
||||
size=comfy.model_management.module_size(fl),
|
||||
)
|
||||
|
||||
def detect_batch(self, images, num_faces: int, score_thresh: float, variant: str):
|
||||
comfy.model_management.load_model_gpu(self.patchers[variant])
|
||||
return self.models[variant].detect_batch(images, num_faces=num_faces, score_thresh=score_thresh)
|
||||
|
||||
|
||||
def _image_to_uint8(image: torch.Tensor) -> np.ndarray:
|
||||
return image[..., :3].mul(255.0).add_(0.5).clamp_(0, 255).to(torch.uint8).cpu().numpy()
|
||||
|
||||
|
||||
def _parse_color(color: str) -> tuple[int, int, int]:
|
||||
try:
|
||||
return ImageColor.getrgb(color)[:3]
|
||||
except ValueError:
|
||||
return (0, 255, 0)
|
||||
|
||||
|
||||
def _copy_face(face: dict) -> dict:
|
||||
"""Shallow copy of a face_dict with array-fields cloned so callers can mutate."""
|
||||
return {
|
||||
"bbox_xyxy": face["bbox_xyxy"].copy(),
|
||||
"blendshapes": dict(face["blendshapes"]),
|
||||
"landmarks_xy": face["landmarks_xy"].copy(),
|
||||
"landmarks_3d": face["landmarks_3d"].copy(),
|
||||
"presence": face["presence"],
|
||||
"score": face["score"],
|
||||
}
|
||||
|
||||
|
||||
def _lerp_face(a: dict, b: dict, t: float) -> dict:
|
||||
return {
|
||||
"bbox_xyxy": (1 - t) * a["bbox_xyxy"] + t * b["bbox_xyxy"],
|
||||
"blendshapes": {k: (1 - t) * a["blendshapes"][k] + t * b["blendshapes"][k] for k in a["blendshapes"]},
|
||||
"landmarks_xy": (1 - t) * a["landmarks_xy"] + t * b["landmarks_xy"],
|
||||
"landmarks_3d": (1 - t) * a["landmarks_3d"] + t * b["landmarks_3d"],
|
||||
"presence": (1 - t) * a["presence"] + t * b["presence"],
|
||||
"score": (1 - t) * a["score"] + t * b["score"],
|
||||
}
|
||||
|
||||
|
||||
def _match_faces(a: list[dict], b: list[dict]) -> list[tuple[int, int]]:
|
||||
"""Greedy nearest-neighbour pairing of faces between two frames by bbox
|
||||
centre distance. Unmatched (when counts differ) are dropped."""
|
||||
if not a or not b:
|
||||
return []
|
||||
centers_a = np.array([(0.5 * (f["bbox_xyxy"][0] + f["bbox_xyxy"][2]),
|
||||
0.5 * (f["bbox_xyxy"][1] + f["bbox_xyxy"][3])) for f in a])
|
||||
centers_b = np.array([(0.5 * (f["bbox_xyxy"][0] + f["bbox_xyxy"][2]),
|
||||
0.5 * (f["bbox_xyxy"][1] + f["bbox_xyxy"][3])) for f in b])
|
||||
dists = np.linalg.norm(centers_a[:, None] - centers_b[None], axis=-1)
|
||||
pairs: list[tuple[int, int]] = []
|
||||
used_a: set[int] = set()
|
||||
used_b: set[int] = set()
|
||||
candidates = sorted((dists[ia, ib], ia, ib) for ia in range(len(a)) for ib in range(len(b)))
|
||||
for _, ia, ib in candidates:
|
||||
if ia in used_a or ib in used_b:
|
||||
continue
|
||||
pairs.append((ia, ib))
|
||||
used_a.add(ia)
|
||||
used_b.add(ib)
|
||||
return pairs
|
||||
|
||||
|
||||
def _fill_missing_frames(frames: list[list[dict]], mode: str) -> None:
|
||||
"""In-place fill empty frame slots from neighbouring detections. Multi-face
|
||||
aware: pairs faces across bracketing frames by greedy bbox-centre NN.
|
||||
When counts differ, unmatched faces are dropped from the synthesised frame."""
|
||||
if mode == "empty":
|
||||
return
|
||||
valid = [i for i, fr in enumerate(frames) if fr]
|
||||
if not valid:
|
||||
return # nothing to fill from
|
||||
if mode == "previous":
|
||||
last: list[dict] = []
|
||||
for i, fr in enumerate(frames):
|
||||
if fr:
|
||||
last = fr
|
||||
elif last:
|
||||
frames[i] = [_copy_face(f) for f in last]
|
||||
return
|
||||
# interpolate: lerp between bracketing valid frames; clamp at ends.
|
||||
for i in range(len(frames)):
|
||||
if frames[i]:
|
||||
continue
|
||||
prev_i = max((v for v in valid if v < i), default=None)
|
||||
next_i = min((v for v in valid if v > i), default=None)
|
||||
if prev_i is None:
|
||||
frames[i] = [_copy_face(f) for f in frames[next_i]]
|
||||
elif next_i is None:
|
||||
frames[i] = [_copy_face(f) for f in frames[prev_i]]
|
||||
else:
|
||||
t = (i - prev_i) / (next_i - prev_i)
|
||||
pairs = _match_faces(frames[prev_i], frames[next_i])
|
||||
frames[i] = [_lerp_face(frames[prev_i][a], frames[next_i][b], t) for a, b in pairs]
|
||||
|
||||
|
||||
def _ordered_rings(edges: frozenset[tuple[int, int]]) -> list[list[int]]:
|
||||
"""Walk an unordered edge set into one or more closed-loop vertex rings
|
||||
(handles multi-loop sets like FACEMESH_LIPS: outer + inner)."""
|
||||
adj: dict[int, set[int]] = {}
|
||||
for a, b in edges:
|
||||
adj.setdefault(a, set()).add(b)
|
||||
adj.setdefault(b, set()).add(a)
|
||||
visited: set[int] = set()
|
||||
rings: list[list[int]] = []
|
||||
for start in adj:
|
||||
if start in visited:
|
||||
continue
|
||||
ring = [start]
|
||||
visited.add(start)
|
||||
prev, cur = -1, start
|
||||
while True:
|
||||
nxt = next((v for v in adj[cur] if v != prev), None)
|
||||
if nxt is None or nxt == start:
|
||||
break
|
||||
ring.append(nxt)
|
||||
visited.add(nxt)
|
||||
prev, cur = cur, nxt
|
||||
rings.append(ring)
|
||||
return rings
|
||||
|
||||
|
||||
class LoadMediaPipeFaceLandmarker(io.ComfyNode):
|
||||
"""Load MediaPipe Face Landmarker v2 weights. Contains both detector variants
|
||||
(short / full), shared mesh, blendshapes, and canonical geometry."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return io.Schema(
|
||||
node_id="LoadMediaPipeFaceLandmarker",
|
||||
display_name="Load MediaPipe Face Landmarker",
|
||||
category="loaders",
|
||||
inputs=[
|
||||
io.Combo.Input("model_name", options=folder_paths.get_filename_list("mediapipe"),
|
||||
tooltip="Face Landmarker safetensors from models/mediapipe/."),
|
||||
],
|
||||
outputs=[FaceLandmarkerType.Output()],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, model_name) -> io.NodeOutput:
|
||||
sd = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("mediapipe", model_name), safe_load=True)
|
||||
wrapper = FaceLandmarkerModel(sd)
|
||||
return io.NodeOutput(wrapper)
|
||||
|
||||
|
||||
# Per-frame fallback modes for detection failures in a batch.
|
||||
_FALLBACK_MODES = ("empty", "previous", "interpolate")
|
||||
|
||||
|
||||
class MediaPipeFaceLandmarker(io.ComfyNode):
|
||||
"""BlazeFace → FaceMesh v2 → ARKit-52 blendshapes, batched across the
|
||||
input. Also emits a BOUNDING_BOX list (landmark-extent bbox per face) —
|
||||
pair with DrawBBoxes for detector-only viz or MediaPipeFaceMeshVisualize
|
||||
for the mesh overlay."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return io.Schema(
|
||||
node_id="MediaPipeFaceLandmarker",
|
||||
display_name="MediaPipe Face Landmarker",
|
||||
category="image/detection",
|
||||
inputs=[
|
||||
FaceLandmarkerType.Input("face_landmarker"),
|
||||
io.Image.Input("image"),
|
||||
io.Combo.Input("detector_variant", options=["short", "full", "both"], default="short",
|
||||
tooltip="Face detector range. 'short' is tuned for close-up faces "
|
||||
"(within ~2 m of the camera); 'full' covers farther / smaller "
|
||||
"faces (up to ~5 m) but is slower. 'both' runs both detectors and "
|
||||
"keeps whichever found more faces per frame (~2× detection cost)."),
|
||||
io.Int.Input("num_faces", default=1, min=0, max=16, step=1,
|
||||
tooltip="Maximum faces to return per frame. 0 = no cap (return all detected)."),
|
||||
io.Float.Input("min_confidence", default=0.5, min=0.0, max=1.0, step=0.01, advanced=True,
|
||||
tooltip="BlazeFace score threshold. Lower to catch small/occluded faces."),
|
||||
io.Combo.Input("missing_frame_fallback", options=list(_FALLBACK_MODES), default="empty", advanced=True,
|
||||
tooltip="Per-frame behaviour when detection fails in a batch. "
|
||||
"'empty' leaves the frame faceless. 'previous' copies the most recent successful "
|
||||
"detection. 'interpolate' lerps landmarks/bbox/blendshapes between bracketing "
|
||||
"successful frames. Multi-face: pairs faces across frames by greedy bbox-centre NN."),
|
||||
],
|
||||
outputs=[
|
||||
FaceLandmarksType.Output(display_name="face_landmarks"),
|
||||
io.BoundingBox.Output("bboxes"),
|
||||
],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, face_landmarker, image, detector_variant, num_faces, min_confidence,
|
||||
missing_frame_fallback) -> io.NodeOutput:
|
||||
canonical = face_landmarker.canonical_data
|
||||
img_np = _image_to_uint8(image)
|
||||
B, H, W = img_np.shape[:3]
|
||||
chunk = 16
|
||||
is_both = detector_variant == "both"
|
||||
total_work = 2 * B if is_both else B
|
||||
pbar = comfy.utils.ProgressBar(total_work)
|
||||
|
||||
def _run(variant: str) -> list[list[dict]]:
|
||||
res: list[list[dict]] = []
|
||||
with tqdm(total=B, desc=f"MediaPipe Face Landmarker ({variant})") as tq:
|
||||
for i in range(0, B, chunk):
|
||||
end = min(i + chunk, B)
|
||||
res.extend(face_landmarker.detect_batch(
|
||||
[img_np[bi] for bi in range(i, end)],
|
||||
num_faces=int(num_faces),
|
||||
score_thresh=float(min_confidence),
|
||||
variant=variant,
|
||||
))
|
||||
pbar.update_absolute(min(pbar.current + (end - i), total_work))
|
||||
tq.update(end - i)
|
||||
return res
|
||||
|
||||
if is_both:
|
||||
short_res = _run("short")
|
||||
full_res = _run("full")
|
||||
# Per-frame keep whichever found more faces (tie → short).
|
||||
frames: list[list[dict]] = [
|
||||
short_res[bi] if len(short_res[bi]) >= len(full_res[bi]) else full_res[bi]
|
||||
for bi in range(B)
|
||||
]
|
||||
else:
|
||||
frames = _run(detector_variant)
|
||||
_fill_missing_frames(frames, missing_frame_fallback)
|
||||
bboxes = []
|
||||
for per_frame in frames:
|
||||
per_bb = []
|
||||
for f in per_frame:
|
||||
f["transformation_matrix"] = transformation_matrix_from_detection(f, W, H, canonical)
|
||||
x1, y1, x2, y2 = (float(v) for v in f["bbox_xyxy"])
|
||||
per_bb.append({"x": x1, "y": y1, "width": x2 - x1, "height": y2 - y1, "label": "face", "score": float(f["score"])})
|
||||
bboxes.append(per_bb)
|
||||
return io.NodeOutput({"frames": frames, "image_size": (H, W),
|
||||
"connection_sets": face_landmarker.connection_sets}, bboxes)
|
||||
|
||||
|
||||
# Topology keys unioned by the 'all' connections preset (contour parts + irises + nose).
|
||||
_ALL_CONNECTION_PARTS: tuple[str, ...] = (*_CONTOUR_PARTS, "irises", "nose")
|
||||
_CUSTOM_FEATURES: tuple[tuple[str, bool], ...] = (
|
||||
("face_oval", True),
|
||||
("lips", True),
|
||||
("left_eye", True),
|
||||
("right_eye", True),
|
||||
("left_eyebrow", True),
|
||||
("right_eyebrow", True),
|
||||
("irises", True),
|
||||
("nose", True),
|
||||
("tesselation", False),
|
||||
)
|
||||
|
||||
|
||||
class MediaPipeFaceMeshVisualize(io.ComfyNode):
|
||||
"""Draw a FACEMESH_* subset over an image. Topology travels with the
|
||||
FACE_LANDMARKS payload (set at detection time)."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return io.Schema(
|
||||
node_id="MediaPipeFaceMeshVisualize",
|
||||
display_name="MediaPipe Face Mesh Visualize",
|
||||
category="image/detection",
|
||||
inputs=[
|
||||
FaceLandmarksType.Input("face_landmarks"),
|
||||
io.Image.Input("image", optional=True, tooltip="If not connected, a black canvas will be used."),
|
||||
io.DynamicCombo.Input(
|
||||
"connections",
|
||||
tooltip="'all' = oval+eyes+brows+lips+irises+nose. 'fill' = solid face_oval polygon (silhouette mask). 'custom' = toggle each feature individually (including 'tesselation', the full 2547-edge wireframe).",
|
||||
options=[
|
||||
io.DynamicCombo.Option("all", []),
|
||||
io.DynamicCombo.Option("fill", []),
|
||||
io.DynamicCombo.Option("custom", [
|
||||
io.Boolean.Input(feat, default=default,
|
||||
tooltip=f"Draw the '{feat}' connection set.")
|
||||
for feat, default in _CUSTOM_FEATURES
|
||||
]),
|
||||
],
|
||||
),
|
||||
io.Color.Input("color", default="#00ff00"),
|
||||
io.Int.Input("thickness", default=1, min=0, max=8, step=1,
|
||||
tooltip="Edge line thickness in pixels. 0 disables edge drawing."),
|
||||
io.Int.Input("point_size", default=2, min=0, max=16, step=1,
|
||||
tooltip="Landmark dot radius in pixels. 0 disables point drawing."),
|
||||
],
|
||||
outputs=[io.Image.Output()],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, face_landmarks, connections, color, thickness, point_size, image=None) -> io.NodeOutput:
|
||||
sets = face_landmarks["connection_sets"]
|
||||
sel = connections["connections"]
|
||||
fill_rings: list[list[int]] | None = None
|
||||
if sel == "fill":
|
||||
fill_rings = _ordered_rings(sets["face_oval"])
|
||||
edges = frozenset()
|
||||
elif sel == "custom":
|
||||
parts = [feat for feat, _ in _CUSTOM_FEATURES if connections.get(feat, False)]
|
||||
edges = frozenset().union(*(sets[p] for p in parts))
|
||||
else: # "all"
|
||||
edges = frozenset().union(*(sets[p] for p in _ALL_CONNECTION_PARTS))
|
||||
rgb, thick, psize = _parse_color(color), int(thickness), int(point_size)
|
||||
frames = face_landmarks["frames"]
|
||||
if image is None:
|
||||
H, W = face_landmarks["image_size"]
|
||||
img_np = np.zeros((len(frames), H, W, 3), dtype=np.uint8)
|
||||
else:
|
||||
img_np = _image_to_uint8(image)
|
||||
B = img_np.shape[0]
|
||||
n_frames = len(frames)
|
||||
pbar = comfy.utils.ProgressBar(B)
|
||||
out = np.empty_like(img_np)
|
||||
for bi in range(B):
|
||||
faces = frames[bi] if bi < n_frames else []
|
||||
out[bi] = _draw_mesh(img_np[bi], faces, edges, rgb, thick, psize, fill_rings)
|
||||
pbar.update_absolute(bi + 1)
|
||||
return io.NodeOutput(torch.from_numpy(out).to(
|
||||
device=comfy.model_management.intermediate_device(),
|
||||
dtype=comfy.model_management.intermediate_dtype(),
|
||||
).div_(255.0))
|
||||
|
||||
|
||||
def _draw_mesh(image_rgb: np.ndarray, faces: list, edges,
|
||||
rgb: tuple[int, int, int], thickness: int,
|
||||
point_size: int, fill_rings: list[list[int]] | None = None) -> np.ndarray:
|
||||
draw_edges = thickness > 0 and edges
|
||||
if not faces or (fill_rings is None and not draw_edges and point_size <= 0):
|
||||
return image_rgb.copy()
|
||||
pil = Image.fromarray(image_rgb)
|
||||
draw = ImageDraw.Draw(pil)
|
||||
r = point_size * 0.5
|
||||
if fill_rings is not None:
|
||||
for f in faces:
|
||||
lmks = f["landmarks_xy"]
|
||||
for ring in fill_rings:
|
||||
draw.polygon([(float(lmks[i, 0]), float(lmks[i, 1])) for i in ring], fill=rgb)
|
||||
return np.asarray(pil)
|
||||
for f in faces:
|
||||
lmks = f["landmarks_xy"]
|
||||
n = lmks.shape[0]
|
||||
if draw_edges:
|
||||
for a, b in edges:
|
||||
if a < n and b < n:
|
||||
draw.line([(float(lmks[a, 0]), float(lmks[a, 1])),
|
||||
(float(lmks[b, 0]), float(lmks[b, 1]))], fill=rgb, width=thickness)
|
||||
if point_size == 1:
|
||||
draw.point(lmks.flatten().tolist(), fill=rgb)
|
||||
elif point_size > 1:
|
||||
for x, y in lmks:
|
||||
draw.ellipse((float(x) - r, float(y) - r, float(x) + r, float(y) + r), fill=rgb)
|
||||
return np.asarray(pil)
|
||||
|
||||
|
||||
# Mask region presets — closed-loop topologies only.
|
||||
_MASK_REGIONS: tuple[str, ...] = ("face_oval", "lips", "left_eye", "right_eye", "irises")
|
||||
_MASK_CUSTOM_FEATURES: tuple[tuple[str, bool], ...] = (
|
||||
("face_oval", True),
|
||||
("lips", False),
|
||||
("left_eye", False),
|
||||
("right_eye", False),
|
||||
("irises", False),
|
||||
)
|
||||
|
||||
|
||||
class MediaPipeFaceMask(io.ComfyNode):
|
||||
"""Binary mask from face landmarks, filled polygon per face. One mask per
|
||||
frame in the batch; faces in the same frame composite (union)."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return io.Schema(
|
||||
node_id="MediaPipeFaceMask",
|
||||
display_name="MediaPipe Face Mask",
|
||||
category="image/detection",
|
||||
inputs=[
|
||||
FaceLandmarksType.Input("face_landmarks"),
|
||||
io.DynamicCombo.Input(
|
||||
"regions",
|
||||
tooltip="'all' = union of face_oval+lips+eyes+irises (which collapses to face_oval since it encloses the rest). 'custom' = toggle each region individually for combos like lips+eyes.",
|
||||
options=[
|
||||
io.DynamicCombo.Option("all", []),
|
||||
io.DynamicCombo.Option("custom", [
|
||||
io.Boolean.Input(reg, default=default,
|
||||
tooltip=f"Include the '{reg}' region in the mask.")
|
||||
for reg, default in _MASK_CUSTOM_FEATURES
|
||||
]),
|
||||
],
|
||||
),
|
||||
],
|
||||
outputs=[io.Mask.Output()],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, face_landmarks, regions) -> io.NodeOutput:
|
||||
sets = face_landmarks["connection_sets"]
|
||||
sel = regions["regions"]
|
||||
if sel == "custom":
|
||||
picked = [reg for reg, _ in _MASK_CUSTOM_FEATURES if regions.get(reg, False)]
|
||||
else:
|
||||
picked = list(_MASK_REGIONS)
|
||||
rings = [r for reg in picked for r in _ordered_rings(sets[reg])]
|
||||
frames = face_landmarks["frames"]
|
||||
H, W = face_landmarks["image_size"]
|
||||
masks = np.zeros((len(frames), H, W), dtype=np.uint8)
|
||||
pbar = comfy.utils.ProgressBar(len(frames))
|
||||
for bi, per_frame in enumerate(frames):
|
||||
if per_frame:
|
||||
pil = Image.new("L", (W, H), 0)
|
||||
draw = ImageDraw.Draw(pil)
|
||||
for f in per_frame:
|
||||
lmks = f["landmarks_xy"]
|
||||
for ring in rings:
|
||||
draw.polygon([(float(lmks[i, 0]), float(lmks[i, 1])) for i in ring], fill=255)
|
||||
masks[bi] = np.asarray(pil)
|
||||
pbar.update_absolute(bi + 1)
|
||||
return io.NodeOutput(torch.from_numpy(masks).to(
|
||||
device=comfy.model_management.intermediate_device(),
|
||||
dtype=comfy.model_management.intermediate_dtype(),
|
||||
).div_(255.0))
|
||||
|
||||
|
||||
class MediaPipeFaceExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||
return [LoadMediaPipeFaceLandmarker, MediaPipeFaceLandmarker, MediaPipeFaceMeshVisualize, MediaPipeFaceMask]
|
||||
|
||||
|
||||
async def comfy_entrypoint() -> MediaPipeFaceExtension:
|
||||
return MediaPipeFaceExtension()
|
||||
12
execution.py
12
execution.py
@ -2,6 +2,7 @@ import copy
|
||||
import heapq
|
||||
import inspect
|
||||
import logging
|
||||
import psutil
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
@ -727,6 +728,7 @@ class PromptExecutor:
|
||||
|
||||
self._notify_prompt_lifecycle("start", prompt_id)
|
||||
ram_headroom = int(self.cache_args["ram"] * (1024 ** 3))
|
||||
ram_inactive_headroom = int(self.cache_args["ram_inactive"] * (1024 ** 3))
|
||||
ram_release_callback = self.caches.outputs.ram_release if self.cache_type == CacheType.RAM_PRESSURE else None
|
||||
comfy.memory_management.set_ram_cache_release_state(ram_release_callback, ram_headroom)
|
||||
|
||||
@ -780,8 +782,14 @@ class PromptExecutor:
|
||||
execution_list.complete_node_execution()
|
||||
|
||||
if self.cache_type == CacheType.RAM_PRESSURE:
|
||||
comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom)
|
||||
ram_release_callback(ram_headroom, free_active=True)
|
||||
ram_release_callback(ram_inactive_headroom)
|
||||
ram_shortfall = ram_headroom - psutil.virtual_memory().available
|
||||
freed = comfy.model_management.free_pins(ram_shortfall + 512 * (1024 ** 2))
|
||||
if freed < ram_shortfall:
|
||||
if freed > 64 * (1024 ** 2):
|
||||
# AIMDO MEM_DECOMMIT can outrun psutil.available catching up.
|
||||
time.sleep(0.05)
|
||||
ram_release_callback(ram_headroom, free_active=True)
|
||||
else:
|
||||
# Only execute when the while-loop ends without break
|
||||
# Send cached UI for intermediate output nodes that weren't executed
|
||||
|
||||
@ -60,6 +60,8 @@ folder_names_and_paths["geometry_estimation"] = ([os.path.join(models_dir, "geom
|
||||
|
||||
folder_names_and_paths["optical_flow"] = ([os.path.join(models_dir, "optical_flow")], supported_pt_extensions)
|
||||
|
||||
folder_names_and_paths["mediapipe"] = ([os.path.join(models_dir, "mediapipe")], supported_pt_extensions)
|
||||
|
||||
output_directory = os.path.join(base_path, "output")
|
||||
temp_directory = os.path.join(base_path, "temp")
|
||||
input_directory = os.path.join(base_path, "input")
|
||||
|
||||
20
main.py
20
main.py
@ -283,19 +283,25 @@ def _collect_output_absolute_paths(history_result: dict) -> list[str]:
|
||||
|
||||
def prompt_worker(q, server_instance):
|
||||
current_time: float = 0.0
|
||||
cache_ram = args.cache_ram
|
||||
if cache_ram < 0:
|
||||
cache_ram = 0
|
||||
cache_ram_inactive = 0
|
||||
if not args.cache_classic and not args.cache_none and args.cache_lru <= 0:
|
||||
cache_ram = min(32.0, max(4.0, comfy.model_management.total_ram * 0.25 / 1024.0))
|
||||
cache_ram_inactive = min(96.0, max(12.0, comfy.model_management.total_ram * 0.75 / 1024.0))
|
||||
if len(args.cache_ram) > 0:
|
||||
cache_ram = args.cache_ram[0]
|
||||
if len(args.cache_ram) > 1:
|
||||
cache_ram_inactive = args.cache_ram[1]
|
||||
|
||||
cache_type = execution.CacheType.CLASSIC
|
||||
if args.cache_lru > 0:
|
||||
cache_type = execution.CacheType.RAM_PRESSURE
|
||||
if args.cache_classic:
|
||||
cache_type = execution.CacheType.CLASSIC
|
||||
elif args.cache_lru > 0:
|
||||
cache_type = execution.CacheType.LRU
|
||||
elif cache_ram > 0:
|
||||
cache_type = execution.CacheType.RAM_PRESSURE
|
||||
elif args.cache_none:
|
||||
cache_type = execution.CacheType.NONE
|
||||
|
||||
e = execution.PromptExecutor(server_instance, cache_type=cache_type, cache_args={ "lru" : args.cache_lru, "ram" : cache_ram } )
|
||||
e = execution.PromptExecutor(server_instance, cache_type=cache_type, cache_args={ "lru" : args.cache_lru, "ram" : cache_ram, "ram_inactive" : cache_ram_inactive } )
|
||||
last_gc_collect = 0
|
||||
need_gc = False
|
||||
gc_collect_interval = 10.0
|
||||
|
||||
0
models/mediapipe/put_mediapipe_models_here
Normal file
0
models/mediapipe/put_mediapipe_models_here
Normal file
1
nodes.py
1
nodes.py
@ -2444,6 +2444,7 @@ async def init_builtin_extra_nodes():
|
||||
"nodes_hidream_o1.py",
|
||||
"nodes_save_3d.py",
|
||||
"nodes_moge.py",
|
||||
"nodes_mediapipe.py",
|
||||
]
|
||||
|
||||
import_failed = []
|
||||
|
||||
@ -23,7 +23,7 @@ SQLAlchemy>=2.0.0
|
||||
filelock
|
||||
av>=14.2.0
|
||||
comfy-kitchen>=0.2.8
|
||||
comfy-aimdo==0.3.0
|
||||
comfy-aimdo==0.4.3
|
||||
requests
|
||||
simpleeval>=1.0.0
|
||||
blake3
|
||||
|
||||
@ -21,6 +21,7 @@ from app.assets.database.queries import (
|
||||
get_reference_ids_by_ids,
|
||||
ensure_tags_exist,
|
||||
add_tags_to_reference,
|
||||
set_reference_tags,
|
||||
)
|
||||
from app.assets.helpers import get_utc_now
|
||||
|
||||
@ -159,6 +160,153 @@ class TestListReferencesPage:
|
||||
assert refs[0].name == "large"
|
||||
|
||||
|
||||
class TestTagRetrievalOrder:
|
||||
"""End-to-end check: tags written through the public write paths come
|
||||
back from the public read paths in insertion order rather than the
|
||||
composite-PK alphabetical order SQLite would otherwise impose.
|
||||
|
||||
Each test deliberately picks tag names that would sort differently
|
||||
under alphabetical vs insertion order, so an alphabetical regression
|
||||
fails loudly.
|
||||
"""
|
||||
|
||||
def _make_ref(self, session: Session) -> AssetReference:
|
||||
asset = _make_asset(session, "h1")
|
||||
return _make_reference(session, asset, name="x.bin")
|
||||
|
||||
def test_set_reference_tags_preserves_input_order_in_list(self, session: Session):
|
||||
ref = self._make_ref(session)
|
||||
# "checkpoints" < "models" alphabetically; if added_at stagger
|
||||
# works, list_references_page returns insertion order.
|
||||
set_reference_tags(session, reference_id=ref.id, tags=["models", "checkpoints"])
|
||||
session.commit()
|
||||
|
||||
_, tag_map, _ = list_references_page(session)
|
||||
assert tag_map[ref.id] == ["models", "checkpoints"]
|
||||
|
||||
def test_set_reference_tags_preserves_input_order_in_fetch(self, session: Session):
|
||||
ref = self._make_ref(session)
|
||||
# Subpath tag sorts before "models" alphabetically.
|
||||
set_reference_tags(
|
||||
session,
|
||||
reference_id=ref.id,
|
||||
tags=["models", "diffusers/kolors/text_encoder"],
|
||||
)
|
||||
session.commit()
|
||||
|
||||
result = fetch_reference_asset_and_tags(session, ref.id)
|
||||
assert result is not None
|
||||
_, _, tags = result
|
||||
# Bucket-prefix expansion appends the standalone `diffusers` token
|
||||
# at path-tier (microsecond stagger) so FE set-membership filters
|
||||
# match nested category paths.
|
||||
assert tags == ["models", "diffusers/kolors/text_encoder", "diffusers"]
|
||||
|
||||
def test_add_tags_to_reference_lands_after_path_tags(self, session: Session):
|
||||
ref = self._make_ref(session)
|
||||
set_reference_tags(session, reference_id=ref.id, tags=["models", "checkpoints"])
|
||||
session.commit()
|
||||
|
||||
# "aaa-..." sorts before both path tags alphabetically. If added_at
|
||||
# stagger is missing, alphabetic tiebreak would hoist it to tags[0].
|
||||
add_tags_to_reference(
|
||||
session, reference_id=ref.id, tags=["aaa-user-tag"], origin="manual"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
_, tag_map, _ = list_references_page(session)
|
||||
assert tag_map[ref.id] == ["models", "checkpoints", "aaa-user-tag"]
|
||||
|
||||
def test_multi_tag_batch_lands_after_path_tags(self, session: Session):
|
||||
ref = self._make_ref(session)
|
||||
set_reference_tags(session, reference_id=ref.id, tags=["models", "checkpoints"])
|
||||
session.commit()
|
||||
|
||||
# Three user tags inserted in non-alphabetical input order. Per-tag
|
||||
# microsecond stagger should preserve at least the "user batch is
|
||||
# after path tags" property; within the user batch insertion order
|
||||
# is also preserved.
|
||||
add_tags_to_reference(
|
||||
session,
|
||||
reference_id=ref.id,
|
||||
tags=["zzz-z", "favorite", "experiment-q4"],
|
||||
origin="manual",
|
||||
)
|
||||
session.commit()
|
||||
|
||||
_, tag_map, _ = list_references_page(session)
|
||||
tags = tag_map[ref.id]
|
||||
assert tags[0:2] == ["models", "checkpoints"]
|
||||
assert set(tags[2:]) == {"zzz-z", "favorite", "experiment-q4"}
|
||||
|
||||
def test_user_batch_lands_after_path_batch_under_clock_collision(
|
||||
self, session: Session, monkeypatch: pytest.MonkeyPatch
|
||||
):
|
||||
"""Windows-specific race: when two back-to-back commits share the
|
||||
same datetime.now() microsecond, the path-tier and user-tier
|
||||
added_at values used to collide and alphabetic tiebreak would
|
||||
hoist user tags ahead of path tags. The fix reads
|
||||
max(existing_added_at) for the reference and seeds the next batch
|
||||
past it, deterministically restoring insertion order.
|
||||
|
||||
This test simulates the collision by pinning get_utc_now() so the
|
||||
platform-dependent race becomes a platform-independent failure.
|
||||
"""
|
||||
ref = self._make_ref(session)
|
||||
|
||||
from datetime import datetime
|
||||
from app.assets.database import queries as queries_pkg
|
||||
from app.assets.database.queries import tags as tags_module
|
||||
|
||||
frozen = datetime(2026, 1, 1, 0, 0, 0)
|
||||
monkeypatch.setattr(tags_module, "get_utc_now", lambda: frozen)
|
||||
monkeypatch.setattr(queries_pkg, "get_utc_now", lambda: frozen, raising=False)
|
||||
|
||||
set_reference_tags(session, reference_id=ref.id, tags=["models", "checkpoints"])
|
||||
session.commit()
|
||||
|
||||
# Same frozen timestamp — without the max(existing) seed, the
|
||||
# user batch would share added_at with the path batch and
|
||||
# `aaa-user-tag` would sort to position 0 via the alphabetic
|
||||
# tiebreaker.
|
||||
add_tags_to_reference(
|
||||
session, reference_id=ref.id, tags=["aaa-user-tag"], origin="manual"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
_, tag_map, _ = list_references_page(session)
|
||||
assert tag_map[ref.id] == ["models", "checkpoints", "aaa-user-tag"]
|
||||
|
||||
def test_remove_then_add_does_not_disrupt_path_tag_positions(
|
||||
self, session: Session
|
||||
):
|
||||
ref = self._make_ref(session)
|
||||
set_reference_tags(
|
||||
session,
|
||||
reference_id=ref.id,
|
||||
tags=["models", "loras/my/custom/path"],
|
||||
)
|
||||
session.commit()
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["temp-tag"])
|
||||
session.commit()
|
||||
from app.assets.database.queries import remove_tags_from_reference
|
||||
|
||||
remove_tags_from_reference(session, reference_id=ref.id, tags=["temp-tag"])
|
||||
session.commit()
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["second-tag"])
|
||||
session.commit()
|
||||
|
||||
_, tag_map, _ = list_references_page(session)
|
||||
# `loras` is expanded from the nested category path; user-added
|
||||
# tags trail behind it via the microsecond stagger.
|
||||
assert tag_map[ref.id] == [
|
||||
"models",
|
||||
"loras/my/custom/path",
|
||||
"loras",
|
||||
"second-tag",
|
||||
]
|
||||
|
||||
|
||||
class TestFetchReferenceAssetAndTags:
|
||||
def test_returns_none_for_nonexistent(self, session: Session):
|
||||
result = fetch_reference_asset_and_tags(session, "nonexistent")
|
||||
|
||||
@ -160,6 +160,120 @@ class TestAddTagsToReference:
|
||||
add_tags_to_reference(session, reference_id="nonexistent", tags=["x"])
|
||||
|
||||
|
||||
class TestBucketPrefixExpansion:
|
||||
"""The standalone bucket token must appear in the asset's tag set for
|
||||
nested category paths so FE filters like
|
||||
`include_tags=models,checkpoints` continue to match.
|
||||
"""
|
||||
|
||||
def test_set_reference_tags_inserts_bucket_for_nested_path(
|
||||
self, session: Session
|
||||
):
|
||||
asset = _make_asset(session, "hash-nested")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
result = set_reference_tags(
|
||||
session,
|
||||
reference_id=ref.id,
|
||||
tags=["models", "checkpoints/flux"],
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert set(result.total) == {"models", "checkpoints/flux", "checkpoints"}
|
||||
stored = get_reference_tags(session, reference_id=ref.id)
|
||||
# tag[1] keeps the slash-joined positional contract; the standalone
|
||||
# bucket lands after it via path-tier microsecond stagger so user
|
||||
# tags remain at the tail.
|
||||
assert stored[:3] == ["models", "checkpoints/flux", "checkpoints"]
|
||||
|
||||
def test_set_reference_tags_idempotent_on_replay(self, session: Session):
|
||||
asset = _make_asset(session, "hash-replay")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
set_reference_tags(
|
||||
session,
|
||||
reference_id=ref.id,
|
||||
tags=["models", "checkpoints/flux"],
|
||||
)
|
||||
# Replay with the same caller-supplied set; expansion is already
|
||||
# baked in, so nothing should be added or removed.
|
||||
result = set_reference_tags(
|
||||
session,
|
||||
reference_id=ref.id,
|
||||
tags=["models", "checkpoints/flux"],
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert result.added == []
|
||||
assert result.removed == []
|
||||
assert set(result.total) == {"models", "checkpoints/flux", "checkpoints"}
|
||||
|
||||
def test_add_tags_to_reference_expands_bucket(self, session: Session):
|
||||
asset = _make_asset(session, "hash-add")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
result = add_tags_to_reference(
|
||||
session,
|
||||
reference_id=ref.id,
|
||||
tags=["loras/style/v2"],
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert set(result.added) == {"loras/style/v2", "loras"}
|
||||
stored = get_reference_tags(session, reference_id=ref.id)
|
||||
assert "loras" in stored
|
||||
assert "loras/style/v2" in stored
|
||||
|
||||
def test_add_tags_does_not_duplicate_existing_bucket(self, session: Session):
|
||||
asset = _make_asset(session, "hash-dedupe")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
add_tags_to_reference(
|
||||
session, reference_id=ref.id, tags=["models", "checkpoints"]
|
||||
)
|
||||
result = add_tags_to_reference(
|
||||
session, reference_id=ref.id, tags=["checkpoints/flux"]
|
||||
)
|
||||
session.commit()
|
||||
|
||||
# `checkpoints` was already there from the first add; only the
|
||||
# slash-joined token is genuinely new.
|
||||
assert result.added == ["checkpoints/flux"]
|
||||
assert "checkpoints" in result.already_present
|
||||
|
||||
def test_flat_category_is_unaffected(self, session: Session):
|
||||
asset = _make_asset(session, "hash-flat")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
result = set_reference_tags(
|
||||
session,
|
||||
reference_id=ref.id,
|
||||
tags=["models", "checkpoints"],
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert set(result.total) == {"models", "checkpoints"}
|
||||
assert get_reference_tags(session, reference_id=ref.id) == [
|
||||
"models",
|
||||
"checkpoints",
|
||||
]
|
||||
|
||||
def test_unknown_prefix_passes_through(self, session: Session):
|
||||
asset = _make_asset(session, "hash-user")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
# `my-org` isn't a registered bucket — the slash-joined user tag
|
||||
# should not trigger bucket expansion.
|
||||
result = set_reference_tags(
|
||||
session,
|
||||
reference_id=ref.id,
|
||||
tags=["my-org/team-a"],
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert result.total == ["my-org/team-a"]
|
||||
|
||||
|
||||
class TestRemoveTagsFromReference:
|
||||
def test_removes_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
|
||||
@ -4,7 +4,7 @@ from pathlib import Path
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetReference
|
||||
from app.assets.database.models import Asset, AssetReference, AssetReferenceTag
|
||||
from app.assets.services.bulk_ingest import SeedAssetSpec, batch_insert_seed_assets
|
||||
|
||||
|
||||
@ -102,6 +102,82 @@ class TestBatchInsertSeedAssets:
|
||||
assert asset.mime_type == expected_mime, f"Expected {expected_mime} for {filename}, got {asset.mime_type}"
|
||||
|
||||
|
||||
class TestBucketPrefixExpansionOnIngest:
|
||||
"""Path-scanning ingest must persist the standalone bucket token for
|
||||
nested category paths so the FE set-membership filter
|
||||
(`include_tags=models,checkpoints`) matches assets organized into
|
||||
subfolders (`models/checkpoints/flux/foo.safetensors`).
|
||||
"""
|
||||
|
||||
def test_nested_path_inserts_standalone_bucket(
|
||||
self, session: Session, temp_dir: Path
|
||||
):
|
||||
file_path = temp_dir / "flux.safetensors"
|
||||
file_path.write_bytes(b"content")
|
||||
|
||||
specs: list[SeedAssetSpec] = [
|
||||
{
|
||||
"abs_path": str(file_path),
|
||||
"size_bytes": 7,
|
||||
"mtime_ns": 1234567890000000000,
|
||||
"info_name": "flux",
|
||||
# Shape emitted by get_name_and_tags_from_asset_path for a
|
||||
# nested model path.
|
||||
"tags": ["models", "checkpoints/flux"],
|
||||
"fname": "flux.safetensors",
|
||||
"metadata": None,
|
||||
"hash": None,
|
||||
"mime_type": "application/safetensors",
|
||||
}
|
||||
]
|
||||
|
||||
result = batch_insert_seed_assets(session, specs=specs, owner_id="")
|
||||
|
||||
assert result.inserted_refs == 1
|
||||
ref = session.query(AssetReference).filter_by(name="flux").one()
|
||||
stored = [
|
||||
row.tag_name
|
||||
for row in session.query(AssetReferenceTag)
|
||||
.filter_by(asset_reference_id=ref.id)
|
||||
.order_by(AssetReferenceTag.added_at.asc())
|
||||
.all()
|
||||
]
|
||||
assert stored == ["models", "checkpoints/flux", "checkpoints"]
|
||||
|
||||
def test_flat_path_remains_two_tags(
|
||||
self, session: Session, temp_dir: Path
|
||||
):
|
||||
file_path = temp_dir / "vanilla.safetensors"
|
||||
file_path.write_bytes(b"content")
|
||||
|
||||
specs: list[SeedAssetSpec] = [
|
||||
{
|
||||
"abs_path": str(file_path),
|
||||
"size_bytes": 7,
|
||||
"mtime_ns": 1234567890000000000,
|
||||
"info_name": "vanilla",
|
||||
"tags": ["models", "checkpoints"],
|
||||
"fname": "vanilla.safetensors",
|
||||
"metadata": None,
|
||||
"hash": None,
|
||||
"mime_type": "application/safetensors",
|
||||
}
|
||||
]
|
||||
|
||||
batch_insert_seed_assets(session, specs=specs, owner_id="")
|
||||
|
||||
ref = session.query(AssetReference).filter_by(name="vanilla").one()
|
||||
stored = {
|
||||
row.tag_name
|
||||
for row in session.query(AssetReferenceTag)
|
||||
.filter_by(asset_reference_id=ref.id)
|
||||
.all()
|
||||
}
|
||||
# Dedupe means flat layouts don't pick up a redundant `checkpoints`
|
||||
# row — tag[1] already serves both positional and set-membership.
|
||||
assert stored == {"models", "checkpoints"}
|
||||
|
||||
|
||||
class TestMetadataExtraction:
|
||||
def test_extracts_mime_type_for_model_files(self, temp_dir: Path):
|
||||
"""Verify metadata extraction returns correct mime_type for model files."""
|
||||
|
||||
@ -6,7 +6,11 @@ from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from app.assets.services.path_utils import get_asset_category_and_relative_path
|
||||
from app.assets.services.path_utils import (
|
||||
get_asset_category_and_relative_path,
|
||||
get_name_and_tags_from_asset_path,
|
||||
resolve_destination_from_tags,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@ -38,6 +42,50 @@ def fake_dirs():
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_dirs_multi_bucket():
|
||||
"""Variant fixture with multiple model buckets (checkpoints + diffusers + loras)."""
|
||||
with tempfile.TemporaryDirectory() as root:
|
||||
root_path = Path(root)
|
||||
input_dir = root_path / "input"
|
||||
output_dir = root_path / "output"
|
||||
temp_dir = root_path / "temp"
|
||||
checkpoints_dir = root_path / "models" / "checkpoints"
|
||||
diffusers_dir = root_path / "models" / "diffusers"
|
||||
loras_dir = root_path / "models" / "loras"
|
||||
for d in (
|
||||
input_dir,
|
||||
output_dir,
|
||||
temp_dir,
|
||||
checkpoints_dir,
|
||||
diffusers_dir,
|
||||
loras_dir,
|
||||
):
|
||||
d.mkdir(parents=True)
|
||||
|
||||
with patch("app.assets.services.path_utils.folder_paths") as mock_fp:
|
||||
mock_fp.get_input_directory.return_value = str(input_dir)
|
||||
mock_fp.get_output_directory.return_value = str(output_dir)
|
||||
mock_fp.get_temp_directory.return_value = str(temp_dir)
|
||||
|
||||
with patch(
|
||||
"app.assets.services.path_utils.get_comfy_models_folders",
|
||||
return_value=[
|
||||
("checkpoints", [str(checkpoints_dir)]),
|
||||
("diffusers", [str(diffusers_dir)]),
|
||||
("loras", [str(loras_dir)]),
|
||||
],
|
||||
):
|
||||
yield {
|
||||
"input": input_dir,
|
||||
"output": output_dir,
|
||||
"temp": temp_dir,
|
||||
"checkpoints": checkpoints_dir,
|
||||
"diffusers": diffusers_dir,
|
||||
"loras": loras_dir,
|
||||
}
|
||||
|
||||
|
||||
class TestGetAssetCategoryAndRelativePath:
|
||||
def test_input_file(self, fake_dirs):
|
||||
f = fake_dirs["input"] / "photo.png"
|
||||
@ -79,3 +127,161 @@ class TestGetAssetCategoryAndRelativePath:
|
||||
def test_unknown_path_raises(self, fake_dirs):
|
||||
with pytest.raises(ValueError, match="not within"):
|
||||
get_asset_category_and_relative_path("/some/random/path.png")
|
||||
|
||||
|
||||
class TestGetNameAndTagsFromAssetPath:
|
||||
"""tags collapse the parent subpath into a single slash-joined tag.
|
||||
|
||||
Consumers should be able to read ``tags[1]`` as a stable category
|
||||
identifier regardless of how deep the file lives in the bucket.
|
||||
"""
|
||||
|
||||
def test_flat_input(self, fake_dirs_multi_bucket):
|
||||
f = fake_dirs_multi_bucket["input"] / "photo.png"
|
||||
f.touch()
|
||||
name, tags = get_name_and_tags_from_asset_path(str(f))
|
||||
assert name == "photo.png"
|
||||
assert tags == ["input"]
|
||||
|
||||
def test_flat_output(self, fake_dirs_multi_bucket):
|
||||
f = fake_dirs_multi_bucket["output"] / "result_00001.png"
|
||||
f.touch()
|
||||
name, tags = get_name_and_tags_from_asset_path(str(f))
|
||||
assert name == "result_00001.png"
|
||||
assert tags == ["output"]
|
||||
|
||||
def test_flat_models_checkpoint(self, fake_dirs_multi_bucket):
|
||||
f = fake_dirs_multi_bucket["checkpoints"] / "flux.safetensors"
|
||||
f.touch()
|
||||
name, tags = get_name_and_tags_from_asset_path(str(f))
|
||||
assert name == "flux.safetensors"
|
||||
assert tags == ["models", "checkpoints"]
|
||||
|
||||
def test_diffusers_nested_subpath_slash_joined(self, fake_dirs_multi_bucket):
|
||||
"""Diffusers components live in nested directories — the full subpath
|
||||
must collapse into one tag so consumers can look up the model category
|
||||
via tags[1] regardless of nesting depth.
|
||||
|
||||
The subpath is lowercased to match the canonicalization
|
||||
:func:`ensure_tags_exist` applies on the write side; without that,
|
||||
the asset_reference_tags.tag_name FK to tags.name would fail for
|
||||
any path containing uppercase letters.
|
||||
"""
|
||||
nested = (
|
||||
fake_dirs_multi_bucket["diffusers"]
|
||||
/ "Kolors"
|
||||
/ "text_encoder"
|
||||
)
|
||||
nested.mkdir(parents=True)
|
||||
f = nested / "model.safetensors"
|
||||
f.touch()
|
||||
name, tags = get_name_and_tags_from_asset_path(str(f))
|
||||
assert name == "model.safetensors"
|
||||
assert tags == ["models", "diffusers/kolors/text_encoder"]
|
||||
|
||||
def test_deep_lora_user_subpath_slash_joined(self, fake_dirs_multi_bucket):
|
||||
"""User-created subdirectories under a model bucket also collapse to a
|
||||
single tag rather than one tag per directory."""
|
||||
nested = (
|
||||
fake_dirs_multi_bucket["loras"]
|
||||
/ "my"
|
||||
/ "custom"
|
||||
/ "path"
|
||||
)
|
||||
nested.mkdir(parents=True)
|
||||
f = nested / "v0001.safetensors"
|
||||
f.touch()
|
||||
name, tags = get_name_and_tags_from_asset_path(str(f))
|
||||
assert name == "v0001.safetensors"
|
||||
assert tags == ["models", "loras/my/custom/path"]
|
||||
|
||||
|
||||
class TestResolveDestinationFromTags:
|
||||
"""resolve_destination_from_tags must accept both the legacy
|
||||
one-tag-per-directory shape and the new slash-joined shape so that an
|
||||
upload using the tags it just read back from /api/assets round-trips
|
||||
to the right on-disk destination.
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def resolve_dirs(self):
|
||||
with tempfile.TemporaryDirectory() as root:
|
||||
root_path = Path(root)
|
||||
input_dir = root_path / "input"
|
||||
output_dir = root_path / "output"
|
||||
checkpoints_dir = root_path / "models" / "checkpoints"
|
||||
diffusers_dir = root_path / "models" / "diffusers"
|
||||
loras_dir = root_path / "models" / "loras"
|
||||
for d in (input_dir, output_dir, checkpoints_dir, diffusers_dir, loras_dir):
|
||||
d.mkdir(parents=True)
|
||||
with patch("app.assets.services.path_utils.folder_paths") as mock_fp:
|
||||
mock_fp.get_input_directory.return_value = str(input_dir)
|
||||
mock_fp.get_output_directory.return_value = str(output_dir)
|
||||
mock_fp.folder_names_and_paths = {
|
||||
"checkpoints": ([str(checkpoints_dir)], None),
|
||||
"diffusers": ([str(diffusers_dir)], None),
|
||||
"loras": ([str(loras_dir)], None),
|
||||
}
|
||||
yield {
|
||||
"input": input_dir,
|
||||
"output": output_dir,
|
||||
"checkpoints": checkpoints_dir,
|
||||
"diffusers": diffusers_dir,
|
||||
"loras": loras_dir,
|
||||
}
|
||||
|
||||
def test_models_flat_category(self, resolve_dirs):
|
||||
base, subdirs = resolve_destination_from_tags(["models", "checkpoints"])
|
||||
assert base == str(resolve_dirs["checkpoints"])
|
||||
assert subdirs == []
|
||||
|
||||
def test_models_slash_joined_new_shape(self, resolve_dirs):
|
||||
# The shape get_name_and_tags_from_asset_path now emits.
|
||||
base, subdirs = resolve_destination_from_tags(
|
||||
["models", "diffusers/kolors/text_encoder"]
|
||||
)
|
||||
assert base == str(resolve_dirs["diffusers"])
|
||||
assert subdirs == ["kolors", "text_encoder"]
|
||||
|
||||
def test_models_legacy_one_tag_per_dir(self, resolve_dirs):
|
||||
# The legacy shape must still resolve identically.
|
||||
base, subdirs = resolve_destination_from_tags(
|
||||
["models", "diffusers", "kolors", "text_encoder"]
|
||||
)
|
||||
assert base == str(resolve_dirs["diffusers"])
|
||||
assert subdirs == ["kolors", "text_encoder"]
|
||||
|
||||
def test_models_loras_slash_joined(self, resolve_dirs):
|
||||
base, subdirs = resolve_destination_from_tags(
|
||||
["models", "loras/my/custom/path"]
|
||||
)
|
||||
assert base == str(resolve_dirs["loras"])
|
||||
assert subdirs == ["my", "custom", "path"]
|
||||
|
||||
def test_input_no_subdir(self, resolve_dirs):
|
||||
base, subdirs = resolve_destination_from_tags(["input"])
|
||||
assert base == str(resolve_dirs["input"])
|
||||
assert subdirs == []
|
||||
|
||||
def test_input_slash_joined_subdir(self, resolve_dirs):
|
||||
base, subdirs = resolve_destination_from_tags(["input", "portraits/2026"])
|
||||
assert base == str(resolve_dirs["input"])
|
||||
assert subdirs == ["portraits", "2026"]
|
||||
|
||||
def test_output_slash_joined_subdir(self, resolve_dirs):
|
||||
base, subdirs = resolve_destination_from_tags(["output", "runs/abc"])
|
||||
assert base == str(resolve_dirs["output"])
|
||||
assert subdirs == ["runs", "abc"]
|
||||
|
||||
def test_unknown_category_rejected(self, resolve_dirs):
|
||||
with pytest.raises(ValueError, match="unknown model category"):
|
||||
resolve_destination_from_tags(["models", "not_a_real_category"])
|
||||
|
||||
def test_unknown_category_via_slash_joined(self, resolve_dirs):
|
||||
# First segment of a slash-joined tag must still match a registered category.
|
||||
with pytest.raises(ValueError, match="unknown model category 'bogus'"):
|
||||
resolve_destination_from_tags(["models", "bogus/sub/path"])
|
||||
|
||||
def test_traversal_in_subdir_rejected(self, resolve_dirs):
|
||||
with pytest.raises(ValueError, match="invalid path component"):
|
||||
resolve_destination_from_tags(["models", "checkpoints/..", "evil"])
|
||||
|
||||
@ -32,7 +32,7 @@ def test_seed_asset_removed_when_file_is_deleted(
|
||||
# Verify it is visible via API and carries no hash (seed)
|
||||
r1 = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={"include_tags": "unit-tests,syncseed", "name_contains": name},
|
||||
params={"include_tags": "unit-tests/syncseed", "name_contains": name},
|
||||
timeout=120,
|
||||
)
|
||||
body1 = r1.json()
|
||||
@ -52,7 +52,7 @@ def test_seed_asset_removed_when_file_is_deleted(
|
||||
# It should disappear (AssetInfo and seed Asset gone)
|
||||
r2 = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={"include_tags": "unit-tests,syncseed", "name_contains": name},
|
||||
params={"include_tags": "unit-tests/syncseed", "name_contains": name},
|
||||
timeout=120,
|
||||
)
|
||||
body2 = r2.json()
|
||||
@ -332,7 +332,7 @@ def test_fastpass_removes_stale_state_row_no_missing(
|
||||
|
||||
rl = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={"include_tags": f"unit-tests,{scope}"},
|
||||
params={"include_tags": f"unit-tests/{scope}"},
|
||||
timeout=120,
|
||||
)
|
||||
bl = rl.json()
|
||||
|
||||
@ -280,9 +280,15 @@ def test_metadata_filename_is_set_for_seed_asset_without_hash(
|
||||
|
||||
trigger_sync_seed_assets(http, api_base)
|
||||
|
||||
# Scanner emits tags as ``[root, "<dir1>/<dir2>/..."]`` — the second tag
|
||||
# is the slash-joined parent subpath. For ``<root>/unit-tests/<scope>/a/b/<name>``
|
||||
# the second tag is ``"unit-tests/<scope>/a/b"``.
|
||||
r1 = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={"include_tags": f"unit-tests,{scope}", "name_contains": name},
|
||||
params={
|
||||
"include_tags": f"unit-tests/{scope}/a/b",
|
||||
"name_contains": name,
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
body = r1.json()
|
||||
|
||||
69
tests-unit/assets_test/test_helpers.py
Normal file
69
tests-unit/assets_test/test_helpers.py
Normal file
@ -0,0 +1,69 @@
|
||||
"""Unit tests for app.assets.helpers."""
|
||||
|
||||
from app.assets.helpers import expand_bucket_prefixes
|
||||
|
||||
|
||||
class TestExpandBucketPrefixes:
|
||||
def test_flat_category_unchanged(self):
|
||||
# `checkpoints` is already a standalone token, no expansion needed.
|
||||
assert expand_bucket_prefixes(["models", "checkpoints"]) == [
|
||||
"models",
|
||||
"checkpoints",
|
||||
]
|
||||
|
||||
def test_nested_category_inserts_bucket(self):
|
||||
# Path-derived shape for `models/checkpoints/flux/foo.safetensors` —
|
||||
# the standalone bucket has to be present so the FE set-membership
|
||||
# filter (`include_tags=models,checkpoints`) matches the asset.
|
||||
assert expand_bucket_prefixes(["models", "checkpoints/flux"]) == [
|
||||
"models",
|
||||
"checkpoints/flux",
|
||||
"checkpoints",
|
||||
]
|
||||
|
||||
def test_deeply_nested_only_first_segment_expands(self):
|
||||
# Only the FIRST slash segment ever gets emitted as a standalone —
|
||||
# intermediate path segments don't have routing significance.
|
||||
assert expand_bucket_prefixes(
|
||||
["models", "diffusers/kolors/text_encoder"]
|
||||
) == ["models", "diffusers/kolors/text_encoder", "diffusers"]
|
||||
|
||||
def test_unknown_prefix_does_not_expand(self):
|
||||
# Free-form user labels with slashes whose first segment is not a
|
||||
# registered bucket pass through opaquely.
|
||||
assert expand_bucket_prefixes(["models", "my-org/team-a"]) == [
|
||||
"models",
|
||||
"my-org/team-a",
|
||||
]
|
||||
|
||||
def test_idempotent(self):
|
||||
# Re-applying the helper is a no-op once the bucket is in the set.
|
||||
expanded = expand_bucket_prefixes(["models", "checkpoints/flux"])
|
||||
assert expand_bucket_prefixes(expanded) == expanded
|
||||
|
||||
def test_does_not_duplicate_existing_bucket(self):
|
||||
# If the caller already supplied the standalone bucket, don't add a
|
||||
# second copy.
|
||||
assert expand_bucket_prefixes(
|
||||
["models", "checkpoints/flux", "checkpoints"]
|
||||
) == ["models", "checkpoints/flux", "checkpoints"]
|
||||
|
||||
def test_preserves_caller_order(self):
|
||||
# User tags after path tags must stay after; the inserted bucket
|
||||
# token slots in immediately after its slash-joined parent so the
|
||||
# microsecond stagger lands it at path-tier before user-tier.
|
||||
assert expand_bucket_prefixes(
|
||||
["models", "loras/style", "favorite", "v2"]
|
||||
) == ["models", "loras/style", "loras", "favorite", "v2"]
|
||||
|
||||
def test_empty_input(self):
|
||||
assert expand_bucket_prefixes([]) == []
|
||||
|
||||
def test_input_root_with_subpath_no_expansion(self):
|
||||
# `portraits` isn't a registered model category, so the input
|
||||
# subpath stays opaque (FE filter doesn't have a checkpoint-loader
|
||||
# analogue for input subfolders).
|
||||
assert expand_bucket_prefixes(["input", "portraits/2026"]) == [
|
||||
"input",
|
||||
"portraits/2026",
|
||||
]
|
||||
@ -29,7 +29,10 @@ def create_seed_file(comfy_tmp_base_dir: Path):
|
||||
def find_asset(http: requests.Session, api_base: str):
|
||||
"""Query API for assets matching scope and optional name."""
|
||||
def _find(scope: str, name: str | None = None) -> list[dict]:
|
||||
params = {"include_tags": f"unit-tests,{scope}"}
|
||||
# Scanner now emits tags as ``[root, "<dir1>/<dir2>/..."]`` rather than
|
||||
# one tag per directory. For files at ``<root>/unit-tests/<scope>/...``
|
||||
# the second tag is exactly ``"unit-tests/<scope>"``.
|
||||
params = {"include_tags": f"unit-tests/{scope}"}
|
||||
if name:
|
||||
params["name_contains"] = name
|
||||
r = http.get(f"{api_base}/api/assets", params=params, timeout=120)
|
||||
@ -138,4 +141,7 @@ def test_special_chars_in_path_escaped_correctly(
|
||||
trigger_sync_seed_assets(http, api_base)
|
||||
trigger_sync_seed_assets(http, api_base)
|
||||
|
||||
assert find_asset(scope.split("/")[0], fp.name), "Asset with special chars should survive"
|
||||
# Scanner emits the full parent subpath as a single slash-joined tag, so
|
||||
# the lookup tag is ``unit-tests/<scope>`` even when <scope> itself
|
||||
# contains a slash (parent + special-char dirname).
|
||||
assert find_asset(scope, fp.name), "Asset with special chars should survive"
|
||||
|
||||
135
tests-unit/assets_test/test_user_tag_http_smoke.py
Normal file
135
tests-unit/assets_test/test_user_tag_http_smoke.py
Normal file
@ -0,0 +1,135 @@
|
||||
"""HTTP-layer smoke test: user-added tags via POST /api/assets/{id}/tags
|
||||
land after path tags when read back via GET /api/assets.
|
||||
|
||||
Exercises the full route handler -> service -> query path that the unit
|
||||
tests at tests-unit/assets_test/queries/test_asset_info.py only cover at
|
||||
the service layer.
|
||||
"""
|
||||
import json
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def smoke_asset(http: requests.Session, api_base: str):
|
||||
"""Upload a single asset into models/checkpoints/unit-tests/smoke
|
||||
and delete it on teardown."""
|
||||
name = "smoke_user_tag.safetensors"
|
||||
tags = ["models", "checkpoints", "unit-tests", "smoke"]
|
||||
files = {"file": (name, b"S" * 4096, "application/octet-stream")}
|
||||
form_data = {
|
||||
"tags": json.dumps(tags),
|
||||
"name": name,
|
||||
"user_metadata": json.dumps({}),
|
||||
}
|
||||
r = http.post(api_base + "/api/assets", files=files, data=form_data, timeout=120)
|
||||
assert r.status_code == 201, r.text
|
||||
body = r.json()
|
||||
yield body
|
||||
http.delete(
|
||||
f"{api_base}/api/assets/{body['id']}?delete_content=true", timeout=30
|
||||
)
|
||||
|
||||
|
||||
def _fetch_asset_tags(http, api_base, ref_id):
|
||||
r = http.get(f"{api_base}/api/assets/{ref_id}", timeout=30)
|
||||
assert r.status_code == 200, r.text
|
||||
return r.json()["tags"]
|
||||
|
||||
|
||||
def test_user_tag_lands_after_path_tags_via_http(
|
||||
http: requests.Session, api_base: str, smoke_asset: dict
|
||||
):
|
||||
ref_id = smoke_asset["id"]
|
||||
|
||||
initial_tags = _fetch_asset_tags(http, api_base, ref_id)
|
||||
# Path tags should already be at the front in upload order.
|
||||
assert initial_tags[:2] == ["models", "checkpoints"]
|
||||
|
||||
# Add a user tag that would jump to position 0 under alphabetical sort.
|
||||
r = http.post(
|
||||
f"{api_base}/api/assets/{ref_id}/tags",
|
||||
json={"tags": ["aaa-user-tag"]},
|
||||
timeout=30,
|
||||
)
|
||||
assert r.status_code in (200, 201), r.text
|
||||
|
||||
tags_after = _fetch_asset_tags(http, api_base, ref_id)
|
||||
# Path tags must still be at the front; user tag goes to the end.
|
||||
assert tags_after[0] == "models"
|
||||
assert tags_after[1] == "checkpoints"
|
||||
assert "aaa-user-tag" in tags_after
|
||||
assert tags_after[-1] == "aaa-user-tag"
|
||||
|
||||
|
||||
def test_user_tag_batch_lands_after_path_tags_via_http(
|
||||
http: requests.Session, api_base: str, smoke_asset: dict
|
||||
):
|
||||
ref_id = smoke_asset["id"]
|
||||
|
||||
# Add three user tags in a single request, in non-alphabetical input
|
||||
# order. They should all land after the path tags (microsecond stagger
|
||||
# in set_reference_tags / add_tags_to_reference is what makes this
|
||||
# work — without it, "aaa" would jump to position 0).
|
||||
r = http.post(
|
||||
f"{api_base}/api/assets/{ref_id}/tags",
|
||||
json={"tags": ["zzz-z", "favorite", "aaa-experiment"]},
|
||||
timeout=30,
|
||||
)
|
||||
assert r.status_code in (200, 201), r.text
|
||||
|
||||
tags_after = _fetch_asset_tags(http, api_base, ref_id)
|
||||
assert tags_after[0] == "models"
|
||||
assert tags_after[1] == "checkpoints"
|
||||
user_tail = tags_after[len({"models", "checkpoints", "unit-tests", "smoke"}):]
|
||||
assert set(user_tail) >= {"zzz-z", "favorite", "aaa-experiment"}
|
||||
# Critically: alphabetical sort would put 'aaa-experiment' at position 0.
|
||||
assert tags_after.index("aaa-experiment") > tags_after.index("models")
|
||||
assert tags_after.index("aaa-experiment") > tags_after.index("checkpoints")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def nested_checkpoint_asset(http: requests.Session, api_base: str):
|
||||
"""Upload a checkpoint at the slash-joined path shape cloud emits
|
||||
(`models/checkpoints/flux/...`), then delete it on teardown.
|
||||
"""
|
||||
name = "nested_checkpoint.safetensors"
|
||||
tags = ["models", "checkpoints/flux"]
|
||||
files = {"file": (name, b"S" * 4096, "application/octet-stream")}
|
||||
form_data = {
|
||||
"tags": json.dumps(tags),
|
||||
"name": name,
|
||||
"user_metadata": json.dumps({}),
|
||||
}
|
||||
r = http.post(api_base + "/api/assets", files=files, data=form_data, timeout=120)
|
||||
assert r.status_code == 201, r.text
|
||||
body = r.json()
|
||||
yield body
|
||||
http.delete(
|
||||
f"{api_base}/api/assets/{body['id']}?delete_content=true", timeout=30
|
||||
)
|
||||
|
||||
|
||||
def test_nested_checkpoint_satisfies_fe_set_filter(
|
||||
http: requests.Session, api_base: str, nested_checkpoint_asset: dict
|
||||
):
|
||||
"""The case Simon flagged: a nested-path checkpoint must still match
|
||||
`include_tags=models,checkpoints` — the FE combo-widget filter.
|
||||
"""
|
||||
ref_id = nested_checkpoint_asset["id"]
|
||||
|
||||
stored = _fetch_asset_tags(http, api_base, ref_id)
|
||||
# tag[1] keeps cloud's slash-joined positional contract; tag[2] holds
|
||||
# the standalone bucket the FE filter looks for.
|
||||
assert stored[:3] == ["models", "checkpoints/flux", "checkpoints"]
|
||||
|
||||
# The actual FE query — exact set-membership across both tokens.
|
||||
r = http.get(
|
||||
f"{api_base}/api/assets",
|
||||
params=[("include_tags", "models"), ("include_tags", "checkpoints")],
|
||||
timeout=30,
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
returned_ids = {a["id"] for a in r.json()["assets"]}
|
||||
assert ref_id in returned_ids
|
||||
@ -14,7 +14,6 @@ from tests.execution.test_execution import ComfyClient, run_warmup
|
||||
class TestAsyncNodes:
|
||||
@fixture(scope="class", autouse=True, params=[
|
||||
(False, 0),
|
||||
(True, 0),
|
||||
(True, 100),
|
||||
])
|
||||
def _server(self, args_pytest, request):
|
||||
@ -29,6 +28,8 @@ class TestAsyncNodes:
|
||||
use_lru, lru_size = request.param
|
||||
if use_lru:
|
||||
pargs += ['--cache-lru', str(lru_size)]
|
||||
else:
|
||||
pargs += ['--cache-classic']
|
||||
# Running server with args: pargs
|
||||
p = subprocess.Popen(pargs)
|
||||
yield
|
||||
|
||||
@ -183,8 +183,7 @@ class TestExecution:
|
||||
# Initialize server and client
|
||||
#
|
||||
@fixture(scope="class", autouse=True, params=[
|
||||
{ "extra_args" : [], "should_cache_results" : True },
|
||||
{ "extra_args" : ["--cache-lru", 0], "should_cache_results" : True },
|
||||
{ "extra_args" : ["--cache-classic"], "should_cache_results" : True },
|
||||
{ "extra_args" : ["--cache-lru", 100], "should_cache_results" : True },
|
||||
{ "extra_args" : ["--cache-none"], "should_cache_results" : False },
|
||||
])
|
||||
|
||||
Reference in New Issue
Block a user