[Frontend] Use engine argument to control MM cache size (#22441)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-08-08 00:47:10 +08:00
committed by GitHub
parent 8c9da6be22
commit 139d155781
13 changed files with 101 additions and 47 deletions

View File

@ -261,7 +261,7 @@ def build_model_context(
model_config_kwargs: Optional[dict[str, Any]] = None,
mm_processor_kwargs: Optional[dict[str, Any]] = None,
limit_mm_per_prompt: Optional[dict[str, int]] = None,
disable_mm_preprocessor_cache: bool = True,
mm_processor_cache_gb: int = 0,
):
"""Creates an InputContext for a given model.
@ -291,7 +291,7 @@ def build_model_context(
seed=0,
mm_processor_kwargs=mm_processor_kwargs,
limit_mm_per_prompt=limit_mm_per_prompt,
disable_mm_preprocessor_cache=disable_mm_preprocessor_cache,
mm_processor_cache_gb=mm_processor_cache_gb,
hf_overrides=model_info.hf_overrides,
**model_config_kwargs,
)