[Frontend] Use engine argument to control MM cache size (#22441)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@ -261,7 +261,7 @@ def build_model_context(
|
||||
model_config_kwargs: Optional[dict[str, Any]] = None,
|
||||
mm_processor_kwargs: Optional[dict[str, Any]] = None,
|
||||
limit_mm_per_prompt: Optional[dict[str, int]] = None,
|
||||
disable_mm_preprocessor_cache: bool = True,
|
||||
mm_processor_cache_gb: int = 0,
|
||||
):
|
||||
"""Creates an InputContext for a given model.
|
||||
|
||||
@ -291,7 +291,7 @@ def build_model_context(
|
||||
seed=0,
|
||||
mm_processor_kwargs=mm_processor_kwargs,
|
||||
limit_mm_per_prompt=limit_mm_per_prompt,
|
||||
disable_mm_preprocessor_cache=disable_mm_preprocessor_cache,
|
||||
mm_processor_cache_gb=mm_processor_cache_gb,
|
||||
hf_overrides=model_info.hf_overrides,
|
||||
**model_config_kwargs,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user