[Frontend] Use engine argument to control MM cache size (#22441)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-08-08 00:47:10 +08:00
committed by GitHub
parent 8c9da6be22
commit 139d155781
13 changed files with 101 additions and 47 deletions

View File

@ -68,7 +68,7 @@ def run_simple_demo(args: argparse.Namespace):
max_model_len=4096,
max_num_seqs=2,
tensor_parallel_size=2,
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
mm_processor_cache_gb=0 if args.disable_mm_processor_cache else 4,
)
prompt = "Describe this image in one sentence."
@ -105,7 +105,7 @@ def run_advanced_demo(args: argparse.Namespace):
limit_mm_per_prompt={"image": max_img_per_msg},
max_model_len=max_img_per_msg * max_tokens_per_img,
tensor_parallel_size=2,
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
mm_processor_cache_gb=0 if args.disable_mm_processor_cache else 4,
)
prompt = "Describe the following image."
@ -164,7 +164,7 @@ def parse_args():
)
parser.add_argument(
"--disable-mm-preprocessor-cache",
"--disable-mm-processor-cache",
action="store_true",
help="If True, disables caching of multi-modal processor.",
)

View File

@ -1563,7 +1563,7 @@ def parse_args():
)
parser.add_argument(
"--disable-mm-preprocessor-cache",
"--disable-mm-processor-cache",
action="store_true",
help="If True, disables caching of multi-modal processor.",
)
@ -1603,7 +1603,7 @@ def main(args):
engine_args = asdict(req_data.engine_args) | {
"seed": args.seed,
"disable_mm_preprocessor_cache": args.disable_mm_preprocessor_cache,
"mm_processor_cache_gb": 0 if args.disable_mm_processor_cache else 4,
}
llm = LLM(**engine_args)