[Frontend] Use engine argument to control MM cache size (#22441)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@ -68,7 +68,7 @@ def run_simple_demo(args: argparse.Namespace):
|
||||
max_model_len=4096,
|
||||
max_num_seqs=2,
|
||||
tensor_parallel_size=2,
|
||||
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
|
||||
mm_processor_cache_gb=0 if args.disable_mm_processor_cache else 4,
|
||||
)
|
||||
|
||||
prompt = "Describe this image in one sentence."
|
||||
@ -105,7 +105,7 @@ def run_advanced_demo(args: argparse.Namespace):
|
||||
limit_mm_per_prompt={"image": max_img_per_msg},
|
||||
max_model_len=max_img_per_msg * max_tokens_per_img,
|
||||
tensor_parallel_size=2,
|
||||
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
|
||||
mm_processor_cache_gb=0 if args.disable_mm_processor_cache else 4,
|
||||
)
|
||||
|
||||
prompt = "Describe the following image."
|
||||
@ -164,7 +164,7 @@ def parse_args():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--disable-mm-preprocessor-cache",
|
||||
"--disable-mm-processor-cache",
|
||||
action="store_true",
|
||||
help="If True, disables caching of multi-modal processor.",
|
||||
)
|
||||
|
||||
@ -1563,7 +1563,7 @@ def parse_args():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--disable-mm-preprocessor-cache",
|
||||
"--disable-mm-processor-cache",
|
||||
action="store_true",
|
||||
help="If True, disables caching of multi-modal processor.",
|
||||
)
|
||||
@ -1603,7 +1603,7 @@ def main(args):
|
||||
|
||||
engine_args = asdict(req_data.engine_args) | {
|
||||
"seed": args.seed,
|
||||
"disable_mm_preprocessor_cache": args.disable_mm_preprocessor_cache,
|
||||
"mm_processor_cache_gb": 0 if args.disable_mm_processor_cache else 4,
|
||||
}
|
||||
llm = LLM(**engine_args)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user