[Bugfix][VLM] Fix failing Phi-4-MM multi-images tests and add vision-speech test (#16424)
Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@ -199,13 +199,6 @@ def main(args):
|
||||
engine_args = asdict(req_data.engine_args) | {"seed": args.seed}
|
||||
llm = LLM(**engine_args)
|
||||
|
||||
# To maintain code compatibility in this script, we add LoRA here.
|
||||
# You can also add LoRA using:
|
||||
# llm.generate(prompts, lora_request=lora_request,...)
|
||||
if req_data.lora_requests:
|
||||
for lora_request in req_data.lora_requests:
|
||||
llm.llm_engine.add_lora(lora_request=lora_request)
|
||||
|
||||
# We set temperature to 0.2 so that outputs can be different
|
||||
# even when all prompts are identical when running batch inference.
|
||||
sampling_params = SamplingParams(temperature=0.2,
|
||||
@ -226,8 +219,15 @@ def main(args):
|
||||
if args.num_prompts > 1:
|
||||
# Batch inference
|
||||
inputs = [inputs] * args.num_prompts
|
||||
# Add LoRA request if applicable
|
||||
lora_request = (req_data.lora_requests *
|
||||
args.num_prompts if req_data.lora_requests else None)
|
||||
|
||||
outputs = llm.generate(inputs, sampling_params=sampling_params)
|
||||
outputs = llm.generate(
|
||||
inputs,
|
||||
sampling_params=sampling_params,
|
||||
lora_request=lora_request,
|
||||
)
|
||||
|
||||
for o in outputs:
|
||||
generated_text = o.outputs[0].text
|
||||
|
||||
@ -8,6 +8,7 @@ on HuggingFace model repository.
|
||||
"""
|
||||
import os
|
||||
import random
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import asdict
|
||||
from typing import NamedTuple, Optional
|
||||
|
||||
@ -1055,6 +1056,20 @@ def apply_image_repeat(image_repeat_prob, num_prompts, data,
|
||||
return inputs
|
||||
|
||||
|
||||
@contextmanager
|
||||
def time_counter(enable: bool):
|
||||
if enable:
|
||||
import time
|
||||
start_time = time.time()
|
||||
yield
|
||||
elapsed_time = time.time() - start_time
|
||||
print("-" * 50)
|
||||
print("-- generate time = {}".format(elapsed_time))
|
||||
print("-" * 50)
|
||||
else:
|
||||
yield
|
||||
|
||||
|
||||
def main(args):
|
||||
model = args.model_type
|
||||
if model not in model_example_map:
|
||||
@ -1113,17 +1128,16 @@ def main(args):
|
||||
},
|
||||
} for i in range(args.num_prompts)]
|
||||
|
||||
if args.time_generate:
|
||||
import time
|
||||
start_time = time.time()
|
||||
outputs = llm.generate(inputs, sampling_params=sampling_params)
|
||||
elapsed_time = time.time() - start_time
|
||||
print("-" * 50)
|
||||
print("-- generate time = {}".format(elapsed_time))
|
||||
print("-" * 50)
|
||||
# Add LoRA request if applicable
|
||||
lora_request = (req_data.lora_requests *
|
||||
args.num_prompts if req_data.lora_requests else None)
|
||||
|
||||
else:
|
||||
outputs = llm.generate(inputs, sampling_params=sampling_params)
|
||||
with time_counter(args.time_generate):
|
||||
outputs = llm.generate(
|
||||
inputs,
|
||||
sampling_params=sampling_params,
|
||||
lora_request=lora_request,
|
||||
)
|
||||
|
||||
print("-" * 50)
|
||||
for o in outputs:
|
||||
|
||||
@ -661,13 +661,6 @@ def run_generate(model, question: str, image_urls: list[str],
|
||||
engine_args = asdict(req_data.engine_args) | {"seed": args.seed}
|
||||
llm = LLM(**engine_args)
|
||||
|
||||
# To maintain code compatibility in this script, we add LoRA here.
|
||||
# You can also add LoRA using:
|
||||
# llm.generate(prompts, lora_request=lora_request,...)
|
||||
if req_data.lora_requests:
|
||||
for lora_request in req_data.lora_requests:
|
||||
llm.llm_engine.add_lora(lora_request=lora_request)
|
||||
|
||||
sampling_params = SamplingParams(temperature=0.0,
|
||||
max_tokens=256,
|
||||
stop_token_ids=req_data.stop_token_ids)
|
||||
@ -679,7 +672,9 @@ def run_generate(model, question: str, image_urls: list[str],
|
||||
"image": req_data.image_data
|
||||
},
|
||||
},
|
||||
sampling_params=sampling_params)
|
||||
sampling_params=sampling_params,
|
||||
lora_request=req_data.lora_requests,
|
||||
)
|
||||
|
||||
print("-" * 50)
|
||||
for o in outputs:
|
||||
@ -724,6 +719,7 @@ def run_chat(model: str, question: str, image_urls: list[str],
|
||||
}],
|
||||
sampling_params=sampling_params,
|
||||
chat_template=req_data.chat_template,
|
||||
lora_request=req_data.lora_requests,
|
||||
)
|
||||
|
||||
print("-" * 50)
|
||||
|
||||
Reference in New Issue
Block a user