[Misc] Split up pooling tasks (#10820)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@ -93,7 +93,7 @@ def _run_test(
|
||||
# if we run HF first, the cuda initialization will be done and it
|
||||
# will hurt multiprocessing backend with fork method (the default method).
|
||||
with vllm_runner(model,
|
||||
task="embedding",
|
||||
task="embed",
|
||||
dtype=dtype,
|
||||
enforce_eager=True,
|
||||
max_model_len=8192) as vllm_model:
|
||||
|
||||
@ -47,7 +47,7 @@ def _run_test(
|
||||
# if we run HF first, the cuda initialization will be done and it
|
||||
# will hurt multiprocessing backend with fork method (the default method).
|
||||
with vllm_runner(model,
|
||||
task="embedding",
|
||||
task="embed",
|
||||
dtype=dtype,
|
||||
max_model_len=4096,
|
||||
enforce_eager=True) as vllm_model:
|
||||
|
||||
@ -39,7 +39,7 @@ def _run_test(
|
||||
# vLLM needs a fresh new process without cuda initialization.
|
||||
# if we run HF first, the cuda initialization will be done and it
|
||||
# will hurt multiprocessing backend with fork method (the default method).
|
||||
with vllm_runner(model, task="embedding", dtype=dtype,
|
||||
with vllm_runner(model, task="embed", dtype=dtype,
|
||||
enforce_eager=True) as vllm_model:
|
||||
vllm_outputs = vllm_model.encode(input_texts, images=input_images)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user