diff --git a/tests/models/language/generation_ppl_test/ppl_utils.py b/tests/models/language/generation_ppl_test/ppl_utils.py index dcef365e99..43f6066b1c 100644 --- a/tests/models/language/generation_ppl_test/ppl_utils.py +++ b/tests/models/language/generation_ppl_test/ppl_utils.py @@ -51,7 +51,6 @@ def wikitext_ppl_test( gpu_memory_utilization=0.7, max_model_len=max_length, max_num_seqs=1, - enforce_eager=True, **vllm_extra_kwargs, ) as vllm_model: # Use max_num_seqs=1 to avoid OOM, diff --git a/tests/models/language/pooling_mteb_test/mteb_utils.py b/tests/models/language/pooling_mteb_test/mteb_utils.py index a4a7f1b48d..d96dc90416 100644 --- a/tests/models/language/pooling_mteb_test/mteb_utils.py +++ b/tests/models/language/pooling_mteb_test/mteb_utils.py @@ -192,7 +192,6 @@ def mteb_test_embed_models( model_info.name, runner="pooling", max_model_len=None, - enforce_eager=True, **vllm_extra_kwargs, ) as vllm_model: model_config = vllm_model.llm.llm_engine.model_config @@ -349,7 +348,6 @@ def mteb_test_rerank_models( runner="pooling", max_model_len=None, max_num_seqs=8, - enforce_eager=True, **vllm_extra_kwargs, ) as vllm_model: model_config = vllm_model.llm.llm_engine.model_config