[CI] Speed up model unit tests in CI (#24253)
Signed-off-by: Andrew Feldman <afeldman@redhat.com>
This commit is contained in:
@ -38,7 +38,7 @@ AITER_MODEL_LIST = [
|
||||
[
|
||||
pytest.param(
|
||||
"bigscience/bloom-560m", # bloom - testing alibi slopes
|
||||
marks=[pytest.mark.core_model],
|
||||
marks=[pytest.mark.core_model, pytest.mark.slow_test],
|
||||
),
|
||||
pytest.param(
|
||||
"openai-community/gpt2", # gpt2
|
||||
@ -49,7 +49,10 @@ AITER_MODEL_LIST = [
|
||||
pytest.param("EleutherAI/pythia-70m"), # gpt_neox
|
||||
pytest.param(
|
||||
"google/gemma-1.1-2b-it", # gemma
|
||||
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
|
||||
marks=[
|
||||
pytest.mark.core_model, pytest.mark.cpu_model,
|
||||
pytest.mark.slow_test
|
||||
],
|
||||
),
|
||||
pytest.param(
|
||||
"zai-org/chatglm3-6b", # chatglm (text-only)
|
||||
@ -70,14 +73,17 @@ AITER_MODEL_LIST = [
|
||||
),
|
||||
pytest.param(
|
||||
"microsoft/phi-2", # phi
|
||||
marks=[pytest.mark.core_model],
|
||||
marks=[pytest.mark.core_model, pytest.mark.slow_test],
|
||||
),
|
||||
pytest.param(
|
||||
"Qwen/Qwen-7B-Chat", # qwen (text-only)
|
||||
),
|
||||
pytest.param(
|
||||
"Qwen/Qwen2.5-0.5B-Instruct", # qwen2
|
||||
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
|
||||
marks=[
|
||||
pytest.mark.core_model, pytest.mark.cpu_model,
|
||||
pytest.mark.slow_test
|
||||
],
|
||||
),
|
||||
pytest.param(
|
||||
"Qwen/Qwen3-8B", # qwen (text-only)
|
||||
|
||||
@ -11,7 +11,10 @@ from vllm.platforms import current_platform
|
||||
"model",
|
||||
[
|
||||
pytest.param("jason9693/Qwen2.5-1.5B-apeach",
|
||||
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
|
||||
marks=[
|
||||
pytest.mark.core_model, pytest.mark.cpu_model,
|
||||
pytest.mark.slow_test
|
||||
]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("dtype",
|
||||
|
||||
@ -19,7 +19,7 @@ from ...utils import check_embeddings_close
|
||||
# model code with bidirectional attention.
|
||||
# [Decoder-only]
|
||||
pytest.param("BAAI/bge-multilingual-gemma2",
|
||||
marks=[pytest.mark.core_model]),
|
||||
marks=[pytest.mark.core_model, pytest.mark.slow_test]),
|
||||
pytest.param(
|
||||
"intfloat/e5-mistral-7b-instruct",
|
||||
# CPU v1 doesn't support sliding window
|
||||
@ -29,7 +29,10 @@ from ...utils import check_embeddings_close
|
||||
# [Encoder-only]
|
||||
pytest.param(
|
||||
"BAAI/bge-base-en-v1.5",
|
||||
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
|
||||
marks=[
|
||||
pytest.mark.core_model, pytest.mark.cpu_model,
|
||||
pytest.mark.slow_test
|
||||
],
|
||||
),
|
||||
pytest.param("sentence-transformers/all-MiniLM-L12-v2"),
|
||||
pytest.param("intfloat/multilingual-e5-small"),
|
||||
|
||||
@ -18,6 +18,26 @@ from .registry import (_TRANSFORMERS_BACKEND_MODELS, AUTO_EXAMPLE_MODELS,
|
||||
HF_EXAMPLE_MODELS, HfExampleModels)
|
||||
from .utils import dummy_hf_overrides
|
||||
|
||||
# This minimal list of model architectures is smaller than the total list of
|
||||
# supported models. The intention is that in the "typical" regression testing
|
||||
# scenario, we only test initializing these models. This subset was chosen
|
||||
# to include representative examples of model varieties/workloads (conditional
|
||||
# generation, sequence classification, causal LM, ranking, chat, reward model,
|
||||
# multimodal, geospatial, voice, embedding, MTP)
|
||||
MINIMAL_MODEL_ARCH_LIST = [
|
||||
"LlavaForConditionalGeneration", "Llama4ForConditionalGeneration",
|
||||
"BertForSequenceClassification", "Gemma3nForCausalLM", "JinaVLForRanking",
|
||||
"InternVLChatModel", "InternLM2ForRewardModel",
|
||||
"TransformersForMultimodalLM", "PrithviGeoSpatialMAE", "UltravoxModel",
|
||||
"DeepSeekMTPModel", "XLMRobertaModel"
|
||||
]
|
||||
|
||||
# This list is the complement of the minimal list above. The intention is that
|
||||
# this list of models is only tested in a "special case" i.e. most PRs should
|
||||
# not test these models
|
||||
OTHER_MODEL_ARCH_LIST = (set(HF_EXAMPLE_MODELS.get_supported_archs()) -
|
||||
set(MINIMAL_MODEL_ARCH_LIST))
|
||||
|
||||
|
||||
@create_new_process_for_each_test()
|
||||
def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
|
||||
@ -101,8 +121,23 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
|
||||
max_num_seqs=model_info.max_num_seqs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_arch", HF_EXAMPLE_MODELS.get_supported_archs())
|
||||
def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
|
||||
@pytest.mark.parametrize("model_arch", MINIMAL_MODEL_ARCH_LIST)
|
||||
def test_can_initialize_small_subset(model_arch: str,
|
||||
monkeypatch: pytest.MonkeyPatch):
|
||||
"""Test initializing small subset of supported models"""
|
||||
if model_arch == "Lfm2ForCausalLM":
|
||||
pytest.skip("Skipping until test supports V1-only models")
|
||||
can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_arch", OTHER_MODEL_ARCH_LIST)
|
||||
def test_can_initialize_large_subset(model_arch: str,
|
||||
monkeypatch: pytest.MonkeyPatch):
|
||||
"""Test initializing large subset of supported models
|
||||
|
||||
This test covers the complement of the tests covered in the "small subset"
|
||||
test.
|
||||
"""
|
||||
if model_arch == "Lfm2ForCausalLM":
|
||||
pytest.skip("Skipping until test supports V1-only models")
|
||||
can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS)
|
||||
|
||||
Reference in New Issue
Block a user