[CI] Speed up model unit tests in CI (#24253)

Signed-off-by: Andrew Feldman <afeldman@redhat.com>
2025-09-12 13:36:50 -04:00
parent 9d2a44606d
commit c8c42597ab
6 changed files with 123 additions and 21 deletions
--- a/tests/models/language/generation/test_common.py
+++ b/tests/models/language/generation/test_common.py
@ -38,7 +38,7 @@ AITER_MODEL_LIST = [
    [
        pytest.param(
            "bigscience/bloom-560m",  # bloom - testing alibi slopes
-            marks=[pytest.mark.core_model],
+            marks=[pytest.mark.core_model, pytest.mark.slow_test],
        ),
        pytest.param(
            "openai-community/gpt2",  # gpt2
@ -49,7 +49,10 @@ AITER_MODEL_LIST = [
        pytest.param("EleutherAI/pythia-70m"),  # gpt_neox
        pytest.param(
            "google/gemma-1.1-2b-it",  # gemma
-            marks=[pytest.mark.core_model, pytest.mark.cpu_model],
+            marks=[
+                pytest.mark.core_model, pytest.mark.cpu_model,
+                pytest.mark.slow_test
+            ],
        ),
        pytest.param(
            "zai-org/chatglm3-6b",  # chatglm (text-only)
@ -70,14 +73,17 @@ AITER_MODEL_LIST = [
        ),
        pytest.param(
            "microsoft/phi-2",  # phi
-            marks=[pytest.mark.core_model],
+            marks=[pytest.mark.core_model, pytest.mark.slow_test],
        ),
        pytest.param(
            "Qwen/Qwen-7B-Chat",  # qwen (text-only)
        ),
        pytest.param(
            "Qwen/Qwen2.5-0.5B-Instruct",  # qwen2
-            marks=[pytest.mark.core_model, pytest.mark.cpu_model],
+            marks=[
+                pytest.mark.core_model, pytest.mark.cpu_model,
+                pytest.mark.slow_test
+            ],
        ),
        pytest.param(
            "Qwen/Qwen3-8B",  # qwen (text-only)
--- a/tests/models/language/pooling/test_classification.py
+++ b/tests/models/language/pooling/test_classification.py
@ -11,7 +11,10 @@ from vllm.platforms import current_platform
    "model",
    [
        pytest.param("jason9693/Qwen2.5-1.5B-apeach",
-                     marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
+                     marks=[
+                         pytest.mark.core_model, pytest.mark.cpu_model,
+                         pytest.mark.slow_test
+                     ]),
    ],
 )
@pytest.mark.parametrize("dtype",
--- a/tests/models/language/pooling/test_embedding.py
+++ b/tests/models/language/pooling/test_embedding.py
@ -19,7 +19,7 @@ from ...utils import check_embeddings_close
        # model code with bidirectional attention.
        # [Decoder-only]
        pytest.param("BAAI/bge-multilingual-gemma2",
-                     marks=[pytest.mark.core_model]),
+                     marks=[pytest.mark.core_model, pytest.mark.slow_test]),
        pytest.param(
            "intfloat/e5-mistral-7b-instruct",
            # CPU v1 doesn't support sliding window
@ -29,7 +29,10 @@ from ...utils import check_embeddings_close
        # [Encoder-only]
        pytest.param(
            "BAAI/bge-base-en-v1.5",
-            marks=[pytest.mark.core_model, pytest.mark.cpu_model],
+            marks=[
+                pytest.mark.core_model, pytest.mark.cpu_model,
+                pytest.mark.slow_test
+            ],
        ),
        pytest.param("sentence-transformers/all-MiniLM-L12-v2"),
        pytest.param("intfloat/multilingual-e5-small"),
--- a/tests/models/test_initialization.py
+++ b/tests/models/test_initialization.py
@ -18,6 +18,26 @@ from .registry import (_TRANSFORMERS_BACKEND_MODELS, AUTO_EXAMPLE_MODELS,
                       HF_EXAMPLE_MODELS, HfExampleModels)
 from .utils import dummy_hf_overrides

+# This minimal list of model architectures is smaller than the total list of
+# supported models. The intention is that in the "typical" regression testing
+# scenario, we only test initializing these models. This subset was chosen
+# to include representative examples of model varieties/workloads (conditional
+# generation, sequence classification, causal LM, ranking, chat, reward model,
+# multimodal, geospatial, voice, embedding, MTP)
+MINIMAL_MODEL_ARCH_LIST = [
+    "LlavaForConditionalGeneration", "Llama4ForConditionalGeneration",
+    "BertForSequenceClassification", "Gemma3nForCausalLM", "JinaVLForRanking",
+    "InternVLChatModel", "InternLM2ForRewardModel",
+    "TransformersForMultimodalLM", "PrithviGeoSpatialMAE", "UltravoxModel",
+    "DeepSeekMTPModel", "XLMRobertaModel"
+]
+
+# This list is the complement of the minimal list above. The intention is that
+# this list of models is only tested in a "special case" i.e. most PRs should
+# not test these models
+OTHER_MODEL_ARCH_LIST = (set(HF_EXAMPLE_MODELS.get_supported_archs()) -
+                         set(MINIMAL_MODEL_ARCH_LIST))
+

@create_new_process_for_each_test()
 def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
@ -101,8 +121,23 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
            max_num_seqs=model_info.max_num_seqs)


-@pytest.mark.parametrize("model_arch", HF_EXAMPLE_MODELS.get_supported_archs())
-def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
+@pytest.mark.parametrize("model_arch", MINIMAL_MODEL_ARCH_LIST)
+def test_can_initialize_small_subset(model_arch: str,
+                                     monkeypatch: pytest.MonkeyPatch):
+    """Test initializing small subset of supported models"""
+    if model_arch == "Lfm2ForCausalLM":
+        pytest.skip("Skipping until test supports V1-only models")
+    can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS)
+
+
+@pytest.mark.parametrize("model_arch", OTHER_MODEL_ARCH_LIST)
+def test_can_initialize_large_subset(model_arch: str,
+                                     monkeypatch: pytest.MonkeyPatch):
+    """Test initializing large subset of supported models
+    
+    This test covers the complement of the tests covered in the "small subset"
+    test.
+    """
    if model_arch == "Lfm2ForCausalLM":
        pytest.skip("Skipping until test supports V1-only models")
    can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS)