[CI/Build][CPU] Fix CPU CI and remove all CPU V0 files (#20560)

Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
Li, Jiang
2025-07-08 13:13:44 +08:00
committed by GitHub
parent 8369b7c2a9
commit 7721ef1786
9 changed files with 785 additions and 839 deletions

View File

@ -39,7 +39,7 @@ AITER_MODEL_LIST = [
[
pytest.param(
"bigscience/bloom-560m", # bloom - testing alibi slopes
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
marks=[pytest.mark.core_model],
),
pytest.param(
"openai-community/gpt2", # gpt2
@ -87,7 +87,11 @@ AITER_MODEL_LIST = [
pytest.param("bigcode/starcoder2-3b"), # starcoder2
pytest.param(
"TitanML/tiny-mixtral", # mixtral
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
marks=[pytest.mark.core_model],
),
pytest.param(
"Qwen/Qwen1.5-MoE-A2.7B-Chat",
marks=[pytest.mark.cpu_model],
)
])
@pytest.mark.parametrize("max_tokens", [32])

View File

@ -1,6 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os
from typing import Optional
import pytest
@ -29,8 +28,10 @@ def v1(run_with_both_engines):
# [Decoder-only]
pytest.param("BAAI/bge-multilingual-gemma2",
marks=[pytest.mark.core_model]),
pytest.param("intfloat/e5-mistral-7b-instruct",
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
pytest.param(
"intfloat/e5-mistral-7b-instruct",
# CPU v1 doesn't support sliding window
marks=[pytest.mark.core_model]),
# the qwen models interfere with each other (see PR
# https://github.com/vllm-project/vllm/pull/18720).
# To avoid this problem, for now we skip v0 since it will be
@ -38,11 +39,13 @@ def v1(run_with_both_engines):
pytest.param("ssmits/Qwen2-7B-Instruct-embed-base",
marks=[pytest.mark.skip_v0, pytest.mark.cpu_model]),
# [Encoder-only]
pytest.param("BAAI/bge-base-en-v1.5",
marks=[
pytest.mark.core_model, pytest.mark.cpu_model,
pytest.mark.skip_v1
]),
pytest.param(
"BAAI/bge-base-en-v1.5",
marks=[
# CPU only supports V1
pytest.mark.core_model,
pytest.mark.skip_v1
]),
pytest.param("sentence-transformers/all-MiniLM-L12-v2",
marks=[pytest.mark.skip_v1]),
pytest.param("intfloat/multilingual-e5-small",
@ -61,10 +64,6 @@ def test_models(
model,
monkeypatch,
) -> None:
if model == "intfloat/e5-mistral-7b-instruct" and current_platform.is_cpu(
) and os.environ.get("VLLM_USE_V1", "0") == "1":
pytest.skip("CPU V1 doesn't support sliding window")
if model == "BAAI/bge-multilingual-gemma2" and current_platform.is_rocm():
# ROCm Triton FA does not currently support sliding window attention
# switch to use ROCm CK FA backend

View File

@ -1,5 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os
import pytest
import torch
import torch.nn.functional as F
@ -84,6 +86,9 @@ def test_prm_models(
dtype: str,
monkeypatch,
) -> None:
if current_platform.is_cpu() and os.environ.get("VLLM_USE_V1", "0") == "0":
pytest.skip("CPU only supports V1")
if current_platform.is_rocm():
# ROCm Triton FA does not currently support sliding window attention
# switch to use ROCm CK FA backend