[Hardware][CPU] using current_platform.is_cpu (#9536)

2024-10-22 15:50:43 +08:00
parent 0d02747f2e
commit 3ddbe25502
17 changed files with 60 additions and 64 deletions
--- a/tests/worker/test_encoder_decoder_model_runner.py
+++ b/tests/worker/test_encoder_decoder_model_runner.py
@ -5,8 +5,9 @@ import pytest
 import torch

 from vllm.engine.arg_utils import EngineArgs
+from vllm.platforms import current_platform
 from vllm.sequence import SamplingParams, SequenceData, SequenceGroupMetadata
-from vllm.utils import is_cpu, make_tensor_with_pad
+from vllm.utils import make_tensor_with_pad
 from vllm.worker.enc_dec_model_runner import EncoderDecoderModelRunner
 from vllm.worker.model_runner import _get_graph_batch_size

@ -31,7 +32,7 @@ def _create_model_runner(model: str, *args,
    return model_runner


-@pytest.mark.skipif(condition=is_cpu(),
+@pytest.mark.skipif(condition=current_platform.is_cpu(),
                    reason="CPU backend is currently "
                    "unsupported for encoder/ "
                    "decoder models")
@ -74,7 +75,7 @@ def test_empty_seq_group():
    assert return_seq_lens is None


-@pytest.mark.skipif(condition=is_cpu(),
+@pytest.mark.skipif(condition=current_platform.is_cpu(),
                    reason="CPU backend is currently "
                    "unsupported for encoder/ "
                    "decoder models")
@ -264,7 +265,7 @@ def test_prepare_prompt(batch_size):
    assert torch.equal(actual, expected)


-@pytest.mark.skipif(condition=is_cpu(),
+@pytest.mark.skipif(condition=current_platform.is_cpu(),
                    reason="CPU backend is currently "
                    "unsupported for encoder/ "
                    "decoder models")
@ -490,7 +491,7 @@ def test_prepare_decode(batch_size, multiple_seqs_per_seq_group):
 def test_prepare_decode_cuda_graph(batch_size, multiple_seqs_per_seq_group):
    """
    Tests that for encoder-decoder models with CUDA Graph capture and replay
-    enabled, the tensors used during the decode phase are correctly padded 
+    enabled, the tensors used during the decode phase are correctly padded
    for varying input batch sizes.
    """
    model_runner = _create_model_runner(