[KV offload] Enable CPU KV offload on CUDA alike Platforms (#27770)
Signed-off-by: zhewenli <zhewenli@meta.com>
This commit is contained in:
@ -12,7 +12,6 @@ from tqdm import tqdm
|
||||
from vllm import LLM, SamplingParams, TokensPrompt
|
||||
from vllm.config import KVEventsConfig, KVTransferConfig
|
||||
from vllm.distributed.kv_events import BlockStored, KVEventBatch
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
CPU_BLOCK_SIZES = [16, 48]
|
||||
|
||||
@ -64,9 +63,6 @@ class MockSubscriber:
|
||||
self.sub.close()
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not current_platform.is_cuda(), reason="CPU offloading only supported on CUDA"
|
||||
)
|
||||
@pytest.mark.parametrize("cpu_block_size", CPU_BLOCK_SIZES)
|
||||
def test_cpu_offloading(cpu_block_size: int) -> None:
|
||||
"""
|
||||
|
||||
@ -51,9 +51,9 @@ class CPUOffloadingSpec(OffloadingSpec):
|
||||
self, kv_caches: dict[str, torch.Tensor]
|
||||
) -> Iterator[tuple[type[LoadStoreSpec], type[LoadStoreSpec], OffloadingHandler]]:
|
||||
if not self._handler:
|
||||
if not current_platform.is_cuda():
|
||||
if not current_platform.is_cuda_alike():
|
||||
raise Exception(
|
||||
"CPU Offloading is currently only supported on CUDA GPUs"
|
||||
"CPU Offloading is currently only supported on CUDA-alike GPUs"
|
||||
)
|
||||
|
||||
layer_names = list(kv_caches.keys())
|
||||
|
||||
Reference in New Issue
Block a user