diff --git a/vllm/v1/executor/ray_utils.py b/vllm/v1/executor/ray_utils.py index 382f008266..9385e55b06 100644 --- a/vllm/v1/executor/ray_utils.py +++ b/vllm/v1/executor/ray_utils.py @@ -322,7 +322,7 @@ def initialize_ray_cluster( # Prevalidate GPU requirements before Ray processing if current_platform.is_cuda() and parallel_config.world_size > 1: - from vllm.utils import cuda_device_count_stateless + from vllm.utils.torch_utils import cuda_device_count_stateless available_gpus = cuda_device_count_stateless() if parallel_config.world_size > available_gpus: