diff --git a/vllm/v1/executor/ray_utils.py b/vllm/v1/executor/ray_utils.py
index 382f008266..9385e55b06 100644
--- a/vllm/v1/executor/ray_utils.py
+++ b/vllm/v1/executor/ray_utils.py
@@ -322,7 +322,7 @@ def initialize_ray_cluster(
 
     # Prevalidate GPU requirements before Ray processing
     if current_platform.is_cuda() and parallel_config.world_size > 1:
-        from vllm.utils import cuda_device_count_stateless
+        from vllm.utils.torch_utils import cuda_device_count_stateless
 
         available_gpus = cuda_device_count_stateless()
         if parallel_config.world_size > available_gpus: