Minor

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2024-12-21 17:28:21 -08:00
parent 8a4180c8b6
commit 03b1e6fdbd
1 changed files with 10 additions and 1 deletions
--- a/csrc/prepare_inputs/copy_subranges.cu
+++ b/csrc/prepare_inputs/copy_subranges.cu
@ -55,7 +55,16 @@ void copy_subranges(torch::Tensor& matrix_src, torch::Tensor& matrix_diff,

  // One thread block per row.
  int blocks = n;
-  int threads = 1024;
+  int threads;
+  if (blocks < 128) {
+    threads = 1024;
+  } else if (blocks < 256) {
+    threads = 512;
+  } else if (blocks < 512) {
+    threads = 256;
+  } else {
+    threads = 128;
+  }
  const at::cuda::OptionalCUDAGuard device_guard(device_of(matrix_tgt));
  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
  vllm::copy_subranges_kernel<<<blocks, threads, 0, stream>>>(