Fix INT8 quantization error on Blackwell GPUs (SM100+) (#25935)

Signed-off-by: padg9912 <phone.and.desktop@gmail.com>
2025-09-30 22:19:53 -04:00
parent 1244948885
commit 99028fda44
2 changed files with 9 additions and 2 deletions
--- a/csrc/quantization/cutlass_w8a8/c3x/scaled_mm_helper.hpp
+++ b/csrc/quantization/cutlass_w8a8/c3x/scaled_mm_helper.hpp
@ -25,7 +25,10 @@ void dispatch_scaled_mm(torch::Tensor& c, torch::Tensor const& a,
      if constexpr (!std::is_same_v<Int8Func, std::nullptr_t>) {
        int8_func(c, a, b, a_scales, b_scales, bias);
      } else {
-        TORCH_CHECK(false, "Int8 not supported for this architecture");
+        int32_t version_num = get_sm_version_num();
+        TORCH_CHECK(
+            false, "Int8 not supported on SM", version_num,
+            ". Use FP8 quantization instead, or run on older arch (SM < 100).");
      }
    }
  } else {