Treat negative zero as equivalent to positive zero in sm90_sparse_gemm_compressor.hpp (#2110)

* Treat negative zero as zero in the sparse gemm compressor Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> * format Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> * Apply patch Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> * sm90_sparse_gemm_compressor.hpp * test/unit/transform/CMakeLists.txt * test/unit/transform/device/sm90_sparse_gemm_compressor_legacy.hpp * include/cutlass/numeric_types.h --------- Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: Haicheng Wu <haichengw@nvidia.com> Co-authored-by: Haicheng Wu <57973641+hwu36@users.noreply.github.com>
2025-03-20 22:44:17 -07:00
parent 3fe62887d8
commit 8c4d1dc47d
5 changed files with 68 additions and 24 deletions
--- a/test/unit/transform/CMakeLists.txt
+++ b/test/unit/transform/CMakeLists.txt
@ -27,6 +27,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 add_subdirectory(threadblock)
+add_subdirectory(device)
 add_subdirectory(kernel)

 add_custom_target(
--- a/test/unit/transform/device/sm90_sparse_gemm_compressor_legacy.hpp
+++ b/test/unit/transform/device/sm90_sparse_gemm_compressor_legacy.hpp
@ -155,7 +155,9 @@ namespace detail {
          int offset = chunk_idx * LogicalElemsAPerChunk + subchunk_idx * ElemsARawPerElementAMmaRaw + elem_idx;
          subchunk_elems[elem_idx] = offset < effective_elems ? tensorA(offset) : ElementA(0);
          
-          if (subchunk_elems[elem_idx] != ElementA(0)) {
+          ElementA zero = static_cast<ElementA>(0);
+          ElementA minus_zero = static_cast<ElementA>(ElementA(1) << cutlass::sizeof_bits_v<ElementA> - 1);
+          if (subchunk_elems[elem_idx] != zero && subchunk_elems[elem_idx] != minus_zero) {
            if (non_zero_cnt >= PhysicalSubChunk) {
              #ifdef  __CUDA_ARCH__
                asm volatile ("brkpt;\n" ::);