Treat negative zero as equivalent to positive zero in sm90_sparse_gemm_compressor.hpp (#2110)

* Treat negative zero as zero in the sparse gemm compressor

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>

* format

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>

* Apply patch

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>

* sm90_sparse_gemm_compressor.hpp

* test/unit/transform/CMakeLists.txt

* test/unit/transform/device/sm90_sparse_gemm_compressor_legacy.hpp

* include/cutlass/numeric_types.h

---------

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Haicheng Wu <haichengw@nvidia.com>
Co-authored-by: Haicheng Wu <57973641+hwu36@users.noreply.github.com>
This commit is contained in:
Tyler Michael Smith
2025-03-20 22:44:17 -07:00
committed by GitHub
parent 3fe62887d8
commit 8c4d1dc47d
5 changed files with 68 additions and 24 deletions

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add_subdirectory(threadblock)
add_subdirectory(device)
add_subdirectory(kernel)
add_custom_target(

View File

@ -155,7 +155,9 @@ namespace detail {
int offset = chunk_idx * LogicalElemsAPerChunk + subchunk_idx * ElemsARawPerElementAMmaRaw + elem_idx;
subchunk_elems[elem_idx] = offset < effective_elems ? tensorA(offset) : ElementA(0);
if (subchunk_elems[elem_idx] != ElementA(0)) {
ElementA zero = static_cast<ElementA>(0);
ElementA minus_zero = static_cast<ElementA>(ElementA(1) << cutlass::sizeof_bits_v<ElementA> - 1);
if (subchunk_elems[elem_idx] != zero && subchunk_elems[elem_idx] != minus_zero) {
if (non_zero_cnt >= PhysicalSubChunk) {
#ifdef __CUDA_ARCH__
asm volatile ("brkpt;\n" ::);