Treat negative zero as equivalent to positive zero in sm90_sparse_gemm_compressor.hpp (#2110)
* Treat negative zero as zero in the sparse gemm compressor Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> * format Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> * Apply patch Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> * sm90_sparse_gemm_compressor.hpp * test/unit/transform/CMakeLists.txt * test/unit/transform/device/sm90_sparse_gemm_compressor_legacy.hpp * include/cutlass/numeric_types.h --------- Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: Haicheng Wu <haichengw@nvidia.com> Co-authored-by: Haicheng Wu <57973641+hwu36@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
3fe62887d8
commit
8c4d1dc47d
@ -27,6 +27,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
add_subdirectory(threadblock)
|
||||
add_subdirectory(device)
|
||||
add_subdirectory(kernel)
|
||||
|
||||
add_custom_target(
|
||||
|
||||
@ -155,7 +155,9 @@ namespace detail {
|
||||
int offset = chunk_idx * LogicalElemsAPerChunk + subchunk_idx * ElemsARawPerElementAMmaRaw + elem_idx;
|
||||
subchunk_elems[elem_idx] = offset < effective_elems ? tensorA(offset) : ElementA(0);
|
||||
|
||||
if (subchunk_elems[elem_idx] != ElementA(0)) {
|
||||
ElementA zero = static_cast<ElementA>(0);
|
||||
ElementA minus_zero = static_cast<ElementA>(ElementA(1) << cutlass::sizeof_bits_v<ElementA> - 1);
|
||||
if (subchunk_elems[elem_idx] != zero && subchunk_elems[elem_idx] != minus_zero) {
|
||||
if (non_zero_cnt >= PhysicalSubChunk) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
asm volatile ("brkpt;\n" ::);
|
||||
|
||||
Reference in New Issue
Block a user