CUTLASS 3.2.1 (#1113)

* Updates for 3.2.1 release.

* Minor fix in gemm op profiler for raster order.

* Add scheduler mapping for raster order in the kernels.
This commit is contained in:
ANIKET SHIVAM
2023-09-26 14:24:26 -07:00
committed by GitHub
parent e0aaa3c3b3
commit 90d3b0fb18
428 changed files with 22253 additions and 21762 deletions

View File

@ -42,8 +42,6 @@
#include "conv2d_testbed_interleaved.h"
#if defined(CUTLASS_ARCH_MMA_SM75_SUPPORTED)
TEST(SM75_Device_Conv2d_Fprop_Analytic_ImplicitGemm_s4ncxhwx_s4cxrskx_s4ncxhwx_tensor_op_s32,
128x128_128x2_64x64x128) {

View File

@ -42,7 +42,6 @@
#include "conv2d_testbed.h"
#if defined(CUTLASS_ARCH_MMA_SM75_SUPPORTED)
TEST(SM75_Device_Conv2d_Fprop_Analytic_ImplicitGemm_s4nhwc_s4nhwc_s32nhwc_tensor_op_s32,
128x128_64x3_64x64x64) {

View File

@ -42,7 +42,6 @@
#include "conv2d_testbed_interleaved.h"
#if defined(CUTLASS_ARCH_MMA_SM75_SUPPORTED)
TEST(SM75_Device_Conv2d_Fprop_Analytic_ImplicitGemm_s8ncxhwx_s8cxrskx_s8ncxhwx_tensor_op_s32,
128x128_64x2_64x64x64) {

View File

@ -42,7 +42,6 @@
#include "conv2d_testbed.h"
#if defined(CUTLASS_ARCH_MMA_SM75_SUPPORTED)
TEST(SM75_Device_Conv2d_Fprop_Analytic_ImplicitGemm_s8nhwc_s8nhwc_s32nhwc_tensor_op_s32,
128x128_64x2_64x64x64) {