Release v4.0.0 (#2294)

This commit is contained in:
Kihiro Bando
2025-05-13 15:55:29 -04:00
committed by GitHub
parent ad7b2f5e84
commit f115c3f854
299 changed files with 51495 additions and 4413 deletions

View File

@ -658,7 +658,6 @@ cutlass_test_unit_gemm_device_add_executable(
# Syrk SM80 complex f64 tests
syrk_cf64n_cf64t_tensor_op_f64_sm80.cu
syrk_cf64n_cf64n_tensor_op_f64_sm80.cu
syrk_cf64n_cf64t_tensor_op_f64_gaussian_sm80.cu
# Syrk SM80 complex f32 tests
syrk_cf32n_cf32t_tensor_op_f32_sm80.cu
@ -703,7 +702,6 @@ cutlass_test_unit_gemm_device_add_executable(
# Trmm SM80 complex f64 tests
trmm_cf64n_cf64n_cf64t_tensor_op_f64_sm80.cu
trmm_cf64n_cf64n_cf64t_tensor_op_f64_gaussian_sm80.cu
# Trmm SM80 complex f32 tests
trmm_cf32n_cf32n_cf32t_tensor_op_f32_sm80.cu
@ -776,7 +774,6 @@ cutlass_test_unit_gemm_device_add_executable(
# Symm SM80 complex f64 tests
symm_cf64n_cf64n_cf64n_tensor_op_ls_f64_sm80.cu
symm_cf64n_cf64n_cf64n_tensor_op_rs_f64_sm80.cu
symm_cf64n_cf64n_cf64n_tensor_op_ls_f64_gaussian_sm80.cu
# Symm SM80 complex f32 tests
symm_cf32n_cf32n_tensor_op_f32_ls_sm80.cu
@ -793,7 +790,6 @@ cutlass_test_unit_gemm_device_add_executable(
# Hemm SM80 complex f64 tests
hemm_cf64h_cf64n_cf64n_tensor_op_ls_f64_sm80.cu
hemm_cf64h_cf64n_cf64n_tensor_op_rs_f64_sm80.cu
hemm_cf64h_cf64n_cf64n_tensor_op_ls_f64_gaussian_sm80.cu
# Hemm SM80 complex f32 tests
hemm_cf32h_cf32n_tensor_op_f32_ls_sm80.cu
@ -805,6 +801,20 @@ cutlass_test_unit_gemm_device_add_executable(
hemm_cf64_cf64_cf64_tensor_op_f64_sm90.cu
)
if (NOT CUTLASS_NVCC_ARCHS MATCHES 101|101a|101f|103|103a|103f)
cutlass_test_unit_gemm_device_add_executable(
cutlass_test_unit_gemm_device_blas3_gaussian
BATCH_SOURCES ON
BATCH_SIZE 4
syrk_cf64n_cf64t_tensor_op_f64_gaussian_sm80.cu
trmm_cf64n_cf64n_cf64t_tensor_op_f64_gaussian_sm80.cu
symm_cf64n_cf64n_cf64n_tensor_op_ls_f64_gaussian_sm80.cu
hemm_cf64h_cf64n_cf64n_tensor_op_ls_f64_gaussian_sm80.cu
)
endif()
cutlass_test_unit_gemm_device_add_executable(
cutlass_test_unit_gemm_device_grouped_blas3
@ -930,6 +940,13 @@ cutlass_test_unit_gemm_device_add_executable(
# 8 unit tests
sm100_gemm_f6_f6_f32_tensor_op_f32_ptr_array.cu
)
cutlass_test_unit_gemm_device_add_executable(
cutlass_test_unit_blockwise_gemm_sm100
sm100_gemm_f8_f8_f8_tensor_op_f32_blockwise.cu
)
endif()

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x512_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x512_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,7 +512,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -588,7 +588,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.2
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x512_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x512_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -664,8 +664,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -675,8 +675,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -686,8 +686,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -697,8 +697,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x512_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x512_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x512_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x512_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -708,8 +708,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -719,8 +719,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -730,8 +730,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -741,8 +741,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x512_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x512_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x512_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x512_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -753,7 +753,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -829,7 +829,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -905,7 +905,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -981,7 +981,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x512_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x512_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1057,7 +1057,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1133,7 +1133,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1209,7 +1209,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1285,7 +1285,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.2
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x512_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x512_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1361,8 +1361,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1372,8 +1372,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1383,8 +1383,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1394,8 +1394,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x512_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x512_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x512_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x512_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1405,8 +1405,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1416,8 +1416,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1427,8 +1427,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1438,8 +1438,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x512_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x512_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x512_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x512_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x512_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x512_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,7 +512,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -588,7 +588,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.2
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x512_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x512_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -664,8 +664,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -675,8 +675,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -686,8 +686,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -697,8 +697,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x512_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x512_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x512_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x512_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -708,8 +708,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -719,8 +719,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -730,8 +730,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -741,8 +741,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x512_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x512_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x512_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x512_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -753,7 +753,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -829,7 +829,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -905,7 +905,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -981,7 +981,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x512_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x512_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1057,7 +1057,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1133,7 +1133,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1209,7 +1209,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1285,7 +1285,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.2
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x512_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x512_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1361,8 +1361,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1372,8 +1372,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1383,8 +1383,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1394,8 +1394,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x512_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x512_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x512_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x512_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1405,8 +1405,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1416,8 +1416,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1427,8 +1427,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1438,8 +1438,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x512_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x512_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x512_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x512_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -578,7 +578,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -654,7 +654,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -730,7 +730,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -806,7 +806,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -882,7 +882,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -958,7 +958,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1034,8 +1034,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1045,8 +1045,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1056,8 +1056,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1067,8 +1067,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1078,8 +1078,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1089,8 +1089,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -578,7 +578,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -654,7 +654,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -730,7 +730,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -806,7 +806,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -882,7 +882,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -958,7 +958,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1034,8 +1034,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1045,8 +1045,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1056,8 +1056,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1067,8 +1067,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1078,8 +1078,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1089,8 +1089,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x512_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x512_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,7 +512,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -588,7 +588,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x512_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x512_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -664,8 +664,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -675,8 +675,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -686,8 +686,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -697,8 +697,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x512_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x512_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x512_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x512_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -708,8 +708,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -719,8 +719,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -730,8 +730,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -741,8 +741,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x512_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x512_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x512_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x512_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -753,7 +753,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -829,7 +829,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -905,7 +905,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -981,7 +981,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x512_0_vs64_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x512_0_vs64_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1057,7 +1057,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1133,7 +1133,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1209,7 +1209,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1285,7 +1285,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x512_0_vs64_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x512_0_vs64_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1361,8 +1361,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1372,8 +1372,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1383,8 +1383,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1394,8 +1394,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x512_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x512_0_vs64_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x512_0_vs64_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x512_0_vs64_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1405,8 +1405,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1416,8 +1416,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1427,8 +1427,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1438,8 +1438,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x512_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x512_0_vs64_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x512_0_vs64_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x512_0_vs64_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x512_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x512_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,7 +512,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -588,7 +588,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x512_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x512_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -664,8 +664,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -675,8 +675,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -686,8 +686,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -697,8 +697,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x512_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x512_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x512_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x512_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -708,8 +708,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -719,8 +719,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -730,8 +730,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -741,8 +741,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x512_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x512_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x512_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x512_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -753,7 +753,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -829,7 +829,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -905,7 +905,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -981,7 +981,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x512_0_vs64_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x512_0_vs64_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1057,7 +1057,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1133,7 +1133,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1209,7 +1209,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1286,7 +1286,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 6.2
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x512_0_vs64_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x512_0_vs64_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1363,8 +1363,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1374,8 +1374,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1385,8 +1385,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1396,8 +1396,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x512_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x512_0_vs64_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x512_0_vs64_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x512_0_vs64_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1407,8 +1407,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1418,8 +1418,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1429,8 +1429,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1440,8 +1440,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x512_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x512_0_vs64_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x512_0_vs64_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x512_0_vs64_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -578,7 +578,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -654,7 +654,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -730,7 +730,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -806,7 +806,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -882,7 +882,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -958,7 +958,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1034,8 +1034,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1045,8 +1045,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1056,8 +1056,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1067,8 +1067,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1078,8 +1078,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1089,8 +1089,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -578,7 +578,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_
}
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -654,7 +654,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -730,7 +730,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -806,7 +806,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -882,7 +882,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -958,7 +958,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1034,8 +1034,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1045,8 +1045,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1056,8 +1056,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1067,8 +1067,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1078,8 +1078,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1089,8 +1089,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe2m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -578,7 +578,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_
}
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -654,7 +654,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -730,7 +730,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -806,7 +806,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -882,7 +882,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -958,7 +958,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1034,8 +1034,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1045,8 +1045,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1056,8 +1056,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1067,8 +1067,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1078,8 +1078,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1089,8 +1089,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m1_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -578,7 +578,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_
}
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -654,7 +654,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -730,7 +730,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -806,7 +806,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -882,7 +882,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -958,7 +958,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1034,8 +1034,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1045,8 +1045,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1056,8 +1056,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1067,8 +1067,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1078,8 +1078,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1089,8 +1089,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -578,7 +578,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_
}
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -654,7 +654,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -730,7 +730,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -806,7 +806,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -882,7 +882,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -958,7 +958,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1034,8 +1034,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1045,8 +1045,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1056,8 +1056,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1067,8 +1067,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1078,8 +1078,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1089,8 +1089,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe3m2_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -578,7 +578,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_
}
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -654,7 +654,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -730,7 +730,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -806,7 +806,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -882,7 +882,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -958,7 +958,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1034,8 +1034,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1045,8 +1045,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1056,8 +1056,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1067,8 +1067,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1078,8 +1078,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1089,8 +1089,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe2m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -578,7 +578,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -654,7 +654,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -730,7 +730,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -806,7 +806,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -882,7 +882,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -958,7 +958,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1034,8 +1034,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1045,8 +1045,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1056,8 +1056,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1067,8 +1067,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1078,8 +1078,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1089,8 +1089,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -578,7 +578,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -654,7 +654,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -730,7 +730,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -806,7 +806,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -882,7 +882,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -958,7 +958,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1034,8 +1034,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1045,8 +1045,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1056,8 +1056,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1067,8 +1067,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1078,8 +1078,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1089,8 +1089,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -578,7 +578,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -654,7 +654,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -730,7 +730,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -806,7 +806,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -882,7 +882,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -958,7 +958,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1034,8 +1034,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1045,8 +1045,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1056,8 +1056,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1067,8 +1067,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1078,8 +1078,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1089,8 +1089,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m1_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -578,7 +578,7 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_
}
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -654,7 +654,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -730,7 +730,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -806,7 +806,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -882,7 +882,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -958,7 +958,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -1034,8 +1034,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1045,8 +1045,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1056,8 +1056,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1067,8 +1067,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1078,8 +1078,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -1089,8 +1089,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe2m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnn_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnn_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnn_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnn_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnn_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnn_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnn_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnn_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnn_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnn_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnn_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnn_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnn_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnn_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnn_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnn_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnn_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnn_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnn_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnn_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnn_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnn_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnn_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnn_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -60,7 +60,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_nnn_align32_q_1sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_nnn_align32_q_1sm_epiVs64n {
using LayoutA = cutlass::layout::ColumnMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -141,7 +141,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_nnn_align32_q_1sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_nnn_align32_q_1sm_epiVs64n {
using LayoutA = cutlass::layout::ColumnMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -222,7 +222,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_nnn_align32_q_1sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_nnn_align32_q_1sm_epiVs64n {
using LayoutA = cutlass::layout::ColumnMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -303,7 +303,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_nnn_align32_q_2sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_nnn_align32_q_2sm_epiVs64n {
using LayoutA = cutlass::layout::ColumnMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_nnn_align32_q_2sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_nnn_align32_q_2sm_epiVs64n {
using LayoutA = cutlass::layout::ColumnMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -465,7 +465,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_nnn_align32_q_2sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_nnn_align32_q_2sm_epiVs64n {
using LayoutA = cutlass::layout::ColumnMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -546,8 +546,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_nnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_nnn_align32_q_1sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_nnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_nnn_align32_q_1sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -557,8 +557,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_nnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_nnn_align32_q_1sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_nnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_nnn_align32_q_1sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -568,8 +568,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_nnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_nnn_align32_q_1sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_nnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_nnn_align32_q_1sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -579,8 +579,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_nnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_nnn_align32_q_2sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_nnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_nnn_align32_q_2sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -590,8 +590,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_nnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_nnn_align32_q_2sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_nnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_nnn_align32_q_2sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -601,8 +601,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_nnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_nnn_align32_q_2sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_nnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_nnn_align32_q_2sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -60,7 +60,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_nnt_align32_q_1sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_nnt_align32_q_1sm_epiVs64t {
using LayoutA = cutlass::layout::ColumnMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -141,7 +141,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_nnt_align32_q_1sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_nnt_align32_q_1sm_epiVs64t {
using LayoutA = cutlass::layout::ColumnMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -222,7 +222,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_nnt_align32_q_1sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_nnt_align32_q_1sm_epiVs64t {
using LayoutA = cutlass::layout::ColumnMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -303,7 +303,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_nnt_align32_q_2sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_nnt_align32_q_2sm_epiVs64t {
using LayoutA = cutlass::layout::ColumnMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_nnt_align32_q_2sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_nnt_align32_q_2sm_epiVs64t {
using LayoutA = cutlass::layout::ColumnMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -465,7 +465,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_nnt_align32_q_2sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_nnt_align32_q_2sm_epiVs64t {
using LayoutA = cutlass::layout::ColumnMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -546,8 +546,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_nnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_nnt_align32_q_1sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_nnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_nnt_align32_q_1sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -557,8 +557,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_nnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_nnt_align32_q_1sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_nnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_nnt_align32_q_1sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -568,8 +568,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_nnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_nnt_align32_q_1sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_nnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_nnt_align32_q_1sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -579,8 +579,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_nnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_nnt_align32_q_2sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_nnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_nnt_align32_q_2sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -590,8 +590,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_nnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_nnt_align32_q_2sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_nnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_nnt_align32_q_2sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -601,8 +601,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_nnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_nnt_align32_q_2sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_nnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_nnt_align32_q_2sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -60,7 +60,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_tnn_align32_q_1sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_tnn_align32_q_1sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -141,7 +141,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_tnn_align32_q_1sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_tnn_align32_q_1sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -222,7 +222,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_tnn_align32_q_1sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_tnn_align32_q_1sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -303,7 +303,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_tnn_align32_q_2sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_tnn_align32_q_2sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_tnn_align32_q_2sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_tnn_align32_q_2sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -465,7 +465,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_tnn_align32_q_2sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_tnn_align32_q_2sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -546,8 +546,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_tnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_tnn_align32_q_1sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_tnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_tnn_align32_q_1sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -557,8 +557,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_tnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_tnn_align32_q_1sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_tnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_tnn_align32_q_1sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -568,8 +568,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_tnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_tnn_align32_q_1sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_tnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_tnn_align32_q_1sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -579,8 +579,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_tnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_tnn_align32_q_2sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_tnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_tnn_align32_q_2sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -590,8 +590,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_tnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_tnn_align32_q_2sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_tnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_tnn_align32_q_2sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -601,8 +601,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_tnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_tnn_align32_q_2sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_tnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_tnn_align32_q_2sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -60,7 +60,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_tnt_align32_q_1sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_tnt_align32_q_1sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -141,7 +141,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_tnt_align32_q_1sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_tnt_align32_q_1sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -222,7 +222,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_tnt_align32_q_1sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_tnt_align32_q_1sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -303,7 +303,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_tnt_align32_q_2sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_tnt_align32_q_2sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_tnt_align32_q_2sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_tnt_align32_q_2sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -465,7 +465,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_tnt_align32_q_2sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_tnt_align32_q_2sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -546,8 +546,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_tnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_tnt_align32_q_1sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_tnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_tnt_align32_q_1sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -557,8 +557,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_tnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_tnt_align32_q_1sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_tnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_tnt_align32_q_1sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -568,8 +568,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_tnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_tnt_align32_q_1sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_tnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_tnt_align32_q_1sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -579,8 +579,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_tnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_tnt_align32_q_2sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_tnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_tnt_align32_q_2sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -590,8 +590,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_tnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_tnt_align32_q_2sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_tnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_tnt_align32_q_2sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -601,8 +601,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_tnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_tnt_align32_q_2sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_tnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_tnt_align32_q_2sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
// 6. 256x256_tnt_vs64in
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -125,7 +125,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -194,7 +194,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -263,7 +263,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -332,7 +332,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -401,7 +401,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -470,8 +470,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -481,8 +481,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -492,8 +492,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -503,8 +503,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -514,8 +514,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -525,8 +525,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_e4m3_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -60,7 +60,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_ttn_align32_q_1sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_ttn_align32_q_1sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::RowMajor;
@ -141,7 +141,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_ttn_align32_q_1sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_ttn_align32_q_1sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::RowMajor;
@ -222,7 +222,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_ttn_align32_q_1sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_ttn_align32_q_1sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::RowMajor;
@ -303,7 +303,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_ttn_align32_q_2sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_ttn_align32_q_2sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::RowMajor;
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_ttn_align32_q_2sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_ttn_align32_q_2sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::RowMajor;
@ -465,7 +465,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_ttn_align32_q_2sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_ttn_align32_q_2sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::RowMajor;
@ -546,8 +546,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_ttn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_ttn_align32_q_1sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_ttn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_ttn_align32_q_1sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -557,8 +557,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_ttn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_ttn_align32_q_1sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_ttn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_ttn_align32_q_1sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -568,8 +568,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_ttn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_ttn_align32_q_1sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_ttn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_ttn_align32_q_1sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -579,8 +579,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_ttn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_ttn_align32_q_2sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_ttn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_ttn_align32_q_2sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -590,8 +590,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_ttn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_ttn_align32_q_2sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_ttn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_ttn_align32_q_2sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -601,8 +601,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_ttn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_ttn_align32_q_2sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_ttn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_ttn_align32_q_2sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -60,7 +60,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_ttt_align32_q_1sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_ttt_align32_q_1sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::RowMajor;
@ -141,7 +141,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_ttt_align32_q_1sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_ttt_align32_q_1sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::RowMajor;
@ -222,7 +222,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_ttt_align32_q_1sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_ttt_align32_q_1sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::RowMajor;
@ -303,7 +303,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_ttt_align32_q_2sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_ttt_align32_q_2sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::RowMajor;
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_ttt_align32_q_2sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_ttt_align32_q_2sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::RowMajor;
@ -465,7 +465,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_ttt_align32_q_2sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_ttt_align32_q_2sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::RowMajor;
@ -546,8 +546,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_ttt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_ttt_align32_q_1sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_ttt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x128x256_0_vs64_ttt_align32_q_1sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -557,8 +557,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_ttt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_ttt_align32_q_1sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_ttt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x192x256_0_vs64_ttt_align32_q_1sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -568,8 +568,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_ttt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_ttt_align32_q_1sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_ttt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_128x256x256_0_vs64_ttt_align32_q_1sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -579,8 +579,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_ttt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_ttt_align32_q_2sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_ttt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x128x256_0_vs64_ttt_align32_q_2sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -590,8 +590,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_ttt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_ttt_align32_q_2sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_ttt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x192x256_0_vs64_ttt_align32_q_2sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -601,8 +601,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_ttt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_ttt_align32_q_2sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_ttt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f16_ue8m0xe4m3_256x256x256_0_vs64_ttt_align32_q_2sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x128x256_0_vs64_tnn_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x128x256_0_vs64_tnn_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x192x256_0_vs64_tnn_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x192x256_0_vs64_tnn_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x256x256_0_vs64_tnn_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x256x256_0_vs64_tnn_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x128x256_0_vs64_tnn_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x128x256_0_vs64_tnn_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x192x256_0_vs64_tnn_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x192x256_0_vs64_tnn_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x256x256_0_vs64_tnn_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x256x256_0_vs64_tnn_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x128x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x128x256_0_vs64_tnn_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x128x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x128x256_0_vs64_tnn_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x192x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x192x256_0_vs64_tnn_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x192x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x192x256_0_vs64_tnn_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x256x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x256x256_0_vs64_tnn_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x256x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x256x256_0_vs64_tnn_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x128x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x128x256_0_vs64_tnn_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x128x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x128x256_0_vs64_tnn_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x192x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x192x256_0_vs64_tnn_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x192x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x192x256_0_vs64_tnn_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x256x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x256x256_0_vs64_tnn_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x256x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x256x256_0_vs64_tnn_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_f32_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x128x256_0_vs64_tnn_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x128x256_0_vs64_tnn_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x192x256_0_vs64_tnn_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x192x256_0_vs64_tnn_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x256x256_0_vs64_tnn_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x256x256_0_vs64_tnn_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x128x256_0_vs64_tnn_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x128x256_0_vs64_tnn_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x192x256_0_vs64_tnn_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x192x256_0_vs64_tnn_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x256x256_0_vs64_tnn_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x256x256_0_vs64_tnn_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x128x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x128x256_0_vs64_tnn_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x128x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x128x256_0_vs64_tnn_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x192x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x192x256_0_vs64_tnn_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x192x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x192x256_0_vs64_tnn_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x256x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x256x256_0_vs64_tnn_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x256x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x256x256_0_vs64_tnn_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x128x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x128x256_0_vs64_tnn_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x128x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x128x256_0_vs64_tnn_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x192x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x192x256_0_vs64_tnn_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x192x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x192x256_0_vs64_tnn_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x256x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x256x256_0_vs64_tnn_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x256x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x256x256_0_vs64_tnn_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f16_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnn_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnn_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnn_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnn_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnn_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnn_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnn_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnn_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnn_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnn_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnn_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnn_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnn_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnn_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnn_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnn_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnn_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnn_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnn_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnn_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnn_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnn_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnn_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnn_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnn_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnn_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x128x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x192x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_128x256x256_0_vs64_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x128x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x192x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_f32_256x256x256_0_vs64_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -60,7 +60,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x128x256_0_vs64_tnn_align32_q_1sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x128x256_0_vs64_tnn_align32_q_1sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -141,7 +141,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x192x256_0_vs64_tnn_align32_q_1sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x192x256_0_vs64_tnn_align32_q_1sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -222,7 +222,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x256x256_0_vs64_tnn_align32_q_1sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x256x256_0_vs64_tnn_align32_q_1sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -303,7 +303,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x128x256_0_vs64_tnn_align32_q_2sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x128x256_0_vs64_tnn_align32_q_2sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x192x256_0_vs64_tnn_align32_q_2sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x192x256_0_vs64_tnn_align32_q_2sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -465,7 +465,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x256x256_0_vs64_tnn_align32_q_2sm_epiVs64n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x256x256_0_vs64_tnn_align32_q_2sm_epiVs64n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -546,8 +546,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x128x256_0_vs64_tnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x128x256_0_vs64_tnn_align32_q_1sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x128x256_0_vs64_tnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x128x256_0_vs64_tnn_align32_q_1sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -557,8 +557,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x192x256_0_vs64_tnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x192x256_0_vs64_tnn_align32_q_1sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x192x256_0_vs64_tnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x192x256_0_vs64_tnn_align32_q_1sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -568,8 +568,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x256x256_0_vs64_tnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x256x256_0_vs64_tnn_align32_q_1sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x256x256_0_vs64_tnn_align32_q_1sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x256x256_0_vs64_tnn_align32_q_1sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -579,8 +579,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x128x256_0_vs64_tnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x128x256_0_vs64_tnn_align32_q_2sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x128x256_0_vs64_tnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x128x256_0_vs64_tnn_align32_q_2sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -590,8 +590,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x192x256_0_vs64_tnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x192x256_0_vs64_tnn_align32_q_2sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x192x256_0_vs64_tnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x192x256_0_vs64_tnn_align32_q_2sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -601,8 +601,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x256x256_0_vs64_tnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x256x256_0_vs64_tnn_align32_q_2sm_epiVs64n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x256x256_0_vs64_tnn_align32_q_2sm_epiVs64n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x256x256_0_vs64_tnn_align32_q_2sm_epiVs64n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -60,7 +60,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x128x256_0_vs64_tnt_align32_q_1sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x128x256_0_vs64_tnt_align32_q_1sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -141,7 +141,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x192x256_0_vs64_tnt_align32_q_1sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x192x256_0_vs64_tnt_align32_q_1sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -222,7 +222,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x256x256_0_vs64_tnt_align32_q_1sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x256x256_0_vs64_tnt_align32_q_1sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -303,7 +303,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x128x256_0_vs64_tnt_align32_q_2sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x128x256_0_vs64_tnt_align32_q_2sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x192x256_0_vs64_tnt_align32_q_2sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x192x256_0_vs64_tnt_align32_q_2sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -465,7 +465,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x256x256_0_vs64_tnt_align32_q_2sm_epiVs64t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x256x256_0_vs64_tnt_align32_q_2sm_epiVs64t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -546,8 +546,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x128x256_0_vs64_tnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x128x256_0_vs64_tnt_align32_q_1sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x128x256_0_vs64_tnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x128x256_0_vs64_tnt_align32_q_1sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -557,8 +557,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x192x256_0_vs64_tnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x192x256_0_vs64_tnt_align32_q_1sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x192x256_0_vs64_tnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x192x256_0_vs64_tnt_align32_q_1sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -568,8 +568,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x256x256_0_vs64_tnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x256x256_0_vs64_tnt_align32_q_1sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x256x256_0_vs64_tnt_align32_q_1sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_128x256x256_0_vs64_tnt_align32_q_1sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -579,8 +579,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x128x256_0_vs64_tnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x128x256_0_vs64_tnt_align32_q_2sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x128x256_0_vs64_tnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x128x256_0_vs64_tnt_align32_q_2sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -590,8 +590,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x192x256_0_vs64_tnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x192x256_0_vs64_tnt_align32_q_2sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x192x256_0_vs64_tnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x192x256_0_vs64_tnt_align32_q_2sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -601,8 +601,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x192x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x256x256_0_vs64_tnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x64bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x256x256_0_vs64_tnt_align32_q_2sm_epiVs64t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x256x256_0_vs64_tnt_align32_q_2sm_epiVs64t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue8m0xe4m3_ue8m0xe4m3_f32_void_ue8m0xe4m3_256x256x256_0_vs64_tnt_align32_q_2sm_epiVs64t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x128x256_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x128x256_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x192x256_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x192x256_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x256_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x256_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x512_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x512_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x128x256_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x128x256_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x192x256_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x192x256_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,7 +512,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x256_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x256_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -588,7 +588,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.2
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x512_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x512_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -664,8 +664,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x128x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x128x256_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x128x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x128x256_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -675,8 +675,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x192x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x192x256_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x192x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x192x256_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -686,8 +686,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x256_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x256_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -697,8 +697,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x512_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x512_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x512_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x512_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -708,8 +708,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x128x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x128x256_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x128x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x128x256_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -719,8 +719,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x192x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x192x256_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x192x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x192x256_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -730,8 +730,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x256_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x256_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -741,8 +741,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x512_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x512_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x512_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x512_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x128x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x128x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x192x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x192x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x512_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x512_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x128x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x128x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x192x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x192x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,7 +512,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -588,7 +588,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.2
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x512_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x512_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -664,8 +664,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x128x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x128x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x128x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x128x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -675,8 +675,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x192x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x192x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x192x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x192x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -686,8 +686,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -697,8 +697,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x512_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x512_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x512_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_128x256x512_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -708,8 +708,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x128x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x128x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x128x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x128x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -719,8 +719,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x192x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x192x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x192x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x192x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -730,8 +730,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -741,8 +741,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x512_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x512_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x512_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_f16_256x256x512_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -60,7 +60,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x128x256_0_vs32_tnn_align64_o_1sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x128x256_0_vs32_tnn_align64_o_1sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -141,7 +141,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x192x256_0_vs32_tnn_align64_o_1sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x192x256_0_vs32_tnn_align64_o_1sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -222,7 +222,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x256_0_vs32_tnn_align64_o_1sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x256_0_vs32_tnn_align64_o_1sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -303,7 +303,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x512_0_vs32_tnn_align64_o_1sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x512_0_vs32_tnn_align64_o_1sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x128x256_0_vs32_tnn_align64_o_2sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x128x256_0_vs32_tnn_align64_o_2sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -465,7 +465,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x192x256_0_vs32_tnn_align64_o_2sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x192x256_0_vs32_tnn_align64_o_2sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -546,7 +546,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x256_0_vs32_tnn_align64_o_2sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x256_0_vs32_tnn_align64_o_2sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -627,7 +627,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x512_0_vs32_tnn_align64_o_2sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x512_0_vs32_tnn_align64_o_2sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -708,8 +708,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x128x256_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x128x256_0_vs32_tnn_align64_o_1sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x128x256_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x128x256_0_vs32_tnn_align64_o_1sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -719,8 +719,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x192x256_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x192x256_0_vs32_tnn_align64_o_1sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x192x256_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x192x256_0_vs32_tnn_align64_o_1sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -730,8 +730,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x256_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x256_0_vs32_tnn_align64_o_1sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x256_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x256_0_vs32_tnn_align64_o_1sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -741,8 +741,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x512_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x512_0_vs32_tnn_align64_o_1sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x512_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x512_0_vs32_tnn_align64_o_1sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -752,8 +752,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x128x256_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x128x256_0_vs32_tnn_align64_o_2sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x128x256_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x128x256_0_vs32_tnn_align64_o_2sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -763,8 +763,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x192x256_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x192x256_0_vs32_tnn_align64_o_2sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x192x256_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x192x256_0_vs32_tnn_align64_o_2sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -774,8 +774,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x256_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x256_0_vs32_tnn_align64_o_2sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x256_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x256_0_vs32_tnn_align64_o_2sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -785,8 +785,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x512_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x512_0_vs32_tnn_align64_o_2sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x512_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x512_0_vs32_tnn_align64_o_2sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -60,7 +60,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x128x256_0_vs32_tnt_align64_o_1sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x128x256_0_vs32_tnt_align64_o_1sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -141,7 +141,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x192x256_0_vs32_tnt_align64_o_1sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x192x256_0_vs32_tnt_align64_o_1sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -222,7 +222,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x256_0_vs32_tnt_align64_o_1sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x256_0_vs32_tnt_align64_o_1sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -303,7 +303,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x512_0_vs32_tnt_align64_o_1sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x512_0_vs32_tnt_align64_o_1sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x128x256_0_vs32_tnt_align64_o_2sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x128x256_0_vs32_tnt_align64_o_2sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -465,7 +465,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x192x256_0_vs32_tnt_align64_o_2sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x192x256_0_vs32_tnt_align64_o_2sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -546,7 +546,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x256_0_vs32_tnt_align64_o_2sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x256_0_vs32_tnt_align64_o_2sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -627,7 +627,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x512_0_vs32_tnt_align64_o_2sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x512_0_vs32_tnt_align64_o_2sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -708,8 +708,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x128x256_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x128x256_0_vs32_tnt_align64_o_1sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x128x256_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x128x256_0_vs32_tnt_align64_o_1sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -719,8 +719,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x192x256_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x192x256_0_vs32_tnt_align64_o_1sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x192x256_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x192x256_0_vs32_tnt_align64_o_1sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -730,8 +730,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x256_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x256_0_vs32_tnt_align64_o_1sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x256_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x256_0_vs32_tnt_align64_o_1sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -741,8 +741,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x512_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x512_0_vs32_tnt_align64_o_1sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x512_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_128x256x512_0_vs32_tnt_align64_o_1sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -752,8 +752,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x128x256_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x128x256_0_vs32_tnt_align64_o_2sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x128x256_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x128x256_0_vs32_tnt_align64_o_2sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -763,8 +763,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x192x256_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x192x256_0_vs32_tnt_align64_o_2sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x192x256_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x192x256_0_vs32_tnt_align64_o_2sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -774,8 +774,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x256_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x256_0_vs32_tnt_align64_o_2sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x256_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x256_0_vs32_tnt_align64_o_2sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -785,8 +785,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x512_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x512_0_vs32_tnt_align64_o_2sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x512_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_ue4m3xe2m1_256x256x512_0_vs32_tnt_align64_o_2sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x128x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x128x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x192x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x192x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x256x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x256x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x128x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x128x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x192x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x192x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x256x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x256x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,8 +512,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x128x256_0_vs32_tnt_align64_o_1sm, streamk) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x128x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x128x256_0_vs32_tnt_align64_o_1sm, streamk) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x128x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -523,8 +523,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x192x256_0_vs32_tnt_align64_o_1sm, streamk) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x192x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x192x256_0_vs32_tnt_align64_o_1sm, streamk) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x192x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -534,8 +534,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x256x256_0_vs32_tnt_align64_o_1sm, streamk) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x256x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x256x256_0_vs32_tnt_align64_o_1sm, streamk) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_128x256x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -545,8 +545,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x128x256_0_vs32_tnt_align64_o_2sm, streamk) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x128x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x128x256_0_vs32_tnt_align64_o_2sm, streamk) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x128x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -556,8 +556,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x192x256_0_vs32_tnt_align64_o_2sm, streamk) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x192x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x192x256_0_vs32_tnt_align64_o_2sm, streamk) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x192x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -567,8 +567,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x256x256_0_vs32_tnt_align64_o_2sm, streamk) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x256x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x256x256_0_vs32_tnt_align64_o_2sm, streamk) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f16_e2m1_256x256x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x128x256_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x128x256_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x192x256_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x192x256_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x256_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x256_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x512_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x512_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x128x256_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x128x256_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x192x256_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x192x256_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,7 +512,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x256_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x256_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -588,7 +588,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x512_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x512_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -664,8 +664,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x128x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x128x256_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x128x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x128x256_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -675,8 +675,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x192x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x192x256_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x192x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x192x256_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -686,8 +686,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x256_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x256_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -697,8 +697,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x512_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x512_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x512_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x512_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -708,8 +708,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x128x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x128x256_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x128x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x128x256_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -719,8 +719,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x192x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x192x256_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x192x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x192x256_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -730,8 +730,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x256_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x256_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -741,8 +741,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x512_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x512_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x512_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x512_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x128x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x128x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x192x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x192x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x512_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x512_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x128x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x128x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x192x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x192x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,7 +512,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -589,7 +589,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.2
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x512_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x512_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -666,8 +666,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x128x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x128x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x128x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x128x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -677,8 +677,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x192x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x192x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x192x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x192x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -688,8 +688,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -699,8 +699,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x512_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x512_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x512_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_128x256x512_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -710,8 +710,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x128x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x128x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x128x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x128x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -721,8 +721,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x192x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x192x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x192x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x192x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -732,8 +732,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -743,8 +743,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x512_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x512_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x512_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_f32_f32_256x256x512_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x128x256_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x128x256_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x192x256_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x192x256_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x256_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x256_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x512_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x512_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x128x256_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x128x256_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x192x256_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x192x256_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,7 +512,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x256_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x256_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -588,7 +588,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.2
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x512_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x512_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -664,8 +664,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x128x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x128x256_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x128x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x128x256_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -675,8 +675,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x192x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x192x256_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x192x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x192x256_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -686,8 +686,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x256_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x256_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -697,8 +697,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x512_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x512_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x512_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x512_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -708,8 +708,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x128x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x128x256_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x128x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x128x256_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -719,8 +719,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x192x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x192x256_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x192x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x192x256_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -730,8 +730,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x256_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x256_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -741,8 +741,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x512_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x512_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x512_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x512_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x128x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x128x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x192x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x192x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x512_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x512_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x128x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x128x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x192x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x192x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,7 +512,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -588,7 +588,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.2
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x512_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x512_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -664,8 +664,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x128x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x128x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x128x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x128x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -675,8 +675,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x192x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x192x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x192x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x192x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -686,8 +686,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -697,8 +697,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x512_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x512_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x512_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_128x256x512_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -708,8 +708,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x128x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x128x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x128x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x128x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -719,8 +719,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x192x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x192x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x192x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x192x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -730,8 +730,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -741,8 +741,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x512_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x512_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x512_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f16_256x256x512_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x128x256_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x128x256_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x192x256_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x192x256_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x256_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x256_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x512_0_vs32_tnn_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x512_0_vs32_tnn_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x128x256_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x128x256_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x192x256_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x192x256_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,7 +512,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x256_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x256_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -588,7 +588,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x512_0_vs32_tnn_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x512_0_vs32_tnn_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -664,8 +664,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x128x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x128x256_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x128x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x128x256_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -675,8 +675,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x192x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x192x256_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x192x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x192x256_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -686,8 +686,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x256_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x256_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x256_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -697,8 +697,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x512_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x512_0_vs32_tnn_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x512_0_vs32_tnn_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x512_0_vs32_tnn_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -708,8 +708,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x128x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x128x256_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x128x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x128x256_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -719,8 +719,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x192x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x192x256_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x192x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x192x256_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -730,8 +730,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x256_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x256_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x256_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -741,8 +741,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x512_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x512_0_vs32_tnn_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x512_0_vs32_tnn_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x512_0_vs32_tnn_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -56,7 +56,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x128x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x128x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x192x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x192x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -208,7 +208,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x256_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x256_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -284,7 +284,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x512_0_vs32_tnt_align64_o_1sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x512_0_vs32_tnt_align64_o_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -360,7 +360,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x128x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x128x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -436,7 +436,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x192x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x192x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -512,7 +512,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x256_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x256_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -589,7 +589,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.2
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x512_0_vs32_tnt_align64_o_2sm {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x512_0_vs32_tnt_align64_o_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -666,8 +666,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x128x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x128x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x128x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x128x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -677,8 +677,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x192x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x192x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x192x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x192x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -688,8 +688,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x256_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x256_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x256_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -699,8 +699,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x512_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x512_0_vs32_tnt_align64_o_1sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x512_0_vs32_tnt_align64_o_1sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_128x256x512_0_vs32_tnt_align64_o_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -710,8 +710,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x128x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x128x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x128x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x128x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -721,8 +721,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x192x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x192x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x192x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x192x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -732,8 +732,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x256_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x256_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x256_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -743,8 +743,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x512_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x512_0_vs32_tnt_align64_o_2sm;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x512_0_vs32_tnt_align64_o_2sm, functional) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_f32_256x256x512_0_vs32_tnt_align64_o_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -60,7 +60,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x128x256_0_vs32_tnn_align64_o_1sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x128x256_0_vs32_tnn_align64_o_1sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -141,7 +141,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x192x256_0_vs32_tnn_align64_o_1sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x192x256_0_vs32_tnn_align64_o_1sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -222,7 +222,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x256_0_vs32_tnn_align64_o_1sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x256_0_vs32_tnn_align64_o_1sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -303,7 +303,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x512_0_vs32_tnn_align64_o_1sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x512_0_vs32_tnn_align64_o_1sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x128x256_0_vs32_tnn_align64_o_2sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x128x256_0_vs32_tnn_align64_o_2sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -465,7 +465,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x192x256_0_vs32_tnn_align64_o_2sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x192x256_0_vs32_tnn_align64_o_2sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -546,7 +546,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x256_0_vs32_tnn_align64_o_2sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x256_0_vs32_tnn_align64_o_2sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -627,7 +627,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.2
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x512_0_vs32_tnn_align64_o_2sm_epiVs32n {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x512_0_vs32_tnn_align64_o_2sm_epiVs32n {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -708,8 +708,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x128x256_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x128x256_0_vs32_tnn_align64_o_1sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x128x256_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x128x256_0_vs32_tnn_align64_o_1sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -719,8 +719,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x192x256_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x192x256_0_vs32_tnn_align64_o_1sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x192x256_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x192x256_0_vs32_tnn_align64_o_1sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -730,8 +730,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x256_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x256_0_vs32_tnn_align64_o_1sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x256_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x256_0_vs32_tnn_align64_o_1sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -741,8 +741,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x512_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x512_0_vs32_tnn_align64_o_1sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x512_0_vs32_tnn_align64_o_1sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x512_0_vs32_tnn_align64_o_1sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -752,8 +752,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x128x256_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x128x256_0_vs32_tnn_align64_o_2sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x128x256_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x128x256_0_vs32_tnn_align64_o_2sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -763,8 +763,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x192x256_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x192x256_0_vs32_tnn_align64_o_2sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x192x256_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x192x256_0_vs32_tnn_align64_o_2sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -774,8 +774,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x256_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x256_0_vs32_tnn_align64_o_2sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x256_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x256_0_vs32_tnn_align64_o_2sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -785,8 +785,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x512_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x512_0_vs32_tnn_align64_o_2sm_epiVs32n;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x512_0_vs32_tnn_align64_o_2sm_epiVs32n, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x512_0_vs32_tnn_align64_o_2sm_epiVs32n;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -60,7 +60,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x128x256_0_vs32_tnt_align64_o_1sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x128x256_0_vs32_tnt_align64_o_1sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -141,7 +141,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 2.
namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x192x256_0_vs32_tnt_align64_o_1sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x192x256_0_vs32_tnt_align64_o_1sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -222,7 +222,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x256_0_vs32_tnt_align64_o_1sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x256_0_vs32_tnt_align64_o_1sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -303,7 +303,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 3.2
namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x512_0_vs32_tnt_align64_o_1sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x512_0_vs32_tnt_align64_o_1sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 4.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x128x256_0_vs32_tnt_align64_o_2sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x128x256_0_vs32_tnt_align64_o_2sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -465,7 +465,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 5.
namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x192x256_0_vs32_tnt_align64_o_2sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x192x256_0_vs32_tnt_align64_o_2sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -546,7 +546,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x256_0_vs32_tnt_align64_o_2sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x256_0_vs32_tnt_align64_o_2sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -627,7 +627,7 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 6.2
namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x512_0_vs32_tnt_align64_o_2sm_epiVs32t {
namespace cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x512_0_vs32_tnt_align64_o_2sm_epiVs32t {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -708,8 +708,8 @@ namespace cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m
}
// 1.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x128x256_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x128x256_0_vs32_tnt_align64_o_1sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x128x256_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x128x256_0_vs32_tnt_align64_o_1sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -719,8 +719,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 2.
TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x192x256_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x192x256_0_vs32_tnt_align64_o_1sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x192x256_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x192x256_0_vs32_tnt_align64_o_1sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -730,8 +730,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x256_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x256_0_vs32_tnt_align64_o_1sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x256_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x256_0_vs32_tnt_align64_o_1sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -741,8 +741,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 3.2
TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x512_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x512_0_vs32_tnt_align64_o_1sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x512_0_vs32_tnt_align64_o_1sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_128x256x512_0_vs32_tnt_align64_o_1sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -752,8 +752,8 @@ TEST(cutlass3x_sm100_bssptensorop_s128x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 4.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x128x256_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x128x256_0_vs32_tnt_align64_o_2sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x128x256_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x128x256_0_vs32_tnt_align64_o_2sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -763,8 +763,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 5.
TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x192x256_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x192x256_0_vs32_tnt_align64_o_2sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x192x256_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x192x256_0_vs32_tnt_align64_o_2sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -774,8 +774,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x128x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x256_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x256_0_vs32_tnt_align64_o_2sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x256_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x256_0_vs32_tnt_align64_o_2sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -785,8 +785,8 @@ TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32
}
// 6.2
TEST(cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x512_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_s256x256x128bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x512_0_vs32_tnt_align64_o_2sm_epiVs32t;
TEST(cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x512_0_vs32_tnt_align64_o_2sm_epiVs32t, sfd_fusion) {
namespace gemm = cutlass3x_sm100_bssptensorop_bsspgemm_ue4m3xe2m1_ue4m3xe2m1_f32_void_ue4m3xe2m1_256x256x512_0_vs32_tnt_align64_o_2sm_epiVs32t;
EXPECT_TRUE(test::gemm::device::TestSmallFusion<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -0,0 +1,320 @@
/***************************************************************************************************
* Copyright (c) 2025 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: BSD-3-Clause
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************************************/
/*! \file
\brief Tests for device-wide GEMM interface
*/
#include <iostream>
#include <thrust/universal_vector.h>
#include <thrust/generate.h>
#include <thrust/random.h>
#include "cutlass/cutlass.h"
#include "cute/tensor.hpp"
#include "cute/atom/mma_atom.hpp"
#include "cutlass/numeric_types.h"
#include "cutlass/gemm/device/gemm_universal_adapter.h"
#include "cutlass/gemm/kernel/gemm_universal.hpp"
#include "cutlass/gemm/collective/collective_builder.hpp"
#include "cutlass/epilogue/dispatch_policy.hpp"
#include "cutlass/epilogue/collective/collective_builder.hpp"
#include "cutlass/epilogue/thread/activation.h"
#include "../../common/cutlass_unit_test.h"
#include "cutlass/util/packed_stride.hpp"
#include "cutlass/util/reference/host/gett.hpp"
#include "cutlass/util/device_memory.h"
using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
template<cute::UMMA::Major SFAMajor,
cute::UMMA::Major SFBMajor,
int ScaleGranularityM,
int ScaleGranularityN,
int ScaleGranularityK,
bool Is2SM,
class LayoutA,
class LayoutB,
class LayoutCD,
class MmaTileShape,
class ClusterShape>
bool groupwise_test(
Int<ScaleGranularityM>, Int<ScaleGranularityN>, Int<ScaleGranularityK>, C<Is2SM>,
LayoutA, LayoutB, LayoutCD,
MmaTileShape, ClusterShape) {
using ScaleConfig = cutlass::detail::Sm100BlockwiseScaleConfig<ScaleGranularityM, ScaleGranularityN, ScaleGranularityK, SFAMajor, SFBMajor>;
using LayoutSFA = decltype(ScaleConfig::deduce_layoutSFA()); // Layout type for SFA matrix operand
using LayoutSFB = decltype(ScaleConfig::deduce_layoutSFB()); // Layout type for SFB matrix operand
using CollectiveEpilogue = typename cutlass::epilogue::collective::CollectiveBuilder<
cutlass::arch::Sm100, cutlass::arch::OpClassTensorOp,
MmaTileShape, ClusterShape,
cutlass::epilogue::collective::EpilogueTileAuto,
float, float,
cutlass::float_e4m3_t, LayoutCD, 16,
cutlass::float_e4m3_t, LayoutCD, 16,
conditional_t<Is2SM, cutlass::epilogue::TmaWarpSpecialized2Sm, cutlass::epilogue::TmaWarpSpecialized1Sm>
>::CollectiveOp;
using CollectiveMainloop =
typename cutlass::gemm::collective::CollectiveBuilder<
cutlass::arch::Sm100, cutlass::arch::OpClassTensorOp,
cutlass::float_e4m3_t, cute::tuple<LayoutA, LayoutSFA>, 16,
cutlass::float_e4m3_t, cute::tuple<LayoutB, LayoutSFB>, 16,
float,
MmaTileShape, ClusterShape,
cutlass::gemm::collective::StageCountAutoCarveout<sizeof(typename CollectiveEpilogue::SharedStorage)>,
conditional_t<Is2SM, cutlass::gemm::KernelTmaWarpSpecializedBlockwise2SmSm100, cutlass::gemm::KernelTmaWarpSpecializedBlockwise1SmSm100>
>::CollectiveOp;
using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
cute::Shape<int,int,int,int>,
CollectiveMainloop,
CollectiveEpilogue>;
using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
using StrideA = typename Gemm::GemmKernel::StrideA;
using StrideB = typename Gemm::GemmKernel::StrideB;
using StrideC = typename Gemm::GemmKernel::StrideC;
using StrideD = typename Gemm::GemmKernel::StrideD;
/// Initialization
StrideA stride_A;
StrideB stride_B;
StrideC stride_C;
StrideD stride_D;
// Strides just iterate over scalars and have no zeros
LayoutSFA layout_SFA;
LayoutSFB layout_SFB;
int alignment_M = max(max((is_same_v<LayoutA, cutlass::layout::ColumnMajor> ? 16 : 1) ,
(SFAMajor == UMMA::Major::MN ? CollectiveMainloop::AlignmentSFA : 1)),
(is_same_v<LayoutCD, cutlass::layout::ColumnMajor> ? 16 : 1));
int alignment_N = max(max((is_same_v<LayoutB, cutlass::layout::RowMajor> ? 16 : 1) ,
(SFBMajor == UMMA::Major::MN ? CollectiveMainloop::AlignmentSFB : 1)),
(is_same_v<LayoutCD, cutlass::layout::RowMajor> ? 16 : 1));
int alignment_K = max(max((is_same_v<LayoutA, cutlass::layout::RowMajor> ? 16 : 1) ,
(SFAMajor == UMMA::Major::K ? CollectiveMainloop::AlignmentSFA : 1)),
max((is_same_v<LayoutB, cutlass::layout::ColumnMajor> ? 16 : 1) ,
(SFBMajor == UMMA::Major::K ? CollectiveMainloop::AlignmentSFB : 1)));
int M = 1024 + alignment_M;
int N = 1024 + alignment_N;
int K = 512 + alignment_K;
EXPECT_TRUE(M % alignment_M == 0);
EXPECT_TRUE(N % alignment_N == 0);
EXPECT_TRUE(K % alignment_K == 0);
stride_A = cutlass::make_cute_packed_stride(StrideA{}, cute::make_shape(M, K, 1));
stride_B = cutlass::make_cute_packed_stride(StrideB{}, cute::make_shape(N, K, 1));
stride_C = cutlass::make_cute_packed_stride(StrideC{}, cute::make_shape(M, N, 1));
stride_D = cutlass::make_cute_packed_stride(StrideD{}, cute::make_shape(M, N, 1));
layout_SFA = ScaleConfig::tile_atom_to_shape_SFA(make_shape(M, N, K, 1));
layout_SFB = ScaleConfig::tile_atom_to_shape_SFB(make_shape(M, N, K, 1));
thrust::universal_vector<cutlass::float_e4m3_t> tensor_A(M * K);
thrust::universal_vector<float> tensor_SFA(cute::size(cute::filter_zeros(layout_SFA)));
thrust::universal_vector<cutlass::float_e4m3_t> tensor_B(N * K);
thrust::universal_vector<float> tensor_SFB(cute::size(cute::filter_zeros(layout_SFB)));
thrust::universal_vector<cutlass::float_e4m3_t> tensor_C(M * N);
thrust::universal_vector<cutlass::float_e4m3_t> tensor_D(M * N);
thrust::universal_vector<cutlass::float_e4m3_t> tensor_ref_D(M * N);
thrust::random::default_random_engine engine(2025);
thrust::random::uniform_int_distribution<int> dist(-2, 2);
std::generate(tensor_A.begin(), tensor_A.end(), [&] () {
return static_cast<cutlass::float_e4m3_t>(dist(engine));
});
std::generate(tensor_SFA.begin(), tensor_SFA.end(), [&] () {
return static_cast<float>(dist(engine));
});
std::generate(tensor_B.begin(), tensor_B.end(), [&] () {
return static_cast<cutlass::float_e4m3_t>(dist(engine));
});
std::generate(tensor_SFB.begin(), tensor_SFB.end(), [&] () {
return static_cast<float>(dist(engine));
});
std::generate(tensor_C.begin(), tensor_C.end(), [&] () {
return static_cast<cutlass::float_e4m3_t>(dist(engine));
});
typename Gemm::Arguments arguments {
cutlass::gemm::GemmUniversalMode::kGemm,
{M, N, K, 1},
{thrust::raw_pointer_cast(tensor_A.data()), stride_A,
thrust::raw_pointer_cast(tensor_B.data()), stride_B,
thrust::raw_pointer_cast(tensor_SFA.data()), layout_SFA,
thrust::raw_pointer_cast(tensor_SFB.data()), layout_SFB},
{
{}, // epilogue.thread
thrust::raw_pointer_cast(tensor_C.data()), stride_C,
thrust::raw_pointer_cast(tensor_D.data()), stride_D
}
};
auto &fusion_args = arguments.epilogue.thread;
fusion_args.alpha = 1.0f;
fusion_args.beta = 1.0f;
size_t workspace_size = Gemm::get_workspace_size(arguments);
cutlass::device_memory::allocation<uint8_t> workspace(workspace_size);
Gemm gemm;
EXPECT_TRUE(gemm.can_implement(arguments) == cutlass::Status::kSuccess);
EXPECT_TRUE(gemm.initialize(arguments, workspace.get()) == cutlass::Status::kSuccess);
EXPECT_TRUE(gemm.run() == cutlass::Status::kSuccess);
EXPECT_TRUE(cudaDeviceSynchronize() == cudaSuccess);
auto A = cute::make_tensor(thrust::raw_pointer_cast(tensor_A.data()),
cute::make_layout(cute::make_shape(M, K, 1), stride_A));
auto B = cute::make_tensor(thrust::raw_pointer_cast(tensor_B.data()),
cute::make_layout(cute::make_shape(N, K, 1), stride_B));
auto C = cute::make_tensor(thrust::raw_pointer_cast(tensor_C.data()),
cute::make_layout(cute::make_shape(M, N, 1), stride_C));
auto D = cute::make_tensor(thrust::raw_pointer_cast(tensor_ref_D.data()),
cute::make_layout(cute::make_shape(M, N, 1), stride_D));
auto SFA = cute::make_tensor(thrust::raw_pointer_cast(tensor_SFA.data()), layout_SFA);
auto SFB = cute::make_tensor(thrust::raw_pointer_cast(tensor_SFB.data()), layout_SFB);
cutlass::reference::host::GettBlockScalingMainloopParams<
float,
decltype(A),
decltype(SFA),
decltype(B),
decltype(SFB)
> mainloop_params{A, SFA, B, SFB};
cutlass::reference::host::GettEpilogueParams<
float,
float,
float,
float,
decltype(C),
decltype(D)
> epilogue_params;
epilogue_params.C = C;
epilogue_params.D = D;
epilogue_params.alpha = 1.0f;
epilogue_params.beta = 1.0f;
// get reference result
cutlass::reference::host::Gemm3x(mainloop_params, epilogue_params);
// compare_reference
bool equal = true;
for (size_t i = 0; i < tensor_ref_D.size(); ++i) {
equal &= (tensor_ref_D[i] == tensor_D[i]);
}
return equal;
}
TEST(SM100_Device_Gemm_e4m3t_e4m3n_e4m3t_tensorop_1sm_f32_align16_blockwise, 128x128x128_1x1x1_2x2x32_scale) {
bool passed = groupwise_test<UMMA::Major::MN, UMMA::Major::K>(
Int<2>{}, Int<2>{}, Int<32>{}, false_type{},
cutlass::layout::RowMajor{}, cutlass::layout::ColumnMajor{},
cutlass::layout::RowMajor{},
Shape<_128,_128,_128>{},
Shape<_1,_1,_1>{});
EXPECT_TRUE(passed);
}
TEST(SM100_Device_Gemm_e4m3t_e4m3n_e4m3t_tensorop_2sm_f32_align16_blockwise, 256x128x128_2x1x1_64x4x32_scale) {
bool passed = groupwise_test<UMMA::Major::MN, UMMA::Major::MN>(
Int<64>{}, Int<4>{}, Int<32>{}, true_type{},
cutlass::layout::RowMajor{}, cutlass::layout::ColumnMajor{},
cutlass::layout::RowMajor{},
Shape<_256,_128,_128>{},
Shape<_2,_1,_1>{});
EXPECT_TRUE(passed);
}
TEST(SM100_Device_Gemm_e4m3t_e4m3n_e4m3t_tensorop_1sm_f32_align16_blockwise, 128x128x128_1x1x1_1x128x128_scale) {
bool passed = groupwise_test<UMMA::Major::MN, UMMA::Major::K>(
Int<1>{}, Int<128>{}, Int<128>{}, false_type{},
cutlass::layout::RowMajor{}, cutlass::layout::ColumnMajor{},
cutlass::layout::RowMajor{},
Shape<_128,_128,_128>{},
Shape<_1,_1,_1>{});
EXPECT_TRUE(passed);
}
TEST(SM100_Device_Gemm_e4m3t_e4m3n_e4m3t_tensorop_2sm_f32_align16_blockwise, 256x128x128_2x1x1_1x128x128_scale) {
bool passed = groupwise_test<UMMA::Major::MN, UMMA::Major::MN>(
Int<1>{}, Int<128>{}, Int<128>{}, true_type{},
cutlass::layout::RowMajor{}, cutlass::layout::ColumnMajor{},
cutlass::layout::RowMajor{},
Shape<_256,_128,_128>{},
Shape<_2,_1,_1>{});
EXPECT_TRUE(passed);
}
TEST(SM100_Device_Gemm_e4m3t_e4m3n_e4m3t_tensorop_2sm_f32_align16_blockwise, 256x128x128_2x1x1_64x64x64_scale) {
bool passed = groupwise_test<UMMA::Major::MN, UMMA::Major::MN>(
Int<64>{}, Int<64>{}, Int<64>{}, true_type{},
cutlass::layout::RowMajor{}, cutlass::layout::ColumnMajor{},
cutlass::layout::RowMajor{},
Shape<_256,_128,_128>{},
Shape<_2,_1,_1>{});
EXPECT_TRUE(passed);
}
#endif // #if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)

View File

@ -54,7 +54,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -124,7 +124,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f16_f16_128
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -194,7 +194,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f16_f16_128
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -264,7 +264,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f16_f16_256
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -334,8 +334,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f16_f16_256
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -345,8 +345,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f16_f16_128x128x
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -356,8 +356,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f16_f16_128x256x
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -367,8 +367,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f16_f16_256x128x
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -379,7 +379,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f16_f16_256x256x
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -449,7 +449,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_f16_12
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -519,7 +519,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_f16_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -589,7 +589,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_f16_25
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -659,8 +659,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_void_f16_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -670,8 +670,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_f16_128x128
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -681,8 +681,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_f16_128x256
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -692,8 +692,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_f16_256x128
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -54,7 +54,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f16_e4m3_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_e4m3_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -124,7 +124,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f16_e4m3_12
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f16_e4m3_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_e4m3_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -194,7 +194,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f16_e4m3_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f16_e4m3_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_e4m3_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -264,7 +264,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f16_e4m3_25
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f16_e4m3_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_e4m3_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -334,8 +334,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f16_e4m3_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f16_e4m3_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f16_e4m3_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_e4m3_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_e4m3_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -345,8 +345,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f16_e4m3_128x128
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f16_e4m3_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f16_e4m3_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_e4m3_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_e4m3_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -356,8 +356,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f16_e4m3_128x256
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f16_e4m3_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f16_e4m3_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_e4m3_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_e4m3_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -367,8 +367,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f16_e4m3_256x128
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f16_e4m3_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f16_e4m3_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_e4m3_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f16_e4m3_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -379,7 +379,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f16_e4m3_256x256
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_e4m3_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_e4m3_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -449,7 +449,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_e4m3_1
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_e4m3_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_e4m3_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -519,7 +519,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_e4m3_1
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_e4m3_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_e4m3_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -589,7 +589,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_e4m3_2
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_void_e4m3_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_e4m3_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -659,8 +659,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_void_e4m3_2
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_e4m3_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_e4m3_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_e4m3_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_e4m3_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -670,8 +670,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_e4m3_128x12
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_e4m3_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_e4m3_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_e4m3_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_e4m3_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -681,8 +681,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_e4m3_128x25
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_e4m3_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_e4m3_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_e4m3_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_e4m3_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -692,8 +692,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_e4m3_256x12
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_void_e4m3_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_void_e4m3_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_e4m3_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_e4m3_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -54,7 +54,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f32_f32_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f32_f32_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -124,7 +124,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f32_f32_128
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f32_f32_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f32_f32_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -194,7 +194,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f32_f32_128
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f32_f32_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f32_f32_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -264,7 +264,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f32_f32_256
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f32_f32_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f32_f32_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -334,8 +334,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f32_f32_256
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f32_f32_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f32_f32_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f32_f32_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f32_f32_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -345,8 +345,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_f32_f32_128x128x
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f32_f32_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f32_f32_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f32_f32_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f32_f32_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -356,8 +356,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_f32_f32_128x256x
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f32_f32_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f32_f32_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f32_f32_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f32_f32_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -367,8 +367,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_f32_f32_256x128x
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f32_f32_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f32_f32_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f32_f32_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_f32_f32_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -379,7 +379,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_f32_f32_256x256x
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_f32_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f32_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -449,7 +449,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_f32_12
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_f32_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f32_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -519,7 +519,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_f32_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_f32_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f32_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -589,7 +589,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_f32_25
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_void_f32_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f32_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -659,8 +659,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_void_f32_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_f32_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_f32_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f32_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f32_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -670,8 +670,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e2m1_f32_void_f32_128x128
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_f32_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_f32_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f32_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f32_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -681,8 +681,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e2m1_f32_void_f32_128x256
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_f32_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_f32_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f32_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f32_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -692,8 +692,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e2m1_f32_void_f32_256x128
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_void_f32_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e2m1_f32_void_f32_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f32_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e2m1_f32_void_f32_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -54,7 +54,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -124,7 +124,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e3m2_f32_f16_f16_128
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -194,7 +194,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e3m2_f32_f16_f16_128
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -264,7 +264,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e3m2_f32_f16_f16_256
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -334,8 +334,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e3m2_f32_f16_f16_256
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -345,8 +345,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e3m2_f32_f16_f16_128x128x
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -356,8 +356,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e3m2_f32_f16_f16_128x256x
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -367,8 +367,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e3m2_f32_f16_f16_256x128x
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -379,7 +379,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e3m2_f32_f16_f16_256x256x
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -449,7 +449,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e3m2_f32_void_f16_12
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -519,7 +519,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e3m2_f32_void_f16_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -589,7 +589,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e3m2_f32_void_f16_25
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -659,8 +659,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e3m2_f32_void_f16_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -670,8 +670,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e3m2_f32_void_f16_128x128
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -681,8 +681,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e3m2_f32_void_f16_128x256
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -692,8 +692,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e3m2_f32_void_f16_256x128
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -54,7 +54,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e4m3_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -124,7 +124,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e4m3_f32_f16_f16_128
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e4m3_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -194,7 +194,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e4m3_f32_f16_f16_128
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e4m3_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -264,7 +264,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e4m3_f32_f16_f16_256
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e4m3_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -334,8 +334,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e4m3_f32_f16_f16_256
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e4m3_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e4m3_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -345,8 +345,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e4m3_f32_f16_f16_128x128x
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e4m3_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e4m3_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -356,8 +356,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e4m3_f32_f16_f16_128x256x
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e4m3_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e4m3_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -367,8 +367,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e4m3_f32_f16_f16_256x128x
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e4m3_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e4m3_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -379,7 +379,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e4m3_f32_f16_f16_256x256x
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e4m3_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -449,7 +449,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e4m3_f32_void_f16_12
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e4m3_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -519,7 +519,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e4m3_f32_void_f16_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e4m3_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -589,7 +589,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e4m3_f32_void_f16_25
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e4m3_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -659,8 +659,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e4m3_f32_void_f16_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e4m3_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e4m3_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -670,8 +670,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e2m1_e4m3_f32_void_f16_128x128
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e4m3_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e4m3_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -681,8 +681,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e2m1_e4m3_f32_void_f16_128x256
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e4m3_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e4m3_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -692,8 +692,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e2m1_e4m3_f32_void_f16_256x128
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e4m3_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e2m1_e4m3_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e2m1_e4m3_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -54,7 +54,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -124,7 +124,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e2m1_f32_f16_f16_128
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -194,7 +194,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e2m1_f32_f16_f16_128
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -264,7 +264,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e2m1_f32_f16_f16_256
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -334,8 +334,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e2m1_f32_f16_f16_256
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -345,8 +345,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e2m1_f32_f16_f16_128x128x
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -356,8 +356,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e2m1_f32_f16_f16_128x256x
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -367,8 +367,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e2m1_f32_f16_f16_256x128x
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -379,7 +379,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e2m1_f32_f16_f16_256x256x
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -449,7 +449,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e2m1_f32_void_f16_12
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -519,7 +519,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e2m1_f32_void_f16_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -589,7 +589,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e2m1_f32_void_f16_25
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -659,8 +659,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e2m1_f32_void_f16_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -670,8 +670,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e2m1_f32_void_f16_128x128
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -681,8 +681,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e2m1_f32_void_f16_128x256
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -692,8 +692,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e2m1_f32_void_f16_256x128
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -54,7 +54,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -124,7 +124,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f16_f16_128
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -194,7 +194,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f16_f16_128
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -264,7 +264,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f16_f16_256
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -334,8 +334,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f16_f16_256
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -345,8 +345,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f16_f16_128x128x
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -356,8 +356,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f16_f16_128x256x
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -367,8 +367,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f16_f16_256x128x
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -379,7 +379,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f16_f16_256x256x
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -449,7 +449,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_f16_12
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -519,7 +519,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_f16_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -589,7 +589,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_f16_25
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -659,8 +659,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_void_f16_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -670,8 +670,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_f16_128x128
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -681,8 +681,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_f16_128x256
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -692,8 +692,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_f16_256x128
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -54,7 +54,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f16_e4m3_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_e4m3_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -124,7 +124,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f16_e4m3_12
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f16_e4m3_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_e4m3_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -194,7 +194,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f16_e4m3_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f16_e4m3_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_e4m3_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -264,7 +264,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f16_e4m3_25
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f16_e4m3_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_e4m3_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -334,8 +334,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f16_e4m3_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f16_e4m3_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f16_e4m3_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_e4m3_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_e4m3_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -345,8 +345,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f16_e4m3_128x128
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f16_e4m3_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f16_e4m3_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_e4m3_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_e4m3_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -356,8 +356,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f16_e4m3_128x256
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f16_e4m3_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f16_e4m3_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_e4m3_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_e4m3_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -367,8 +367,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f16_e4m3_256x128
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f16_e4m3_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f16_e4m3_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_e4m3_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f16_e4m3_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -379,7 +379,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f16_e4m3_256x256
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_e4m3_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_e4m3_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -449,7 +449,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_e4m3_1
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_e4m3_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_e4m3_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -519,7 +519,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_e4m3_1
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_e4m3_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_e4m3_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -589,7 +589,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_e4m3_2
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_void_e4m3_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_e4m3_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -659,8 +659,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_void_e4m3_2
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_e4m3_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_e4m3_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_e4m3_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_e4m3_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -670,8 +670,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_e4m3_128x12
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_e4m3_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_e4m3_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_e4m3_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_e4m3_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -681,8 +681,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_e4m3_128x25
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_e4m3_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_e4m3_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_e4m3_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_e4m3_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -692,8 +692,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_e4m3_256x12
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_void_e4m3_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_void_e4m3_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_e4m3_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_e4m3_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -54,7 +54,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f32_f32_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f32_f32_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -124,7 +124,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f32_f32_128
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f32_f32_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f32_f32_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -194,7 +194,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f32_f32_128
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f32_f32_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f32_f32_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -264,7 +264,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f32_f32_256
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f32_f32_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f32_f32_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -334,8 +334,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f32_f32_256
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f32_f32_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f32_f32_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f32_f32_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f32_f32_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -345,8 +345,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_f32_f32_128x128x
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f32_f32_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f32_f32_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f32_f32_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f32_f32_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -356,8 +356,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_f32_f32_128x256x
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f32_f32_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f32_f32_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f32_f32_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f32_f32_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -367,8 +367,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_f32_f32_256x128x
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f32_f32_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f32_f32_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f32_f32_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_f32_f32_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -379,7 +379,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_f32_f32_256x256x
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_f32_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f32_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -449,7 +449,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_f32_12
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_f32_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f32_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -519,7 +519,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_f32_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_f32_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f32_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -589,7 +589,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_f32_25
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_void_f32_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f32_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -659,8 +659,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_void_f32_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_f32_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_f32_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f32_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f32_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -670,8 +670,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e3m2_f32_void_f32_128x128
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_f32_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_f32_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f32_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f32_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -681,8 +681,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e3m2_f32_void_f32_128x256
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_f32_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_f32_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f32_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f32_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -692,8 +692,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e3m2_f32_void_f32_256x128
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_void_f32_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e3m2_f32_void_f32_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f32_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e3m2_f32_void_f32_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -54,7 +54,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e4m3_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -124,7 +124,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e4m3_f32_f16_f16_128
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e4m3_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -194,7 +194,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e4m3_f32_f16_f16_128
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e4m3_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -264,7 +264,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e4m3_f32_f16_f16_256
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e4m3_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -334,8 +334,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e4m3_f32_f16_f16_256
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e4m3_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e4m3_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -345,8 +345,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e4m3_f32_f16_f16_128x128x
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e4m3_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e4m3_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -356,8 +356,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e4m3_f32_f16_f16_128x256x
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e4m3_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e4m3_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -367,8 +367,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e4m3_f32_f16_f16_256x128x
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e4m3_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e4m3_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -379,7 +379,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e4m3_f32_f16_f16_256x256x
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e4m3_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -449,7 +449,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e4m3_f32_void_f16_12
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e4m3_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -519,7 +519,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e4m3_f32_void_f16_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e4m3_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -589,7 +589,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e4m3_f32_void_f16_25
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e4m3_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -659,8 +659,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e4m3_f32_void_f16_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e4m3_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e4m3_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -670,8 +670,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e3m2_e4m3_f32_void_f16_128x128
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e4m3_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e4m3_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -681,8 +681,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e3m2_e4m3_f32_void_f16_128x256
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e4m3_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e4m3_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -692,8 +692,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e3m2_e4m3_f32_void_f16_256x128
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e4m3_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e3m2_e4m3_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e3m2_e4m3_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -54,7 +54,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -124,7 +124,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e2m1_f32_f16_f16_128
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -194,7 +194,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e2m1_f32_f16_f16_128
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -264,7 +264,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e2m1_f32_f16_f16_256
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -334,8 +334,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e2m1_f32_f16_f16_256
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -345,8 +345,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e2m1_f32_f16_f16_128x128x
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -356,8 +356,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e2m1_f32_f16_f16_128x256x
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -367,8 +367,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e2m1_f32_f16_f16_256x128x
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -379,7 +379,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e2m1_f32_f16_f16_256x256x
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -449,7 +449,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e2m1_f32_void_f16_12
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -519,7 +519,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e2m1_f32_void_f16_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -589,7 +589,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e2m1_f32_void_f16_25
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -659,8 +659,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e2m1_f32_void_f16_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -670,8 +670,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e2m1_f32_void_f16_128x128
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -681,8 +681,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e2m1_f32_void_f16_128x256
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -692,8 +692,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e2m1_f32_void_f16_256x128
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e2m1_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -54,7 +54,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -124,7 +124,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e3m2_f32_f16_f16_128
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -194,7 +194,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e3m2_f32_f16_f16_128
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -264,7 +264,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e3m2_f32_f16_f16_256
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -334,8 +334,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e3m2_f32_f16_f16_256
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_f16_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -345,8 +345,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e3m2_f32_f16_f16_128x128x
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_f16_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -356,8 +356,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e3m2_f32_f16_f16_128x256x
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_f16_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -367,8 +367,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e3m2_f32_f16_f16_256x128x
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_f16_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
test::gemm::device::CheckEquality::RELATIVE,
@ -379,7 +379,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e3m2_f32_f16_f16_256x256x
// 1.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -449,7 +449,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e3m2_f32_void_f16_12
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -519,7 +519,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e3m2_f32_void_f16_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -589,7 +589,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e3m2_f32_void_f16_25
}
// 4.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
@ -659,8 +659,8 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e3m2_f32_void_f16_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_void_f16_128x128x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -670,8 +670,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e3m2_f32_void_f16_128x128
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_void_f16_128x256x256_0_tnt_align32_q_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -681,8 +681,8 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e3m2_f32_void_f16_128x256
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_void_f16_256x128x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,
@ -692,8 +692,8 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e3m2_f32_void_f16_256x128
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm, functional) {
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e3m2_f32_void_f16_256x256x256_0_tnt_align32_q_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
test::gemm::device::CheckEquality::RELATIVE,

View File

@ -47,7 +47,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x64x32spgemm_f16_f16_f32_f16_f16_128x64x64_1x1x1_0_tnn_align16_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_128x64x64_1x1x1_0_tnn_align16_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
cutlass::arch::Sm100, cutlass::arch::OpClassSparseTensorOp,
@ -88,7 +88,7 @@ namespace cutlass3x_sm100_sptensorop_s128x64x32spgemm_f16_f16_f32_f16_f16_128x64
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x128x32spgemm_f16_f16_f32_f16_f16_128x128x64_1x1x1_0_tnn_align16_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_128x128x64_1x1x1_0_tnn_align16_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -130,7 +130,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x32spgemm_f16_f16_f32_f16_f16_128x1
}
// 3.
namespace cutlass3x_sm100_sptensorop_s128x192x32spgemm_f16_f16_f32_f16_f16_128x192x64_1x1x1_0_tnn_align16_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_128x192x64_1x1x1_0_tnn_align16_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -172,7 +172,7 @@ namespace cutlass3x_sm100_sptensorop_s128x192x32spgemm_f16_f16_f32_f16_f16_128x1
}
// 4.
namespace cutlass3x_sm100_sptensorop_s128x256x32spgemm_f16_f16_f32_f16_f16_128x256x64_1x1x1_0_tnn_align16_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_128x256x64_1x1x1_0_tnn_align16_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -214,7 +214,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x32spgemm_f16_f16_f32_f16_f16_128x2
}
// 5.
namespace cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_f16_f16_256x64x64_2x1x1_0_tnn_align16_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x64x64_2x1x1_0_tnn_align16_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -256,7 +256,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_f16_f16_256x64
}
// 6.
namespace cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_f16_f16_256x64x128_2x1x1_0_tnn_align16_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x64x128_2x1x1_0_tnn_align16_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -298,7 +298,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_f16_f16_256x64
}
// 7.
namespace cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_f16_f16_256x128x64_2x1x1_0_tnn_align16_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x128x64_2x1x1_0_tnn_align16_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -340,7 +340,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_f16_f16_256x1
}
// 8.
namespace cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_f16_f16_256x128x128_2x1x1_0_tnn_align16_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x128x128_2x1x1_0_tnn_align16_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -382,7 +382,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_f16_f16_256x1
}
// 9.
namespace cutlass3x_sm100_sptensorop_s256x192x32spgemm_f16_f16_f32_f16_f16_256x192x64_2x1x1_0_tnn_align16_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x192x64_2x1x1_0_tnn_align16_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -424,7 +424,7 @@ namespace cutlass3x_sm100_sptensorop_s256x192x32spgemm_f16_f16_f32_f16_f16_256x1
}
// 10.
namespace cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_f16_f16_256x256x64_2x1x1_0_tnn_align16_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x256x64_2x1x1_0_tnn_align16_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -466,7 +466,7 @@ namespace cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_f16_f16_256x2
}
// 11.
namespace cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_f16_f16_256x256x128_2x1x1_0_tnn_align16_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x256x128_2x1x1_0_tnn_align16_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -508,9 +508,9 @@ namespace cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_f16_f16_256x2
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x64x32spgemm_f16_f16_f32_f16_f16_128x64x64_1x1x1_0_tnn_align16_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_128x64x64_1x1x1_0_tnn_align16_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x64x32spgemm_f16_f16_f32_f16_f16_128x64x64_1x1x1_0_tnn_align16_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_128x64x64_1x1x1_0_tnn_align16_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -521,9 +521,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x64x32spgemm_f16_f16_f32_f16_f16_128x64x64_1
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x128x32spgemm_f16_f16_f32_f16_f16_128x128x64_1x1x1_0_tnn_align16_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_128x128x64_1x1x1_0_tnn_align16_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x32spgemm_f16_f16_f32_f16_f16_128x128x64_1x1x1_0_tnn_align16_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_128x128x64_1x1x1_0_tnn_align16_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -534,9 +534,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x32spgemm_f16_f16_f32_f16_f16_128x128x64
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s128x192x32spgemm_f16_f16_f32_f16_f16_128x192x64_1x1x1_0_tnn_align16_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_128x192x64_1x1x1_0_tnn_align16_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x192x32spgemm_f16_f16_f32_f16_f16_128x192x64_1x1x1_0_tnn_align16_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_128x192x64_1x1x1_0_tnn_align16_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -547,9 +547,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x192x32spgemm_f16_f16_f32_f16_f16_128x192x64
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s128x256x32spgemm_f16_f16_f32_f16_f16_128x256x64_1x1x1_0_tnn_align16_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_128x256x64_1x1x1_0_tnn_align16_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x32spgemm_f16_f16_f32_f16_f16_128x256x64_1x1x1_0_tnn_align16_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_128x256x64_1x1x1_0_tnn_align16_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -560,9 +560,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x32spgemm_f16_f16_f32_f16_f16_128x256x64
}
// 5.
TEST(cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_f16_f16_256x64x64_2x1x1_0_tnn_align16_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x64x64_2x1x1_0_tnn_align16_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_f16_f16_256x64x64_2x1x1_0_tnn_align16_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x64x64_2x1x1_0_tnn_align16_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -573,9 +573,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_f16_f16_256x64x64_2
}
//6.
TEST(cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_f16_f16_256x64x128_2x1x1_0_tnn_align16_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x64x128_2x1x1_0_tnn_align16_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_f16_f16_256x64x128_2x1x1_0_tnn_align16_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x64x128_2x1x1_0_tnn_align16_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -586,9 +586,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_f16_f16_256x64x128_
}
// 7.
TEST(cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_f16_f16_256x128x64_2x1x1_0_tnn_align16_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x128x64_2x1x1_0_tnn_align16_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_f16_f16_256x128x64_2x1x1_0_tnn_align16_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x128x64_2x1x1_0_tnn_align16_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -599,9 +599,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_f16_f16_256x128x64
}
// 8.
TEST(cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_f16_f16_256x128x128_2x1x1_0_tnn_align16_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x128x128_2x1x1_0_tnn_align16_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_f16_f16_256x128x128_2x1x1_0_tnn_align16_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x128x128_2x1x1_0_tnn_align16_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -612,9 +612,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_f16_f16_256x128x12
}
// 9.
TEST(cutlass3x_sm100_sptensorop_s256x192x32spgemm_f16_f16_f32_f16_f16_256x192x64_2x1x1_0_tnn_align16_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x192x64_2x1x1_0_tnn_align16_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x192x32spgemm_f16_f16_f32_f16_f16_256x192x64_2x1x1_0_tnn_align16_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x192x64_2x1x1_0_tnn_align16_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -625,9 +625,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x192x32spgemm_f16_f16_f32_f16_f16_256x192x64
}
// 10.
TEST(cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_f16_f16_256x256x64_2x1x1_0_tnn_align16_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x256x64_2x1x1_0_tnn_align16_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_f16_f16_256x256x64_2x1x1_0_tnn_align16_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x256x64_2x1x1_0_tnn_align16_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -638,9 +638,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_f16_f16_256x256x64
}
// 11.
TEST(cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_f16_f16_256x256x128_2x1x1_0_tnn_align16_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x256x128_2x1x1_0_tnn_align16_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_f16_f16_256x256x128_2x1x1_0_tnn_align16_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_f16_f16_256x256x128_2x1x1_0_tnn_align16_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -651,7 +651,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_f16_f16_256x256x12
}
// 1.
namespace cutlass3x_sm100_sptensorop_s128x64x32spgemm_f16_f16_f32_void_f16_128x64x64_1x1x1_0_tnn_align16_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_128x64x64_1x1x1_0_tnn_align16_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -693,7 +693,7 @@ namespace cutlass3x_sm100_sptensorop_s128x64x32spgemm_f16_f16_f32_void_f16_128x6
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x128x32spgemm_f16_f16_f32_void_f16_128x128x64_1x1x1_0_tnn_align16_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_128x128x64_1x1x1_0_tnn_align16_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -735,7 +735,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x32spgemm_f16_f16_f32_void_f16_128x
}
// 3.
namespace cutlass3x_sm100_sptensorop_s128x192x32spgemm_f16_f16_f32_void_f16_128x192x64_1x1x1_0_tnn_align16_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_128x192x64_1x1x1_0_tnn_align16_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -777,7 +777,7 @@ namespace cutlass3x_sm100_sptensorop_s128x192x32spgemm_f16_f16_f32_void_f16_128x
}
// 4.
namespace cutlass3x_sm100_sptensorop_s128x256x32spgemm_f16_f16_f32_void_f16_128x256x64_1x1x1_0_tnn_align16_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_128x256x64_1x1x1_0_tnn_align16_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -819,7 +819,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x32spgemm_f16_f16_f32_void_f16_128x
}
// 5.
namespace cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_void_f16_256x64x64_2x1x1_0_tnn_align16_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x64x64_2x1x1_0_tnn_align16_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -861,7 +861,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_void_f16_256x6
}
// 6.
namespace cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_void_f16_256x64x128_2x1x1_0_tnn_align16_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x64x128_2x1x1_0_tnn_align16_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -903,7 +903,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_void_f16_256x6
}
// 7.
namespace cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_void_f16_256x128x64_2x1x1_0_tnn_align16_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x128x64_2x1x1_0_tnn_align16_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -945,7 +945,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_void_f16_256x
}
// 8.
namespace cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_void_f16_256x128x128_2x1x1_0_tnn_align16_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x128x128_2x1x1_0_tnn_align16_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -987,7 +987,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_void_f16_256x
}
// 9.
namespace cutlass3x_sm100_sptensorop_s256x192x32spgemm_f16_f16_f32_void_f16_256x192x64_2x1x1_0_tnn_align16_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x192x64_2x1x1_0_tnn_align16_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1029,7 +1029,7 @@ namespace cutlass3x_sm100_sptensorop_s256x192x32spgemm_f16_f16_f32_void_f16_256x
}
// 10.
namespace cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_void_f16_256x256x128_2x1x1_0_tnn_align16_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x256x128_2x1x1_0_tnn_align16_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1071,7 +1071,7 @@ namespace cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_void_f16_256x
}
// 11.
namespace cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_void_f16_256x256x64_2x1x1_0_tnn_align16_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x256x64_2x1x1_0_tnn_align16_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1113,9 +1113,9 @@ namespace cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_void_f16_256x
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x64x32spgemm_f16_f16_f32_void_f16_128x64x64_1x1x1_0_tnn_align16_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_128x64x64_1x1x1_0_tnn_align16_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x64x32spgemm_f16_f16_f32_void_f16_128x64x64_1x1x1_0_tnn_align16_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_128x64x64_1x1x1_0_tnn_align16_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1126,9 +1126,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x64x32spgemm_f16_f16_f32_void_f16_128x64x64_
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x128x32spgemm_f16_f16_f32_void_f16_128x128x64_1x1x1_0_tnn_align16_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_128x128x64_1x1x1_0_tnn_align16_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x32spgemm_f16_f16_f32_void_f16_128x128x64_1x1x1_0_tnn_align16_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_128x128x64_1x1x1_0_tnn_align16_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1139,9 +1139,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x32spgemm_f16_f16_f32_void_f16_128x128x6
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s128x192x32spgemm_f16_f16_f32_void_f16_128x192x64_1x1x1_0_tnn_align16_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_128x192x64_1x1x1_0_tnn_align16_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x192x32spgemm_f16_f16_f32_void_f16_128x192x64_1x1x1_0_tnn_align16_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_128x192x64_1x1x1_0_tnn_align16_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1152,9 +1152,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x192x32spgemm_f16_f16_f32_void_f16_128x192x6
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s128x256x32spgemm_f16_f16_f32_void_f16_128x256x64_1x1x1_0_tnn_align16_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_128x256x64_1x1x1_0_tnn_align16_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x32spgemm_f16_f16_f32_void_f16_128x256x64_1x1x1_0_tnn_align16_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_128x256x64_1x1x1_0_tnn_align16_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1165,9 +1165,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x32spgemm_f16_f16_f32_void_f16_128x256x6
}
// 5.
TEST(cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_void_f16_256x64x64_2x1x1_0_tnn_align16_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x64x64_2x1x1_0_tnn_align16_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_void_f16_256x64x64_2x1x1_0_tnn_align16_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x64x64_2x1x1_0_tnn_align16_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1178,9 +1178,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_void_f16_256x64x64_
}
// 6.
TEST(cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_void_f16_256x64x128_2x1x1_0_tnn_align16_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x64x128_2x1x1_0_tnn_align16_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_void_f16_256x64x128_2x1x1_0_tnn_align16_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x64x128_2x1x1_0_tnn_align16_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1191,9 +1191,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x32spgemm_f16_f16_f32_void_f16_256x64x128
}
// 7.
TEST(cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_void_f16_256x128x64_2x1x1_0_tnn_align16_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x128x64_2x1x1_0_tnn_align16_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_void_f16_256x128x64_2x1x1_0_tnn_align16_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x128x64_2x1x1_0_tnn_align16_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1204,9 +1204,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_void_f16_256x128x6
}
// 8.
TEST(cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_void_f16_256x128x128_2x1x1_0_tnn_align16_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x128x128_2x1x1_0_tnn_align16_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_void_f16_256x128x128_2x1x1_0_tnn_align16_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x128x128_2x1x1_0_tnn_align16_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1217,9 +1217,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x32spgemm_f16_f16_f32_void_f16_256x128x1
}
// 9.
TEST(cutlass3x_sm100_sptensorop_s256x192x32spgemm_f16_f16_f32_void_f16_256x192x64_2x1x1_0_tnn_align16_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x192x64_2x1x1_0_tnn_align16_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x192x32spgemm_f16_f16_f32_void_f16_256x192x64_2x1x1_0_tnn_align16_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x192x64_2x1x1_0_tnn_align16_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1230,9 +1230,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x192x32spgemm_f16_f16_f32_void_f16_256x192x6
}
// 10.
TEST(cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_void_f16_256x256x64_2x1x1_0_tnn_align16_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x256x64_2x1x1_0_tnn_align16_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_void_f16_256x256x64_2x1x1_0_tnn_align16_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x256x64_2x1x1_0_tnn_align16_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1243,9 +1243,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_void_f16_256x256x6
}
// 11.
TEST(cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_void_f16_256x256x128_2x1x1_0_tnn_align16_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x256x128_2x1x1_0_tnn_align16_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x32spgemm_f16_f16_f32_void_f16_256x256x128_2x1x1_0_tnn_align16_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f16_f16_f32_void_f16_256x256x128_2x1x1_0_tnn_align16_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,

View File

@ -47,7 +47,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x64x16spgemm_f32_f32_f32_f32_f32_128x64x32_1x1x1_0_tnn_align8_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_128x64x32_1x1x1_0_tnn_align8_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -89,7 +89,7 @@ namespace cutlass3x_sm100_sptensorop_s128x64x16spgemm_f32_f32_f32_f32_f32_128x64
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x128x16spgemm_f32_f32_f32_f32_f32_128x128x32_1x1x1_0_tnn_align8_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_128x128x32_1x1x1_0_tnn_align8_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -131,7 +131,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x16spgemm_f32_f32_f32_f32_f32_128x1
}
// 3.
namespace cutlass3x_sm100_sptensorop_s128x192x16spgemm_f32_f32_f32_f32_f32_128x192x32_1x1x1_0_tnn_align8_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_128x192x32_1x1x1_0_tnn_align8_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -173,7 +173,7 @@ namespace cutlass3x_sm100_sptensorop_s128x192x16spgemm_f32_f32_f32_f32_f32_128x1
}
// 4.
namespace cutlass3x_sm100_sptensorop_s128x256x16spgemm_f32_f32_f32_f32_f32_128x256x32_1x1x1_0_tnn_align8_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_128x256x32_1x1x1_0_tnn_align8_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -215,7 +215,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x16spgemm_f32_f32_f32_f32_f32_128x2
}
// 5.
namespace cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_f32_f32_256x64x32_2x1x1_0_tnn_align8_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x64x32_2x1x1_0_tnn_align8_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -257,7 +257,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_f32_f32_256x64
}
// 6.
namespace cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_f32_f32_256x64x64_2x1x1_0_tnn_align8_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x64x64_2x1x1_0_tnn_align8_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -299,7 +299,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_f32_f32_256x64
}
// 7.
namespace cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_f32_f32_256x128x32_2x1x1_0_tnn_align8_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x128x32_2x1x1_0_tnn_align8_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -341,7 +341,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_f32_f32_256x1
}
// 8.
namespace cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_f32_f32_256x128x64_2x1x1_0_tnn_align8_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x128x64_2x1x1_0_tnn_align8_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -383,7 +383,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_f32_f32_256x1
}
// 9.
namespace cutlass3x_sm100_sptensorop_s256x192x16spgemm_f32_f32_f32_f32_f32_256x192x32_2x1x1_0_tnn_align8_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x192x32_2x1x1_0_tnn_align8_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -425,7 +425,7 @@ namespace cutlass3x_sm100_sptensorop_s256x192x16spgemm_f32_f32_f32_f32_f32_256x1
}
// 10.
namespace cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_f32_f32_256x256x32_2x1x1_0_tnn_align8_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x256x32_2x1x1_0_tnn_align8_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -467,7 +467,7 @@ namespace cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_f32_f32_256x2
}
// 11.
namespace cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_f32_f32_256x256x64_2x1x1_0_tnn_align8_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x256x64_2x1x1_0_tnn_align8_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -509,9 +509,9 @@ namespace cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_f32_f32_256x2
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x64x16spgemm_f32_f32_f32_f32_f32_128x64x32_1x1x1_0_tnn_align8_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_128x64x32_1x1x1_0_tnn_align8_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x64x16spgemm_f32_f32_f32_f32_f32_128x64x32_1x1x1_0_tnn_align8_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_128x64x32_1x1x1_0_tnn_align8_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -522,9 +522,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x64x16spgemm_f32_f32_f32_f32_f32_128x64x32_1
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x128x16spgemm_f32_f32_f32_f32_f32_128x128x32_1x1x1_0_tnn_align8_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_128x128x32_1x1x1_0_tnn_align8_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x16spgemm_f32_f32_f32_f32_f32_128x128x32_1x1x1_0_tnn_align8_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_128x128x32_1x1x1_0_tnn_align8_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -535,9 +535,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x16spgemm_f32_f32_f32_f32_f32_128x128x32
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s128x192x16spgemm_f32_f32_f32_f32_f32_128x192x32_1x1x1_0_tnn_align8_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_128x192x32_1x1x1_0_tnn_align8_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x192x16spgemm_f32_f32_f32_f32_f32_128x192x32_1x1x1_0_tnn_align8_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_128x192x32_1x1x1_0_tnn_align8_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -548,9 +548,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x192x16spgemm_f32_f32_f32_f32_f32_128x192x32
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s128x256x16spgemm_f32_f32_f32_f32_f32_128x256x32_1x1x1_0_tnn_align8_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_128x256x32_1x1x1_0_tnn_align8_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x16spgemm_f32_f32_f32_f32_f32_128x256x32_1x1x1_0_tnn_align8_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_128x256x32_1x1x1_0_tnn_align8_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -561,9 +561,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x16spgemm_f32_f32_f32_f32_f32_128x256x32
}
// 5.
TEST(cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_f32_f32_256x64x32_2x1x1_0_tnn_align8_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x64x32_2x1x1_0_tnn_align8_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_f32_f32_256x64x32_2x1x1_0_tnn_align8_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x64x32_2x1x1_0_tnn_align8_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -574,9 +574,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_f32_f32_256x64x32_2
}
//6.
TEST(cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_f32_f32_256x64x64_2x1x1_0_tnn_align8_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x64x64_2x1x1_0_tnn_align8_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_f32_f32_256x64x64_2x1x1_0_tnn_align8_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x64x64_2x1x1_0_tnn_align8_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -587,9 +587,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_f32_f32_256x64x64_2
}
// 7.
TEST(cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_f32_f32_256x128x32_2x1x1_0_tnn_align8_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x128x32_2x1x1_0_tnn_align8_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_f32_f32_256x128x32_2x1x1_0_tnn_align8_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x128x32_2x1x1_0_tnn_align8_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -600,9 +600,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_f32_f32_256x128x32
}
// 8.
TEST(cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_f32_f32_256x128x64_2x1x1_0_tnn_align8_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x128x64_2x1x1_0_tnn_align8_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_f32_f32_256x128x64_2x1x1_0_tnn_align8_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x128x64_2x1x1_0_tnn_align8_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -613,9 +613,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_f32_f32_256x128x64
}
// 9.
TEST(cutlass3x_sm100_sptensorop_s256x192x16spgemm_f32_f32_f32_f32_f32_256x192x32_2x1x1_0_tnn_align8_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x192x32_2x1x1_0_tnn_align8_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x192x16spgemm_f32_f32_f32_f32_f32_256x192x32_2x1x1_0_tnn_align8_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x192x32_2x1x1_0_tnn_align8_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -626,9 +626,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x192x16spgemm_f32_f32_f32_f32_f32_256x192x32
}
// 10.
TEST(cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_f32_f32_256x256x32_2x1x1_0_tnn_align8_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x256x32_2x1x1_0_tnn_align8_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_f32_f32_256x256x32_2x1x1_0_tnn_align8_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x256x32_2x1x1_0_tnn_align8_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -639,9 +639,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_f32_f32_256x256x32
}
// 11.
TEST(cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_f32_f32_256x256x64_2x1x1_0_tnn_align8_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x256x64_2x1x1_0_tnn_align8_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_f32_f32_256x256x64_2x1x1_0_tnn_align8_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_f32_f32_256x256x64_2x1x1_0_tnn_align8_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -652,7 +652,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_f32_f32_256x256x64
}
// 1.
namespace cutlass3x_sm100_sptensorop_s128x64x16spgemm_f32_f32_f32_void_f32_128x64x32_1x1x1_0_tnn_align8_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_128x64x32_1x1x1_0_tnn_align8_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -694,7 +694,7 @@ namespace cutlass3x_sm100_sptensorop_s128x64x16spgemm_f32_f32_f32_void_f32_128x6
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x128x16spgemm_f32_f32_f32_void_f32_128x128x32_1x1x1_0_tnn_align8_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_128x128x32_1x1x1_0_tnn_align8_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -736,7 +736,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x16spgemm_f32_f32_f32_void_f32_128x
}
// 3.
namespace cutlass3x_sm100_sptensorop_s128x192x16spgemm_f32_f32_f32_void_f32_128x192x32_1x1x1_0_tnn_align8_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_128x192x32_1x1x1_0_tnn_align8_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -778,7 +778,7 @@ namespace cutlass3x_sm100_sptensorop_s128x192x16spgemm_f32_f32_f32_void_f32_128x
}
// 4.
namespace cutlass3x_sm100_sptensorop_s128x256x16spgemm_f32_f32_f32_void_f32_128x256x32_1x1x1_0_tnn_align8_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_128x256x32_1x1x1_0_tnn_align8_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -820,7 +820,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x16spgemm_f32_f32_f32_void_f32_128x
}
// 5.
namespace cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_void_f32_256x64x32_2x1x1_0_tnn_align8_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x64x32_2x1x1_0_tnn_align8_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -862,7 +862,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_void_f32_256x6
}
// 6.
namespace cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_void_f32_256x64x64_2x1x1_0_tnn_align8_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x64x64_2x1x1_0_tnn_align8_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -904,7 +904,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_void_f32_256x6
}
// 7.
namespace cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_void_f32_256x128x32_2x1x1_0_tnn_align8_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x128x32_2x1x1_0_tnn_align8_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -946,7 +946,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_void_f32_256x
}
// 8.
namespace cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_void_f32_256x128x64_2x1x1_0_tnn_align8_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x128x64_2x1x1_0_tnn_align8_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -988,7 +988,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_void_f32_256x
}
// 9.
namespace cutlass3x_sm100_sptensorop_s256x192x16spgemm_f32_f32_f32_void_f32_256x192x32_2x1x1_0_tnn_align8_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x192x32_2x1x1_0_tnn_align8_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1030,7 +1030,7 @@ namespace cutlass3x_sm100_sptensorop_s256x192x16spgemm_f32_f32_f32_void_f32_256x
}
// 10.
namespace cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_void_f32_256x256x64_2x1x1_0_tnn_align8_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x256x64_2x1x1_0_tnn_align8_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1072,7 +1072,7 @@ namespace cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_void_f32_256x
}
// 11.
namespace cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_void_f32_256x256x32_2x1x1_0_tnn_align8_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x256x32_2x1x1_0_tnn_align8_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1114,9 +1114,9 @@ namespace cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_void_f32_256x
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x64x16spgemm_f32_f32_f32_void_f32_128x64x32_1x1x1_0_tnn_align8_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_128x64x32_1x1x1_0_tnn_align8_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x64x16spgemm_f32_f32_f32_void_f32_128x64x32_1x1x1_0_tnn_align8_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_128x64x32_1x1x1_0_tnn_align8_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1127,9 +1127,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x64x16spgemm_f32_f32_f32_void_f32_128x64x32_
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x128x16spgemm_f32_f32_f32_void_f32_128x128x32_1x1x1_0_tnn_align8_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_128x128x32_1x1x1_0_tnn_align8_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x16spgemm_f32_f32_f32_void_f32_128x128x32_1x1x1_0_tnn_align8_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_128x128x32_1x1x1_0_tnn_align8_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1140,9 +1140,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x16spgemm_f32_f32_f32_void_f32_128x128x3
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s128x192x16spgemm_f32_f32_f32_void_f32_128x192x32_1x1x1_0_tnn_align8_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_128x192x32_1x1x1_0_tnn_align8_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x192x16spgemm_f32_f32_f32_void_f32_128x192x32_1x1x1_0_tnn_align8_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_128x192x32_1x1x1_0_tnn_align8_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1153,9 +1153,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x192x16spgemm_f32_f32_f32_void_f32_128x192x3
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s128x256x16spgemm_f32_f32_f32_void_f32_128x256x32_1x1x1_0_tnn_align8_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_128x256x32_1x1x1_0_tnn_align8_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x16spgemm_f32_f32_f32_void_f32_128x256x32_1x1x1_0_tnn_align8_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_128x256x32_1x1x1_0_tnn_align8_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1166,9 +1166,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x16spgemm_f32_f32_f32_void_f32_128x256x3
}
// 5.
TEST(cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_void_f32_256x64x32_2x1x1_0_tnn_align8_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x64x32_2x1x1_0_tnn_align8_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_void_f32_256x64x32_2x1x1_0_tnn_align8_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x64x32_2x1x1_0_tnn_align8_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1179,9 +1179,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_void_f32_256x64x32_
}
// 6.
TEST(cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_void_f32_256x64x64_2x1x1_0_tnn_align8_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x64x64_2x1x1_0_tnn_align8_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_void_f32_256x64x64_2x1x1_0_tnn_align8_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x64x64_2x1x1_0_tnn_align8_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1192,9 +1192,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x16spgemm_f32_f32_f32_void_f32_256x64x64_
}
// 7.
TEST(cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_void_f32_256x128x32_2x1x1_0_tnn_align8_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x128x32_2x1x1_0_tnn_align8_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_void_f32_256x128x32_2x1x1_0_tnn_align8_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x128x32_2x1x1_0_tnn_align8_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1205,9 +1205,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_void_f32_256x128x3
}
// 8.
TEST(cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_void_f32_256x128x64_2x1x1_0_tnn_align8_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x128x64_2x1x1_0_tnn_align8_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_void_f32_256x128x64_2x1x1_0_tnn_align8_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x128x64_2x1x1_0_tnn_align8_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1218,9 +1218,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x16spgemm_f32_f32_f32_void_f32_256x128x6
}
// 9.
TEST(cutlass3x_sm100_sptensorop_s256x192x16spgemm_f32_f32_f32_void_f32_256x192x32_2x1x1_0_tnn_align8_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x192x32_2x1x1_0_tnn_align8_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x192x16spgemm_f32_f32_f32_void_f32_256x192x32_2x1x1_0_tnn_align8_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x192x32_2x1x1_0_tnn_align8_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1231,9 +1231,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x192x16spgemm_f32_f32_f32_void_f32_256x192x3
}
// 10.
TEST(cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_void_f32_256x256x32_2x1x1_0_tnn_align8_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x256x32_2x1x1_0_tnn_align8_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_void_f32_256x256x32_2x1x1_0_tnn_align8_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x256x32_2x1x1_0_tnn_align8_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1244,9 +1244,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_void_f32_256x256x3
}
// 11.
TEST(cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_void_f32_256x256x64_2x1x1_0_tnn_align8_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x256x64_2x1x1_0_tnn_align8_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x16spgemm_f32_f32_f32_void_f32_256x256x64_2x1x1_0_tnn_align8_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_f32_f32_f32_void_f32_256x256x64_2x1x1_0_tnn_align8_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,

View File

@ -48,7 +48,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f16_f16_128x64x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_128x64x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -90,7 +90,7 @@ namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f16_f16_128x
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f16_f16_128x128x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_128x128x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f16_f16_128
}
// 3.
namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f16_f16_128x192x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_128x192x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -174,7 +174,7 @@ namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f16_f16_128
}
// 4.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f16_f16_128x256x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_128x256x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -216,7 +216,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f16_f16_128
}
// 5.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_f16_256x64x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x64x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -258,7 +258,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_f16_256x
}
// 6.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_f16_256x64x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x64x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -300,7 +300,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_f16_256x
}
// 7.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_f16_256x128x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x128x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -342,7 +342,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_f16_256
}
// 8.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_f16_256x128x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x128x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_f16_256
}
// 9.
namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f16_f16_256x192x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x192x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -426,7 +426,7 @@ namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f16_f16_256
}
// 10.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_f16_256x256x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x256x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -468,7 +468,7 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_f16_256
}
// 11.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_f16_256x256x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x256x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -510,9 +510,9 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_f16_256
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f16_f16_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f16_f16_128x64x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_128x64x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -523,9 +523,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f16_f16_128x64x12
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f16_f16_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f16_f16_128x128x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_128x128x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -536,9 +536,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f16_f16_128x128x
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f16_f16_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f16_f16_128x192x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_128x192x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -549,9 +549,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f16_f16_128x192x
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f16_f16_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f16_f16_128x256x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_128x256x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -562,9 +562,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f16_f16_128x256x
}
// 5.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_f16_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_f16_256x64x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x64x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -575,9 +575,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_f16_256x64x12
}
//6.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_f16_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_f16_256x64x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x64x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -588,9 +588,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_f16_256x64x25
}
// 7.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_f16_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_f16_256x128x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x128x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -601,9 +601,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_f16_256x128x
}
// 8.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_f16_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_f16_256x128x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x128x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -614,9 +614,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_f16_256x128x
}
// 9.
TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f16_f16_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f16_f16_256x192x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x192x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -627,9 +627,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f16_f16_256x192x
}
// 10.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_f16_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_f16_256x256x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x256x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -640,9 +640,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_f16_256x256x
}
// 11.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_f16_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_f16_256x256x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_f16_256x256x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -653,7 +653,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_f16_256x256x
}
// 1.
namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_f16_128x64x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_128x64x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -695,7 +695,7 @@ namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_f16_128
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_f16_128x128x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_128x128x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -737,7 +737,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_f16_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_f16_128x192x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_128x192x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -779,7 +779,7 @@ namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_f16_12
}
// 4.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_f16_128x256x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_128x256x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -821,7 +821,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_f16_12
}
// 5.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f16_256x64x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x64x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -863,7 +863,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f16_256
}
// 6.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f16_256x64x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x64x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -905,7 +905,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f16_256
}
// 7.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f16_256x128x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x128x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -947,7 +947,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f16_25
}
// 8.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f16_256x128x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x128x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -989,7 +989,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f16_25
}
// 9.
namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_f16_256x192x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x192x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1031,7 +1031,7 @@ namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_f16_25
}
// 10.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f16_256x256x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x256x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1073,7 +1073,7 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f16_25
}
// 11.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f16_256x256x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x256x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1115,9 +1115,9 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f16_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_f16_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_f16_128x64x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_128x64x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1128,9 +1128,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_f16_128x64x1
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_f16_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_f16_128x128x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_128x128x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1141,9 +1141,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_f16_128x128
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_f16_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_f16_128x192x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_128x192x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1154,9 +1154,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_f16_128x192
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_f16_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_f16_128x256x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_128x256x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1167,9 +1167,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_f16_128x256
}
// 5.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f16_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f16_256x64x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x64x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1180,9 +1180,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f16_256x64x1
}
// 6.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f16_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f16_256x64x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x64x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1193,9 +1193,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f16_256x64x2
}
// 7.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f16_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f16_256x128x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x128x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1206,9 +1206,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f16_256x128
}
// 8.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f16_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f16_256x128x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x128x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1219,9 +1219,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f16_256x128
}
// 9.
TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_f16_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_f16_256x192x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x192x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1232,9 +1232,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_f16_256x192
}
// 10.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f16_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f16_256x256x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x256x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1245,9 +1245,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f16_256x256
}
// 11.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f16_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f16_256x256x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f16_256x256x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,

View File

@ -48,7 +48,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x64x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_128x64x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -90,7 +90,7 @@ namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_128
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x128x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_128x128x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x192x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_128x192x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -174,7 +174,7 @@ namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f16_e4m3_12
}
// 4.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x256x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_128x256x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -216,7 +216,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_12
}
// 5.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x64x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x64x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -258,7 +258,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_256
}
// 6.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x64x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x64x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -300,7 +300,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_256
}
// 7.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x128x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x128x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -342,7 +342,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_25
}
// 8.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x128x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x128x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_25
}
// 9.
namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x192x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x192x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -426,7 +426,7 @@ namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f16_e4m3_25
}
// 10.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x256x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x256x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -468,7 +468,7 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_25
}
// 11.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x256x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x256x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -510,9 +510,9 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x64x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_128x64x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -523,9 +523,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x64x1
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x128x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_128x128x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -536,9 +536,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x128
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x192x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_128x192x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -549,9 +549,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x192
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x256x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_128x256x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -562,9 +562,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_128x256
}
// 5.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x64x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x64x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -575,9 +575,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x64x1
}
//6.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x64x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x64x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -588,9 +588,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x64x2
}
// 7.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x128x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x128x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -601,9 +601,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x128
}
// 8.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x128x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x128x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -614,9 +614,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x128
}
// 9.
TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x192x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x192x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -627,9 +627,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x192
}
// 10.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x256x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x256x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -640,9 +640,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x256
}
// 11.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x256x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f16_e4m3_256x256x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -653,7 +653,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f16_e4m3_256x256
}
// 1.
namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_e4m3_128x64x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_128x64x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -695,7 +695,7 @@ namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_e4m3_12
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_e4m3_128x128x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_128x128x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -737,7 +737,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_e4m3_1
}
// 3.
namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_e4m3_128x192x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_128x192x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -779,7 +779,7 @@ namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_e4m3_1
}
// 4.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_e4m3_128x256x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_128x256x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -821,7 +821,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_e4m3_1
}
// 5.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_e4m3_256x64x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x64x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -863,7 +863,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_e4m3_25
}
// 6.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_e4m3_256x64x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x64x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -905,7 +905,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_e4m3_25
}
// 7.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_e4m3_256x128x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x128x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -947,7 +947,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_e4m3_2
}
// 8.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_e4m3_256x128x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x128x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -989,7 +989,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_e4m3_2
}
// 9.
namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_e4m3_256x192x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x192x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1031,7 +1031,7 @@ namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_e4m3_2
}
// 10.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_e4m3_256x256x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x256x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1073,7 +1073,7 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_e4m3_2
}
// 11.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_e4m3_256x256x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x256x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1115,9 +1115,9 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_e4m3_2
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_e4m3_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_e4m3_128x64x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_128x64x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1128,9 +1128,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_e4m3_128x64x
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_e4m3_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_e4m3_128x128x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_128x128x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1141,9 +1141,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_e4m3_128x12
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_e4m3_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_e4m3_128x192x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_128x192x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1154,9 +1154,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_e4m3_128x19
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_e4m3_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_e4m3_128x256x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_128x256x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1167,9 +1167,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_e4m3_128x25
}
// 5.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_e4m3_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_e4m3_256x64x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x64x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1180,9 +1180,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_e4m3_256x64x
}
// 6.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_e4m3_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_e4m3_256x64x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x64x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1193,9 +1193,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_e4m3_256x64x
}
// 7.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_e4m3_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_e4m3_256x128x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x128x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1206,9 +1206,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_e4m3_256x12
}
// 8.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_e4m3_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_e4m3_256x128x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x128x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1219,9 +1219,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_e4m3_256x12
}
// 9.
TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_e4m3_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_e4m3_256x192x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x192x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1232,9 +1232,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_e4m3_256x19
}
// 10.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_e4m3_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_e4m3_256x256x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x256x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1245,9 +1245,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_e4m3_256x25
}
// 11.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_e4m3_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_e4m3_256x256x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_e4m3_256x256x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,

View File

@ -48,7 +48,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f32_f32_128x64x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_128x64x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -90,7 +90,7 @@ namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f32_f32_128x
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f32_f32_128x128x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_128x128x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -132,7 +132,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f32_f32_128
}
// 3.
namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f32_f32_128x192x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_128x192x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -174,7 +174,7 @@ namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f32_f32_128
}
// 4.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f32_f32_128x256x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_128x256x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -216,7 +216,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f32_f32_128
}
// 5.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f32_f32_256x64x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x64x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -258,7 +258,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f32_f32_256x
}
// 6.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f32_f32_256x64x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x64x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -300,7 +300,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f32_f32_256x
}
// 7.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f32_f32_256x128x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x128x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -342,7 +342,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f32_f32_256
}
// 8.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f32_f32_256x128x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x128x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -384,7 +384,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f32_f32_256
}
// 9.
namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f32_f32_256x192x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x192x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -426,7 +426,7 @@ namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f32_f32_256
}
// 10.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f32_f32_256x256x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x256x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -468,7 +468,7 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f32_f32_256
}
// 11.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f32_f32_256x256x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x256x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -510,9 +510,9 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f32_f32_256
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f32_f32_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f32_f32_128x64x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_128x64x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -523,9 +523,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_f32_f32_128x64x12
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f32_f32_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f32_f32_128x128x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_128x128x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -536,9 +536,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_f32_f32_128x128x
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f32_f32_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f32_f32_128x192x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_128x192x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -549,9 +549,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_f32_f32_128x192x
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f32_f32_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f32_f32_128x256x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_128x256x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -561,9 +561,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_f32_f32_128x256x
{512}));
}
// 5.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f32_f32_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f32_f32_256x64x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x64x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -574,9 +574,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f32_f32_256x64x12
}
//6.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f32_f32_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f32_f32_256x64x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x64x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -587,9 +587,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_f32_f32_256x64x25
}
// 7.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f32_f32_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f32_f32_256x128x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x128x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -600,9 +600,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f32_f32_256x128x
}
// 8.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f32_f32_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f32_f32_256x128x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x128x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -612,9 +612,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_f32_f32_256x128x
{512}));
}
// 9.
TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f32_f32_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f32_f32_256x192x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x192x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -625,9 +625,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_f32_f32_256x192x
}
// 10.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f32_f32_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f32_f32_256x256x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x256x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -638,9 +638,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f32_f32_256x256x
}
// 11.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f32_f32_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f32_f32_256x256x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_f32_f32_256x256x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -651,7 +651,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_f32_f32_256x256x
}
// 1.
namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_f32_128x64x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_128x64x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -693,7 +693,7 @@ namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_f32_128
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_f32_128x128x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_128x128x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -735,7 +735,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_f32_12
}
// 3.
namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_f32_128x192x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_128x192x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -777,7 +777,7 @@ namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_f32_12
}
// 4.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_f32_128x256x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_128x256x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -819,7 +819,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_f32_12
}
// 5.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f32_256x64x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x64x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -861,7 +861,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f32_256
}
// 6.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f32_256x64x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x64x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -903,7 +903,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f32_256
}
// 7.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f32_256x128x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x128x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -945,7 +945,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f32_25
}
// 8.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f32_256x128x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x128x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -987,7 +987,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f32_25
}
// 9.
namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_f32_256x192x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x192x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1029,7 +1029,7 @@ namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_f32_25
}
// 10.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f32_256x256x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x256x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1071,7 +1071,7 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f32_25
}
// 11.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f32_256x256x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x256x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1113,9 +1113,9 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f32_25
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_f32_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_f32_128x64x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_128x64x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1126,9 +1126,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_e4m3_e4m3_f32_void_f32_128x64x1
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_f32_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_f32_128x128x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_128x128x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1138,9 +1138,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_e4m3_e4m3_f32_void_f32_128x128
{512}));
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_f32_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_f32_128x192x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_128x192x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1151,9 +1151,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_e4m3_e4m3_f32_void_f32_128x192
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_f32_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_f32_128x256x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_128x256x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1164,9 +1164,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_e4m3_e4m3_f32_void_f32_128x256
}
// 5.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f32_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f32_256x64x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x64x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1176,9 +1176,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f32_256x64x1
{512}));
}
// 6.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f32_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f32_256x64x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x64x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1188,9 +1188,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_e4m3_e4m3_f32_void_f32_256x64x2
{512}));
}
// 7.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f32_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f32_256x128x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x128x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1201,9 +1201,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f32_256x128
}
// 8.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f32_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f32_256x128x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x128x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1214,9 +1214,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_e4m3_e4m3_f32_void_f32_256x128
}
// 9.
TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_f32_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_f32_256x192x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x192x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1227,9 +1227,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_e4m3_e4m3_f32_void_f32_256x192
}
// 10.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f32_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f32_256x256x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x256x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1239,9 +1239,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f32_256x256
{512}));
}
// 11.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f32_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_e4m3_e4m3_f32_void_f32_256x256x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_e4m3_e4m3_f32_void_f32_256x256x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,

View File

@ -47,7 +47,7 @@ using namespace cute;
#if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
// 1.
namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_s8_s8_s32_s8_s8_128x64x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_128x64x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -89,7 +89,7 @@ namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_s8_s8_s32_s8_s8_128x64x128
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_s8_s8_s32_s8_s8_128x128x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_128x128x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -131,7 +131,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_s8_s8_s32_s8_s8_128x128x1
}
// 3.
namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_s8_s8_s32_s8_s8_128x192x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_128x192x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -173,7 +173,7 @@ namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_s8_s8_s32_s8_s8_128x192x1
}
// 4.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_s8_s8_s32_s8_s8_128x256x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_128x256x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -215,7 +215,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_s8_s8_s32_s8_s8_128x256x1
}
// 5.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_s8_s8_256x64x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x64x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -257,7 +257,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_s8_s8_256x64x128
}
// 6.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_s8_s8_256x64x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x64x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -299,7 +299,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_s8_s8_256x64x256
}
// 7.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_s8_s8_256x128x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x128x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -341,7 +341,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_s8_s8_256x128x1
}
// 8.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_s8_s8_256x128x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x128x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -383,7 +383,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_s8_s8_256x128x2
}
// 9.
namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_s8_s8_s32_s8_s8_256x192x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x192x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -425,7 +425,7 @@ namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_s8_s8_s32_s8_s8_256x192x1
}
// 10.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_s8_s8_256x256x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x256x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -467,7 +467,7 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_s8_s8_256x256x1
}
// 11.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_s8_s8_256x256x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x256x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -509,9 +509,9 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_s8_s8_256x256x2
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_s8_s8_s32_s8_s8_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x64x64spgemm_s8_s8_s32_s8_s8_128x64x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_128x64x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -522,9 +522,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_s8_s8_s32_s8_s8_128x64x128_1x1x
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_s8_s8_s32_s8_s8_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_s8_s8_s32_s8_s8_128x128x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_128x128x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -535,9 +535,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_s8_s8_s32_s8_s8_128x128x128_1x
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_s8_s8_s32_s8_s8_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x192x64spgemm_s8_s8_s32_s8_s8_128x192x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_128x192x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -548,9 +548,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_s8_s8_s32_s8_s8_128x192x128_1x
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_s8_s8_s32_s8_s8_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_s8_s8_s32_s8_s8_128x256x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_128x256x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -561,9 +561,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_s8_s8_s32_s8_s8_128x256x128_1x
}
// 5.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_s8_s8_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_s8_s8_256x64x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x64x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -574,9 +574,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_s8_s8_256x64x128_2x1x
}
//6.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_s8_s8_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_s8_s8_256x64x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x64x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -587,9 +587,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_s8_s8_256x64x256_2x1x
}
// 7.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_s8_s8_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_s8_s8_256x128x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x128x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -600,9 +600,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_s8_s8_256x128x128_2x
}
// 8.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_s8_s8_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_s8_s8_256x128x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x128x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -613,9 +613,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_s8_s8_256x128x256_2x
}
// 9.
TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_s8_s8_s32_s8_s8_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x192x64spgemm_s8_s8_s32_s8_s8_256x192x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x192x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -626,9 +626,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_s8_s8_s32_s8_s8_256x192x128_2x
}
// 10.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_s8_s8_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_s8_s8_256x256x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x256x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -639,9 +639,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_s8_s8_256x256x128_2x
}
// 11.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_s8_s8_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_s8_s8_256x256x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_s8_s8_256x256x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 1,
@ -652,7 +652,7 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_s8_s8_256x256x256_2x
}
// 1.
namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_s8_s8_s32_void_s8_128x64x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_128x64x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -694,7 +694,7 @@ namespace cutlass3x_sm100_sptensorop_s128x64x64spgemm_s8_s8_s32_void_s8_128x64x1
}
// 2.
namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_s8_s8_s32_void_s8_128x128x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_128x128x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -736,7 +736,7 @@ namespace cutlass3x_sm100_sptensorop_s128x128x64spgemm_s8_s8_s32_void_s8_128x128
}
// 3.
namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_s8_s8_s32_void_s8_128x192x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_128x192x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -778,7 +778,7 @@ namespace cutlass3x_sm100_sptensorop_s128x192x64spgemm_s8_s8_s32_void_s8_128x192
}
// 4.
namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_s8_s8_s32_void_s8_128x256x128_1x1x1_0_tnn_align32_1sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_128x256x128_1x1x1_0_tnn_align32_1sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -820,7 +820,7 @@ namespace cutlass3x_sm100_sptensorop_s128x256x64spgemm_s8_s8_s32_void_s8_128x256
}
// 5.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_void_s8_256x64x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x64x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -862,7 +862,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_void_s8_256x64x1
}
// 6.
namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_void_s8_256x64x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x64x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -904,7 +904,7 @@ namespace cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_void_s8_256x64x2
}
// 7.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_void_s8_256x128x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x128x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -946,7 +946,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_void_s8_256x128
}
// 8.
namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_void_s8_256x128x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x128x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -988,7 +988,7 @@ namespace cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_void_s8_256x128
}
// 9.
namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_s8_s8_s32_void_s8_256x192x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x192x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1030,7 +1030,7 @@ namespace cutlass3x_sm100_sptensorop_s256x192x64spgemm_s8_s8_s32_void_s8_256x192
}
// 10.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_void_s8_256x256x256_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x256x256_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1072,7 +1072,7 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_void_s8_256x256
}
// 11.
namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_void_s8_256x256x128_2x1x1_0_tnn_align32_2sm {
namespace cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x256x128_2x1x1_0_tnn_align32_2sm {
using CollectiveEpilogue =
typename cutlass::epilogue::collective::CollectiveBuilder<
@ -1114,9 +1114,9 @@ namespace cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_void_s8_256x256
}
// 1.
TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_s8_s8_s32_void_s8_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_128x64x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x64x64spgemm_s8_s8_s32_void_s8_128x64x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_128x64x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1127,9 +1127,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x64x64spgemm_s8_s8_s32_void_s8_128x64x128_1x
}
// 2.
TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_s8_s8_s32_void_s8_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_128x128x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x128x64spgemm_s8_s8_s32_void_s8_128x128x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_128x128x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1140,9 +1140,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x128x64spgemm_s8_s8_s32_void_s8_128x128x128_
}
// 3.
TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_s8_s8_s32_void_s8_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_128x192x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x192x64spgemm_s8_s8_s32_void_s8_128x192x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_128x192x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1153,9 +1153,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x192x64spgemm_s8_s8_s32_void_s8_128x192x128_
}
// 4.
TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_s8_s8_s32_void_s8_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_128x256x128_1x1x1_0_tnn_align32_1sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s128x256x64spgemm_s8_s8_s32_void_s8_128x256x128_1x1x1_0_tnn_align32_1sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_128x256x128_1x1x1_0_tnn_align32_1sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1166,9 +1166,9 @@ TEST(cutlass3x_sm100_sptensorop_s128x256x64spgemm_s8_s8_s32_void_s8_128x256x128_
}
// 5.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_void_s8_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x64x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_void_s8_256x64x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x64x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1179,9 +1179,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_void_s8_256x64x128_2x
}
// 6.
TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_void_s8_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x64x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_void_s8_256x64x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x64x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1192,9 +1192,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x64x64spgemm_s8_s8_s32_void_s8_256x64x256_2x
}
// 7.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_void_s8_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x128x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_void_s8_256x128x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x128x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1205,9 +1205,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_void_s8_256x128x128_
}
// 8.
TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_void_s8_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x128x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_void_s8_256x128x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x128x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1218,9 +1218,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x128x64spgemm_s8_s8_s32_void_s8_256x128x256_
}
// 9.
TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_s8_s8_s32_void_s8_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x192x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x192x64spgemm_s8_s8_s32_void_s8_256x192x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x192x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1231,9 +1231,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x192x64spgemm_s8_s8_s32_void_s8_256x192x128_
}
// 10.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_void_s8_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x256x128_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_void_s8_256x256x128_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x256x128_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,
@ -1244,9 +1244,9 @@ TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_void_s8_256x256x128_
}
// 11.
TEST(cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_void_s8_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
TEST(cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x256x256_2x1x1_0_tnn_align32_2sm, func_check)
{
namespace gemm = cutlass3x_sm100_sptensorop_s256x256x64spgemm_s8_s8_s32_void_s8_256x256x256_2x1x1_0_tnn_align32_2sm;
namespace gemm = cutlass3x_sm100_sptensorop_spgemm_s8_s8_s32_void_s8_256x256x256_2x1x1_0_tnn_align32_2sm;
EXPECT_TRUE(test::gemm::device::TestSmall<gemm::Gemm>(
1, 0,

View File

@ -550,5 +550,77 @@ TEST(SM90_Device_Gemm_e4m3t_e4m3n_f32t_tensor_op_gmma_f32, 64x128x128_tma_epilog
EXPECT_TRUE(test::gemm::device::TestAll<Gemm>());
}
#if defined(CUTE_SM90_EXTENDED_MMA_SHAPES_ENABLED)
TEST(SM90_Device_Gemm_e4m3t_e4m3n_f32t_tensor_op_gmma_f32, 128x56x128_tma_epilogue_fp8_fast_accum) {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
using LayoutC = cutlass::layout::ColumnMajor;
using TileMNK = Shape<_128,_56,_128>;
using EpilogueOp = typename cutlass::epilogue::collective::CollectiveBuilder<
cutlass::arch::Sm90, cutlass::arch::OpClassTensorOp,
TileMNK, Shape<_1,_1,_1>,
cutlass::epilogue::collective::EpilogueTileAuto,
float, float,
void, LayoutC, 4,
cutlass::half_t, LayoutC, 8,
cutlass::epilogue::TmaWarpSpecialized
>::CollectiveOp;
using CollectiveOp = typename cutlass::gemm::collective::CollectiveBuilder<
cutlass::arch::Sm90, cutlass::arch::OpClassTensorOp,
cutlass::float_e4m3_t, LayoutA, 16,
cutlass::float_e4m3_t, LayoutB, 16,
float,
TileMNK, Shape<_1,_1,_1>,
cutlass::gemm::collective::StageCountAutoCarveout<sizeof(typename EpilogueOp::SharedStorage)>,
cutlass::gemm::KernelTmaWarpSpecializedPingpongFP8FastAccum
>::CollectiveOp;
using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
Shape<int,int,int,int>,
CollectiveOp,
EpilogueOp
>;
using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
EXPECT_TRUE(test::gemm::device::TestAll<Gemm>());
}
TEST(SM90_Device_Gemm_e4m3t_e4m3n_f32t_tensor_op_gmma_f32, 128x112x128_tma_epilogue_fp8_fast_accum) {
using LayoutA = cutlass::layout::RowMajor;
using LayoutB = cutlass::layout::ColumnMajor;
using LayoutC = cutlass::layout::ColumnMajor;
using TileMNK = Shape<_128,_112,_128>;
using EpilogueOp = typename cutlass::epilogue::collective::CollectiveBuilder<
cutlass::arch::Sm90, cutlass::arch::OpClassTensorOp,
TileMNK, Shape<_1,_1,_1>,
cutlass::epilogue::collective::EpilogueTileAuto,
float, float,
void, LayoutC, 4,
cutlass::half_t, LayoutC, 8,
cutlass::epilogue::TmaWarpSpecialized
>::CollectiveOp;
using CollectiveOp = typename cutlass::gemm::collective::CollectiveBuilder<
cutlass::arch::Sm90, cutlass::arch::OpClassTensorOp,
cutlass::float_e4m3_t, LayoutA, 16,
cutlass::float_e4m3_t, LayoutB, 16,
float,
TileMNK, Shape<_1,_1,_1>,
cutlass::gemm::collective::StageCountAutoCarveout<sizeof(typename EpilogueOp::SharedStorage)>,
cutlass::gemm::KernelTmaWarpSpecializedPingpongFP8FastAccum
>::CollectiveOp;
using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
Shape<int,int,int,int>,
CollectiveOp,
EpilogueOp
>;
using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
EXPECT_TRUE(test::gemm::device::TestAll<Gemm>());
}
#endif // defined(CUTE_SM90_EXTENDED_MMA_SHAPES_ENABLED)
#endif // defined(CUTLASS_ARCH_MMA_SM90_SUPPORTED)