Remove sparse GEMM with row broadcasted bias vector (#1302)

This reverts commit d3e72719b4. Co-authored-by: Aleksandar Samardžić <asamardzic@matf.bg.ac.rs>
2024-01-17 20:06:27 +01:00
parent 362abbf274
commit ca37d632c9
7 changed files with 7 additions and 1846 deletions
--- a/test/unit/gemm/device/gemm_f16n_f16n_f16t_tensor_op_f32_sparse_sm80.cu
+++ b/test/unit/gemm/device/gemm_f16n_f16n_f16t_tensor_op_f32_sparse_sm80.cu
@ -37,7 +37,6 @@
 #include "../../common/cutlass_unit_test.h"
 #include "cutlass/cutlass.h"
 #include "cutlass/gemm/device/gemm_sparse.h"
-#include "cutlass/gemm/device/gemm_sparse_row_broadcast.h"
 #include "cutlass/util/host_tensor.h"
 #include "cutlass/util/reference/host/gemm.h"
 #include "cutlass/util/reference/host/tensor_compare.h"
@ -268,24 +267,6 @@ TEST(SM80_Device_Sparse_Gemm_f16n_f16n_f16t_tensor_op_f32, 64x64x128_32x32x128)
  EXPECT_TRUE(test::gemm::device::TestAllSparseGemm<Gemm>());
 }

-TEST(SM80_Device_Sparse_Gemm_Row_Broadcast_f16n_f16n_f16t_tensor_op_f32, 64x64x128_32x32x128) {
-  using ElementOutput = cutlass::half_t;
-  using ElementAccumulator = float;
-
-  using Gemm = cutlass::gemm::device::SparseGemmRowBroadcast<
-      cutlass::half_t, cutlass::layout::ColumnMajor, cutlass::half_t,
-      cutlass::layout::ColumnMajor, ElementOutput, cutlass::layout::RowMajor,
-      ElementAccumulator, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm80,
-      cutlass::gemm::GemmShape<64, 64, 128>,
-      cutlass::gemm::GemmShape<32, 32, 128>, cutlass::gemm::GemmShape<16, 8, 32>,
-      cutlass::epilogue::thread::LinearCombination<
-          ElementOutput, 128 / cutlass::sizeof_bits<ElementOutput>::value,
-          ElementAccumulator, ElementAccumulator>,
-      cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>, 6>;
-
-  EXPECT_TRUE(test::gemm::device::TestAllSparseGemm<Gemm>(true));
-}
-
 ////////////////////////////////////////////////////////////////////////////////

 #endif  // #if defined(CUTLASS_ARCH_SPARSE_MMA_SM80_SUPPORTED)
--- a/test/unit/gemm/device/testbed_sparse.h
+++ b/test/unit/gemm/device/testbed_sparse.h
@ -163,19 +163,14 @@ struct SparseTestbed {
  }

  /// Initializes data structures
-  void initialize(cutlass::gemm::GemmCoord problem_size, bool tensor_C_row_broadcast = false) {
+  void initialize(cutlass::gemm::GemmCoord problem_size) {
    //
    // Allocate the GEMM workspace
    //
    tensor_A.resize(cutlass::make_Coord(problem_size.m(), problem_size.k() / kSparse));
    tensor_A_uncompressed.resize(problem_size.mk());
    tensor_B.resize(problem_size.kn());
-    if (tensor_C_row_broadcast) {
-      tensor_C.resize({problem_size.m(), 1});
-    } else {
-      tensor_C.resize(problem_size.mn());
-    }
-
+    tensor_C.resize(problem_size.mn());
    tensor_D.resize(problem_size.mn());
    reference_D.resize(problem_size.mn(), false);
    tensor_E.resize(cutlass::make_Coord(
@ -209,13 +204,7 @@ struct SparseTestbed {
    tensor_B.host_view().at({0, 0}) = typename Gemm::ElementB(1);
    tensor_C.host_view().at({0, 0}) = typename Gemm::ElementC(1);

-    if (tensor_C_row_broadcast) {
-      for (int i = 0; i < problem_size.m(); ++i)
-        for (int j = 0; j < problem_size.n(); ++j)
-          reference_D.host_view().at({i, j}) = tensor_C.host_view().at({i, 0});
-    } else {
-      cutlass::reference::host::TensorCopy(reference_D.host_view(), tensor_C.host_view());
-    }
+    cutlass::reference::host::TensorCopy(reference_D.host_view(), tensor_C.host_view());

    tensor_A.sync_device();
    tensor_B.sync_device();
@ -347,8 +336,7 @@ struct SparseTestbed {
    cutlass::gemm::GemmCoord problem_size, 
    int split_k_slices = 1,
    ElementCompute alpha = ElementCompute(1), 
-    ElementCompute beta = ElementCompute(0),
-    bool tensor_C_row_broadcast = false) {
+    ElementCompute beta = ElementCompute(0)) {

    // Waive test if insufficient CUDA device
    if (!sufficient()) {
@ -358,7 +346,7 @@ struct SparseTestbed {
      return true;
    }

-    this->initialize(problem_size, tensor_C_row_broadcast);
+    this->initialize(problem_size);

    //
    // Initialize the GEMM operator
@ -413,7 +401,7 @@ struct SparseTestbed {
 /////////////////////////////////////////////////////////////////////////////////////////////////

 template <typename Gemm>
-bool TestAllSparseGemm(bool tensor_C_row_broadcast = false) {
+bool TestAllSparseGemm() {
  bool passed = true;

  int const kMinimumOperandElementSize = 
@ -473,8 +461,7 @@ bool TestAllSparseGemm(bool tensor_C_row_broadcast = false) {
                problem_size, 
                split_k,
                cutlass::from_real<ElementCompute>(alpha), 
-                cutlass::from_real<ElementCompute>(beta),
-                tensor_C_row_broadcast
+                cutlass::from_real<ElementCompute>(beta)
              );

              if (!passed) {