Remove sparse GEMM with row broadcasted bias vector (#1302)

This reverts commit d3e72719b4.

Co-authored-by: Aleksandar Samardžić <asamardzic@matf.bg.ac.rs>
This commit is contained in:
Aleksandar Samardžić
2024-01-17 20:06:27 +01:00
committed by GitHub
parent 362abbf274
commit ca37d632c9
7 changed files with 7 additions and 1846 deletions

View File

@ -37,7 +37,6 @@
#include "../../common/cutlass_unit_test.h"
#include "cutlass/cutlass.h"
#include "cutlass/gemm/device/gemm_sparse.h"
#include "cutlass/gemm/device/gemm_sparse_row_broadcast.h"
#include "cutlass/util/host_tensor.h"
#include "cutlass/util/reference/host/gemm.h"
#include "cutlass/util/reference/host/tensor_compare.h"
@ -268,24 +267,6 @@ TEST(SM80_Device_Sparse_Gemm_f16n_f16n_f16t_tensor_op_f32, 64x64x128_32x32x128)
EXPECT_TRUE(test::gemm::device::TestAllSparseGemm<Gemm>());
}
TEST(SM80_Device_Sparse_Gemm_Row_Broadcast_f16n_f16n_f16t_tensor_op_f32, 64x64x128_32x32x128) {
using ElementOutput = cutlass::half_t;
using ElementAccumulator = float;
using Gemm = cutlass::gemm::device::SparseGemmRowBroadcast<
cutlass::half_t, cutlass::layout::ColumnMajor, cutlass::half_t,
cutlass::layout::ColumnMajor, ElementOutput, cutlass::layout::RowMajor,
ElementAccumulator, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm80,
cutlass::gemm::GemmShape<64, 64, 128>,
cutlass::gemm::GemmShape<32, 32, 128>, cutlass::gemm::GemmShape<16, 8, 32>,
cutlass::epilogue::thread::LinearCombination<
ElementOutput, 128 / cutlass::sizeof_bits<ElementOutput>::value,
ElementAccumulator, ElementAccumulator>,
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>, 6>;
EXPECT_TRUE(test::gemm::device::TestAllSparseGemm<Gemm>(true));
}
////////////////////////////////////////////////////////////////////////////////
#endif // #if defined(CUTLASS_ARCH_SPARSE_MMA_SM80_SUPPORTED)

View File

@ -163,19 +163,14 @@ struct SparseTestbed {
}
/// Initializes data structures
void initialize(cutlass::gemm::GemmCoord problem_size, bool tensor_C_row_broadcast = false) {
void initialize(cutlass::gemm::GemmCoord problem_size) {
//
// Allocate the GEMM workspace
//
tensor_A.resize(cutlass::make_Coord(problem_size.m(), problem_size.k() / kSparse));
tensor_A_uncompressed.resize(problem_size.mk());
tensor_B.resize(problem_size.kn());
if (tensor_C_row_broadcast) {
tensor_C.resize({problem_size.m(), 1});
} else {
tensor_C.resize(problem_size.mn());
}
tensor_C.resize(problem_size.mn());
tensor_D.resize(problem_size.mn());
reference_D.resize(problem_size.mn(), false);
tensor_E.resize(cutlass::make_Coord(
@ -209,13 +204,7 @@ struct SparseTestbed {
tensor_B.host_view().at({0, 0}) = typename Gemm::ElementB(1);
tensor_C.host_view().at({0, 0}) = typename Gemm::ElementC(1);
if (tensor_C_row_broadcast) {
for (int i = 0; i < problem_size.m(); ++i)
for (int j = 0; j < problem_size.n(); ++j)
reference_D.host_view().at({i, j}) = tensor_C.host_view().at({i, 0});
} else {
cutlass::reference::host::TensorCopy(reference_D.host_view(), tensor_C.host_view());
}
cutlass::reference::host::TensorCopy(reference_D.host_view(), tensor_C.host_view());
tensor_A.sync_device();
tensor_B.sync_device();
@ -347,8 +336,7 @@ struct SparseTestbed {
cutlass::gemm::GemmCoord problem_size,
int split_k_slices = 1,
ElementCompute alpha = ElementCompute(1),
ElementCompute beta = ElementCompute(0),
bool tensor_C_row_broadcast = false) {
ElementCompute beta = ElementCompute(0)) {
// Waive test if insufficient CUDA device
if (!sufficient()) {
@ -358,7 +346,7 @@ struct SparseTestbed {
return true;
}
this->initialize(problem_size, tensor_C_row_broadcast);
this->initialize(problem_size);
//
// Initialize the GEMM operator
@ -413,7 +401,7 @@ struct SparseTestbed {
/////////////////////////////////////////////////////////////////////////////////////////////////
template <typename Gemm>
bool TestAllSparseGemm(bool tensor_C_row_broadcast = false) {
bool TestAllSparseGemm() {
bool passed = true;
int const kMinimumOperandElementSize =
@ -473,8 +461,7 @@ bool TestAllSparseGemm(bool tensor_C_row_broadcast = false) {
problem_size,
split_k,
cutlass::from_real<ElementCompute>(alpha),
cutlass::from_real<ElementCompute>(beta),
tensor_C_row_broadcast
cutlass::from_real<ElementCompute>(beta)
);
if (!passed) {