CUTLASS 3.0.0 (#786)

* CUTLASS 3.0.0
This commit is contained in:
Vijay Thakkar
2023-01-23 17:55:28 -08:00
committed by GitHub
parent 66d9cddc83
commit 277bd6e537
377 changed files with 76396 additions and 1186 deletions

View File

@ -241,8 +241,6 @@ struct SparseTestbed {
// Determine SMEM requirements and waive if not satisfied
//
int smem_size = int(sizeof(typename Mma::SharedStorage));
cudaDeviceProp properties;
int device_idx;
cudaError_t result = cudaGetDevice(&device_idx);
@ -257,10 +255,6 @@ struct SparseTestbed {
throw std::runtime_error("cudaGetDeviceProperties() failed");
}
if (properties.sharedMemPerBlockOptin < smem_size) {
return false;
}
return true;
}
@ -415,7 +409,12 @@ struct SparseTestbed {
bool passed = cutlass::reference::host::TensorEquals(
matrix_C_computed.host_view(), matrix_C_reference.host_view());
EXPECT_TRUE(passed)
EXPECT_TRUE(passed);
if (!passed && CUTLASS_TEST_UNIT_ENABLE_WARNINGS) {
std::cout
<< __FILE__ << ":" << __LINE__ << " "
<< "A:\n" << matrix_A.host_view() << "\n"
<< "B:\n" << matrix_B.host_view() << "\n"
<< "E:\n" << matrix_E.host_view() << "\n"
@ -423,6 +422,7 @@ struct SparseTestbed {
<< matrix_C_reference.host_view() << "\n"
<< "Computed:\n"
<< matrix_C_computed.host_view() << "\n";
}
EXPECT_GT(cutlass::reference::host::TensorNorm(matrix_C_reference.host_view()), 0);
EXPECT_GT(cutlass::reference::host::TensorNorm(matrix_C_computed.host_view()), 0);

View File

@ -193,11 +193,40 @@ struct Testbed {
matrix_C_reference.reset(cutlass::make_Coord(m, n), false);
}
/// Returns true if the CUDA device is sufficient to execute the kernel.
bool sufficient() const {
//
// Determine SMEM requirements and waive if not satisfied
//
cudaDeviceProp properties;
int device_idx;
cudaError_t result = cudaGetDevice(&device_idx);
if (result != cudaSuccess) {
throw std::runtime_error("cudaGetDevice() API call failed.");
}
result = cudaGetDeviceProperties(&properties, device_idx);
if (result != cudaSuccess) {
throw std::runtime_error("cudaGetDeviceProperties() failed");
}
return true;
}
/// Runs the test
bool run(
dim3 grid, dim3 block,
cutlass::Distribution::Kind init_A = cutlass::Distribution::Uniform,
cutlass::Distribution::Kind init_B = cutlass::Distribution::Uniform) {
if (!sufficient()) {
return true;
}
//
// initialize device memory
//
@ -318,13 +347,18 @@ struct Testbed {
bool passed = cutlass::reference::host::TensorEquals(
matrix_C_computed.host_view(), matrix_C_reference.host_view());
EXPECT_TRUE(passed)
EXPECT_TRUE(passed);
if (!passed && CUTLASS_TEST_UNIT_ENABLE_WARNINGS) {
std::cout
<< __FILE__ << ":" << __LINE__ << " "
<< "A:\n" << matrix_A.host_view() << "\n"
<< "B:\n" << matrix_B.host_view() << "\n"
<< "Reference:\n"
<< matrix_C_reference.host_view() << "\n"
<< "Computed:\n"
<< matrix_C_computed.host_view() << "\n";
}
EXPECT_GT(cutlass::reference::host::TensorNorm(matrix_C_reference.host_view()), 0);
EXPECT_GT(cutlass::reference::host::TensorNorm(matrix_C_computed.host_view()), 0);

View File

@ -217,11 +217,25 @@ struct Testbed {
matrix_C_reference.reset(cutlass::make_Coord(m, n), false);
}
bool sufficient() {
return true;
}
/// Runs the test
bool run(
dim3 grid, dim3 block,
cutlass::Distribution::Kind init_A = cutlass::Distribution::Uniform,
cutlass::Distribution::Kind init_B = cutlass::Distribution::Uniform) {
// Waive test if insufficient CUDA device
if (!sufficient()) {
if (CUTLASS_TEST_UNIT_ENABLE_WARNINGS) {
std::cerr << "Test waived due to insufficient CUDA device." << std::endl;
}
return true;
}
//
// initialize device memory
//
@ -300,7 +314,7 @@ struct Testbed {
cudaError_t result = cudaDeviceSynchronize();
EXPECT_EQ(result, cudaSuccess)
<< " kernel error: " << cudaGetErrorString(result);
<< " kernel error: " << cudaGetErrorString(result) << " on device " << GetCudaDevice();
matrix_C_computed.sync_host();
@ -316,7 +330,7 @@ struct Testbed {
bool passed = cutlass::reference::host::TensorEquals(
matrix_C_computed.host_view(), matrix_C_reference.host_view());
EXPECT_TRUE(passed);
EXPECT_TRUE(passed) << "Failed on device " << GetCudaDevice();
if (!passed) {
std::ofstream output("mma_pipelined_testbed_errors.txt");