@ -241,8 +241,6 @@ struct SparseTestbed {
|
||||
// Determine SMEM requirements and waive if not satisfied
|
||||
//
|
||||
|
||||
int smem_size = int(sizeof(typename Mma::SharedStorage));
|
||||
|
||||
cudaDeviceProp properties;
|
||||
int device_idx;
|
||||
cudaError_t result = cudaGetDevice(&device_idx);
|
||||
@ -257,10 +255,6 @@ struct SparseTestbed {
|
||||
throw std::runtime_error("cudaGetDeviceProperties() failed");
|
||||
}
|
||||
|
||||
if (properties.sharedMemPerBlockOptin < smem_size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -415,7 +409,12 @@ struct SparseTestbed {
|
||||
bool passed = cutlass::reference::host::TensorEquals(
|
||||
matrix_C_computed.host_view(), matrix_C_reference.host_view());
|
||||
|
||||
EXPECT_TRUE(passed)
|
||||
EXPECT_TRUE(passed);
|
||||
|
||||
if (!passed && CUTLASS_TEST_UNIT_ENABLE_WARNINGS) {
|
||||
|
||||
std::cout
|
||||
<< __FILE__ << ":" << __LINE__ << " "
|
||||
<< "A:\n" << matrix_A.host_view() << "\n"
|
||||
<< "B:\n" << matrix_B.host_view() << "\n"
|
||||
<< "E:\n" << matrix_E.host_view() << "\n"
|
||||
@ -423,6 +422,7 @@ struct SparseTestbed {
|
||||
<< matrix_C_reference.host_view() << "\n"
|
||||
<< "Computed:\n"
|
||||
<< matrix_C_computed.host_view() << "\n";
|
||||
}
|
||||
|
||||
EXPECT_GT(cutlass::reference::host::TensorNorm(matrix_C_reference.host_view()), 0);
|
||||
EXPECT_GT(cutlass::reference::host::TensorNorm(matrix_C_computed.host_view()), 0);
|
||||
|
||||
@ -193,11 +193,40 @@ struct Testbed {
|
||||
matrix_C_reference.reset(cutlass::make_Coord(m, n), false);
|
||||
}
|
||||
|
||||
/// Returns true if the CUDA device is sufficient to execute the kernel.
|
||||
bool sufficient() const {
|
||||
|
||||
//
|
||||
// Determine SMEM requirements and waive if not satisfied
|
||||
//
|
||||
|
||||
cudaDeviceProp properties;
|
||||
int device_idx;
|
||||
cudaError_t result = cudaGetDevice(&device_idx);
|
||||
|
||||
if (result != cudaSuccess) {
|
||||
throw std::runtime_error("cudaGetDevice() API call failed.");
|
||||
}
|
||||
|
||||
result = cudaGetDeviceProperties(&properties, device_idx);
|
||||
|
||||
if (result != cudaSuccess) {
|
||||
throw std::runtime_error("cudaGetDeviceProperties() failed");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Runs the test
|
||||
bool run(
|
||||
dim3 grid, dim3 block,
|
||||
cutlass::Distribution::Kind init_A = cutlass::Distribution::Uniform,
|
||||
cutlass::Distribution::Kind init_B = cutlass::Distribution::Uniform) {
|
||||
|
||||
if (!sufficient()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
// initialize device memory
|
||||
//
|
||||
@ -318,13 +347,18 @@ struct Testbed {
|
||||
bool passed = cutlass::reference::host::TensorEquals(
|
||||
matrix_C_computed.host_view(), matrix_C_reference.host_view());
|
||||
|
||||
EXPECT_TRUE(passed)
|
||||
EXPECT_TRUE(passed);
|
||||
|
||||
if (!passed && CUTLASS_TEST_UNIT_ENABLE_WARNINGS) {
|
||||
std::cout
|
||||
<< __FILE__ << ":" << __LINE__ << " "
|
||||
<< "A:\n" << matrix_A.host_view() << "\n"
|
||||
<< "B:\n" << matrix_B.host_view() << "\n"
|
||||
<< "Reference:\n"
|
||||
<< matrix_C_reference.host_view() << "\n"
|
||||
<< "Computed:\n"
|
||||
<< matrix_C_computed.host_view() << "\n";
|
||||
}
|
||||
|
||||
EXPECT_GT(cutlass::reference::host::TensorNorm(matrix_C_reference.host_view()), 0);
|
||||
EXPECT_GT(cutlass::reference::host::TensorNorm(matrix_C_computed.host_view()), 0);
|
||||
|
||||
@ -217,11 +217,25 @@ struct Testbed {
|
||||
matrix_C_reference.reset(cutlass::make_Coord(m, n), false);
|
||||
}
|
||||
|
||||
bool sufficient() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Runs the test
|
||||
bool run(
|
||||
dim3 grid, dim3 block,
|
||||
cutlass::Distribution::Kind init_A = cutlass::Distribution::Uniform,
|
||||
cutlass::Distribution::Kind init_B = cutlass::Distribution::Uniform) {
|
||||
|
||||
// Waive test if insufficient CUDA device
|
||||
if (!sufficient()) {
|
||||
if (CUTLASS_TEST_UNIT_ENABLE_WARNINGS) {
|
||||
std::cerr << "Test waived due to insufficient CUDA device." << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// initialize device memory
|
||||
//
|
||||
@ -300,7 +314,7 @@ struct Testbed {
|
||||
|
||||
cudaError_t result = cudaDeviceSynchronize();
|
||||
EXPECT_EQ(result, cudaSuccess)
|
||||
<< " kernel error: " << cudaGetErrorString(result);
|
||||
<< " kernel error: " << cudaGetErrorString(result) << " on device " << GetCudaDevice();
|
||||
|
||||
matrix_C_computed.sync_host();
|
||||
|
||||
@ -316,7 +330,7 @@ struct Testbed {
|
||||
bool passed = cutlass::reference::host::TensorEquals(
|
||||
matrix_C_computed.host_view(), matrix_C_reference.host_view());
|
||||
|
||||
EXPECT_TRUE(passed);
|
||||
EXPECT_TRUE(passed) << "Failed on device " << GetCudaDevice();
|
||||
|
||||
if (!passed) {
|
||||
std::ofstream output("mma_pipelined_testbed_errors.txt");
|
||||
|
||||
Reference in New Issue
Block a user