CUTLASS 2.1 (#83)
CUTLASS 2.1 contributes: - BLAS-style host-side API added to CUTLASS Library - Planar Complex GEMM kernels targeting Volta and Turing Tensor Cores - Minor enhancements and bug fixes
This commit is contained in:
@ -29,7 +29,7 @@ provided by CUTLASS using tensor cores; which we run on a NVIDIA Volta GPU.
|
||||
|
||||
Writing a single high performance matrix multiplication kernel is hard but do-able. Whereas writing
|
||||
high performance kernels at scale which works for multiple problem sizes with good abstractions is
|
||||
really hard. CUTLASS solves this problem by providing simplified abstractions (knobs) to compose
|
||||
really hard. CUTLASS solves this problem by providing simplified abstractions to compose
|
||||
multiple sections of gemm kernel. When used properly, the kernels can hit peak performance of GPU
|
||||
easily.
|
||||
|
||||
@ -189,13 +189,27 @@ using Gemm = cutlass::gemm::device::Gemm<ElementInputA,
|
||||
NumStages>;
|
||||
|
||||
int main() {
|
||||
|
||||
// Volta Tensor Core operations exposed with mma.sync are first available in CUDA 10.1.
|
||||
//
|
||||
// CUTLASS must be compiled with CUDA 10.1 Toolkit to run these examples.
|
||||
if (!(__CUDACC_VER_MAJOR__ > 10 || (__CUDACC_VER_MAJOR__ == 10 && __CUDACC_VER_MINOR__ >= 1))) {
|
||||
std::cerr << "Volta Tensor Core operations must be compiled with CUDA 10.1 Toolkit or later." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
cudaDeviceProp props;
|
||||
CUDA_CHECK(cudaGetDeviceProperties(&props, 0));
|
||||
|
||||
cudaError_t error = cudaGetDeviceProperties(&props, 0);
|
||||
if (error != cudaSuccess) {
|
||||
std::cerr << "cudaGetDeviceProperties() returned an error: " << cudaGetErrorString(error) << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!(props.major >= 7)) {
|
||||
std::cerr << "Volta Tensor Ops must be run on a machine with compute capability at least 70."
|
||||
<< std::endl;
|
||||
return 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
const int length_m = 5120;
|
||||
|
||||
Reference in New Issue
Block a user