Updated mma_sm80.h to avoid perf penalty due to reinterpret_cast<>. (#100)
- Updated mma_sm80.h to avoid perf penalty due to reinterpret_cast<>. - Enhancement to CUTLASS Utility Library's HostTensorPlanarComplex template to support copy-in and copy-out - Added test_examples target to build and test all CUTLASS examples - Minor edits to documentation to point to GTC 2020 webinar
This commit is contained in:
@ -198,10 +198,12 @@ int run() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!(props.major >= 7)) {
|
||||
std::cerr << "Volta Tensor Ops must be run on a machine with compute capability at least 70."
|
||||
if (props.major != 7) {
|
||||
std::cerr << "Volta Tensor Ops must be run on a machine with compute capability of 70, 72, or 75."
|
||||
<< std::endl;
|
||||
return -1;
|
||||
|
||||
// Return 0 so tests are considered passing if run on unsupported architectures or CUDA Toolkits.
|
||||
return 0;
|
||||
}
|
||||
|
||||
const int length_m = 5120;
|
||||
|
||||
Reference in New Issue
Block a user