Updated mma_sm80.h to avoid perf penalty due to reinterpret_cast<>. (#100)

- Updated mma_sm80.h to avoid perf penalty due to reinterpret_cast<>. - Enhancement to CUTLASS Utility Library's HostTensorPlanarComplex template to support copy-in and copy-out - Added test_examples target to build and test all CUTLASS examples - Minor edits to documentation to point to GTC 2020 webinar
2020-06-15 10:47:01 -07:00
parent 86931fef85
commit 1ab1027954
11 changed files with 213 additions and 33 deletions
--- a/examples/07_volta_tensorop_gemm/volta_tensorop_gemm.cu
+++ b/examples/07_volta_tensorop_gemm/volta_tensorop_gemm.cu
@ -198,10 +198,12 @@ int run() {
    return -1;
  }

-  if (!(props.major >= 7)) {
-    std::cerr << "Volta Tensor Ops must be run on a machine with compute capability at least 70."
+  if (props.major != 7) {
+    std::cerr << "Volta Tensor Ops must be run on a machine with compute capability of 70, 72, or 75."
              << std::endl;
-    return -1;
+
+    // Return 0 so tests are considered passing if run on unsupported architectures or CUDA Toolkits.
+    return 0;
  }

  const int length_m = 5120;