Updated mma_sm80.h to avoid perf penalty due to reinterpret_cast<>. (#100)

- Updated mma_sm80.h to avoid perf penalty due to reinterpret_cast<>. - Enhancement to CUTLASS Utility Library's HostTensorPlanarComplex template to support copy-in and copy-out - Added test_examples target to build and test all CUTLASS examples - Minor edits to documentation to point to GTC 2020 webinar
2020-06-15 10:47:01 -07:00
parent 86931fef85
commit 1ab1027954
11 changed files with 213 additions and 33 deletions
--- a/examples/03_visualize_layout/CMakeLists.txt
+++ b/examples/03_visualize_layout/CMakeLists.txt
@ -20,15 +20,9 @@
 # STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-cutlass_add_executable(
+cutlass_example_add_executable(
  03_visualize_layout
  visualize_layout.cpp
  register_layout.cu
  )

-target_link_libraries(
-  03_visualize_layout
-  PRIVATE
-  CUTLASS
-  cutlass_tools_util_includes
-  )
--- a/examples/06_splitK_gemm/splitk_gemm.cu
+++ b/examples/06_splitK_gemm/splitk_gemm.cu
@ -182,10 +182,12 @@ int run() {
    return -1;
  }

-  if (!(props.major >= 7)) {
-    std::cerr << "Volta Tensor Ops must be run on a machine with compute capability at least 70."
+  if (props.major != 7) {
+    std::cerr << "Volta Tensor Ops must be run on a machine with compute capability of 70, 72, or 75."
              << std::endl;
-    return -1;
+
+    // Return 0 so tests pass if run on unsupported architectures or CUDA Toolkits.
+    return 0;
  }

  //
--- a/examples/07_volta_tensorop_gemm/volta_tensorop_gemm.cu
+++ b/examples/07_volta_tensorop_gemm/volta_tensorop_gemm.cu
@ -198,10 +198,12 @@ int run() {
    return -1;
  }

-  if (!(props.major >= 7)) {
-    std::cerr << "Volta Tensor Ops must be run on a machine with compute capability at least 70."
+  if (props.major != 7) {
+    std::cerr << "Volta Tensor Ops must be run on a machine with compute capability of 70, 72, or 75."
              << std::endl;
-    return -1;
+
+    // Return 0 so tests are considered passing if run on unsupported architectures or CUDA Toolkits.
+    return 0;
  }

  const int length_m = 5120;
--- a/examples/08_turing_tensorop_gemm/turing_tensorop_gemm.cu
+++ b/examples/08_turing_tensorop_gemm/turing_tensorop_gemm.cu
@ -208,7 +208,9 @@ int run() {
  if (!((props.major * 10 + props.minor) >= 75)) {
    std::cerr << "Turing Tensor Core operations must be run on a machine with compute capability at least 75."
              << std::endl;
-    return -1;
+
+    // Return 0 so tests are considered passing if run on unsupported platforms.
+    return 0;
  }

  const int length_m = 5120;
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -44,9 +44,18 @@ function(cutlass_example_add_executable NAME)
    ${CUTLASS_EXAMPLES_COMMON_SOURCE_DIR}
    )

+  add_custom_target(
+    test_${NAME}
+    COMMAND
+    ${CUTLASS_TEST_EXECUTION_ENVIRONMENT} $<TARGET_FILE:${NAME}>
+  DEPENDS
+    ${NAME}
+    )
+
 endfunction()

 add_custom_target(cutlass_examples)
+add_custom_target(test_examples)

 foreach(EXAMPLE
  00_basic_gemm
@ -66,5 +75,6 @@ foreach(EXAMPLE

  add_subdirectory(${EXAMPLE})
  add_dependencies(cutlass_examples ${EXAMPLE})
+  add_dependencies(test_examples test_${EXAMPLE})

 endforeach()