Updated mma_sm80.h to avoid perf penalty due to reinterpret_cast<>. (#100)

- Updated mma_sm80.h to avoid perf penalty due to reinterpret_cast<>.
- Enhancement to CUTLASS Utility Library's HostTensorPlanarComplex template to support copy-in and copy-out
- Added test_examples target to build and test all CUTLASS examples
- Minor edits to documentation to point to GTC 2020 webinar
This commit is contained in:
Andrew Kerr
2020-06-15 10:47:01 -07:00
committed by GitHub
parent 86931fef85
commit 1ab1027954
11 changed files with 213 additions and 33 deletions

View File

@ -20,15 +20,9 @@
# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cutlass_add_executable(
cutlass_example_add_executable(
03_visualize_layout
visualize_layout.cpp
register_layout.cu
)
target_link_libraries(
03_visualize_layout
PRIVATE
CUTLASS
cutlass_tools_util_includes
)

View File

@ -182,10 +182,12 @@ int run() {
return -1;
}
if (!(props.major >= 7)) {
std::cerr << "Volta Tensor Ops must be run on a machine with compute capability at least 70."
if (props.major != 7) {
std::cerr << "Volta Tensor Ops must be run on a machine with compute capability of 70, 72, or 75."
<< std::endl;
return -1;
// Return 0 so tests pass if run on unsupported architectures or CUDA Toolkits.
return 0;
}
//

View File

@ -198,10 +198,12 @@ int run() {
return -1;
}
if (!(props.major >= 7)) {
std::cerr << "Volta Tensor Ops must be run on a machine with compute capability at least 70."
if (props.major != 7) {
std::cerr << "Volta Tensor Ops must be run on a machine with compute capability of 70, 72, or 75."
<< std::endl;
return -1;
// Return 0 so tests are considered passing if run on unsupported architectures or CUDA Toolkits.
return 0;
}
const int length_m = 5120;

View File

@ -208,7 +208,9 @@ int run() {
if (!((props.major * 10 + props.minor) >= 75)) {
std::cerr << "Turing Tensor Core operations must be run on a machine with compute capability at least 75."
<< std::endl;
return -1;
// Return 0 so tests are considered passing if run on unsupported platforms.
return 0;
}
const int length_m = 5120;

View File

@ -44,9 +44,18 @@ function(cutlass_example_add_executable NAME)
${CUTLASS_EXAMPLES_COMMON_SOURCE_DIR}
)
add_custom_target(
test_${NAME}
COMMAND
${CUTLASS_TEST_EXECUTION_ENVIRONMENT} $<TARGET_FILE:${NAME}>
DEPENDS
${NAME}
)
endfunction()
add_custom_target(cutlass_examples)
add_custom_target(test_examples)
foreach(EXAMPLE
00_basic_gemm
@ -66,5 +75,6 @@ foreach(EXAMPLE
add_subdirectory(${EXAMPLE})
add_dependencies(cutlass_examples ${EXAMPLE})
add_dependencies(test_examples test_${EXAMPLE})
endforeach()