Updated mma_sm80.h to avoid perf penalty due to reinterpret_cast<>. (#100)
- Updated mma_sm80.h to avoid perf penalty due to reinterpret_cast<>. - Enhancement to CUTLASS Utility Library's HostTensorPlanarComplex template to support copy-in and copy-out - Added test_examples target to build and test all CUTLASS examples - Minor edits to documentation to point to GTC 2020 webinar
This commit is contained in:
@ -20,15 +20,9 @@
|
||||
# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
cutlass_add_executable(
|
||||
cutlass_example_add_executable(
|
||||
03_visualize_layout
|
||||
visualize_layout.cpp
|
||||
register_layout.cu
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
03_visualize_layout
|
||||
PRIVATE
|
||||
CUTLASS
|
||||
cutlass_tools_util_includes
|
||||
)
|
||||
|
||||
@ -182,10 +182,12 @@ int run() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!(props.major >= 7)) {
|
||||
std::cerr << "Volta Tensor Ops must be run on a machine with compute capability at least 70."
|
||||
if (props.major != 7) {
|
||||
std::cerr << "Volta Tensor Ops must be run on a machine with compute capability of 70, 72, or 75."
|
||||
<< std::endl;
|
||||
return -1;
|
||||
|
||||
// Return 0 so tests pass if run on unsupported architectures or CUDA Toolkits.
|
||||
return 0;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
@ -198,10 +198,12 @@ int run() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!(props.major >= 7)) {
|
||||
std::cerr << "Volta Tensor Ops must be run on a machine with compute capability at least 70."
|
||||
if (props.major != 7) {
|
||||
std::cerr << "Volta Tensor Ops must be run on a machine with compute capability of 70, 72, or 75."
|
||||
<< std::endl;
|
||||
return -1;
|
||||
|
||||
// Return 0 so tests are considered passing if run on unsupported architectures or CUDA Toolkits.
|
||||
return 0;
|
||||
}
|
||||
|
||||
const int length_m = 5120;
|
||||
|
||||
@ -208,7 +208,9 @@ int run() {
|
||||
if (!((props.major * 10 + props.minor) >= 75)) {
|
||||
std::cerr << "Turing Tensor Core operations must be run on a machine with compute capability at least 75."
|
||||
<< std::endl;
|
||||
return -1;
|
||||
|
||||
// Return 0 so tests are considered passing if run on unsupported platforms.
|
||||
return 0;
|
||||
}
|
||||
|
||||
const int length_m = 5120;
|
||||
|
||||
@ -44,9 +44,18 @@ function(cutlass_example_add_executable NAME)
|
||||
${CUTLASS_EXAMPLES_COMMON_SOURCE_DIR}
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
test_${NAME}
|
||||
COMMAND
|
||||
${CUTLASS_TEST_EXECUTION_ENVIRONMENT} $<TARGET_FILE:${NAME}>
|
||||
DEPENDS
|
||||
${NAME}
|
||||
)
|
||||
|
||||
endfunction()
|
||||
|
||||
add_custom_target(cutlass_examples)
|
||||
add_custom_target(test_examples)
|
||||
|
||||
foreach(EXAMPLE
|
||||
00_basic_gemm
|
||||
@ -66,5 +75,6 @@ foreach(EXAMPLE
|
||||
|
||||
add_subdirectory(${EXAMPLE})
|
||||
add_dependencies(cutlass_examples ${EXAMPLE})
|
||||
add_dependencies(test_examples test_${EXAMPLE})
|
||||
|
||||
endforeach()
|
||||
|
||||
Reference in New Issue
Block a user