CUTLASS 2.5

This commit is contained in:
Andrew Kerr
2021-02-26 09:58:26 -05:00
parent ccb697bac7
commit 0e13748649
771 changed files with 15476 additions and 1717 deletions

View File

@ -20,32 +20,73 @@
# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add_custom_target(
list(SORT CUTLASS_NVCC_ARCHS_ENABLED)
set(CUTLASS_NVCC_ARCHS_ENABLED_REVERSED ${CUTLASS_NVCC_ARCHS_ENABLED})
list(REVERSE CUTLASS_NVCC_ARCHS_ENABLED_REVERSED)
list(GET CUTLASS_NVCC_ARCHS_ENABLED_REVERSED 0 CUTLASS_NVCC_MAX_ARCH)
add_custom_target(
cutlass_test_unit_conv_device
DEPENDS
cutlass_test_unit_conv_device_simt
cutlass_test_unit_conv_device_tensorop_f32_sm70
cutlass_test_unit_conv_device_tensorop_f32_sm75
cutlass_test_unit_conv_device_tensorop_f16_sm80
cutlass_test_unit_conv_device_tensorop_f32_sm80
cutlass_test_unit_conv_device_tensorop_f32_tf32_sm80
cutlass_test_unit_conv_device_tensorop_s32
cutlass_test_unit_conv_device_tensorop_s32_interleaved
)
add_custom_target(
test_unit_conv_device
DEPENDS
test_unit_conv_device_simt
test_unit_conv_device_tensorop_f32_sm70
test_unit_conv_device_tensorop_f32_sm75
test_unit_conv_device_tensorop_f16_sm80
test_unit_conv_device_tensorop_f32_sm80
test_unit_conv_device_tensorop_f32_tf32_sm80
test_unit_conv_device_tensorop_s32
test_unit_conv_device_tensorop_s32_interleaved
)
if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 70)
add_dependencies(
cutlass_test_unit_conv_device
cutlass_test_unit_conv_device_tensorop_f32_sm70
)
add_dependencies(
test_unit_conv_device
test_unit_conv_device_tensorop_f32_sm70
)
endif()
if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 75)
add_dependencies(
cutlass_test_unit_conv_device
cutlass_test_unit_conv_device_tensorop_f32_sm75
cutlass_test_unit_conv_device_tensorop_s32
cutlass_test_unit_conv_device_tensorop_s32_interleaved
)
add_dependencies(
test_unit_conv_device
test_unit_conv_device_tensorop_f32_sm75
test_unit_conv_device_tensorop_s32
test_unit_conv_device_tensorop_s32_interleaved
)
endif()
if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 80)
add_dependencies(
cutlass_test_unit_conv_device
cutlass_test_unit_conv_device_tensorop_f16_sm80
cutlass_test_unit_conv_device_tensorop_f32_sm80
cutlass_test_unit_conv_device_tensorop_f32_tf32_sm80
)
add_dependencies(
test_unit_conv_device
test_unit_conv_device_tensorop_f16_sm80
test_unit_conv_device_tensorop_f32_sm80
test_unit_conv_device_tensorop_f32_tf32_sm80
)
endif()
#
# OpClassSimt (CUDA cores)
#
@ -56,20 +97,27 @@ cutlass_test_unit_add_executable(
# F32
conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm50.cu
conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
# CF32
conv2d_fprop_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm50.cu
conv2d_dgrad_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm50.cu
conv2d_wgrad_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm50.cu
conv2d_fprop_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm80.cu
conv2d_dgrad_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm80.cu
conv2d_wgrad_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm80.cu
)
if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 80)
cutlass_target_sources(
cutlass_test_unit_conv_device_simt
PRIVATE
conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
conv2d_fprop_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm80.cu
conv2d_dgrad_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm80.cu
conv2d_wgrad_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm80.cu
)
endif()
#
# OpClassTensorOp (Tensor cores)
#
@ -92,57 +140,81 @@ cutlass_test_unit_add_executable(
conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm75.cu
)
# Conv2d - F16 input, F16 output, F16 accumulation
cutlass_test_unit_add_executable(
cutlass_test_unit_conv_device_tensorop_f16_sm80
if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 80)
# Conv2d - F16 input, F16 output, F16 accumulation
cutlass_test_unit_add_executable(
cutlass_test_unit_conv_device_tensorop_f16_sm80
conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
)
conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
)
# Conv2d - F16 input, F32 output, F32 accumulation
cutlass_test_unit_add_executable(
cutlass_test_unit_conv_device_tensorop_f32_sm80
conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
conv3d_wgrad_implicit_gemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32_sm75.cu
conv3d_wgrad_implicit_gemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
)
# Conv2d - TF32 input, F32 output, F32 accumulation
cutlass_test_unit_add_executable(
cutlass_test_unit_conv_device_tensorop_f32_tf32_sm80
conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
conv3d_fprop_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
conv3d_dgrad_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
conv3d_wgrad_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
)
# Conv2d - F16 input, F32 output, F32 accumulation
cutlass_test_unit_add_executable(
cutlass_test_unit_conv_device_tensorop_f32_sm80
endif()
if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 75)
conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
# Conv2d - S8 input, S32 output, S32 accumulation
conv3d_wgrad_implicit_gemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32_sm75.cu
conv3d_wgrad_implicit_gemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
)
cutlass_test_unit_add_executable(
cutlass_test_unit_conv_device_tensorop_s32
conv2d_fprop_implicit_gemm_s8nhwc_s8nhwc_s32nhwc_tensor_op_s32_sm75.cu
conv2d_fprop_implicit_gemm_s4nhwc_s4nhwc_s32nhwc_tensor_op_s32_sm75.cu
)
# Conv2d - S8 interleaved input, S8 interleaved output, S32 accumulation
# Conv2d - TF32 input, F32 output, F32 accumulation
cutlass_test_unit_add_executable(
cutlass_test_unit_conv_device_tensorop_f32_tf32_sm80
cutlass_test_unit_add_executable(
cutlass_test_unit_conv_device_tensorop_s32_interleaved
conv2d_fprop_implicit_gemm_s8ncxhwx_s8cxrskx_s8ncxhwx_tensor_op_s32_sm75.cu
conv2d_fprop_implicit_gemm_s4ncxhwx_s4cxrskx_s4ncxhwx_tensor_op_s32_sm75.cu
)
conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 80)
conv3d_fprop_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
conv3d_dgrad_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
conv3d_wgrad_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
)
cutlass_target_sources(
cutlass_test_unit_conv_device_tensorop_s32
PRIVATE
conv2d_fprop_implicit_gemm_s8nhwc_s8nhwc_s32nhwc_tensor_op_s32_sm80.cu
conv2d_fprop_implicit_gemm_s4nhwc_s4nhwc_s32nhwc_tensor_op_s32_sm80.cu
)
# Conv2d - S8 interleaved input, S8 interleaved output, S32 accumulation
cutlass_target_sources(
cutlass_test_unit_conv_device_tensorop_s32_interleaved
PRIVATE
conv2d_fprop_implicit_gemm_s8ncxhwx_s8cxrskx_s8ncxhwx_tensor_op_s32_sm80.cu
conv2d_fprop_implicit_gemm_s4ncxhwx_s4cxrskx_s4ncxhwx_tensor_op_s32_sm80.cu
)
# Conv2d - S8 input, S32 output, S32 accumulation
cutlass_test_unit_add_executable(
cutlass_test_unit_conv_device_tensorop_s32
endif()
conv2d_fprop_implicit_gemm_s8nhwc_s8nhwc_s32nhwc_tensor_op_s32_sm75.cu
conv2d_fprop_implicit_gemm_s4nhwc_s4nhwc_s32nhwc_tensor_op_s32_sm75.cu
conv2d_fprop_implicit_gemm_s8nhwc_s8nhwc_s32nhwc_tensor_op_s32_sm80.cu
conv2d_fprop_implicit_gemm_s4nhwc_s4nhwc_s32nhwc_tensor_op_s32_sm80.cu
)
# Conv2d - S8 interleaved input, S8 interleaved output, S32 accumulation
cutlass_test_unit_add_executable(
cutlass_test_unit_conv_device_tensorop_s32_interleaved
conv2d_fprop_implicit_gemm_s8ncxhwx_s8cxrskx_s8ncxhwx_tensor_op_s32_sm75.cu
conv2d_fprop_implicit_gemm_s4ncxhwx_s4cxrskx_s4ncxhwx_tensor_op_s32_sm75.cu
conv2d_fprop_implicit_gemm_s8ncxhwx_s8cxrskx_s8ncxhwx_tensor_op_s32_sm80.cu
conv2d_fprop_implicit_gemm_s4ncxhwx_s4cxrskx_s4ncxhwx_tensor_op_s32_sm80.cu
)
endif()

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
@ -165,6 +165,22 @@ struct TestbedConv2dProblemSizes {
// C < CTA::K and non-multiples of CTA::K. Typical CTA::K = {32, 64}
////////////////////////////////////////////////////////////////////////////////////////////
conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize(
{1, 1, 1, minimum_channel_size}, // input size (NHWC)
{8, 1, 1, minimum_channel_size}, // filter size (KRSC)
{1, 1, 1, 1}, // padding (pad_h, _, pad_w, _)
{1, 1}, // stride (stride_h, stride_w)
{1, 1} // dilation (dilation_h, dilation_w)
));
conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize(
{1, 1, 8, minimum_channel_size}, // input size (NHWC)
{8, 1, 3, minimum_channel_size}, // filter size (KRSC)
{1, 1, 1, 1}, // padding (pad_h, _, pad_w, _)
{1, 1}, // stride (stride_h, stride_w)
{1, 1} // dilation (dilation_h, dilation_w)
));
conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize(
{1, 8, 8, minimum_channel_size}, // input size (NHWC)
{8, 3, 3, minimum_channel_size}, // filter size (KRSC)
@ -322,7 +338,7 @@ struct TestbedConv2dProblemSizes {
{1, 1}, // dilation (dilation_h, dilation_w)
{4, 1, 1, 328} // output size (NPQK)
));
}

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
@ -204,10 +204,13 @@ public:
ElementCompute alpha = ElementCompute(1),
ElementCompute beta = ElementCompute(0)) {
// Waive test if CUDA device is insufficient
if (!sufficient()) {
return true;
}
// Waive test if insufficient CUDA device
if (!sufficient()) {
if (CUTLASS_TEST_UNIT_ENABLE_WARNINGS) {
std::cerr << "Test waived due to insufficient CUDA device." << std::endl;
}
return true;
}
#if 0 //display conv2d problem size for debugging
std::cout << problem_size << std::endl

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -0,0 +1,120 @@
/***************************************************************************************************
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice, this list of
* conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************************************/
/*! \file
\brief Tests for device-wide Implicit GEMM interface
*/
#include "../../common/cutlass_unit_test.h"
#include "cutlass/cutlass.h"
#include "cutlass/conv/kernel/default_conv3d_dgrad.h"
#include "cutlass/conv/device/implicit_gemm_convolution.h"
#include "conv3d_testbed.h"
#if defined(CUTLASS_ARCH_MMA_SM80_SUPPORTED)
TEST(SM80_Device_Conv3d_Dgrad_Analytic_ImplicitGemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32,
128x128_32x4_64x64x32) {
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
using ElementA = cutlass::half_t;
using ElementB = cutlass::half_t;
using ElementC = float;
using ElementAccumulator = float;
using ElementCompute = float;
using Conv3dDgradKernel = typename cutlass::conv::kernel::DefaultConv3dDgrad<
ElementA, cutlass::layout::TensorNDHWC,
ElementB, cutlass::layout::TensorNDHWC,
ElementC, cutlass::layout::TensorNDHWC,
ElementAccumulator,
cutlass::arch::OpClassTensorOp,
cutlass::arch::Sm80,
cutlass::gemm::GemmShape<128, 128, 32>,
cutlass::gemm::GemmShape<64, 64, 32>,
cutlass::gemm::GemmShape<16, 8, 16>,
cutlass::epilogue::thread::LinearCombination<
ElementC,
128 / cutlass::sizeof_bits<ElementC>::value,
ElementAccumulator,
ElementCompute
>,
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
4,
cutlass::arch::OpMultiplyAdd
>::Kernel;
using Conv3dDgrad = cutlass::conv::device::ImplicitGemmConvolution<Conv3dDgradKernel>;
/// Run all unit test sizes with device-level Conv3d instance
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dDgrad>());
}
////////////////////////////////////////////////////////////////////////////////
TEST(SM80_Device_Conv3d_Dgrad_Optimized_ImplicitGemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32,
128x128_32x4_64x64x32) {
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
using ElementA = cutlass::half_t;
using ElementB = cutlass::half_t;
using ElementC = float;
using ElementAccumulator = float;
using ElementCompute = float;
using Conv3dDgradKernel = typename cutlass::conv::kernel::DefaultConv3dDgrad<
ElementA, cutlass::layout::TensorNDHWC,
ElementB, cutlass::layout::TensorNDHWC,
ElementC, cutlass::layout::TensorNDHWC,
ElementAccumulator,
cutlass::arch::OpClassTensorOp,
cutlass::arch::Sm80,
cutlass::gemm::GemmShape<128, 128, 32>,
cutlass::gemm::GemmShape<64, 64, 32>,
cutlass::gemm::GemmShape<16, 8, 16>,
cutlass::epilogue::thread::LinearCombination<
ElementC,
128 / cutlass::sizeof_bits<ElementC>::value,
ElementAccumulator,
ElementCompute
>,
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
4,
cutlass::arch::OpMultiplyAdd,
cutlass::conv::IteratorAlgorithm::kOptimized,
cutlass::conv::StrideSupport::kUnity
>::Kernel;
using Conv3dDgrad = cutlass::conv::device::ImplicitGemmConvolution<Conv3dDgradKernel>;
/// Run all unit test sizes with device-level Conv3d instance
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dDgrad>());
}
////////////////////////////////////////////////////////////////////////////////
#endif // CUTLASS_ARCH_MMA_SM75_SUPPORTED

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
@ -76,5 +76,46 @@ TEST(SM80_Device_Conv3d_Dgrad_Analytic_ImplicitGemm_tf32ndhwc_tf32ndhwc_f32ndhwc
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dDgrad>());
}
////////////////////////////////////////////////////////////////////////////////
TEST(SM80_Device_Conv3d_Dgrad_Optimized_ImplicitGemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32,
128x128_32x3_64x64x32) {
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
using ElementA = cutlass::tfloat32_t;
using ElementB = cutlass::tfloat32_t;
using ElementC = float;
using ElementAccumulator = float;
using ElementCompute = float;
/// Device-level Conv2d instance
using Conv3dDgradKernel = typename cutlass::conv::kernel::DefaultConv3dDgrad<
ElementA, cutlass::layout::TensorNDHWC,
ElementB, cutlass::layout::TensorNDHWC,
ElementC, cutlass::layout::TensorNDHWC,
ElementAccumulator,
cutlass::arch::OpClassTensorOp,
cutlass::arch::Sm80,
cutlass::gemm::GemmShape<128, 128, 16>,
cutlass::gemm::GemmShape<64, 64, 16>,
cutlass::gemm::GemmShape<16, 8, 8>,
cutlass::epilogue::thread::LinearCombination<
ElementC,
128 / cutlass::sizeof_bits<ElementC>::value,
ElementAccumulator,
ElementCompute
>,
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
3,
cutlass::arch::OpMultiplyAdd,
cutlass::conv::IteratorAlgorithm::kOptimized,
cutlass::conv::StrideSupport::kUnity
>::Kernel;
using Conv3dDgrad = cutlass::conv::device::ImplicitGemmConvolution<Conv3dDgradKernel>;
/// Run all unit test sizes with device-level Conv3d instance
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dDgrad>());
}
////////////////////////////////////////////////////////////////////////////////
#endif // CUTLASS_ARCH_MMA_SM80_SUPPORTED

View File

@ -0,0 +1,80 @@
/***************************************************************************************************
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice, this list of
* conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************************************/
/*! \file
\brief Tests for device-wide Implicit GEMM interface
*/
#include "../../common/cutlass_unit_test.h"
#include "cutlass/cutlass.h"
#include "cutlass/conv/kernel/default_conv3d_fprop.h"
#include "cutlass/conv/device/implicit_gemm_convolution.h"
#include "conv3d_testbed.h"
#if defined(CUTLASS_ARCH_MMA_SM75_SUPPORTED)
////////////////////////////////////////////////////////////////////////////////
TEST(SM75_Device_Conv3d_Fprop_Analytic_ImplicitGemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32,
128x128_32x3_64x64x32) {
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
using ElementA = cutlass::half_t;
using ElementB = cutlass::half_t;
using ElementC = float;
using ElementAccumulator = float;
using ElementCompute = float;
/// Device-level Conv2d instance
using Conv3dFpropKernel = typename cutlass::conv::kernel::DefaultConv3dFprop<
ElementA, cutlass::layout::TensorNDHWC,
ElementB, cutlass::layout::TensorNDHWC,
ElementC, cutlass::layout::TensorNDHWC,
ElementAccumulator,
cutlass::arch::OpClassTensorOp,
cutlass::arch::Sm75,
cutlass::gemm::GemmShape<128, 128, 16>,
cutlass::gemm::GemmShape<64, 64, 16>,
cutlass::gemm::GemmShape<16, 8, 8>,
cutlass::epilogue::thread::LinearCombination<
ElementC,
128 / cutlass::sizeof_bits<ElementC>::value,
ElementAccumulator,
ElementCompute
>,
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
2,
cutlass::arch::OpMultiplyAdd
>::Kernel;
using Conv3dFprop = cutlass::conv::device::ImplicitGemmConvolution<Conv3dFpropKernel>;
/// Run all unit test sizes with device-level Conv3d instance
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dFprop>());
}
////////////////////////////////////////////////////////////////////////////////
#endif // CUTLASS_ARCH_MMA_SM75_SUPPORTED

View File

@ -0,0 +1,159 @@
/***************************************************************************************************
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice, this list of
* conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************************************/
/*! \file
\brief Tests for device-wide Implicit GEMM interface
*/
#include "../../common/cutlass_unit_test.h"
#include "cutlass/cutlass.h"
#include "cutlass/conv/kernel/default_conv3d_fprop.h"
#include "cutlass/conv/device/implicit_gemm_convolution.h"
#include "conv3d_testbed.h"
#if defined(CUTLASS_ARCH_MMA_SM80_SUPPORTED)
TEST(SM80_Device_Conv3d_Fprop_Analytic_ImplicitGemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32,
128x128_32x4_64x64x32) {
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
using ElementA = cutlass::half_t;
using ElementB = cutlass::half_t;
using ElementC = float;
using ElementAccumulator = float;
using ElementCompute = float;
using Conv3dFpropKernel = typename cutlass::conv::kernel::DefaultConv3dFprop<
ElementA, cutlass::layout::TensorNDHWC,
ElementB, cutlass::layout::TensorNDHWC,
ElementC, cutlass::layout::TensorNDHWC,
ElementAccumulator,
cutlass::arch::OpClassTensorOp,
cutlass::arch::Sm80,
cutlass::gemm::GemmShape<128, 128, 32>,
cutlass::gemm::GemmShape<64, 64, 32>,
cutlass::gemm::GemmShape<16, 8, 16>,
cutlass::epilogue::thread::LinearCombination<
ElementC,
128 / cutlass::sizeof_bits<ElementC>::value,
ElementAccumulator,
ElementCompute
>,
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
4,
cutlass::arch::OpMultiplyAdd
>::Kernel;
using Conv3dFprop = cutlass::conv::device::ImplicitGemmConvolution<Conv3dFpropKernel>;
/// Run all unit test sizes with device-level Conv3d instance
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dFprop>());
}
////////////////////////////////////////////////////////////////////////////////
TEST(SM80_Device_Conv3d_Fprop_Optimized_ImplicitGemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32,
128x128_32x4_64x64x32) {
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
using ElementA = cutlass::half_t;
using ElementB = cutlass::half_t;
using ElementC = float;
using ElementAccumulator = float;
using ElementCompute = float;
using Conv3dFpropKernel = typename cutlass::conv::kernel::DefaultConv3dFprop<
ElementA, cutlass::layout::TensorNDHWC,
ElementB, cutlass::layout::TensorNDHWC,
ElementC, cutlass::layout::TensorNDHWC,
ElementAccumulator,
cutlass::arch::OpClassTensorOp,
cutlass::arch::Sm80,
cutlass::gemm::GemmShape<128, 128, 32>,
cutlass::gemm::GemmShape<64, 64, 32>,
cutlass::gemm::GemmShape<16, 8, 16>,
cutlass::epilogue::thread::LinearCombination<
ElementC,
128 / cutlass::sizeof_bits<ElementC>::value,
ElementAccumulator,
ElementCompute
>,
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
4,
cutlass::arch::OpMultiplyAdd,
cutlass::conv::IteratorAlgorithm::kOptimized
>::Kernel;
using Conv3dFprop = cutlass::conv::device::ImplicitGemmConvolution<Conv3dFpropKernel>;
/// Run all unit test sizes with device-level Conv3d instance
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dFprop>());
}
////////////////////////////////////////////////////////////////////////////////
TEST(SM80_Device_Conv3d_Fprop_Optimized_ImplicitGemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32,
64x256_32x4_64x64x32) {
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
using ElementA = cutlass::half_t;
using ElementB = cutlass::half_t;
using ElementC = float;
using ElementAccumulator = float;
using ElementCompute = float;
using Conv3dFpropKernel = typename cutlass::conv::kernel::DefaultConv3dFprop<
ElementA, cutlass::layout::TensorNDHWC,
ElementB, cutlass::layout::TensorNDHWC,
ElementC, cutlass::layout::TensorNDHWC,
ElementAccumulator,
cutlass::arch::OpClassTensorOp,
cutlass::arch::Sm80,
cutlass::gemm::GemmShape<64, 256, 32>,
cutlass::gemm::GemmShape<64, 64, 32>,
cutlass::gemm::GemmShape<16, 8, 16>,
cutlass::epilogue::thread::LinearCombination<
ElementC,
128 / cutlass::sizeof_bits<ElementC>::value,
ElementAccumulator,
ElementCompute
>,
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
4,
cutlass::arch::OpMultiplyAdd,
cutlass::conv::IteratorAlgorithm::kOptimized
>::Kernel;
using Conv3dFprop = cutlass::conv::device::ImplicitGemmConvolution<Conv3dFpropKernel>;
/// Run all unit test sizes with device-level Conv3d instance
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dFprop>());
}
////////////////////////////////////////////////////////////////////////////////
#endif // CUTLASS_ARCH_MMA_SM75_SUPPORTED

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
@ -76,5 +76,46 @@ TEST(SM80_Device_Conv3d_Fprop_Analytic_ImplicitGemm_tf32ndhwc_tf32ndhwc_f32ndhwc
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dFprop>());
}
////////////////////////////////////////////////////////////////////////////////
TEST(SM80_Device_Conv3d_Fprop_Optimized_ImplicitGemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32,
128x128_32x3_64x64x32) {
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
using ElementA = cutlass::tfloat32_t;
using ElementB = cutlass::tfloat32_t;
using ElementC = float;
using ElementAccumulator = float;
using ElementCompute = float;
/// Device-level Conv2d instance
using Conv3dFpropKernel = typename cutlass::conv::kernel::DefaultConv3dFprop<
ElementA, cutlass::layout::TensorNDHWC,
ElementB, cutlass::layout::TensorNDHWC,
ElementC, cutlass::layout::TensorNDHWC,
ElementAccumulator,
cutlass::arch::OpClassTensorOp,
cutlass::arch::Sm80,
cutlass::gemm::GemmShape<128, 128, 16>,
cutlass::gemm::GemmShape<64, 64, 16>,
cutlass::gemm::GemmShape<16, 8, 8>,
cutlass::epilogue::thread::LinearCombination<
ElementC,
128 / cutlass::sizeof_bits<ElementC>::value,
ElementAccumulator,
ElementCompute
>,
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
3,
cutlass::arch::OpMultiplyAdd,
cutlass::conv::IteratorAlgorithm::kOptimized
>::Kernel;
using Conv3dFprop = cutlass::conv::device::ImplicitGemmConvolution<Conv3dFpropKernel>;
/// Run all unit test sizes with device-level Conv3d instance
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dFprop>());
}
////////////////////////////////////////////////////////////////////////////////
#endif // CUTLASS_ARCH_MMA_SM80_SUPPORTED

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
@ -107,9 +107,25 @@ struct TestbedConv3dProblemSizes {
));
conv3d_default_sizes.push_back(cutlass::conv::Conv3dProblemSize(
{1, 1, 16, 16, minimum_channel_size}, // input size (NDHWC)
{8, 1, 3, 3, minimum_channel_size}, // filter size (KTRSC)
cutlass::Coord<3>({0, 1, 1}), // padding (pad_d, pad_h, pad_w)
{1, 1, 1, 8, minimum_channel_size}, // input size (NDHWC)
{8, 1, 1, 3, minimum_channel_size}, // filter size (KTRSC)
cutlass::Coord<3>({1, 1, 1}), // padding (pad_d, pad_h, pad_w)
cutlass::Coord<3>({1, 1, 1}), // stride (stride_d, stride_h, stride_w)
cutlass::Coord<3>({1, 1, 1}) // dilation (dilation_d, dilation_h, dilation_w)
));
conv3d_default_sizes.push_back(cutlass::conv::Conv3dProblemSize(
{1, 8, 8, 8, minimum_channel_size}, // input size (NDHWC)
{8, 3, 3, 3, minimum_channel_size}, // filter size (KTRSC)
cutlass::Coord<3>({1, 1, 1}), // padding (pad_d, pad_h, pad_w)
cutlass::Coord<3>({1, 1, 1}), // stride (stride_d, stride_h, stride_w)
cutlass::Coord<3>({1, 1, 1}) // dilation (dilation_d, dilation_h, dilation_w)
));
conv3d_default_sizes.push_back(cutlass::conv::Conv3dProblemSize(
{1, 16, 16, 16, minimum_channel_size}, // input size (NDHWC)
{8, 3, 3, 3, minimum_channel_size}, // filter size (KTRSC)
cutlass::Coord<3>({1, 1, 1}), // padding (pad_d, pad_h, pad_w)
cutlass::Coord<3>({1, 1, 1}), // stride (stride_d, stride_h, stride_w)
cutlass::Coord<3>({1, 1, 1}) // dilation (dilation_d, dilation_h, dilation_w)
));
@ -138,6 +154,7 @@ struct TestbedConv3dProblemSizes {
cutlass::Coord<3>({1, 1, 1}) // dilation (dilation_d, dilation_h, dilation_w)
));
conv3d_default_sizes.push_back(cutlass::conv::Conv3dProblemSize(
{1, 11, 15, 19, 64}, // input size (NDHWC)
{32, 4, 3, 6, 64}, // filter size (KTRSC)

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
@ -204,10 +204,14 @@ public:
ElementCompute alpha = ElementCompute(1),
ElementCompute beta = ElementCompute()) {
// Waive test if CUDA device is insufficient.
if (!sufficient()) {
return true;
}
// Waive test if insufficient CUDA device
if (!sufficient()) {
if (CUTLASS_TEST_UNIT_ENABLE_WARNINGS) {
std::cerr << "Test waived due to insufficient CUDA device." << std::endl;
}
return true;
}
#if 0 //display conv2d problem size for debugging
std::cout << problem_size << std::endl
@ -413,11 +417,6 @@ bool TestAllConv3d(
//
TestbedConv3dProblemSizes conv3d_problems(128/cutlass::sizeof_bits<typename ImplicitGemm::ElementA>::value);
//
// Get conv problem sizes to run conv operator
//
//TestbedConv3dProblemSizes conv_problems(128/cutlass::sizeof_bits<typename ImplicitGemm::ElementA>::value);
// Vector of conv3d problem sizes to avoid duplicate runs
Conv3dProblemVector conv_tested_sizes;
@ -443,12 +442,17 @@ bool TestAllConv3d(
// Procedurally disable certain cases
//
// CUTLASS DGRAD's unity stride specialization only support stride {1, 1}
// CUTLASS DGRAD's unity stride specialization only support stride {1, 1, 1}
if ((ImplicitGemm::kConvolutionalOperator ==
cutlass::conv::Operator::kDgrad) &&
(ImplicitGemm::ImplicitGemmKernel::Mma::IteratorA::kStrideSupport ==
cutlass::conv::StrideSupport::kUnity)) {
if (!((conv_problem.stride_h == 1) && (conv_problem.stride_w == 1))) {
((ImplicitGemm::ImplicitGemmKernel::Mma::IteratorA::kStrideSupport ==
cutlass::conv::StrideSupport::kUnity) ||
(ImplicitGemm::ImplicitGemmKernel::Mma::IteratorB::kStrideSupport ==
cutlass::conv::StrideSupport::kUnity))) {
if (!((conv_problem.stride_d == 1) &&
(conv_problem.stride_h == 1) &&
(conv_problem.stride_w == 1))
) {
continue;
}
}

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:

View File

@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met: