CUTLASS 2.5
This commit is contained in:
@ -20,32 +20,73 @@
|
||||
# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
add_custom_target(
|
||||
list(SORT CUTLASS_NVCC_ARCHS_ENABLED)
|
||||
set(CUTLASS_NVCC_ARCHS_ENABLED_REVERSED ${CUTLASS_NVCC_ARCHS_ENABLED})
|
||||
list(REVERSE CUTLASS_NVCC_ARCHS_ENABLED_REVERSED)
|
||||
list(GET CUTLASS_NVCC_ARCHS_ENABLED_REVERSED 0 CUTLASS_NVCC_MAX_ARCH)
|
||||
|
||||
add_custom_target(
|
||||
cutlass_test_unit_conv_device
|
||||
DEPENDS
|
||||
cutlass_test_unit_conv_device_simt
|
||||
cutlass_test_unit_conv_device_tensorop_f32_sm70
|
||||
cutlass_test_unit_conv_device_tensorop_f32_sm75
|
||||
cutlass_test_unit_conv_device_tensorop_f16_sm80
|
||||
cutlass_test_unit_conv_device_tensorop_f32_sm80
|
||||
cutlass_test_unit_conv_device_tensorop_f32_tf32_sm80
|
||||
cutlass_test_unit_conv_device_tensorop_s32
|
||||
cutlass_test_unit_conv_device_tensorop_s32_interleaved
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
test_unit_conv_device
|
||||
DEPENDS
|
||||
test_unit_conv_device_simt
|
||||
test_unit_conv_device_tensorop_f32_sm70
|
||||
test_unit_conv_device_tensorop_f32_sm75
|
||||
test_unit_conv_device_tensorop_f16_sm80
|
||||
test_unit_conv_device_tensorop_f32_sm80
|
||||
test_unit_conv_device_tensorop_f32_tf32_sm80
|
||||
test_unit_conv_device_tensorop_s32
|
||||
test_unit_conv_device_tensorop_s32_interleaved
|
||||
)
|
||||
|
||||
if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 70)
|
||||
|
||||
add_dependencies(
|
||||
cutlass_test_unit_conv_device
|
||||
cutlass_test_unit_conv_device_tensorop_f32_sm70
|
||||
)
|
||||
|
||||
add_dependencies(
|
||||
test_unit_conv_device
|
||||
test_unit_conv_device_tensorop_f32_sm70
|
||||
)
|
||||
|
||||
endif()
|
||||
|
||||
if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 75)
|
||||
|
||||
add_dependencies(
|
||||
cutlass_test_unit_conv_device
|
||||
cutlass_test_unit_conv_device_tensorop_f32_sm75
|
||||
cutlass_test_unit_conv_device_tensorop_s32
|
||||
cutlass_test_unit_conv_device_tensorop_s32_interleaved
|
||||
)
|
||||
|
||||
add_dependencies(
|
||||
test_unit_conv_device
|
||||
test_unit_conv_device_tensorop_f32_sm75
|
||||
test_unit_conv_device_tensorop_s32
|
||||
test_unit_conv_device_tensorop_s32_interleaved
|
||||
)
|
||||
|
||||
endif()
|
||||
|
||||
if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 80)
|
||||
|
||||
add_dependencies(
|
||||
cutlass_test_unit_conv_device
|
||||
cutlass_test_unit_conv_device_tensorop_f16_sm80
|
||||
cutlass_test_unit_conv_device_tensorop_f32_sm80
|
||||
cutlass_test_unit_conv_device_tensorop_f32_tf32_sm80
|
||||
)
|
||||
|
||||
add_dependencies(
|
||||
test_unit_conv_device
|
||||
test_unit_conv_device_tensorop_f16_sm80
|
||||
test_unit_conv_device_tensorop_f32_sm80
|
||||
test_unit_conv_device_tensorop_f32_tf32_sm80
|
||||
)
|
||||
|
||||
endif()
|
||||
|
||||
#
|
||||
# OpClassSimt (CUDA cores)
|
||||
#
|
||||
@ -56,20 +97,27 @@ cutlass_test_unit_add_executable(
|
||||
# F32
|
||||
conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm50.cu
|
||||
|
||||
conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
|
||||
conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
|
||||
conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
|
||||
|
||||
# CF32
|
||||
conv2d_fprop_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm50.cu
|
||||
conv2d_dgrad_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm50.cu
|
||||
conv2d_wgrad_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm50.cu
|
||||
|
||||
conv2d_fprop_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm80.cu
|
||||
conv2d_dgrad_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm80.cu
|
||||
conv2d_wgrad_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm80.cu
|
||||
)
|
||||
|
||||
if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 80)
|
||||
|
||||
cutlass_target_sources(
|
||||
cutlass_test_unit_conv_device_simt
|
||||
PRIVATE
|
||||
conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
|
||||
conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
|
||||
conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.cu
|
||||
conv2d_fprop_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm80.cu
|
||||
conv2d_dgrad_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm80.cu
|
||||
conv2d_wgrad_implicit_gemm_cf32nhwc_cf32nhwc_cf32nhwc_simt_f32_sm80.cu
|
||||
)
|
||||
|
||||
endif()
|
||||
|
||||
#
|
||||
# OpClassTensorOp (Tensor cores)
|
||||
#
|
||||
@ -92,57 +140,81 @@ cutlass_test_unit_add_executable(
|
||||
conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm75.cu
|
||||
)
|
||||
|
||||
# Conv2d - F16 input, F16 output, F16 accumulation
|
||||
cutlass_test_unit_add_executable(
|
||||
cutlass_test_unit_conv_device_tensorop_f16_sm80
|
||||
if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 80)
|
||||
|
||||
# Conv2d - F16 input, F16 output, F16 accumulation
|
||||
cutlass_test_unit_add_executable(
|
||||
cutlass_test_unit_conv_device_tensorop_f16_sm80
|
||||
|
||||
conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
|
||||
conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
|
||||
conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
|
||||
)
|
||||
|
||||
conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
|
||||
conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
|
||||
conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu
|
||||
)
|
||||
# Conv2d - F16 input, F32 output, F32 accumulation
|
||||
|
||||
cutlass_test_unit_add_executable(
|
||||
cutlass_test_unit_conv_device_tensorop_f32_sm80
|
||||
|
||||
conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
|
||||
conv3d_wgrad_implicit_gemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32_sm75.cu
|
||||
conv3d_wgrad_implicit_gemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
|
||||
)
|
||||
|
||||
# Conv2d - TF32 input, F32 output, F32 accumulation
|
||||
|
||||
cutlass_test_unit_add_executable(
|
||||
cutlass_test_unit_conv_device_tensorop_f32_tf32_sm80
|
||||
|
||||
conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
|
||||
conv3d_fprop_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
|
||||
conv3d_dgrad_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
|
||||
conv3d_wgrad_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
|
||||
)
|
||||
|
||||
# Conv2d - F16 input, F32 output, F32 accumulation
|
||||
cutlass_test_unit_add_executable(
|
||||
cutlass_test_unit_conv_device_tensorop_f32_sm80
|
||||
endif()
|
||||
|
||||
if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 75)
|
||||
|
||||
conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
# Conv2d - S8 input, S32 output, S32 accumulation
|
||||
|
||||
conv3d_wgrad_implicit_gemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32_sm75.cu
|
||||
conv3d_wgrad_implicit_gemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
|
||||
)
|
||||
cutlass_test_unit_add_executable(
|
||||
cutlass_test_unit_conv_device_tensorop_s32
|
||||
conv2d_fprop_implicit_gemm_s8nhwc_s8nhwc_s32nhwc_tensor_op_s32_sm75.cu
|
||||
conv2d_fprop_implicit_gemm_s4nhwc_s4nhwc_s32nhwc_tensor_op_s32_sm75.cu
|
||||
)
|
||||
|
||||
# Conv2d - S8 interleaved input, S8 interleaved output, S32 accumulation
|
||||
|
||||
# Conv2d - TF32 input, F32 output, F32 accumulation
|
||||
cutlass_test_unit_add_executable(
|
||||
cutlass_test_unit_conv_device_tensorop_f32_tf32_sm80
|
||||
cutlass_test_unit_add_executable(
|
||||
cutlass_test_unit_conv_device_tensorop_s32_interleaved
|
||||
conv2d_fprop_implicit_gemm_s8ncxhwx_s8cxrskx_s8ncxhwx_tensor_op_s32_sm75.cu
|
||||
conv2d_fprop_implicit_gemm_s4ncxhwx_s4cxrskx_s4ncxhwx_tensor_op_s32_sm75.cu
|
||||
)
|
||||
|
||||
conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.cu
|
||||
if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 80)
|
||||
|
||||
conv3d_fprop_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
|
||||
conv3d_dgrad_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
|
||||
conv3d_wgrad_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu
|
||||
)
|
||||
cutlass_target_sources(
|
||||
cutlass_test_unit_conv_device_tensorop_s32
|
||||
PRIVATE
|
||||
conv2d_fprop_implicit_gemm_s8nhwc_s8nhwc_s32nhwc_tensor_op_s32_sm80.cu
|
||||
conv2d_fprop_implicit_gemm_s4nhwc_s4nhwc_s32nhwc_tensor_op_s32_sm80.cu
|
||||
)
|
||||
|
||||
# Conv2d - S8 interleaved input, S8 interleaved output, S32 accumulation
|
||||
cutlass_target_sources(
|
||||
cutlass_test_unit_conv_device_tensorop_s32_interleaved
|
||||
PRIVATE
|
||||
conv2d_fprop_implicit_gemm_s8ncxhwx_s8cxrskx_s8ncxhwx_tensor_op_s32_sm80.cu
|
||||
conv2d_fprop_implicit_gemm_s4ncxhwx_s4cxrskx_s4ncxhwx_tensor_op_s32_sm80.cu
|
||||
)
|
||||
|
||||
# Conv2d - S8 input, S32 output, S32 accumulation
|
||||
cutlass_test_unit_add_executable(
|
||||
cutlass_test_unit_conv_device_tensorop_s32
|
||||
endif()
|
||||
|
||||
conv2d_fprop_implicit_gemm_s8nhwc_s8nhwc_s32nhwc_tensor_op_s32_sm75.cu
|
||||
conv2d_fprop_implicit_gemm_s4nhwc_s4nhwc_s32nhwc_tensor_op_s32_sm75.cu
|
||||
conv2d_fprop_implicit_gemm_s8nhwc_s8nhwc_s32nhwc_tensor_op_s32_sm80.cu
|
||||
conv2d_fprop_implicit_gemm_s4nhwc_s4nhwc_s32nhwc_tensor_op_s32_sm80.cu
|
||||
)
|
||||
|
||||
# Conv2d - S8 interleaved input, S8 interleaved output, S32 accumulation
|
||||
cutlass_test_unit_add_executable(
|
||||
cutlass_test_unit_conv_device_tensorop_s32_interleaved
|
||||
|
||||
conv2d_fprop_implicit_gemm_s8ncxhwx_s8cxrskx_s8ncxhwx_tensor_op_s32_sm75.cu
|
||||
conv2d_fprop_implicit_gemm_s4ncxhwx_s4cxrskx_s4ncxhwx_tensor_op_s32_sm75.cu
|
||||
conv2d_fprop_implicit_gemm_s8ncxhwx_s8cxrskx_s8ncxhwx_tensor_op_s32_sm80.cu
|
||||
conv2d_fprop_implicit_gemm_s4ncxhwx_s4cxrskx_s4ncxhwx_tensor_op_s32_sm80.cu
|
||||
)
|
||||
endif()
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
@ -165,6 +165,22 @@ struct TestbedConv2dProblemSizes {
|
||||
// C < CTA::K and non-multiples of CTA::K. Typical CTA::K = {32, 64}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize(
|
||||
{1, 1, 1, minimum_channel_size}, // input size (NHWC)
|
||||
{8, 1, 1, minimum_channel_size}, // filter size (KRSC)
|
||||
{1, 1, 1, 1}, // padding (pad_h, _, pad_w, _)
|
||||
{1, 1}, // stride (stride_h, stride_w)
|
||||
{1, 1} // dilation (dilation_h, dilation_w)
|
||||
));
|
||||
|
||||
conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize(
|
||||
{1, 1, 8, minimum_channel_size}, // input size (NHWC)
|
||||
{8, 1, 3, minimum_channel_size}, // filter size (KRSC)
|
||||
{1, 1, 1, 1}, // padding (pad_h, _, pad_w, _)
|
||||
{1, 1}, // stride (stride_h, stride_w)
|
||||
{1, 1} // dilation (dilation_h, dilation_w)
|
||||
));
|
||||
|
||||
conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize(
|
||||
{1, 8, 8, minimum_channel_size}, // input size (NHWC)
|
||||
{8, 3, 3, minimum_channel_size}, // filter size (KRSC)
|
||||
@ -322,7 +338,7 @@ struct TestbedConv2dProblemSizes {
|
||||
{1, 1}, // dilation (dilation_h, dilation_w)
|
||||
{4, 1, 1, 328} // output size (NPQK)
|
||||
));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
@ -204,10 +204,13 @@ public:
|
||||
ElementCompute alpha = ElementCompute(1),
|
||||
ElementCompute beta = ElementCompute(0)) {
|
||||
|
||||
// Waive test if CUDA device is insufficient
|
||||
if (!sufficient()) {
|
||||
return true;
|
||||
}
|
||||
// Waive test if insufficient CUDA device
|
||||
if (!sufficient()) {
|
||||
if (CUTLASS_TEST_UNIT_ENABLE_WARNINGS) {
|
||||
std::cerr << "Test waived due to insufficient CUDA device." << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#if 0 //display conv2d problem size for debugging
|
||||
std::cout << problem_size << std::endl
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -0,0 +1,120 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
* conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
|
||||
* to endorse or promote products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************************************/
|
||||
/*! \file
|
||||
\brief Tests for device-wide Implicit GEMM interface
|
||||
*/
|
||||
|
||||
#include "../../common/cutlass_unit_test.h"
|
||||
#include "cutlass/cutlass.h"
|
||||
|
||||
#include "cutlass/conv/kernel/default_conv3d_dgrad.h"
|
||||
#include "cutlass/conv/device/implicit_gemm_convolution.h"
|
||||
|
||||
#include "conv3d_testbed.h"
|
||||
|
||||
#if defined(CUTLASS_ARCH_MMA_SM80_SUPPORTED)
|
||||
|
||||
TEST(SM80_Device_Conv3d_Dgrad_Analytic_ImplicitGemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32,
|
||||
128x128_32x4_64x64x32) {
|
||||
|
||||
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
|
||||
using ElementA = cutlass::half_t;
|
||||
using ElementB = cutlass::half_t;
|
||||
using ElementC = float;
|
||||
using ElementAccumulator = float;
|
||||
using ElementCompute = float;
|
||||
|
||||
using Conv3dDgradKernel = typename cutlass::conv::kernel::DefaultConv3dDgrad<
|
||||
ElementA, cutlass::layout::TensorNDHWC,
|
||||
ElementB, cutlass::layout::TensorNDHWC,
|
||||
ElementC, cutlass::layout::TensorNDHWC,
|
||||
ElementAccumulator,
|
||||
cutlass::arch::OpClassTensorOp,
|
||||
cutlass::arch::Sm80,
|
||||
cutlass::gemm::GemmShape<128, 128, 32>,
|
||||
cutlass::gemm::GemmShape<64, 64, 32>,
|
||||
cutlass::gemm::GemmShape<16, 8, 16>,
|
||||
cutlass::epilogue::thread::LinearCombination<
|
||||
ElementC,
|
||||
128 / cutlass::sizeof_bits<ElementC>::value,
|
||||
ElementAccumulator,
|
||||
ElementCompute
|
||||
>,
|
||||
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
|
||||
4,
|
||||
cutlass::arch::OpMultiplyAdd
|
||||
>::Kernel;
|
||||
|
||||
using Conv3dDgrad = cutlass::conv::device::ImplicitGemmConvolution<Conv3dDgradKernel>;
|
||||
|
||||
/// Run all unit test sizes with device-level Conv3d instance
|
||||
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dDgrad>());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
TEST(SM80_Device_Conv3d_Dgrad_Optimized_ImplicitGemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32,
|
||||
128x128_32x4_64x64x32) {
|
||||
|
||||
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
|
||||
using ElementA = cutlass::half_t;
|
||||
using ElementB = cutlass::half_t;
|
||||
using ElementC = float;
|
||||
using ElementAccumulator = float;
|
||||
using ElementCompute = float;
|
||||
|
||||
using Conv3dDgradKernel = typename cutlass::conv::kernel::DefaultConv3dDgrad<
|
||||
ElementA, cutlass::layout::TensorNDHWC,
|
||||
ElementB, cutlass::layout::TensorNDHWC,
|
||||
ElementC, cutlass::layout::TensorNDHWC,
|
||||
ElementAccumulator,
|
||||
cutlass::arch::OpClassTensorOp,
|
||||
cutlass::arch::Sm80,
|
||||
cutlass::gemm::GemmShape<128, 128, 32>,
|
||||
cutlass::gemm::GemmShape<64, 64, 32>,
|
||||
cutlass::gemm::GemmShape<16, 8, 16>,
|
||||
cutlass::epilogue::thread::LinearCombination<
|
||||
ElementC,
|
||||
128 / cutlass::sizeof_bits<ElementC>::value,
|
||||
ElementAccumulator,
|
||||
ElementCompute
|
||||
>,
|
||||
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
|
||||
4,
|
||||
cutlass::arch::OpMultiplyAdd,
|
||||
cutlass::conv::IteratorAlgorithm::kOptimized,
|
||||
cutlass::conv::StrideSupport::kUnity
|
||||
>::Kernel;
|
||||
|
||||
using Conv3dDgrad = cutlass::conv::device::ImplicitGemmConvolution<Conv3dDgradKernel>;
|
||||
|
||||
/// Run all unit test sizes with device-level Conv3d instance
|
||||
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dDgrad>());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#endif // CUTLASS_ARCH_MMA_SM75_SUPPORTED
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
@ -76,5 +76,46 @@ TEST(SM80_Device_Conv3d_Dgrad_Analytic_ImplicitGemm_tf32ndhwc_tf32ndhwc_f32ndhwc
|
||||
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dDgrad>());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TEST(SM80_Device_Conv3d_Dgrad_Optimized_ImplicitGemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32,
|
||||
128x128_32x3_64x64x32) {
|
||||
|
||||
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
|
||||
using ElementA = cutlass::tfloat32_t;
|
||||
using ElementB = cutlass::tfloat32_t;
|
||||
using ElementC = float;
|
||||
using ElementAccumulator = float;
|
||||
using ElementCompute = float;
|
||||
|
||||
/// Device-level Conv2d instance
|
||||
using Conv3dDgradKernel = typename cutlass::conv::kernel::DefaultConv3dDgrad<
|
||||
ElementA, cutlass::layout::TensorNDHWC,
|
||||
ElementB, cutlass::layout::TensorNDHWC,
|
||||
ElementC, cutlass::layout::TensorNDHWC,
|
||||
ElementAccumulator,
|
||||
cutlass::arch::OpClassTensorOp,
|
||||
cutlass::arch::Sm80,
|
||||
cutlass::gemm::GemmShape<128, 128, 16>,
|
||||
cutlass::gemm::GemmShape<64, 64, 16>,
|
||||
cutlass::gemm::GemmShape<16, 8, 8>,
|
||||
cutlass::epilogue::thread::LinearCombination<
|
||||
ElementC,
|
||||
128 / cutlass::sizeof_bits<ElementC>::value,
|
||||
ElementAccumulator,
|
||||
ElementCompute
|
||||
>,
|
||||
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
|
||||
3,
|
||||
cutlass::arch::OpMultiplyAdd,
|
||||
cutlass::conv::IteratorAlgorithm::kOptimized,
|
||||
cutlass::conv::StrideSupport::kUnity
|
||||
>::Kernel;
|
||||
|
||||
using Conv3dDgrad = cutlass::conv::device::ImplicitGemmConvolution<Conv3dDgradKernel>;
|
||||
|
||||
/// Run all unit test sizes with device-level Conv3d instance
|
||||
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dDgrad>());
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#endif // CUTLASS_ARCH_MMA_SM80_SUPPORTED
|
||||
|
||||
@ -0,0 +1,80 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
* conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
|
||||
* to endorse or promote products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************************************/
|
||||
/*! \file
|
||||
\brief Tests for device-wide Implicit GEMM interface
|
||||
*/
|
||||
|
||||
#include "../../common/cutlass_unit_test.h"
|
||||
#include "cutlass/cutlass.h"
|
||||
|
||||
#include "cutlass/conv/kernel/default_conv3d_fprop.h"
|
||||
#include "cutlass/conv/device/implicit_gemm_convolution.h"
|
||||
|
||||
#include "conv3d_testbed.h"
|
||||
|
||||
#if defined(CUTLASS_ARCH_MMA_SM75_SUPPORTED)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TEST(SM75_Device_Conv3d_Fprop_Analytic_ImplicitGemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32,
|
||||
128x128_32x3_64x64x32) {
|
||||
|
||||
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
|
||||
using ElementA = cutlass::half_t;
|
||||
using ElementB = cutlass::half_t;
|
||||
using ElementC = float;
|
||||
using ElementAccumulator = float;
|
||||
using ElementCompute = float;
|
||||
|
||||
/// Device-level Conv2d instance
|
||||
using Conv3dFpropKernel = typename cutlass::conv::kernel::DefaultConv3dFprop<
|
||||
ElementA, cutlass::layout::TensorNDHWC,
|
||||
ElementB, cutlass::layout::TensorNDHWC,
|
||||
ElementC, cutlass::layout::TensorNDHWC,
|
||||
ElementAccumulator,
|
||||
cutlass::arch::OpClassTensorOp,
|
||||
cutlass::arch::Sm75,
|
||||
cutlass::gemm::GemmShape<128, 128, 16>,
|
||||
cutlass::gemm::GemmShape<64, 64, 16>,
|
||||
cutlass::gemm::GemmShape<16, 8, 8>,
|
||||
cutlass::epilogue::thread::LinearCombination<
|
||||
ElementC,
|
||||
128 / cutlass::sizeof_bits<ElementC>::value,
|
||||
ElementAccumulator,
|
||||
ElementCompute
|
||||
>,
|
||||
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
|
||||
2,
|
||||
cutlass::arch::OpMultiplyAdd
|
||||
>::Kernel;
|
||||
|
||||
using Conv3dFprop = cutlass::conv::device::ImplicitGemmConvolution<Conv3dFpropKernel>;
|
||||
|
||||
/// Run all unit test sizes with device-level Conv3d instance
|
||||
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dFprop>());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#endif // CUTLASS_ARCH_MMA_SM75_SUPPORTED
|
||||
@ -0,0 +1,159 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
* conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
|
||||
* to endorse or promote products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************************************/
|
||||
/*! \file
|
||||
\brief Tests for device-wide Implicit GEMM interface
|
||||
*/
|
||||
|
||||
#include "../../common/cutlass_unit_test.h"
|
||||
#include "cutlass/cutlass.h"
|
||||
|
||||
#include "cutlass/conv/kernel/default_conv3d_fprop.h"
|
||||
#include "cutlass/conv/device/implicit_gemm_convolution.h"
|
||||
|
||||
#include "conv3d_testbed.h"
|
||||
|
||||
#if defined(CUTLASS_ARCH_MMA_SM80_SUPPORTED)
|
||||
|
||||
TEST(SM80_Device_Conv3d_Fprop_Analytic_ImplicitGemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32,
|
||||
128x128_32x4_64x64x32) {
|
||||
|
||||
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
|
||||
using ElementA = cutlass::half_t;
|
||||
using ElementB = cutlass::half_t;
|
||||
using ElementC = float;
|
||||
using ElementAccumulator = float;
|
||||
using ElementCompute = float;
|
||||
|
||||
using Conv3dFpropKernel = typename cutlass::conv::kernel::DefaultConv3dFprop<
|
||||
ElementA, cutlass::layout::TensorNDHWC,
|
||||
ElementB, cutlass::layout::TensorNDHWC,
|
||||
ElementC, cutlass::layout::TensorNDHWC,
|
||||
ElementAccumulator,
|
||||
cutlass::arch::OpClassTensorOp,
|
||||
cutlass::arch::Sm80,
|
||||
cutlass::gemm::GemmShape<128, 128, 32>,
|
||||
cutlass::gemm::GemmShape<64, 64, 32>,
|
||||
cutlass::gemm::GemmShape<16, 8, 16>,
|
||||
cutlass::epilogue::thread::LinearCombination<
|
||||
ElementC,
|
||||
128 / cutlass::sizeof_bits<ElementC>::value,
|
||||
ElementAccumulator,
|
||||
ElementCompute
|
||||
>,
|
||||
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
|
||||
4,
|
||||
cutlass::arch::OpMultiplyAdd
|
||||
>::Kernel;
|
||||
|
||||
using Conv3dFprop = cutlass::conv::device::ImplicitGemmConvolution<Conv3dFpropKernel>;
|
||||
|
||||
/// Run all unit test sizes with device-level Conv3d instance
|
||||
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dFprop>());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
TEST(SM80_Device_Conv3d_Fprop_Optimized_ImplicitGemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32,
|
||||
128x128_32x4_64x64x32) {
|
||||
|
||||
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
|
||||
using ElementA = cutlass::half_t;
|
||||
using ElementB = cutlass::half_t;
|
||||
using ElementC = float;
|
||||
using ElementAccumulator = float;
|
||||
using ElementCompute = float;
|
||||
|
||||
using Conv3dFpropKernel = typename cutlass::conv::kernel::DefaultConv3dFprop<
|
||||
ElementA, cutlass::layout::TensorNDHWC,
|
||||
ElementB, cutlass::layout::TensorNDHWC,
|
||||
ElementC, cutlass::layout::TensorNDHWC,
|
||||
ElementAccumulator,
|
||||
cutlass::arch::OpClassTensorOp,
|
||||
cutlass::arch::Sm80,
|
||||
cutlass::gemm::GemmShape<128, 128, 32>,
|
||||
cutlass::gemm::GemmShape<64, 64, 32>,
|
||||
cutlass::gemm::GemmShape<16, 8, 16>,
|
||||
cutlass::epilogue::thread::LinearCombination<
|
||||
ElementC,
|
||||
128 / cutlass::sizeof_bits<ElementC>::value,
|
||||
ElementAccumulator,
|
||||
ElementCompute
|
||||
>,
|
||||
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
|
||||
4,
|
||||
cutlass::arch::OpMultiplyAdd,
|
||||
cutlass::conv::IteratorAlgorithm::kOptimized
|
||||
>::Kernel;
|
||||
|
||||
using Conv3dFprop = cutlass::conv::device::ImplicitGemmConvolution<Conv3dFpropKernel>;
|
||||
|
||||
/// Run all unit test sizes with device-level Conv3d instance
|
||||
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dFprop>());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TEST(SM80_Device_Conv3d_Fprop_Optimized_ImplicitGemm_f16ndhwc_f16ndhwc_f32ndhwc_tensor_op_f32,
|
||||
64x256_32x4_64x64x32) {
|
||||
|
||||
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
|
||||
using ElementA = cutlass::half_t;
|
||||
using ElementB = cutlass::half_t;
|
||||
using ElementC = float;
|
||||
using ElementAccumulator = float;
|
||||
using ElementCompute = float;
|
||||
|
||||
using Conv3dFpropKernel = typename cutlass::conv::kernel::DefaultConv3dFprop<
|
||||
ElementA, cutlass::layout::TensorNDHWC,
|
||||
ElementB, cutlass::layout::TensorNDHWC,
|
||||
ElementC, cutlass::layout::TensorNDHWC,
|
||||
ElementAccumulator,
|
||||
cutlass::arch::OpClassTensorOp,
|
||||
cutlass::arch::Sm80,
|
||||
cutlass::gemm::GemmShape<64, 256, 32>,
|
||||
cutlass::gemm::GemmShape<64, 64, 32>,
|
||||
cutlass::gemm::GemmShape<16, 8, 16>,
|
||||
cutlass::epilogue::thread::LinearCombination<
|
||||
ElementC,
|
||||
128 / cutlass::sizeof_bits<ElementC>::value,
|
||||
ElementAccumulator,
|
||||
ElementCompute
|
||||
>,
|
||||
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
|
||||
4,
|
||||
cutlass::arch::OpMultiplyAdd,
|
||||
cutlass::conv::IteratorAlgorithm::kOptimized
|
||||
>::Kernel;
|
||||
|
||||
using Conv3dFprop = cutlass::conv::device::ImplicitGemmConvolution<Conv3dFpropKernel>;
|
||||
|
||||
/// Run all unit test sizes with device-level Conv3d instance
|
||||
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dFprop>());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#endif // CUTLASS_ARCH_MMA_SM75_SUPPORTED
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
@ -76,5 +76,46 @@ TEST(SM80_Device_Conv3d_Fprop_Analytic_ImplicitGemm_tf32ndhwc_tf32ndhwc_f32ndhwc
|
||||
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dFprop>());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TEST(SM80_Device_Conv3d_Fprop_Optimized_ImplicitGemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32,
|
||||
128x128_32x3_64x64x32) {
|
||||
|
||||
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
|
||||
using ElementA = cutlass::tfloat32_t;
|
||||
using ElementB = cutlass::tfloat32_t;
|
||||
using ElementC = float;
|
||||
using ElementAccumulator = float;
|
||||
using ElementCompute = float;
|
||||
|
||||
/// Device-level Conv2d instance
|
||||
using Conv3dFpropKernel = typename cutlass::conv::kernel::DefaultConv3dFprop<
|
||||
ElementA, cutlass::layout::TensorNDHWC,
|
||||
ElementB, cutlass::layout::TensorNDHWC,
|
||||
ElementC, cutlass::layout::TensorNDHWC,
|
||||
ElementAccumulator,
|
||||
cutlass::arch::OpClassTensorOp,
|
||||
cutlass::arch::Sm80,
|
||||
cutlass::gemm::GemmShape<128, 128, 16>,
|
||||
cutlass::gemm::GemmShape<64, 64, 16>,
|
||||
cutlass::gemm::GemmShape<16, 8, 8>,
|
||||
cutlass::epilogue::thread::LinearCombination<
|
||||
ElementC,
|
||||
128 / cutlass::sizeof_bits<ElementC>::value,
|
||||
ElementAccumulator,
|
||||
ElementCompute
|
||||
>,
|
||||
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
|
||||
3,
|
||||
cutlass::arch::OpMultiplyAdd,
|
||||
cutlass::conv::IteratorAlgorithm::kOptimized
|
||||
>::Kernel;
|
||||
|
||||
using Conv3dFprop = cutlass::conv::device::ImplicitGemmConvolution<Conv3dFpropKernel>;
|
||||
|
||||
/// Run all unit test sizes with device-level Conv3d instance
|
||||
EXPECT_TRUE(test::conv::device::TestAllConv3d<Conv3dFprop>());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#endif // CUTLASS_ARCH_MMA_SM80_SUPPORTED
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
@ -107,9 +107,25 @@ struct TestbedConv3dProblemSizes {
|
||||
));
|
||||
|
||||
conv3d_default_sizes.push_back(cutlass::conv::Conv3dProblemSize(
|
||||
{1, 1, 16, 16, minimum_channel_size}, // input size (NDHWC)
|
||||
{8, 1, 3, 3, minimum_channel_size}, // filter size (KTRSC)
|
||||
cutlass::Coord<3>({0, 1, 1}), // padding (pad_d, pad_h, pad_w)
|
||||
{1, 1, 1, 8, minimum_channel_size}, // input size (NDHWC)
|
||||
{8, 1, 1, 3, minimum_channel_size}, // filter size (KTRSC)
|
||||
cutlass::Coord<3>({1, 1, 1}), // padding (pad_d, pad_h, pad_w)
|
||||
cutlass::Coord<3>({1, 1, 1}), // stride (stride_d, stride_h, stride_w)
|
||||
cutlass::Coord<3>({1, 1, 1}) // dilation (dilation_d, dilation_h, dilation_w)
|
||||
));
|
||||
|
||||
conv3d_default_sizes.push_back(cutlass::conv::Conv3dProblemSize(
|
||||
{1, 8, 8, 8, minimum_channel_size}, // input size (NDHWC)
|
||||
{8, 3, 3, 3, minimum_channel_size}, // filter size (KTRSC)
|
||||
cutlass::Coord<3>({1, 1, 1}), // padding (pad_d, pad_h, pad_w)
|
||||
cutlass::Coord<3>({1, 1, 1}), // stride (stride_d, stride_h, stride_w)
|
||||
cutlass::Coord<3>({1, 1, 1}) // dilation (dilation_d, dilation_h, dilation_w)
|
||||
));
|
||||
|
||||
conv3d_default_sizes.push_back(cutlass::conv::Conv3dProblemSize(
|
||||
{1, 16, 16, 16, minimum_channel_size}, // input size (NDHWC)
|
||||
{8, 3, 3, 3, minimum_channel_size}, // filter size (KTRSC)
|
||||
cutlass::Coord<3>({1, 1, 1}), // padding (pad_d, pad_h, pad_w)
|
||||
cutlass::Coord<3>({1, 1, 1}), // stride (stride_d, stride_h, stride_w)
|
||||
cutlass::Coord<3>({1, 1, 1}) // dilation (dilation_d, dilation_h, dilation_w)
|
||||
));
|
||||
@ -138,6 +154,7 @@ struct TestbedConv3dProblemSizes {
|
||||
cutlass::Coord<3>({1, 1, 1}) // dilation (dilation_d, dilation_h, dilation_w)
|
||||
));
|
||||
|
||||
|
||||
conv3d_default_sizes.push_back(cutlass::conv::Conv3dProblemSize(
|
||||
{1, 11, 15, 19, 64}, // input size (NDHWC)
|
||||
{32, 4, 3, 6, 64}, // filter size (KTRSC)
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
@ -204,10 +204,14 @@ public:
|
||||
ElementCompute alpha = ElementCompute(1),
|
||||
ElementCompute beta = ElementCompute()) {
|
||||
|
||||
// Waive test if CUDA device is insufficient.
|
||||
if (!sufficient()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Waive test if insufficient CUDA device
|
||||
if (!sufficient()) {
|
||||
if (CUTLASS_TEST_UNIT_ENABLE_WARNINGS) {
|
||||
std::cerr << "Test waived due to insufficient CUDA device." << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#if 0 //display conv2d problem size for debugging
|
||||
std::cout << problem_size << std::endl
|
||||
@ -413,11 +417,6 @@ bool TestAllConv3d(
|
||||
//
|
||||
TestbedConv3dProblemSizes conv3d_problems(128/cutlass::sizeof_bits<typename ImplicitGemm::ElementA>::value);
|
||||
|
||||
//
|
||||
// Get conv problem sizes to run conv operator
|
||||
//
|
||||
//TestbedConv3dProblemSizes conv_problems(128/cutlass::sizeof_bits<typename ImplicitGemm::ElementA>::value);
|
||||
|
||||
// Vector of conv3d problem sizes to avoid duplicate runs
|
||||
Conv3dProblemVector conv_tested_sizes;
|
||||
|
||||
@ -443,12 +442,17 @@ bool TestAllConv3d(
|
||||
// Procedurally disable certain cases
|
||||
//
|
||||
|
||||
// CUTLASS DGRAD's unity stride specialization only support stride {1, 1}
|
||||
// CUTLASS DGRAD's unity stride specialization only support stride {1, 1, 1}
|
||||
if ((ImplicitGemm::kConvolutionalOperator ==
|
||||
cutlass::conv::Operator::kDgrad) &&
|
||||
(ImplicitGemm::ImplicitGemmKernel::Mma::IteratorA::kStrideSupport ==
|
||||
cutlass::conv::StrideSupport::kUnity)) {
|
||||
if (!((conv_problem.stride_h == 1) && (conv_problem.stride_w == 1))) {
|
||||
((ImplicitGemm::ImplicitGemmKernel::Mma::IteratorA::kStrideSupport ==
|
||||
cutlass::conv::StrideSupport::kUnity) ||
|
||||
(ImplicitGemm::ImplicitGemmKernel::Mma::IteratorB::kStrideSupport ==
|
||||
cutlass::conv::StrideSupport::kUnity))) {
|
||||
if (!((conv_problem.stride_d == 1) &&
|
||||
(conv_problem.stride_h == 1) &&
|
||||
(conv_problem.stride_w == 1))
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
|
||||
Reference in New Issue
Block a user