@ -243,3 +243,4 @@ if (CUTLASS_NVCC_MAX_ARCH GREATER_EQUAL 75)
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
|
||||
@ -35,8 +35,6 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../../common/cutlass_unit_test.h"
|
||||
|
||||
#include "cutlass/cutlass.h"
|
||||
#include "cutlass/layout/matrix.h"
|
||||
#include "cutlass/conv/convolution.h"
|
||||
|
||||
@ -573,7 +573,7 @@ bool TestSpecificConv2d(
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// TestAllConv: Runs cutlass::conv::device::ImplicitGemmConvolution operator and compares it with reference
|
||||
// TestAllConv runs conv operator on default conv problem sizes from test::conv::device::TestbedConv2dProblemSizes
|
||||
// Additionally, each conv2d test can provide conv problem sizes (conv_test_sizes) and blacklist of sizes
|
||||
// Additionally, each conv2d test can provide conv problem sizes (conv_test_sizes) and blacklist of sizes
|
||||
// (conv_blacklist_sizes)
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template <typename ImplicitGemm>
|
||||
|
||||
@ -410,6 +410,7 @@ public:
|
||||
LayoutC,
|
||||
ElementCompute,
|
||||
ElementAccumulator,
|
||||
ElementC,
|
||||
cutlass::NumericConverterClamp<ElementC, ElementCompute>
|
||||
>(
|
||||
kConvolutionalOperator,
|
||||
@ -517,7 +518,7 @@ public:
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// TestAllConv: Runs cutlass::conv::device::ImplicitGemmConvolution operator and compares it with reference
|
||||
// TestAllConv runs conv operator on default conv problem sizes from test::conv::device::TestbedConv2dProblemSizes
|
||||
// Additionally, each conv2d test can provide conv problem sizes (conv_test_sizes) and blacklist of sizes
|
||||
// Additionally, each conv2d test can provide conv problem sizes (conv_test_sizes) and blacklist of sizes
|
||||
// (conv_blacklist_sizes)
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template <typename ImplicitGemm, int InterleavedK>
|
||||
|
||||
@ -502,7 +502,7 @@ public:
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// TestAllConv: Runs cutlass::conv::device::ImplicitGemmConvolution operator and compares it with reference
|
||||
// TestAllConv runs conv operator on default conv problem sizes from test::conv::device::TestbedConv2dProblemSizes
|
||||
// Additionally, each conv2d test can provide conv problem sizes (conv_test_sizes) and blacklist of sizes
|
||||
// Additionally, each conv2d test can provide conv problem sizes (conv_test_sizes) and blacklist of sizes
|
||||
// (conv_blacklist_sizes)
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template <typename ImplicitGemm,
|
||||
|
||||
@ -464,7 +464,7 @@ public:
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// TestAllConv: Runs cutlass::conv::device::ImplicitGemmConvolution operator and compares it with reference
|
||||
// TestAllConv runs conv operator on default conv problem sizes from test::conv::device::TestbedConv2dProblemSizes
|
||||
// Additionally, each conv2d test can provide conv problem sizes (conv_test_sizes) and blacklist of sizes
|
||||
// Additionally, each conv2d test can provide conv problem sizes (conv_test_sizes) and blacklist of sizes
|
||||
// (conv_blacklist_sizes)
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template <typename ImplicitGemm>
|
||||
|
||||
@ -522,7 +522,7 @@ public:
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// TestAllConv: Runs cutlass::conv::device::ImplicitGemmConvolution operator and compares it with reference
|
||||
// TestAllConv runs conv operator on default conv problem sizes from test::conv::device::TestbedConv2dProblemSizes
|
||||
// Additionally, each conv3d test can provide conv problem sizes (conv_test_sizes) and blacklist of sizes
|
||||
// Additionally, each conv3d test can provide conv problem sizes (conv_test_sizes) and blacklist of sizes
|
||||
// (conv_blacklist_sizes)
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
@ -241,6 +241,106 @@ TEST(SM80_Device_Conv2d_Group_Fprop_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f16nhw
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Analytic 2 stage SingleGroup kernel
|
||||
TEST(SM80_Device_Conv2d_Group_Fprop_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32,
|
||||
SingleGroupPerCTA_128x128_64x2_64x64x64) {
|
||||
|
||||
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
|
||||
using ElementA = cutlass::half_t;
|
||||
using ElementB = cutlass::half_t;
|
||||
using ElementC = cutlass::half_t;
|
||||
using ElementAccumulator = float;
|
||||
using ElementCompute = float;
|
||||
using ThreadblockShape = cutlass::gemm::GemmShape<128, 128, 64>;
|
||||
using WarpShape = cutlass::gemm::GemmShape<64, 64, 64>;
|
||||
using InstructionShape = cutlass::gemm::GemmShape<16, 8, 16>;
|
||||
|
||||
/// Device-level Conv2d instance
|
||||
using Conv2dGroupFpropKernel = typename cutlass::conv::kernel::DefaultConv2dGroupFprop<
|
||||
ElementA, cutlass::layout::TensorNHWC,
|
||||
ElementB, cutlass::layout::TensorNHWC,
|
||||
ElementC, cutlass::layout::TensorNHWC,
|
||||
ElementAccumulator,
|
||||
cutlass::arch::OpClassTensorOp,
|
||||
cutlass::arch::Sm80,
|
||||
ThreadblockShape,
|
||||
WarpShape,
|
||||
InstructionShape,
|
||||
cutlass::epilogue::thread::LinearCombination<
|
||||
ElementC,
|
||||
128 / cutlass::sizeof_bits<ElementC>::value,
|
||||
ElementAccumulator,
|
||||
ElementCompute
|
||||
>,
|
||||
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
|
||||
2,
|
||||
cutlass::arch::OpMultiplyAdd,
|
||||
cutlass::conv::GroupMode::kSingleGroup,
|
||||
cutlass::conv::IteratorAlgorithm::kAnalytic
|
||||
>::Kernel;
|
||||
|
||||
using Conv2dGroupFprop = cutlass::conv::device::ImplicitGemmConvolution<Conv2dGroupFpropKernel>;
|
||||
|
||||
/// Run group conv unit test sizes with device-level Conv2d instance
|
||||
test::conv::device::TestbedGroupConv2dProblemSizes problem_sizes(
|
||||
ThreadblockShape::kN, ThreadblockShape::kK,
|
||||
128/cutlass::sizeof_bits<ElementA>::value
|
||||
);
|
||||
EXPECT_TRUE(test::conv::device::TestSpecificConv2d<Conv2dGroupFprop>(problem_sizes.default_single_group_sizes));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Analytic 2 stage MutipleGroup kernel
|
||||
TEST(SM80_Device_Conv2d_Group_Fprop_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32,
|
||||
MutipleGroupPerCTA_64x64_64x2_32x32x64) {
|
||||
|
||||
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
|
||||
using ElementA = cutlass::half_t;
|
||||
using ElementB = cutlass::half_t;
|
||||
using ElementC = cutlass::half_t;
|
||||
using ElementAccumulator = float;
|
||||
using ElementCompute = float;
|
||||
using ThreadblockShape = cutlass::gemm::GemmShape<64, 64, 64>;
|
||||
using WarpShape = cutlass::gemm::GemmShape<32, 32, 64>;
|
||||
using InstructionShape = cutlass::gemm::GemmShape<16, 8, 16>;
|
||||
|
||||
/// Device-level Conv2d instance
|
||||
using Conv2dGroupFpropKernel = typename cutlass::conv::kernel::DefaultConv2dGroupFprop<
|
||||
ElementA, cutlass::layout::TensorNHWC,
|
||||
ElementB, cutlass::layout::TensorNHWC,
|
||||
ElementC, cutlass::layout::TensorNHWC,
|
||||
ElementAccumulator,
|
||||
cutlass::arch::OpClassTensorOp,
|
||||
cutlass::arch::Sm80,
|
||||
ThreadblockShape,
|
||||
WarpShape,
|
||||
InstructionShape,
|
||||
cutlass::epilogue::thread::LinearCombination<
|
||||
ElementC,
|
||||
128 / cutlass::sizeof_bits<ElementC>::value,
|
||||
ElementAccumulator,
|
||||
ElementCompute
|
||||
>,
|
||||
cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>,
|
||||
2,
|
||||
cutlass::arch::OpMultiplyAdd,
|
||||
cutlass::conv::GroupMode::kMultipleGroup,
|
||||
cutlass::conv::IteratorAlgorithm::kAnalytic
|
||||
>::Kernel;
|
||||
|
||||
using Conv2dGroupFprop = cutlass::conv::device::ImplicitGemmConvolution<Conv2dGroupFpropKernel>;
|
||||
|
||||
/// Run group conv unit test sizes with device-level Conv2d instance
|
||||
test::conv::device::TestbedGroupConv2dProblemSizes problem_sizes(
|
||||
ThreadblockShape::kN, ThreadblockShape::kK,
|
||||
128/cutlass::sizeof_bits<ElementA>::value
|
||||
);
|
||||
EXPECT_TRUE(test::conv::device::TestSpecificConv2d<Conv2dGroupFprop>(problem_sizes.default_multiple_group_sizes));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TEST(SM80_Device_Conv2d_Group_Fprop_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32,
|
||||
SingleGroupPerCTA_128x128_64x3_64x64x64) {
|
||||
|
||||
@ -340,14 +440,14 @@ TEST(SM80_Device_Conv2d_Group_Fprop_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f16nh
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Optimized 2 stage singleGroup kernel
|
||||
// Optimized 2 stage SingleGroup kernel
|
||||
TEST(SM80_Device_Conv2d_Group_Fprop_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32,
|
||||
SingleGroupPerCTA_64x64_64x2_32x32x64) {
|
||||
|
||||
/// Conv operation element types for the Gemm equivalent (ImplicitGemm)
|
||||
using ElementA = cutlass::half_t;
|
||||
using ElementB = cutlass::half_t;
|
||||
using ElementC = float;
|
||||
using ElementC = cutlass::half_t;
|
||||
using ElementAccumulator = float;
|
||||
using ElementCompute = float;
|
||||
using ThreadblockShape = cutlass::gemm::GemmShape<64, 64, 64>;
|
||||
|
||||
Reference in New Issue
Block a user