address comments

2022-03-17 16:13:29 +09:00
parent 247e3f2d22
commit 1550963c20
4 changed files with 3 additions and 5 deletions
--- a/examples/30_wgrad_split_k/30_wgrad_split_k.cu
+++ b/examples/30_wgrad_split_k/30_wgrad_split_k.cu
@ -68,7 +68,6 @@ using ElementC = ElementOutput;
 using ElementCompute = ElementComputeEpilogue;
 using LayoutInputA = cutlass::layout::TensorNHWC;
 using LayoutInputB = cutlass::layout::TensorNHWC;
-using LayoutInputScaleBias = cutlass::layout::RowMajor;
 using LayoutOutput = cutlass::layout::TensorNHWC;

 // This code section describes whether you want to use tensor cores or regular SIMT cores on GPU SM
@ -113,7 +112,7 @@ using EpilogueOpGEMM = cutlass::epilogue::thread::LinearCombination<
 // The epilogue functor for reduction. This is the one that is actually used.
 using EpilogueOpReduction = cutlass::epilogue::thread::LinearCombination<
    ElementOutput,                                     // Data type of output matrix.
-    128 / cutlass::sizeof_bits<ElementOutput>::value,  // The number of elements per vectorized.
+    128 / cutlass::sizeof_bits<ElementAccumulator>::value,  // The number of elements per vectorized.
    // memory access. This becomes the vector width of
    // math instructions in the epilogue too.
    ElementAccumulator,                                // Data type of accumulator
--- a/examples/30_wgrad_split_k/CMakeLists.txt
+++ b/examples/30_wgrad_split_k/CMakeLists.txt
@ -1,4 +1,4 @@
-# Copyright (c) 2017-2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2017-2022, NVIDIA CORPORATION.  All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without modification, are permitted
 # provided that the following conditions are met:
--- a/examples/31_transposed_conv2d/31_transposed_conv2d.cu
+++ b/examples/31_transposed_conv2d/31_transposed_conv2d.cu
@ -68,7 +68,6 @@ using ElementC = ElementOutput;
 using ElementCompute = ElementComputeEpilogue;
 using LayoutInputA = TensorNHWC;
 using LayoutInputB = TensorNHWC;
-using LayoutInputScaleBias = cutlass::layout::RowMajor;
 using LayoutOutput = TensorNHWC;

 // This code section describes whether you want to use tensor cores or regular SIMT cores on GPU SM
--- a/examples/31_transposed_conv2d/CMakeLists.txt
+++ b/examples/31_transposed_conv2d/CMakeLists.txt
@ -1,4 +1,4 @@
-# Copyright (c) 2017-2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2017-2022, NVIDIA CORPORATION.  All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without modification, are permitted
 # provided that the following conditions are met: