Fix typos 2 (#842)

Co-authored-by: Haicheng Wu <57973641+hwu36@users.noreply.github.com>
2023-03-09 20:22:56 -08:00
parent c4f6b8c6bc
commit 7e370c9637
161 changed files with 310 additions and 309 deletions
--- a/tools/util/include/cutlass/util/command_line.h
+++ b/tools/util/include/cutlass/util/command_line.h
@ -188,7 +188,7 @@ struct CommandLine {
      for (int i = 0; i < keys.size(); ++i) {
        if (keys[i] == string(arg_name)) {
          string val_string(values[i]);
-          seperate_string(val_string, vals, sep);
+          separate_string(val_string, vals, sep);
        }
      }
    }
@ -225,7 +225,7 @@ struct CommandLine {
      range != ranges.end(); ++range) {

      std::vector<std::string> range_vals;
-      seperate_string(*range, range_vals, sep);
+      separate_string(*range, range_vals, sep);
      vals.push_back(range_vals);
    }
  }
@ -283,7 +283,7 @@ struct CommandLine {
  }

  template <typename value_t>
-  static void seperate_string(std::string const& str,
+  static void separate_string(std::string const& str,
                              std::vector<value_t>& vals,
                              char sep = ',') {
    std::istringstream str_stream(str);
--- a/tools/util/include/cutlass/util/device_groupnorm.h
+++ b/tools/util/include/cutlass/util/device_groupnorm.h
@ -314,7 +314,7 @@ __global__ void groupnorm_twopass_multiple_load(T*          output,
 }

 //ref_input & ref_output should be [N, H, W, C]
-//ref_gamma & ref_beta shoud be [1, 1, 1, C]
+//ref_gamma & ref_beta should be [1, 1, 1, C]
 template <typename T>
 void groupnorm(cutlass::Tensor4DCoord input_size,
               const int num_groups,
--- a/tools/util/include/cutlass/util/device_nhwc_padding.h
+++ b/tools/util/include/cutlass/util/device_nhwc_padding.h
@ -109,9 +109,9 @@ __global__ void nhwc_padding_channel_3To4_kernel(const int32_t n,
  shm[threadIdx.x] = tidx >= max_input_element ? zero_io : input[tidx];  
  __syncthreads();
  
-  const int ouput_offset = blockIdx.x * 256;
-  const int lower_bound = max_output_element < ouput_offset + 256 ? max_output_element : ouput_offset + 256;
-  for (int i = ouput_offset + threadidx, j = threadidx ; i < lower_bound ; i+=192, j+=192)
+  const int output_offset = blockIdx.x * 256;
+  const int lower_bound = max_output_element < output_offset + 256 ? max_output_element : output_offset + 256;
+  for (int i = output_offset + threadidx, j = threadidx ; i < lower_bound ; i+=192, j+=192)
  {
    const Telement* shm_element = (const Telement*)shm + j*3*element_in_Tio/4;
    Telement array[element_in_Tio];
@ -140,9 +140,9 @@ __global__ void nhwc_padding_channel_3To8_kernel(const int32_t n,
  shm[threadIdx.x] = tidx >= max_input_element ? zero_io : input[tidx];  
  __syncthreads();
  
-  const int ouput_offset = blockIdx.x * 512;
-  const int lower_bound = max_output_element < ouput_offset + 512 ? max_output_element : ouput_offset + 512;
-  for (int i = ouput_offset + threadidx, j = threadidx ; i < lower_bound ; i+=192, j+=192)
+  const int output_offset = blockIdx.x * 512;
+  const int lower_bound = max_output_element < output_offset + 512 ? max_output_element : output_offset + 512;
+  for (int i = output_offset + threadidx, j = threadidx ; i < lower_bound ; i+=192, j+=192)
  {
    const Telement* shm_element = (const Telement*)shm + (element_in_Tio == 4 ? j/2 : j)*3;
    Telement array[element_in_Tio];
--- a/tools/util/include/cutlass/util/helper_cuda.hpp
+++ b/tools/util/include/cutlass/util/helper_cuda.hpp
@ -74,7 +74,7 @@ _ConvertSMVer2Cores(int major, int minor)
  // Defines for GPU Architecture types (using the SM version to determine
  // the # of cores per SM
  typedef struct {
-    int SM;  // 0xMm (hexidecimal notation), M = SM Major version,
+    int SM;  // 0xMm (hexadecimal notation), M = SM Major version,
    // and m = SM minor version
    int Cores;
  } sSMtoCores;
--- a/tools/util/include/cutlass/util/reference/device/gemm.h
+++ b/tools/util/include/cutlass/util/reference/device/gemm.h
@ -248,7 +248,7 @@ struct Gemm<ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType,

 ////////////////////////////////////////////////////////////////////////////////////////////////////

-/// Parital specialization for XOR-popc
+/// Partial specialization for XOR-popc
 template <typename ElementA, typename LayoutA, typename ElementB,
          typename LayoutB, typename ElementC, typename LayoutC,
          typename ScalarType, typename AccumulatorType>
--- a/tools/util/include/cutlass/util/reference/device/kernel/tensor_foreach.h
+++ b/tools/util/include/cutlass/util/reference/device/kernel/tensor_foreach.h
@ -72,7 +72,7 @@ struct TensorForEachHelper {
 template <typename Func, int Rank>
 struct TensorForEachHelper<Func, Rank, 0> {

-  /// Constructor for fastest chaning rank
+  /// Constructor for fastest changing rank
  __inline__ __device__
  TensorForEachHelper(Func &func, Coord<Rank> const &size, Coord<Rank> &coord, int64_t index) {

--- a/tools/util/include/cutlass/util/reference/device/tensor_fill.h
+++ b/tools/util/include/cutlass/util/reference/device/tensor_fill.h
@ -1308,7 +1308,7 @@ void TensorFill(

 ///////////////////////////////////////////////////////////////////////////////////////////////////

-/// Fills a tensor's digonal with 1 and 0 everywhere else.
+/// Fills a tensor's diagonal with 1 and 0 everywhere else.
 template <
  typename Element,               ///< Element type
  typename Layout>                ///< Layout function
--- a/tools/util/include/cutlass/util/reference/device/tensor_foreach.h
+++ b/tools/util/include/cutlass/util/reference/device/tensor_foreach.h
@ -133,4 +133,4 @@ struct BlockForEach {

 } // namespace device
 } // namespace reference
-} // namesace cutlass
+} // namespace cutlass
--- a/tools/util/include/cutlass/util/reference/host/gemm.h
+++ b/tools/util/include/cutlass/util/reference/host/gemm.h
@ -335,7 +335,7 @@ struct Gemm<ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType,

 ////////////////////////////////////////////////////////////////////////////////////////////////////

-/// Parital specialization for XOR-popc
+/// Partial specialization for XOR-popc
 template <typename ElementA, typename LayoutA, typename ElementB,
          typename LayoutB, typename ElementC, typename LayoutC,
          typename ScalarType, typename ComputeType>
--- a/tools/util/include/cutlass/util/reference/host/tensor_fill.h
+++ b/tools/util/include/cutlass/util/reference/host/tensor_fill.h
@ -992,7 +992,7 @@ void TensorFillDiagonal(
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////

-/// Helper to fill a tensor's digonal with 1 and 0 everywhere else.
+/// Helper to fill a tensor's diagonal with 1 and 0 everywhere else.
 template <
  typename Element,               ///< Element type
  typename Layout>                ///< Layout function
--- a/tools/util/include/cutlass/util/reference/host/tensor_foreach.h
+++ b/tools/util/include/cutlass/util/reference/host/tensor_foreach.h
@ -69,7 +69,7 @@ struct TensorForEachHelper<Func, Rank, 0> {
  /// Index of the active rank
  static int const kActiveRank = Rank - 1;

-  /// Constructor for fastest chaning rank
+  /// Constructor for fastest changing rank
  TensorForEachHelper(
    Func &func,
    Coord<Rank> const &extent,