Fix several typos (#1169)

Co-authored-by: isaacw <isaacw@nvidia.com>
2023-11-03 11:54:46 +08:00
parent c008b4aea8
commit 557be3ab0e
21 changed files with 30 additions and 30 deletions
--- a/include/cute/algorithm/tensor_algorithms.hpp
+++ b/include/cute/algorithm/tensor_algorithms.hpp
@ -123,7 +123,7 @@ transform(Tensor<EngineIn,LayoutIn>&& tensor_in, Tensor<EngineOut,LayoutOut>&& t

 // Similar to std::transform with a binary operation
 // Takes two tensors as input and one tensor as output. 
-// Applies the binary_op to tensor_in1 and and tensor_in2 and
+// Applies the binary_op to tensor_in1 and tensor_in2 and
 // assigns it to tensor_out
 template <class EngineIn1, class LayoutIn1,
          class EngineIn2, class LayoutIn2,
--- a/include/cute/layout.hpp
+++ b/include/cute/layout.hpp
@ -576,7 +576,7 @@ depth(Layout<Shape,Stride> const& layout)

 // Return the codomain shape of a mode
 // @post size(coshape(@a a)) == cosize(@a a)
-// @return C Coordinate with smallest elements such that that
+// @return C Coordinate with smallest elements such that
 //           @a elem_less(sub_layout(c), C) for all c < size(@a sub_layout)
 //           where sub_layout = get<Is...>(layout).
 template <int... Is, class Shape, class Stride>
--- a/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_cooperative.hpp
+++ b/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_cooperative.hpp
@ -527,7 +527,7 @@ public:
        auto blk_coord = make_coord(m_coord, n_coord, _, l_coord);
        auto work_k_tile_count = TileScheduler::get_work_k_tile_count(work_tile_info, problem_shape_MNKL, blk_shape);

-        // Allocate the the accumulators for the (M,N) blk_shape
+        // Allocate the accumulators for the (M,N) blk_shape
        //
        // MSVC CTAD breaks if we say "Tensor" here, so we use "auto" instead.
        auto accumulators = partition_fragment_C(tiled_mma, take<0,2>(blk_shape));               // (MMA,MMA_M,MMA_N)
--- a/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_pingpong.hpp
+++ b/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_pingpong.hpp
@ -540,7 +540,7 @@ public:
        auto l_coord = idx2crd(work_tile_info.L_idx, shape<4>(gB_nkl));
        auto blk_coord = make_coord(m_coord, n_coord, _, l_coord);

-        // Allocate the the accumulators for the (M,N) blk_shape
+        // Allocate the accumulators for the (M,N) blk_shape
        Tensor accumulators = partition_fragment_C(tiled_mma, take<0,2>(blk_shape));               // (MMA,MMA_M,MMA_N)

        // Order two Math WG's MMA one after the other, helps hide Epilogue
--- a/include/cutlass/gemm/kernel/sm90_tile_scheduler_stream_k.hpp
+++ b/include/cutlass/gemm/kernel/sm90_tile_scheduler_stream_k.hpp
@ -347,7 +347,7 @@ public:
    // The number of tiles for which reduction is required is either:
    //   (a) the total number of output tiles (in the case of split-K)
    //   (b) the number of stream-K tiles
-    // To calculate the the total number of output tiles in the split-K case, we
+    // To calcualte the total number of output tiles in the split-K case, we
    // note that, in the split-K case, the units_per_problem_ member of Params will be
    // the total number of output tiles.
    auto reduction_tiles = params.splits_ > 1 ? params.units_per_problem_ : params.sk_tiles_;
--- a/include/cutlass/transform/collective/sm90_wgmma_transpose.hpp
+++ b/include/cutlass/transform/collective/sm90_wgmma_transpose.hpp
@ -556,7 +556,7 @@ public:
    constexpr auto WarpThreadLayout           = make_layout(make_shape(Int<WarpThreadShapeN>{}, Int<WarpThreadShapeK>{}));
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////
    /// A warp group uses 8 steps to transpose the whole WarpgroupTileSize x WarpgroupTileSize.
-    ///  Divide a warp_group_tile into 8x8 warp_tiles to futher reduce the reg usage.
+    ///  Divide a warp_group_tile into 8x8 warp_tiles to further reduce the reg usage.
    ///  Step 0:                   Step 1:                   Step 2:                   Step 3:
    ///  W0 W1 W2 W3 -- -- -- --   -- -- -- -- -- -- -- --   -- -- -- -- -- -- -- --   -- -- -- -- -- -- -- --
    ///  W1 W0 -- -- -- -- -- --   -- -- W3 W2 -- -- -- --   -- -- -- -- -- -- -- --   -- -- -- -- -- -- -- --
--- a/include/cutlass/wmma_array.h
+++ b/include/cutlass/wmma_array.h
@ -47,7 +47,7 @@ namespace cutlass {

 ////////////////////////////////////////////////////////////////////////////////////////////////////

-/// Wmma array type (WmmaFragmentArray holds elements of of type nvcuda::wmma::fragment)
+/// Wmma array type (WmmaFragmentArray holds elements of type nvcuda::wmma::fragment)
 template <
  /// Element type
  typename T,