Fix typos 2 (#842)

Co-authored-by: Haicheng Wu <57973641+hwu36@users.noreply.github.com>
This commit is contained in:
Alexander Pivovarov
2023-03-09 20:22:56 -08:00
committed by GitHub
parent c4f6b8c6bc
commit 7e370c9637
161 changed files with 310 additions and 309 deletions

View File

@ -188,7 +188,7 @@ struct CommandLine {
for (int i = 0; i < keys.size(); ++i) {
if (keys[i] == string(arg_name)) {
string val_string(values[i]);
seperate_string(val_string, vals, sep);
separate_string(val_string, vals, sep);
}
}
}
@ -225,7 +225,7 @@ struct CommandLine {
range != ranges.end(); ++range) {
std::vector<std::string> range_vals;
seperate_string(*range, range_vals, sep);
separate_string(*range, range_vals, sep);
vals.push_back(range_vals);
}
}
@ -283,7 +283,7 @@ struct CommandLine {
}
template <typename value_t>
static void seperate_string(std::string const& str,
static void separate_string(std::string const& str,
std::vector<value_t>& vals,
char sep = ',') {
std::istringstream str_stream(str);

View File

@ -314,7 +314,7 @@ __global__ void groupnorm_twopass_multiple_load(T* output,
}
//ref_input & ref_output should be [N, H, W, C]
//ref_gamma & ref_beta shoud be [1, 1, 1, C]
//ref_gamma & ref_beta should be [1, 1, 1, C]
template <typename T>
void groupnorm(cutlass::Tensor4DCoord input_size,
const int num_groups,

View File

@ -109,9 +109,9 @@ __global__ void nhwc_padding_channel_3To4_kernel(const int32_t n,
shm[threadIdx.x] = tidx >= max_input_element ? zero_io : input[tidx];
__syncthreads();
const int ouput_offset = blockIdx.x * 256;
const int lower_bound = max_output_element < ouput_offset + 256 ? max_output_element : ouput_offset + 256;
for (int i = ouput_offset + threadidx, j = threadidx ; i < lower_bound ; i+=192, j+=192)
const int output_offset = blockIdx.x * 256;
const int lower_bound = max_output_element < output_offset + 256 ? max_output_element : output_offset + 256;
for (int i = output_offset + threadidx, j = threadidx ; i < lower_bound ; i+=192, j+=192)
{
const Telement* shm_element = (const Telement*)shm + j*3*element_in_Tio/4;
Telement array[element_in_Tio];
@ -140,9 +140,9 @@ __global__ void nhwc_padding_channel_3To8_kernel(const int32_t n,
shm[threadIdx.x] = tidx >= max_input_element ? zero_io : input[tidx];
__syncthreads();
const int ouput_offset = blockIdx.x * 512;
const int lower_bound = max_output_element < ouput_offset + 512 ? max_output_element : ouput_offset + 512;
for (int i = ouput_offset + threadidx, j = threadidx ; i < lower_bound ; i+=192, j+=192)
const int output_offset = blockIdx.x * 512;
const int lower_bound = max_output_element < output_offset + 512 ? max_output_element : output_offset + 512;
for (int i = output_offset + threadidx, j = threadidx ; i < lower_bound ; i+=192, j+=192)
{
const Telement* shm_element = (const Telement*)shm + (element_in_Tio == 4 ? j/2 : j)*3;
Telement array[element_in_Tio];

View File

@ -74,7 +74,7 @@ _ConvertSMVer2Cores(int major, int minor)
// Defines for GPU Architecture types (using the SM version to determine
// the # of cores per SM
typedef struct {
int SM; // 0xMm (hexidecimal notation), M = SM Major version,
int SM; // 0xMm (hexadecimal notation), M = SM Major version,
// and m = SM minor version
int Cores;
} sSMtoCores;

View File

@ -248,7 +248,7 @@ struct Gemm<ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType,
////////////////////////////////////////////////////////////////////////////////////////////////////
/// Parital specialization for XOR-popc
/// Partial specialization for XOR-popc
template <typename ElementA, typename LayoutA, typename ElementB,
typename LayoutB, typename ElementC, typename LayoutC,
typename ScalarType, typename AccumulatorType>

View File

@ -72,7 +72,7 @@ struct TensorForEachHelper {
template <typename Func, int Rank>
struct TensorForEachHelper<Func, Rank, 0> {
/// Constructor for fastest chaning rank
/// Constructor for fastest changing rank
__inline__ __device__
TensorForEachHelper(Func &func, Coord<Rank> const &size, Coord<Rank> &coord, int64_t index) {

View File

@ -1308,7 +1308,7 @@ void TensorFill(
///////////////////////////////////////////////////////////////////////////////////////////////////
/// Fills a tensor's digonal with 1 and 0 everywhere else.
/// Fills a tensor's diagonal with 1 and 0 everywhere else.
template <
typename Element, ///< Element type
typename Layout> ///< Layout function

View File

@ -133,4 +133,4 @@ struct BlockForEach {
} // namespace device
} // namespace reference
} // namesace cutlass
} // namespace cutlass

View File

@ -335,7 +335,7 @@ struct Gemm<ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType,
////////////////////////////////////////////////////////////////////////////////////////////////////
/// Parital specialization for XOR-popc
/// Partial specialization for XOR-popc
template <typename ElementA, typename LayoutA, typename ElementB,
typename LayoutB, typename ElementC, typename LayoutC,
typename ScalarType, typename ComputeType>

View File

@ -992,7 +992,7 @@ void TensorFillDiagonal(
///////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
/// Helper to fill a tensor's digonal with 1 and 0 everywhere else.
/// Helper to fill a tensor's diagonal with 1 and 0 everywhere else.
template <
typename Element, ///< Element type
typename Layout> ///< Layout function

View File

@ -69,7 +69,7 @@ struct TensorForEachHelper<Func, Rank, 0> {
/// Index of the active rank
static int const kActiveRank = Rank - 1;
/// Constructor for fastest chaning rank
/// Constructor for fastest changing rank
TensorForEachHelper(
Func &func,
Coord<Rank> const &extent,