Fix typos 2 (#842)
Co-authored-by: Haicheng Wu <57973641+hwu36@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
c4f6b8c6bc
commit
7e370c9637
@ -188,7 +188,7 @@ struct CommandLine {
|
||||
for (int i = 0; i < keys.size(); ++i) {
|
||||
if (keys[i] == string(arg_name)) {
|
||||
string val_string(values[i]);
|
||||
seperate_string(val_string, vals, sep);
|
||||
separate_string(val_string, vals, sep);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -225,7 +225,7 @@ struct CommandLine {
|
||||
range != ranges.end(); ++range) {
|
||||
|
||||
std::vector<std::string> range_vals;
|
||||
seperate_string(*range, range_vals, sep);
|
||||
separate_string(*range, range_vals, sep);
|
||||
vals.push_back(range_vals);
|
||||
}
|
||||
}
|
||||
@ -283,7 +283,7 @@ struct CommandLine {
|
||||
}
|
||||
|
||||
template <typename value_t>
|
||||
static void seperate_string(std::string const& str,
|
||||
static void separate_string(std::string const& str,
|
||||
std::vector<value_t>& vals,
|
||||
char sep = ',') {
|
||||
std::istringstream str_stream(str);
|
||||
|
||||
@ -314,7 +314,7 @@ __global__ void groupnorm_twopass_multiple_load(T* output,
|
||||
}
|
||||
|
||||
//ref_input & ref_output should be [N, H, W, C]
|
||||
//ref_gamma & ref_beta shoud be [1, 1, 1, C]
|
||||
//ref_gamma & ref_beta should be [1, 1, 1, C]
|
||||
template <typename T>
|
||||
void groupnorm(cutlass::Tensor4DCoord input_size,
|
||||
const int num_groups,
|
||||
|
||||
@ -109,9 +109,9 @@ __global__ void nhwc_padding_channel_3To4_kernel(const int32_t n,
|
||||
shm[threadIdx.x] = tidx >= max_input_element ? zero_io : input[tidx];
|
||||
__syncthreads();
|
||||
|
||||
const int ouput_offset = blockIdx.x * 256;
|
||||
const int lower_bound = max_output_element < ouput_offset + 256 ? max_output_element : ouput_offset + 256;
|
||||
for (int i = ouput_offset + threadidx, j = threadidx ; i < lower_bound ; i+=192, j+=192)
|
||||
const int output_offset = blockIdx.x * 256;
|
||||
const int lower_bound = max_output_element < output_offset + 256 ? max_output_element : output_offset + 256;
|
||||
for (int i = output_offset + threadidx, j = threadidx ; i < lower_bound ; i+=192, j+=192)
|
||||
{
|
||||
const Telement* shm_element = (const Telement*)shm + j*3*element_in_Tio/4;
|
||||
Telement array[element_in_Tio];
|
||||
@ -140,9 +140,9 @@ __global__ void nhwc_padding_channel_3To8_kernel(const int32_t n,
|
||||
shm[threadIdx.x] = tidx >= max_input_element ? zero_io : input[tidx];
|
||||
__syncthreads();
|
||||
|
||||
const int ouput_offset = blockIdx.x * 512;
|
||||
const int lower_bound = max_output_element < ouput_offset + 512 ? max_output_element : ouput_offset + 512;
|
||||
for (int i = ouput_offset + threadidx, j = threadidx ; i < lower_bound ; i+=192, j+=192)
|
||||
const int output_offset = blockIdx.x * 512;
|
||||
const int lower_bound = max_output_element < output_offset + 512 ? max_output_element : output_offset + 512;
|
||||
for (int i = output_offset + threadidx, j = threadidx ; i < lower_bound ; i+=192, j+=192)
|
||||
{
|
||||
const Telement* shm_element = (const Telement*)shm + (element_in_Tio == 4 ? j/2 : j)*3;
|
||||
Telement array[element_in_Tio];
|
||||
|
||||
@ -74,7 +74,7 @@ _ConvertSMVer2Cores(int major, int minor)
|
||||
// Defines for GPU Architecture types (using the SM version to determine
|
||||
// the # of cores per SM
|
||||
typedef struct {
|
||||
int SM; // 0xMm (hexidecimal notation), M = SM Major version,
|
||||
int SM; // 0xMm (hexadecimal notation), M = SM Major version,
|
||||
// and m = SM minor version
|
||||
int Cores;
|
||||
} sSMtoCores;
|
||||
|
||||
@ -248,7 +248,7 @@ struct Gemm<ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType,
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/// Parital specialization for XOR-popc
|
||||
/// Partial specialization for XOR-popc
|
||||
template <typename ElementA, typename LayoutA, typename ElementB,
|
||||
typename LayoutB, typename ElementC, typename LayoutC,
|
||||
typename ScalarType, typename AccumulatorType>
|
||||
|
||||
@ -72,7 +72,7 @@ struct TensorForEachHelper {
|
||||
template <typename Func, int Rank>
|
||||
struct TensorForEachHelper<Func, Rank, 0> {
|
||||
|
||||
/// Constructor for fastest chaning rank
|
||||
/// Constructor for fastest changing rank
|
||||
__inline__ __device__
|
||||
TensorForEachHelper(Func &func, Coord<Rank> const &size, Coord<Rank> &coord, int64_t index) {
|
||||
|
||||
|
||||
@ -1308,7 +1308,7 @@ void TensorFill(
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/// Fills a tensor's digonal with 1 and 0 everywhere else.
|
||||
/// Fills a tensor's diagonal with 1 and 0 everywhere else.
|
||||
template <
|
||||
typename Element, ///< Element type
|
||||
typename Layout> ///< Layout function
|
||||
|
||||
@ -133,4 +133,4 @@ struct BlockForEach {
|
||||
|
||||
} // namespace device
|
||||
} // namespace reference
|
||||
} // namesace cutlass
|
||||
} // namespace cutlass
|
||||
|
||||
@ -335,7 +335,7 @@ struct Gemm<ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType,
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/// Parital specialization for XOR-popc
|
||||
/// Partial specialization for XOR-popc
|
||||
template <typename ElementA, typename LayoutA, typename ElementB,
|
||||
typename LayoutB, typename ElementC, typename LayoutC,
|
||||
typename ScalarType, typename ComputeType>
|
||||
|
||||
@ -992,7 +992,7 @@ void TensorFillDiagonal(
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/// Helper to fill a tensor's digonal with 1 and 0 everywhere else.
|
||||
/// Helper to fill a tensor's diagonal with 1 and 0 everywhere else.
|
||||
template <
|
||||
typename Element, ///< Element type
|
||||
typename Layout> ///< Layout function
|
||||
|
||||
@ -69,7 +69,7 @@ struct TensorForEachHelper<Func, Rank, 0> {
|
||||
/// Index of the active rank
|
||||
static int const kActiveRank = Rank - 1;
|
||||
|
||||
/// Constructor for fastest chaning rank
|
||||
/// Constructor for fastest changing rank
|
||||
TensorForEachHelper(
|
||||
Func &func,
|
||||
Coord<Rank> const &extent,
|
||||
|
||||
Reference in New Issue
Block a user