CUTLASS 2.6 (#298)

CUTLASS 2.6
This commit is contained in:
Manish Gupta
2021-07-22 21:40:53 -07:00
committed by GitHub
parent 6c29fe20ba
commit e5d51840e8
308 changed files with 32408 additions and 4722 deletions

View File

@ -90,11 +90,19 @@ struct CommandLine {
/**
* Returns number of naked (non-flag and non-key-value) commandline parameters
*/
template <typename value_t>
int num_naked_args() const {
size_t num_naked_args() const {
return args.size();
}
/**
* Print naked (non-flag and non-key-value) commandline parameters
*/
void print_naked_args(std::ostream &out) const {
for (auto arg : args) {
out << " " << arg <<"\n";
}
}
/**
* Returns the commandline parameter for a given index (not including flags)
*/

View File

@ -325,12 +325,12 @@ public:
}
/// Returns the layout object's stride in a given physical dimension
Index stride(int dim) const {
LongIndex stride(int dim) const {
return layout_.stride().at(dim);
}
/// Returns the layout object's stride in a given physical dimension
Index & stride(int dim) {
LongIndex & stride(int dim) {
return layout_.stride().at(dim);
}

View File

@ -0,0 +1,52 @@
/***************************************************************************************************
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice, this list of
* conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************************************/
#pragma once
#include <utility>
#include "cutlass/cutlass.h"
/**
* \file
* \brief C++11 version of index_sequence.
*/
namespace cutlass {
template <size_t... Seq>
struct index_sequence;
template <size_t N, size_t... Next>
struct index_sequence_helper : index_sequence_helper<N - 1, N - 1, Next...> {};
template <size_t... Next>
struct index_sequence_helper<0, 0, Next...> {
using type = index_sequence<0, Next...>;
};
template <size_t N>
using make_index_sequence = typename index_sequence_helper<N>::type;
} // namespace cutlass

View File

@ -65,8 +65,8 @@ __global__ void Gemm(
// Map each thread to a unique tile of the output matrix
MatrixCoord output_coord(
(threadIdx.x + blockIdx.x * blockDim.x) * OutputTile::kRow,
(threadIdx.y + blockIdx.y * blockDim.y) * OutputTile::kColumn
MatrixCoord::Index((threadIdx.x + blockIdx.x * blockDim.x) * OutputTile::kRow),
MatrixCoord::Index((threadIdx.y + blockIdx.y * blockDim.y) * OutputTile::kColumn)
);
// Compute the general matrix product

View File

@ -39,6 +39,7 @@
#include "cutlass/conv/convolution.h"
#include "cutlass/conv/conv2d_problem_size.h"
#include "cutlass/conv/conv3d_problem_size.h"
#include <iostream>
namespace cutlass {
namespace reference {
@ -243,7 +244,21 @@ void Conv2dDgrad(
p = p / problem_size.stride_h;
q = q / problem_size.stride_w;
#if 0
std::cout << "row:"
<< n * problem_size.H * problem_size.W +
h * problem_size.W +
w << " "
<< "n, p, q: ("
<< n << ", "
<< p << ", "
<< q << ") * "
<< "r, s: ("
<< r << ", "
<< s << ") ["
<< ((p < problem_size.P && q < problem_size.Q) ? "true":"false") << "]"
<< std::endl;
#endif
if (p < problem_size.P && q < problem_size.Q) {
ElementA a = tensor_dy.at(cutlass::make_Coord(n, p, q, k));

View File

@ -0,0 +1,60 @@
/***************************************************************************************************
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice, this list of
* conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************************************/
#pragma once
#include <cmath>
#include "cutlass/cutlass.h"
#include "cutlass/complex.h"
#include "cutlass/util/reference/host/tensor_reduce.h"
#include "cutlass/core_io.h"
namespace cutlass {
namespace reference {
namespace host {
/// Helper to compute the relative error metric for tensor A_computed w.r.t. to tensor A_reference
template <
typename Element,
typename Layout,
typename ComputeType = double
>
ComputeType TensorRelativeErrorMetric(
TensorView<Element, Layout> view_A_computed,
TensorView<Element, Layout> view_B_reference,
ComputeType identity = ComputeType()
) {
return cutlass::reference::host::TensorNormDiff(view_A_computed, view_B_reference, identity) /
cutlass::reference::host::TensorNorm(view_B_reference, identity);
}
///////////////////////////////////////////////////////////////////////////////////////////////////
} // namespace host
} // namespace reference
} // namespace cutlass

View File

@ -36,6 +36,7 @@
// Cutlass includes
#include "cutlass/cutlass.h"
#include "cutlass/complex.h"
#include "cutlass/quaternion.h"
#include "cutlass/array.h"
#include "cutlass/numeric_types.h"
#include "cutlass/subbyte_reference.h"
@ -219,6 +220,56 @@ struct RandomGaussianFunc<complex<Element> > {
}
};
/// Partial specialization for initializing a complex value.
template <typename Element>
struct RandomGaussianFunc<Quaternion<Element> > {
uint64_t seed;
double mean;
double stddev;
int int_scale;
double pi;
//
// Methods
//
RandomGaussianFunc(
uint64_t seed_ = 0,
double mean_ = 0,
double stddev_ = 1,
int int_scale_ = -1
):
seed(seed_), mean(mean_), stddev(stddev_), int_scale(int_scale_), pi(std::acos(-1)) {
std::srand((unsigned)seed);
}
/// Compute random value and update RNG state
Quaternion<Element> operator()() const {
Element reals[4];
for (int i = 0; i < 4; ++i) {
// Box-Muller transform to generate random numbers with Normal distribution
double u1 = double(std::rand()) / double(RAND_MAX);
double u2 = double(std::rand()) / double(RAND_MAX);
// Compute Gaussian random value
double rnd = std::sqrt(-2 * std::log(u1)) * std::cos(2 * pi * u2);
rnd = mean + stddev * rnd;
if (int_scale >= 0) {
rnd = double(int(rnd * double(1 << int_scale)));
reals[i] = from_real<Element>(rnd / double(1 << int_scale));
}
else {
reals[i] = from_real<Element>(rnd);
}
}
return Quaternion<Element>(reals[0], reals[1], reals[2], reals[3]);
}
};
/// Computes a random Gaussian distribution
template <
typename Element, ///< Element type
@ -429,6 +480,58 @@ struct RandomUniformFunc<complex<Element> > {
}
};
/// Partial specialization for initializing a Quaternion value.
template <typename Element>
struct RandomUniformFunc<Quaternion<Element> > {
using Real = typename RealType<Element>::Type;
uint64_t seed;
double range;
double min;
int int_scale;
//
// Methods
//
RandomUniformFunc(
uint64_t seed_ = 0,
double max = 1,
double min_ = 0,
int int_scale_ = -1
):
seed(seed_), range(max - min_), min(min_), int_scale(int_scale_) {
std::srand((unsigned)seed);
}
/// Compute random value and update RNG state
Quaternion<Element> operator()() const {
Element reals[4];
for (int i = 0; i < 4; ++i) {
double rnd = double(std::rand()) / double(RAND_MAX);
rnd = min + range * rnd;
// Random values are cast to integer after scaling by a power of two to facilitate error
// testing
if (int_scale >= 0) {
rnd = double(int(rnd * double(1 << int_scale)));
reals[i] = from_real<Element>(Real(rnd / double(1 << int_scale)));
}
else {
reals[i] = from_real<Element>(Real(rnd));
}
}
return make_Quaternion(reals[0], reals[1], reals[2], reals[3]);
}
};
/// Computes a random Gaussian distribution
template <
typename Element, ///< Element type
@ -510,6 +613,32 @@ void TensorFillRandomUniform(
TensorFillRandomUniform(dst.view_imag(), ~seed, max, min, bits);
}
/// Fills a tensor with random values with a uniform random distribution.
template <
typename Element, ///< Element type
typename Layout> ///< Layout function
void TensorFillRandomUniform(
TensorView<Quaternion<Element>, Layout> dst, ///< destination tensor
uint64_t seed, ///< seed for RNG
double max = 1, ///< upper bound of distribution
double min = 0, ///< lower bound for distribution
int bits = -1) { ///< If non-negative, specifies number of fractional bits that
/// are not truncated to zero. Permits reducing precision of
/// data.
detail::RandomUniformFunc<Quaternion<Element>> random_func(seed, max, min, bits);
detail::TensorFillRandomUniformFunc<Quaternion<Element>, Layout> func(
dst,
random_func
);
TensorForEach(
dst.extent(),
func
);
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/// Fills a tensor with random values with a uniform random distribution.
template <