@ -90,11 +90,19 @@ struct CommandLine {
|
||||
/**
|
||||
* Returns number of naked (non-flag and non-key-value) commandline parameters
|
||||
*/
|
||||
template <typename value_t>
|
||||
int num_naked_args() const {
|
||||
size_t num_naked_args() const {
|
||||
return args.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Print naked (non-flag and non-key-value) commandline parameters
|
||||
*/
|
||||
void print_naked_args(std::ostream &out) const {
|
||||
for (auto arg : args) {
|
||||
out << " " << arg <<"\n";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the commandline parameter for a given index (not including flags)
|
||||
*/
|
||||
|
||||
@ -325,12 +325,12 @@ public:
|
||||
}
|
||||
|
||||
/// Returns the layout object's stride in a given physical dimension
|
||||
Index stride(int dim) const {
|
||||
LongIndex stride(int dim) const {
|
||||
return layout_.stride().at(dim);
|
||||
}
|
||||
|
||||
/// Returns the layout object's stride in a given physical dimension
|
||||
Index & stride(int dim) {
|
||||
LongIndex & stride(int dim) {
|
||||
return layout_.stride().at(dim);
|
||||
}
|
||||
|
||||
|
||||
52
tools/util/include/cutlass/util/index_sequence.h
Normal file
52
tools/util/include/cutlass/util/index_sequence.h
Normal file
@ -0,0 +1,52 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
* conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
|
||||
* to endorse or promote products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
#include "cutlass/cutlass.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++11 version of index_sequence.
|
||||
*/
|
||||
|
||||
namespace cutlass {
|
||||
|
||||
template <size_t... Seq>
|
||||
struct index_sequence;
|
||||
|
||||
template <size_t N, size_t... Next>
|
||||
struct index_sequence_helper : index_sequence_helper<N - 1, N - 1, Next...> {};
|
||||
|
||||
template <size_t... Next>
|
||||
struct index_sequence_helper<0, 0, Next...> {
|
||||
using type = index_sequence<0, Next...>;
|
||||
};
|
||||
|
||||
template <size_t N>
|
||||
using make_index_sequence = typename index_sequence_helper<N>::type;
|
||||
|
||||
} // namespace cutlass
|
||||
@ -65,8 +65,8 @@ __global__ void Gemm(
|
||||
|
||||
// Map each thread to a unique tile of the output matrix
|
||||
MatrixCoord output_coord(
|
||||
(threadIdx.x + blockIdx.x * blockDim.x) * OutputTile::kRow,
|
||||
(threadIdx.y + blockIdx.y * blockDim.y) * OutputTile::kColumn
|
||||
MatrixCoord::Index((threadIdx.x + blockIdx.x * blockDim.x) * OutputTile::kRow),
|
||||
MatrixCoord::Index((threadIdx.y + blockIdx.y * blockDim.y) * OutputTile::kColumn)
|
||||
);
|
||||
|
||||
// Compute the general matrix product
|
||||
|
||||
@ -39,6 +39,7 @@
|
||||
#include "cutlass/conv/convolution.h"
|
||||
#include "cutlass/conv/conv2d_problem_size.h"
|
||||
#include "cutlass/conv/conv3d_problem_size.h"
|
||||
#include <iostream>
|
||||
|
||||
namespace cutlass {
|
||||
namespace reference {
|
||||
@ -243,7 +244,21 @@ void Conv2dDgrad(
|
||||
|
||||
p = p / problem_size.stride_h;
|
||||
q = q / problem_size.stride_w;
|
||||
|
||||
#if 0
|
||||
std::cout << "row:"
|
||||
<< n * problem_size.H * problem_size.W +
|
||||
h * problem_size.W +
|
||||
w << " "
|
||||
<< "n, p, q: ("
|
||||
<< n << ", "
|
||||
<< p << ", "
|
||||
<< q << ") * "
|
||||
<< "r, s: ("
|
||||
<< r << ", "
|
||||
<< s << ") ["
|
||||
<< ((p < problem_size.P && q < problem_size.Q) ? "true":"false") << "]"
|
||||
<< std::endl;
|
||||
#endif
|
||||
if (p < problem_size.P && q < problem_size.Q) {
|
||||
|
||||
ElementA a = tensor_dy.at(cutlass::make_Coord(n, p, q, k));
|
||||
|
||||
@ -0,0 +1,60 @@
|
||||
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
* conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
|
||||
* to endorse or promote products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "cutlass/cutlass.h"
|
||||
#include "cutlass/complex.h"
|
||||
#include "cutlass/util/reference/host/tensor_reduce.h"
|
||||
#include "cutlass/core_io.h"
|
||||
|
||||
namespace cutlass {
|
||||
namespace reference {
|
||||
namespace host {
|
||||
|
||||
/// Helper to compute the relative error metric for tensor A_computed w.r.t. to tensor A_reference
|
||||
template <
|
||||
typename Element,
|
||||
typename Layout,
|
||||
typename ComputeType = double
|
||||
>
|
||||
ComputeType TensorRelativeErrorMetric(
|
||||
TensorView<Element, Layout> view_A_computed,
|
||||
TensorView<Element, Layout> view_B_reference,
|
||||
ComputeType identity = ComputeType()
|
||||
) {
|
||||
|
||||
return cutlass::reference::host::TensorNormDiff(view_A_computed, view_B_reference, identity) /
|
||||
cutlass::reference::host::TensorNorm(view_B_reference, identity);
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
} // namespace host
|
||||
} // namespace reference
|
||||
} // namespace cutlass
|
||||
@ -36,6 +36,7 @@
|
||||
// Cutlass includes
|
||||
#include "cutlass/cutlass.h"
|
||||
#include "cutlass/complex.h"
|
||||
#include "cutlass/quaternion.h"
|
||||
#include "cutlass/array.h"
|
||||
#include "cutlass/numeric_types.h"
|
||||
#include "cutlass/subbyte_reference.h"
|
||||
@ -219,6 +220,56 @@ struct RandomGaussianFunc<complex<Element> > {
|
||||
}
|
||||
};
|
||||
|
||||
/// Partial specialization for initializing a complex value.
|
||||
template <typename Element>
|
||||
struct RandomGaussianFunc<Quaternion<Element> > {
|
||||
|
||||
uint64_t seed;
|
||||
double mean;
|
||||
double stddev;
|
||||
int int_scale;
|
||||
double pi;
|
||||
|
||||
//
|
||||
// Methods
|
||||
//
|
||||
RandomGaussianFunc(
|
||||
uint64_t seed_ = 0,
|
||||
double mean_ = 0,
|
||||
double stddev_ = 1,
|
||||
int int_scale_ = -1
|
||||
):
|
||||
seed(seed_), mean(mean_), stddev(stddev_), int_scale(int_scale_), pi(std::acos(-1)) {
|
||||
std::srand((unsigned)seed);
|
||||
}
|
||||
|
||||
/// Compute random value and update RNG state
|
||||
Quaternion<Element> operator()() const {
|
||||
|
||||
Element reals[4];
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
// Box-Muller transform to generate random numbers with Normal distribution
|
||||
double u1 = double(std::rand()) / double(RAND_MAX);
|
||||
double u2 = double(std::rand()) / double(RAND_MAX);
|
||||
|
||||
// Compute Gaussian random value
|
||||
double rnd = std::sqrt(-2 * std::log(u1)) * std::cos(2 * pi * u2);
|
||||
rnd = mean + stddev * rnd;
|
||||
|
||||
if (int_scale >= 0) {
|
||||
rnd = double(int(rnd * double(1 << int_scale)));
|
||||
reals[i] = from_real<Element>(rnd / double(1 << int_scale));
|
||||
}
|
||||
else {
|
||||
reals[i] = from_real<Element>(rnd);
|
||||
}
|
||||
}
|
||||
|
||||
return Quaternion<Element>(reals[0], reals[1], reals[2], reals[3]);
|
||||
}
|
||||
};
|
||||
|
||||
/// Computes a random Gaussian distribution
|
||||
template <
|
||||
typename Element, ///< Element type
|
||||
@ -429,6 +480,58 @@ struct RandomUniformFunc<complex<Element> > {
|
||||
}
|
||||
};
|
||||
|
||||
/// Partial specialization for initializing a Quaternion value.
|
||||
template <typename Element>
|
||||
struct RandomUniformFunc<Quaternion<Element> > {
|
||||
|
||||
using Real = typename RealType<Element>::Type;
|
||||
|
||||
uint64_t seed;
|
||||
double range;
|
||||
double min;
|
||||
int int_scale;
|
||||
|
||||
//
|
||||
// Methods
|
||||
//
|
||||
|
||||
RandomUniformFunc(
|
||||
uint64_t seed_ = 0,
|
||||
double max = 1,
|
||||
double min_ = 0,
|
||||
int int_scale_ = -1
|
||||
):
|
||||
seed(seed_), range(max - min_), min(min_), int_scale(int_scale_) {
|
||||
std::srand((unsigned)seed);
|
||||
}
|
||||
|
||||
|
||||
/// Compute random value and update RNG state
|
||||
Quaternion<Element> operator()() const {
|
||||
|
||||
Element reals[4];
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
double rnd = double(std::rand()) / double(RAND_MAX);
|
||||
|
||||
rnd = min + range * rnd;
|
||||
|
||||
// Random values are cast to integer after scaling by a power of two to facilitate error
|
||||
// testing
|
||||
|
||||
if (int_scale >= 0) {
|
||||
rnd = double(int(rnd * double(1 << int_scale)));
|
||||
reals[i] = from_real<Element>(Real(rnd / double(1 << int_scale)));
|
||||
}
|
||||
else {
|
||||
reals[i] = from_real<Element>(Real(rnd));
|
||||
}
|
||||
}
|
||||
|
||||
return make_Quaternion(reals[0], reals[1], reals[2], reals[3]);
|
||||
}
|
||||
};
|
||||
|
||||
/// Computes a random Gaussian distribution
|
||||
template <
|
||||
typename Element, ///< Element type
|
||||
@ -510,6 +613,32 @@ void TensorFillRandomUniform(
|
||||
TensorFillRandomUniform(dst.view_imag(), ~seed, max, min, bits);
|
||||
}
|
||||
|
||||
|
||||
/// Fills a tensor with random values with a uniform random distribution.
|
||||
template <
|
||||
typename Element, ///< Element type
|
||||
typename Layout> ///< Layout function
|
||||
void TensorFillRandomUniform(
|
||||
TensorView<Quaternion<Element>, Layout> dst, ///< destination tensor
|
||||
uint64_t seed, ///< seed for RNG
|
||||
double max = 1, ///< upper bound of distribution
|
||||
double min = 0, ///< lower bound for distribution
|
||||
int bits = -1) { ///< If non-negative, specifies number of fractional bits that
|
||||
/// are not truncated to zero. Permits reducing precision of
|
||||
/// data.
|
||||
detail::RandomUniformFunc<Quaternion<Element>> random_func(seed, max, min, bits);
|
||||
|
||||
detail::TensorFillRandomUniformFunc<Quaternion<Element>, Layout> func(
|
||||
dst,
|
||||
random_func
|
||||
);
|
||||
|
||||
TensorForEach(
|
||||
dst.extent(),
|
||||
func
|
||||
);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/// Fills a tensor with random values with a uniform random distribution.
|
||||
template <
|
||||
|
||||
Reference in New Issue
Block a user