CUTLASS 2.1 (#83)

CUTLASS 2.1 contributes:
- BLAS-style host-side API added to CUTLASS Library
- Planar Complex GEMM kernels targeting Volta and Turing Tensor Cores
- Minor enhancements and bug fixes
This commit is contained in:
Andrew Kerr
2020-04-07 13:51:25 -07:00
committed by GitHub
parent 7c0cd26d13
commit 96dab34ad9
196 changed files with 20653 additions and 1995 deletions

View File

@ -25,7 +25,11 @@
#pragma once
#include <cuComplex.h>
#if defined(__CUDACC_RTC__)
#include <cuda/std/cstdint>
#else
#include <cstdint>
#endif
#include "cutlass/cutlass.h"
#include "cutlass/half.h"
@ -351,6 +355,16 @@ CUTLASS_HOST_DEVICE R norm_accumulate(complex<T> const &z, R const &accumulator)
static_cast<R>(imag(z)) * static_cast<R>(imag(z));
}
/// Returns the complex conjugate
CUTLASS_HOST_DEVICE float conj(float const &z) {
return z;
}
/// Returns the complex conjugate
CUTLASS_HOST_DEVICE double conj(double const &z) {
return z;
}
/// Returns the complex conjugate
template <typename T>
CUTLASS_HOST_DEVICE complex<T> conj(complex<T> const &z) {
@ -414,6 +428,10 @@ CUTLASS_HOST_DEVICE complex<T> sin(complex<T> const &z) {
template <typename T>
struct RealType< complex<T> > {
using Type = T;
static complex<T> from_real(double x) {
return complex<T>(static_cast<T>(x));
}
};
/////////////////////////////////////////////////////////////////////////////////////////////////
@ -438,5 +456,18 @@ cutlass::complex<double> from_real<cutlass::complex<double> >(double r) {
//////////////////////////////////////////////////////////////////////////////////////////////////
template <typename T>
struct is_complex {
static bool const value = false;
};
template <typename T>
struct is_complex<complex<T>> {
static bool const value = true;
};
//////////////////////////////////////////////////////////////////////////////////////////////////
} // namespace cutlass
//////////////////////////////////////////////////////////////////////////////////////////////////