CUTLASS 3.2.1 (#1113)
* Updates for 3.2.1 release. * Minor fix in gemm op profiler for raster order. * Add scheduler mapping for raster order in the kernels.
This commit is contained in:
@ -73,11 +73,26 @@ abs(T const& t) {
|
||||
CUTE_GCC_UNREACHABLE;
|
||||
}
|
||||
|
||||
// Returns 1 if x > 0, -1 if x < 0, and 0 if x is zero.
|
||||
template <class T,
|
||||
__CUTE_REQUIRES(is_arithmetic<T>::value)>
|
||||
CUTE_HOST_DEVICE constexpr
|
||||
int
|
||||
signum(T const& x) {
|
||||
if constexpr (is_signed<T>::value) {
|
||||
return (T(0) < x) - (x < T(0));
|
||||
} else {
|
||||
return T(0) < x;
|
||||
}
|
||||
|
||||
CUTE_GCC_UNREACHABLE;
|
||||
}
|
||||
|
||||
//
|
||||
// C++17 <numeric> operations
|
||||
//
|
||||
|
||||
// Greatest common divisor of two integers
|
||||
// Greatest common divisor of two positive integers
|
||||
template <class T, class U,
|
||||
__CUTE_REQUIRES(is_std_integral<T>::value &&
|
||||
is_std_integral<U>::value)>
|
||||
@ -92,7 +107,7 @@ gcd(T t, U u) {
|
||||
}
|
||||
}
|
||||
|
||||
// Least common multiple of two integers
|
||||
// Least common multiple of two positive integers
|
||||
template <class T, class U,
|
||||
__CUTE_REQUIRES(is_std_integral<T>::value &&
|
||||
is_std_integral<U>::value)>
|
||||
@ -280,23 +295,6 @@ shiftr(T x, int s) {
|
||||
return s >= 0 ? (x >> s) : (x << -s);
|
||||
}
|
||||
|
||||
// Returns 1 if x > 0, -1 if x < 0, and 0 if x is zero.
|
||||
template <class T,
|
||||
__CUTE_REQUIRES(is_unsigned<T>::value)>
|
||||
CUTE_HOST_DEVICE constexpr
|
||||
int
|
||||
signum(T const& x) {
|
||||
return T(0) < x;
|
||||
}
|
||||
|
||||
template <class T,
|
||||
__CUTE_REQUIRES(not is_unsigned<T>::value)>
|
||||
CUTE_HOST_DEVICE constexpr
|
||||
int
|
||||
signum(T const& x) {
|
||||
return (T(0) < x) - (x < T(0));
|
||||
}
|
||||
|
||||
// Safe divide
|
||||
// @pre t % u == 0
|
||||
// @result t / u
|
||||
|
||||
Reference in New Issue
Block a user