CUTLASS 3.2.1 (#1113)

* Updates for 3.2.1 release.

* Minor fix in gemm op profiler for raster order.

* Add scheduler mapping for raster order in the kernels.
This commit is contained in:
ANIKET SHIVAM
2023-09-26 14:24:26 -07:00
committed by GitHub
parent e0aaa3c3b3
commit 90d3b0fb18
428 changed files with 22253 additions and 21762 deletions

View File

@ -73,11 +73,26 @@ abs(T const& t) {
CUTE_GCC_UNREACHABLE;
}
// Returns 1 if x > 0, -1 if x < 0, and 0 if x is zero.
template <class T,
__CUTE_REQUIRES(is_arithmetic<T>::value)>
CUTE_HOST_DEVICE constexpr
int
signum(T const& x) {
if constexpr (is_signed<T>::value) {
return (T(0) < x) - (x < T(0));
} else {
return T(0) < x;
}
CUTE_GCC_UNREACHABLE;
}
//
// C++17 <numeric> operations
//
// Greatest common divisor of two integers
// Greatest common divisor of two positive integers
template <class T, class U,
__CUTE_REQUIRES(is_std_integral<T>::value &&
is_std_integral<U>::value)>
@ -92,7 +107,7 @@ gcd(T t, U u) {
}
}
// Least common multiple of two integers
// Least common multiple of two positive integers
template <class T, class U,
__CUTE_REQUIRES(is_std_integral<T>::value &&
is_std_integral<U>::value)>
@ -280,23 +295,6 @@ shiftr(T x, int s) {
return s >= 0 ? (x >> s) : (x << -s);
}
// Returns 1 if x > 0, -1 if x < 0, and 0 if x is zero.
template <class T,
__CUTE_REQUIRES(is_unsigned<T>::value)>
CUTE_HOST_DEVICE constexpr
int
signum(T const& x) {
return T(0) < x;
}
template <class T,
__CUTE_REQUIRES(not is_unsigned<T>::value)>
CUTE_HOST_DEVICE constexpr
int
signum(T const& x) {
return (T(0) < x) - (x < T(0));
}
// Safe divide
// @pre t % u == 0
// @result t / u