CUTLASS 3.2.1 (#1113)

* Updates for 3.2.1 release.

* Minor fix in gemm op profiler for raster order.

* Add scheduler mapping for raster order in the kernels.
This commit is contained in:
ANIKET SHIVAM
2023-09-26 14:24:26 -07:00
committed by GitHub
parent e0aaa3c3b3
commit 90d3b0fb18
428 changed files with 22253 additions and 21762 deletions

View File

@ -31,87 +31,31 @@
#include "cutlass_unit_test.h"
// C<uint32_t(something)>::value_type is not uint32_t for GCC 7.5.0.
// This test is thus disabled for GCC < 8.
#if defined(__GNUC__) && (__GNUC__ < 8)
#include <cutlass/trace.h>
#include <cute/swizzle.hpp>
namespace { // (anonymous)
// This function exists to work around a Clang 14 issue, in which
// the compiler tries to instantiate code that lives inside the
// "else" branch of an "if constexpr," even when the "else" branch
// is false. That triggers a spurious static_assert in MixedBits.
// The work-around is to make the body of the "else" branch a
// function, rather than leaving it in line.
//
// Some compilers strangely deduce the first two terms of
// make_integer_sequence<uint32_t, 8> as C<false> and C<true>, and
// the remaining terms as C<2>, C<3>, etc. Making this function take
// cute::integral_constant<uint32_t, S0_value>, etc. doesn't work
// with those compilers.
template<class S0_type, S0_type S0_value,
class F0_type, F0_type F0_value,
class S1_type, S1_type S1_value,
class F1_type, F1_type F1_value>
void clang14_workaround(cute::integral_constant<S0_type, S0_value>,
cute::integral_constant<F0_type, F0_value>,
cute::integral_constant<S1_type, S1_value>,
cute::integral_constant<F1_type, F1_value>)
{
constexpr cute::C<static_cast<uint32_t>(S0_value)> S0{};
constexpr cute::C<static_cast<uint32_t>(F0_value)> F0{};
constexpr cute::C<static_cast<uint32_t>(S1_value)> S1{};
constexpr cute::C<static_cast<uint32_t>(F1_value)> F1{};
for (uint32_t d0 = 0; d0 < 8; ++d0) {
if ((d0 & F0) != d0) { continue; } // Skip repeats
for (uint32_t d1 = 0; d1 < 8; ++d1) {
if ((d1 & F1) != d1) { continue; } // Skip repeats
auto m0 = make_mixed_bits(S0, d0, F0);
auto m1 = make_mixed_bits(S1, d1, F1);
//print(m0); print(" & "); print(m1); print(" = "); print(m0 & m1); print("\n");
EXPECT_EQ(uint32_t(m0 & m1), uint32_t(m0) & uint32_t(m1));
//print(m0); print(" | "); print(m1); print(" = "); print(m0 | m1); print("\n");
EXPECT_EQ(uint32_t(m0 | m1), uint32_t(m0) | uint32_t(m1));
//print(m0); print(" ^ "); print(m1); print(" = "); print(m0 ^ m1); print("\n");
EXPECT_EQ(uint32_t(m0 ^ m1), uint32_t(m0) ^ uint32_t(m1));
}
}
}
} // namespace (anonymous)
TEST(CuTe_core, MixedBits) {
TEST(CuTe_core, MixedBits)
{
using namespace cute;
auto uzero = cute::integral_constant<uint32_t, 0>{};
for_each(make_integer_sequence<uint32_t, 8>{}, [&](auto S0) {
for_each(make_integer_sequence<uint32_t, 8>{}, [&](auto F0) {
for_each(make_integer_sequence<uint32_t, 8>{}, [&](auto S1) {
for_each(make_integer_sequence<uint32_t, 8>{}, [&](auto F1) {
if constexpr (decltype(S0 == uzero || S1 == uzero)::value) {
return;
} else if constexpr (decltype((S0 & F0) != uzero || (S1 & F1) != uzero)::value) {
return;
} else {
clang14_workaround(S0, F0, S1, F1);
for_each(make_int_sequence<8>{}, [&](auto S0) {
for_each(make_int_sequence<8>{}, [&](auto F0) {
for_each(make_int_sequence<8>{}, [&](auto S1) {
for_each(make_int_sequence<8>{}, [&](auto F1) {
for (uint32_t d0 = 0; d0 < 8; ++d0) {
for (uint32_t d1 = 0; d1 < 8; ++d1) {
auto m0 = make_mixed_bits(S0, d0, F0);
auto m1 = make_mixed_bits(S1, d1, F1);
//print(m0); print(" & "); print(m1); print(" = "); print(m0 & m1); print("\n");
EXPECT_EQ(uint32_t(m0 & m1), uint32_t(m0) & uint32_t(m1));
//print(m0); print(" | "); print(m1); print(" = "); print(m0 | m1); print("\n");
EXPECT_EQ(uint32_t(m0 | m1), uint32_t(m0) | uint32_t(m1));
//print(m0); print(" ^ "); print(m1); print(" = "); print(m0 ^ m1); print("\n");
EXPECT_EQ(uint32_t(m0 ^ m1), uint32_t(m0) ^ uint32_t(m1));
}
}
});
});
});
});
}
TEST(CuTe_core, MakeIntegerSequence) {
cute::for_each(cute::make_integer_sequence<uint32_t, 8>{}, [](auto c) {
using c_type = decltype(c);
constexpr auto c_value = c_type::value;
using expected_type = cute::integral_constant<uint32_t, c_value>;
static_assert(cute::is_same_v<c_type, expected_type>);
});
}
#endif // defined(__GNUC__) && (__GNUC__ < 8)