CUTLASS 3.5.1 (#1623)

* CUTLASS 3.5.1

* updates, optimizations, fixes
This commit is contained in:
Vijay Thakkar
2024-07-29 08:46:24 -04:00
committed by GitHub
parent 56b46e2d13
commit be60a0b272
312 changed files with 19793 additions and 6775 deletions

View File

@ -51,7 +51,7 @@ TEST(CuTe_core, ArraySubbyte)
for (size_t i = 0; i < array1.size(); ++i) {
array0[i+5] = array1[i];
}
EXPECT_EQ(int4_t(array0.back()), int4_t(1));
for (size_t i = 0; i < array1.size(); ++i) {
@ -137,7 +137,7 @@ TEST(CuTe_core, Subbyte_iterator)
{
array_subbyte<uint8_t, 15> a{};
auto tensor = make_tensor(subbyte_iterator<uint8_t>(a.raw_data()), make_shape(15));
auto tensor = make_tensor(a.begin(), make_shape(15));
fill(a, uint8_t(13));
for (int i = 0; i < int(a.size()); ++i) {
@ -150,7 +150,7 @@ TEST(CuTe_core, Subbyte_iterator)
{
array_subbyte<int4_t, 15> a{};
auto tensor = make_tensor(subbyte_iterator<int4_t>(a.raw_data()), make_shape(15));
auto tensor = make_tensor(a.begin(), make_shape(15));
fill(a, int4_t(-5));
for (int i = 0; i < int(a.size()); ++i) {
@ -163,7 +163,7 @@ TEST(CuTe_core, Subbyte_iterator)
{
array_subbyte<uint2_t, 15> a{};
auto tensor = make_tensor(subbyte_iterator<uint2_t>(a.raw_data()), make_shape(15));
auto tensor = make_tensor(a.begin(), make_shape(15));
fill(a, uint2_t(-5));
for (int i = 0; i < int(a.size()); ++i) {
@ -176,7 +176,7 @@ TEST(CuTe_core, Subbyte_iterator)
{
array_subbyte<bool, 15> a{};
auto tensor = make_tensor(subbyte_iterator<bool>(a.raw_data()), make_shape(15));
auto tensor = make_tensor(a.begin(), make_shape(15));
fill(a, bool(1));
for (int i = 0; i < int(a.size()); ++i) {
@ -193,7 +193,7 @@ TEST(CuTe_core, Const_subbyte_iterator)
{
array_subbyte<uint8_t, 15> a{};
auto tensor = make_tensor(subbyte_iterator<uint8_t const>(a.raw_data()), make_shape(15));
auto tensor = make_tensor(a.begin(), make_shape(15));
fill(a, uint8_t(13));
for (int i = 0; i < int(a.size()); ++i) {
@ -206,7 +206,7 @@ TEST(CuTe_core, Const_subbyte_iterator)
{
array_subbyte<int4_t, 15> a{};
auto tensor = make_tensor(subbyte_iterator<int4_t const>(a.raw_data()), make_shape(15));
auto tensor = make_tensor(a.begin(), make_shape(15));
fill(a, int4_t(-5));
for (int i = 0; i < int(a.size()); ++i) {
@ -219,7 +219,7 @@ TEST(CuTe_core, Const_subbyte_iterator)
{
array_subbyte<uint2_t, 15> a{};
auto tensor = make_tensor(subbyte_iterator<uint2_t const>(a.raw_data()), make_shape(15));
auto tensor = make_tensor(a.begin(), make_shape(15));
fill(a, uint2_t(-5));
for (int i = 0; i < int(a.size()); ++i) {
@ -232,7 +232,7 @@ TEST(CuTe_core, Const_subbyte_iterator)
{
array_subbyte<bool, 15> a{};
auto tensor = make_tensor(subbyte_iterator<bool const>(a.raw_data()), make_shape(15));
auto tensor = make_tensor(a.begin(), make_shape(15));
fill(a, bool(1));
for (int i = 0; i < int(a.size()); ++i) {