CUTLASS 3.6.0 (#1850)

* v3.6

* update changelog

* update readme

* fix typo

* fixing typos

* hopper gemm with weight prefetch

---------

Co-authored-by: yuzhai <yuzhai@nvidia.com>
Co-authored-by: Haicheng Wu <haichengw@nvidia.com>
This commit is contained in:
Yujia Zhai
2024-10-09 12:33:27 -07:00
committed by GitHub
parent 0837a2a00a
commit cc3c29a81a
354 changed files with 105943 additions and 8203 deletions

View File

@ -54,17 +54,17 @@ is_byte_aligned(void const* const ptr)
# define CUTE_ALIGNAS(n) alignas(n)
#endif
template <size_t Alignment>
template <size_t Alignment, class Child = void>
struct aligned_struct {};
template <> struct CUTE_ALIGNAS( 1) aligned_struct< 1> {};
template <> struct CUTE_ALIGNAS( 2) aligned_struct< 2> {};
template <> struct CUTE_ALIGNAS( 4) aligned_struct< 4> {};
template <> struct CUTE_ALIGNAS( 8) aligned_struct< 8> {};
template <> struct CUTE_ALIGNAS( 16) aligned_struct< 16> {};
template <> struct CUTE_ALIGNAS( 32) aligned_struct< 32> {};
template <> struct CUTE_ALIGNAS( 64) aligned_struct< 64> {};
template <> struct CUTE_ALIGNAS(128) aligned_struct<128> {};
template <> struct CUTE_ALIGNAS(256) aligned_struct<256> {};
template <class Child> struct CUTE_ALIGNAS( 1) aligned_struct< 1, Child> {};
template <class Child> struct CUTE_ALIGNAS( 2) aligned_struct< 2, Child> {};
template <class Child> struct CUTE_ALIGNAS( 4) aligned_struct< 4, Child> {};
template <class Child> struct CUTE_ALIGNAS( 8) aligned_struct< 8, Child> {};
template <class Child> struct CUTE_ALIGNAS( 16) aligned_struct< 16, Child> {};
template <class Child> struct CUTE_ALIGNAS( 32) aligned_struct< 32, Child> {};
template <class Child> struct CUTE_ALIGNAS( 64) aligned_struct< 64, Child> {};
template <class Child> struct CUTE_ALIGNAS(128) aligned_struct<128, Child> {};
template <class Child> struct CUTE_ALIGNAS(256) aligned_struct<256, Child> {};
} // end namespace cute

View File

@ -30,8 +30,8 @@
**************************************************************************************************/
#pragma once
#include <cute/container/array.hpp>
#include <cute/container/alignment.hpp>
#include <cute/container/alignment.hpp> // CUTE_ALIGNAS
#include <cute/container/array.hpp> // cute::array
namespace cute
{

View File

@ -181,6 +181,20 @@ public:
}
};
template <class T>
CUTE_HOST_DEVICE
void
print(subbyte_reference<T> ref) {
cute::print(ref.get());
}
template <class T>
CUTE_HOST_DEVICE
void
pretty_print(subbyte_reference<T> ref) {
cute::pretty_print(ref.get());
}
//
// subbyte_iterator
// Random-access iterator over subbyte references

View File

@ -35,9 +35,9 @@
#pragma once
#include <cute/config.hpp>
#include <cute/config.hpp> // CUTE_HOST_DEVICE
#include <cute/numeric/numeric_types.hpp> // uint_bit_t
#include <cute/util/type_traits.hpp> // cute::is_same
namespace cute
{

View File

@ -30,12 +30,8 @@
**************************************************************************************************/
#pragma once
#include <vector_types.h>
#include <cute/config.hpp>
#include <cute/util/type_traits.hpp>
#include <cute/numeric/integral_constant.hpp>
#include <cute/config.hpp> // CUTE_HOST_DEVICE, CUTE_GCC_UNREACHABLE
#include <cute/numeric/integral_constant.hpp> // cute::integral_constant
namespace cute
{

View File

@ -634,14 +634,23 @@ template <class Tuple, size_t... Is>
CUTE_HOST_DEVICE void print_tuple(Tuple const& t, index_sequence<Is...>, char s = '(', char e = ')')
{
using cute::print;
print(s); ((void(print(Is == 0 ? '\0' : ',')), void(print(get<Is>(t)))), ...); print(e);
if (sizeof...(Is) == 0) {
print(s);
} else {
((void(print(Is == 0 ? s : ',')), void(print(get<Is>(t)))), ...);
}
print(e);
}
#if !defined(__CUDACC_RTC__)
template <class Tuple, std::size_t... Is>
CUTE_HOST std::ostream& print_tuple_os(std::ostream& os, Tuple const& t, index_sequence<Is...>, char s = '(', char e = ')')
{
os << s; (void(os << (Is == 0 ? '\0' : ',') << get<Is>(t)), ...);
if (sizeof...(Is) == 0) {
os << s;
} else {
(void(os << (Is == 0 ? s : ',') << get<Is>(t)), ...);
}
return os << e;
}
#endif // !defined(__CUDACC_RTC__)

View File

@ -30,8 +30,7 @@
**************************************************************************************************/
#pragma once
#include <cute/config.hpp>
#include <cute/util/type_traits.hpp>
#include <cute/config.hpp> // CUTE_HOST_DEVICE, CUTE_STL_NAMESPACE
namespace cute
{