CUTLASS 3.6.0 (#1850)
* v3.6 * update changelog * update readme * fix typo * fixing typos * hopper gemm with weight prefetch --------- Co-authored-by: yuzhai <yuzhai@nvidia.com> Co-authored-by: Haicheng Wu <haichengw@nvidia.com>
This commit is contained in:
@ -54,17 +54,17 @@ is_byte_aligned(void const* const ptr)
|
||||
# define CUTE_ALIGNAS(n) alignas(n)
|
||||
#endif
|
||||
|
||||
template <size_t Alignment>
|
||||
template <size_t Alignment, class Child = void>
|
||||
struct aligned_struct {};
|
||||
|
||||
template <> struct CUTE_ALIGNAS( 1) aligned_struct< 1> {};
|
||||
template <> struct CUTE_ALIGNAS( 2) aligned_struct< 2> {};
|
||||
template <> struct CUTE_ALIGNAS( 4) aligned_struct< 4> {};
|
||||
template <> struct CUTE_ALIGNAS( 8) aligned_struct< 8> {};
|
||||
template <> struct CUTE_ALIGNAS( 16) aligned_struct< 16> {};
|
||||
template <> struct CUTE_ALIGNAS( 32) aligned_struct< 32> {};
|
||||
template <> struct CUTE_ALIGNAS( 64) aligned_struct< 64> {};
|
||||
template <> struct CUTE_ALIGNAS(128) aligned_struct<128> {};
|
||||
template <> struct CUTE_ALIGNAS(256) aligned_struct<256> {};
|
||||
template <class Child> struct CUTE_ALIGNAS( 1) aligned_struct< 1, Child> {};
|
||||
template <class Child> struct CUTE_ALIGNAS( 2) aligned_struct< 2, Child> {};
|
||||
template <class Child> struct CUTE_ALIGNAS( 4) aligned_struct< 4, Child> {};
|
||||
template <class Child> struct CUTE_ALIGNAS( 8) aligned_struct< 8, Child> {};
|
||||
template <class Child> struct CUTE_ALIGNAS( 16) aligned_struct< 16, Child> {};
|
||||
template <class Child> struct CUTE_ALIGNAS( 32) aligned_struct< 32, Child> {};
|
||||
template <class Child> struct CUTE_ALIGNAS( 64) aligned_struct< 64, Child> {};
|
||||
template <class Child> struct CUTE_ALIGNAS(128) aligned_struct<128, Child> {};
|
||||
template <class Child> struct CUTE_ALIGNAS(256) aligned_struct<256, Child> {};
|
||||
|
||||
} // end namespace cute
|
||||
|
||||
@ -30,8 +30,8 @@
|
||||
**************************************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include <cute/container/array.hpp>
|
||||
#include <cute/container/alignment.hpp>
|
||||
#include <cute/container/alignment.hpp> // CUTE_ALIGNAS
|
||||
#include <cute/container/array.hpp> // cute::array
|
||||
|
||||
namespace cute
|
||||
{
|
||||
|
||||
@ -181,6 +181,20 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
CUTE_HOST_DEVICE
|
||||
void
|
||||
print(subbyte_reference<T> ref) {
|
||||
cute::print(ref.get());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
CUTE_HOST_DEVICE
|
||||
void
|
||||
pretty_print(subbyte_reference<T> ref) {
|
||||
cute::pretty_print(ref.get());
|
||||
}
|
||||
|
||||
//
|
||||
// subbyte_iterator
|
||||
// Random-access iterator over subbyte references
|
||||
|
||||
@ -35,9 +35,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cute/config.hpp>
|
||||
|
||||
#include <cute/config.hpp> // CUTE_HOST_DEVICE
|
||||
#include <cute/numeric/numeric_types.hpp> // uint_bit_t
|
||||
#include <cute/util/type_traits.hpp> // cute::is_same
|
||||
|
||||
namespace cute
|
||||
{
|
||||
|
||||
@ -30,12 +30,8 @@
|
||||
**************************************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
#include <cute/config.hpp>
|
||||
|
||||
#include <cute/util/type_traits.hpp>
|
||||
#include <cute/numeric/integral_constant.hpp>
|
||||
#include <cute/config.hpp> // CUTE_HOST_DEVICE, CUTE_GCC_UNREACHABLE
|
||||
#include <cute/numeric/integral_constant.hpp> // cute::integral_constant
|
||||
|
||||
namespace cute
|
||||
{
|
||||
|
||||
@ -634,14 +634,23 @@ template <class Tuple, size_t... Is>
|
||||
CUTE_HOST_DEVICE void print_tuple(Tuple const& t, index_sequence<Is...>, char s = '(', char e = ')')
|
||||
{
|
||||
using cute::print;
|
||||
print(s); ((void(print(Is == 0 ? '\0' : ',')), void(print(get<Is>(t)))), ...); print(e);
|
||||
if (sizeof...(Is) == 0) {
|
||||
print(s);
|
||||
} else {
|
||||
((void(print(Is == 0 ? s : ',')), void(print(get<Is>(t)))), ...);
|
||||
}
|
||||
print(e);
|
||||
}
|
||||
|
||||
#if !defined(__CUDACC_RTC__)
|
||||
template <class Tuple, std::size_t... Is>
|
||||
CUTE_HOST std::ostream& print_tuple_os(std::ostream& os, Tuple const& t, index_sequence<Is...>, char s = '(', char e = ')')
|
||||
{
|
||||
os << s; (void(os << (Is == 0 ? '\0' : ',') << get<Is>(t)), ...);
|
||||
if (sizeof...(Is) == 0) {
|
||||
os << s;
|
||||
} else {
|
||||
(void(os << (Is == 0 ? s : ',') << get<Is>(t)), ...);
|
||||
}
|
||||
return os << e;
|
||||
}
|
||||
#endif // !defined(__CUDACC_RTC__)
|
||||
|
||||
@ -30,8 +30,7 @@
|
||||
**************************************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include <cute/config.hpp>
|
||||
#include <cute/util/type_traits.hpp>
|
||||
#include <cute/config.hpp> // CUTE_HOST_DEVICE, CUTE_STL_NAMESPACE
|
||||
|
||||
namespace cute
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user