CUTLASS 3.3.0 (#1167)

* Release 3.3.0

Adds support for mixed precision GEMMs On Hopper and Ampere
Adds support for < 16B aligned GEMMs on Hopper
Enhancements to EVT
Enhancements to Python interface
Enhancements to Sub-byte type handling in CuTe
Several other bug-fixes and performance improvements.

* minor doc update
This commit is contained in:
Pradeep Ramani
2023-11-02 08:09:05 -07:00
committed by GitHub
parent 922fb5108b
commit c008b4aea8
263 changed files with 16214 additions and 5008 deletions

View File

@ -170,6 +170,76 @@ CUTE_NAMED_BINARY_OP(min_fn, cute::min);
#undef CUTE_BINARY_OP
#undef CUTE_NAMED_BINARY_OP
/**********/
/** Fold **/
/**********/
#define CUTE_FOLD_OP(NAME,OP) \
struct NAME##_unary_rfold { \
template <class... T> \
CUTE_HOST_DEVICE constexpr \
auto operator()(T&&... t) const { \
return (t OP ...); \
} \
}; \
struct NAME##_unary_lfold { \
template <class... T> \
CUTE_HOST_DEVICE constexpr \
auto operator()(T&&... t) const { \
return (... OP t); \
} \
}; \
struct NAME##_binary_rfold { \
template <class U, class... T> \
CUTE_HOST_DEVICE constexpr \
auto operator()(U&& u, T&&... t) const { \
return (t OP ... OP u); \
} \
}; \
struct NAME##_binary_lfold { \
template <class U, class... T> \
CUTE_HOST_DEVICE constexpr \
auto operator()(U&& u, T&&... t) const { \
return (u OP ... OP t); \
} \
}
CUTE_FOLD_OP(plus, +);
CUTE_FOLD_OP(minus, -);
CUTE_FOLD_OP(multiplies, *);
CUTE_FOLD_OP(divides, /);
CUTE_FOLD_OP(modulus, %);
CUTE_FOLD_OP(plus_assign, +=);
CUTE_FOLD_OP(minus_assign, -=);
CUTE_FOLD_OP(multiplies_assign, *=);
CUTE_FOLD_OP(divides_assign, /=);
CUTE_FOLD_OP(modulus_assign, %=);
CUTE_FOLD_OP(bit_and, &);
CUTE_FOLD_OP(bit_or, |);
CUTE_FOLD_OP(bit_xor, ^);
CUTE_FOLD_OP(left_shift, <<);
CUTE_FOLD_OP(right_shift, >>);
CUTE_FOLD_OP(bit_and_assign, &=);
CUTE_FOLD_OP(bit_or_assign, |=);
CUTE_FOLD_OP(bit_xor_assign, ^=);
CUTE_FOLD_OP(left_shift_assign, <<=);
CUTE_FOLD_OP(right_shift_assign, >>=);
CUTE_FOLD_OP(logical_and, &&);
CUTE_FOLD_OP(logical_or, ||);
CUTE_FOLD_OP(equal_to, ==);
CUTE_FOLD_OP(not_equal_to, !=);
CUTE_FOLD_OP(greater, >);
CUTE_FOLD_OP(less, <);
CUTE_FOLD_OP(greater_equal, >=);
CUTE_FOLD_OP(less_equal, <=);
#undef CUTE_FOLD_OP
/**********/
/** Meta **/
/**********/