3.6.0 update (#2005)

* 3.6.0 update

* doc and swap stuff

---------

Co-authored-by: yuzhai <yuzhai@nvidia.com>
Co-authored-by: Haicheng Wu <haichengw@nvidia.com>
This commit is contained in:
Yujia Zhai
2024-12-24 22:34:40 -08:00
committed by GitHub
parent e1cd8c7866
commit 3d261a5974
258 changed files with 10863 additions and 3883 deletions

View File

@ -84,6 +84,8 @@ struct ArrayEngine
};
// Specialization for sparse_elem<S,T> tensor allocation/iteration
// NOTE: This can and should be used for allocation of SMEM as well!
// Fuse these two ArrayEngines?
template <int S, class T, size_t N>
struct ArrayEngine<sparse_elem<S,T>, N>
{
@ -858,6 +860,17 @@ max_common_layout(Tensor<SrcEngine,SrcLayout> const& a,
CUTE_GCC_UNREACHABLE;
}
/* Return the maximum (statically known) alignment of a Tensor in the number of bits
*/
template <class Engine, class Layout>
CUTE_HOST_DEVICE constexpr
auto
max_alignment(Tensor<Engine,Layout> const& t)
{
return gcd(max_alignment(t.data()),
max_alignment(t.layout()) * static_value<sizeof_bits<typename Engine::value_type>>());
}
//
// Key algebraic operations -- Composition, Divide, and Product
//