diff --git a/include/cutlass/epilogue/collective/builders/sm90_builder.inl b/include/cutlass/epilogue/collective/builders/sm90_builder.inl index 50a5420b..9cb03fdc 100644 --- a/include/cutlass/epilogue/collective/builders/sm90_builder.inl +++ b/include/cutlass/epilogue/collective/builders/sm90_builder.inl @@ -116,13 +116,13 @@ sm90_compute_tile_shape_or_override() { auto epi_tile = [&] () { if constexpr (detail::sm90_is_cooperative_v) { auto tile_m = cute::min(_128{}, size<0>(TileShape_MNK{})); - auto tile_n = cute::min(_32{}, size<1>(TileShape_MNK{})); + auto tile_n = cute::gcd(cute::min(_32{}, size<1>(TileShape_MNK{})), size<1>(TileShape_MNK{})); return make_shape(tile_m, tile_n); } else if constexpr (detail::sm90_is_warp_specialized_v) { constexpr int N_perf = sizeof_bits_v == 8 ? 64 : 32; auto tile_m = cute::min(_64{}, size<0>(TileShape_MNK{})); - auto tile_n = cute::min(Int{}, size<1>(TileShape_MNK{})); + auto tile_n = cute::gcd(cute::min(Int{}, size<1>(TileShape_MNK{})), size<1>(TileShape_MNK{})); return make_shape(tile_m, tile_n); } else {