v4.2 release. (#2587)

* Fix default cluster callback values to 1 to avoid profiler failure when these values are not set in command line.

* v4.2 release.
This commit is contained in:
Junkai-Wu
2025-08-23 06:11:24 +08:00
committed by GitHub
parent 11cad1f67b
commit a49a78ffef
351 changed files with 28182 additions and 2032 deletions

View File

@ -407,7 +407,7 @@ def generate_tile_descriptions_sm90(math_instructions, is_aligned: bool, level:
def is_tile_desc_compatible_with_cooperative(tile_description):
# Cooperative kernels require a minimum CTA-M of 128
return tile_description.threadblock_shape[0] >= 128
return tile_description.threadblock_shape[0] % 128 == 0
def can_tile_desc_use_shmem_in_epilogue(tile_description, data_types):