fix: examples/cute/tutorial/blackwell/04_mma_tma_2sm_sm100.cu GridDim miscalculated (#2492)
* fix: examples/cute/tutorial/blackwell/04_mma_tma_2sm_sm100.cu Launch dimGrid error * feat: add cta tiler * Update examples/cute/tutorial/blackwell/04_mma_tma_2sm_sm100.cu use cluster_layout_vmnk instead of cta_tiler Co-authored-by: Junkai-Wu <junkaiw@nvidia.com> * feat: remove cta_tiler --------- Co-authored-by: qinghongzeng <qinghongzeng@deeproute.ai> Co-authored-by: Junkai-Wu <junkaiw@nvidia.com>
This commit is contained in:
@ -573,8 +573,8 @@ void gemm_host_f16xf16_f32_f32_tnt(TypeA const* device_ptr_A, LayoutA layout_A,
|
||||
|
||||
dim3 dimBlock(128);
|
||||
dim3 dimCluster(size<0>(cluster_shape), size<1>(cluster_shape), size<2>(cluster_shape));
|
||||
dim3 dimGrid(round_up(size(ceil_div(Gemm_M, bM)), dimCluster.x),
|
||||
round_up(size(ceil_div(Gemm_N, bN)), dimCluster.y));
|
||||
dim3 dimGrid(size(ceil_div(Gemm_M, bM * size<1>(cluster_layout_vmnk))) * dimCluster.x,
|
||||
size(ceil_div(Gemm_N, bN * size<2>(cluster_layout_vmnk))) * dimCluster.y);
|
||||
int smemBytes = sizeof(SMEMStorage);
|
||||
|
||||
auto* kernel_ptr = &gemm_device<SMEMStorage,
|
||||
|
||||
Reference in New Issue
Block a user