v4.2 release. (#2587)
* Fix default cluster callback values to 1 to avoid profiler failure when these values are not set in command line. * v4.2 release.
This commit is contained in:
@ -452,8 +452,8 @@ void gemm_host_f16xf16_f32_f32_tnt(TypeA const* device_ptr_A, LayoutA layout_A,
|
||||
|
||||
dim3 dimBlock(128);
|
||||
dim3 dimCluster(size<0>(cluster_shape), size<1>(cluster_shape), size<2>(cluster_shape));
|
||||
dim3 dimGrid(round_up(size(ceil_div(Gemm_M, bM)), dimCluster.x),
|
||||
round_up(size(ceil_div(Gemm_N, bN)), dimCluster.y));
|
||||
dim3 dimGrid(size(ceil_div(Gemm_M, bM * size<1>(cluster_layout_vmnk))) * dimCluster.x,
|
||||
size(ceil_div(Gemm_N, bN * size<2>(cluster_layout_vmnk))) * dimCluster.y);
|
||||
int smemBytes = sizeof(SMEMStorage);
|
||||
|
||||
auto* kernel_ptr = &gemm_device<SMEMStorage,
|
||||
|
||||
@ -528,8 +528,8 @@ void gemm_host_f16xf16_f32_f32_tnt(TypeA const* device_ptr_A, LayoutA layout_A,
|
||||
|
||||
dim3 dimBlock(128);
|
||||
dim3 dimCluster(size<0>(cluster_shape), size<1>(cluster_shape), size<2>(cluster_shape));
|
||||
dim3 dimGrid(round_up(size(ceil_div(Gemm_M, bM)), dimCluster.x),
|
||||
round_up(size(ceil_div(Gemm_N, bN)), dimCluster.y));
|
||||
dim3 dimGrid(size(ceil_div(Gemm_M, bM * size<1>(cluster_layout_vmnk))) * dimCluster.x,
|
||||
size(ceil_div(Gemm_N, bN * size<2>(cluster_layout_vmnk))) * dimCluster.y);
|
||||
int smemBytes = sizeof(SMEMStorage);
|
||||
|
||||
auto* kernel_ptr = &gemm_device<SMEMStorage,
|
||||
|
||||
@ -567,8 +567,8 @@ void gemm_host_f16xf16_f32_f32_tnt(TypeA const* device_ptr_A, LayoutA layout_A,
|
||||
|
||||
dim3 dimBlock(128);
|
||||
dim3 dimCluster(size<0>(cluster_shape), size<1>(cluster_shape), size<2>(cluster_shape));
|
||||
dim3 dimGrid(round_up(size(ceil_div(Gemm_M, bM)), dimCluster.x),
|
||||
round_up(size(ceil_div(Gemm_N, bN)), dimCluster.y));
|
||||
dim3 dimGrid(size(ceil_div(Gemm_M, bM * size<1>(cluster_layout_vmnk))) * dimCluster.x,
|
||||
size(ceil_div(Gemm_N, bN * size<2>(cluster_layout_vmnk))) * dimCluster.y);
|
||||
int smemBytes = sizeof(SMEMStorage);
|
||||
|
||||
auto* kernel_ptr = &gemm_device<SMEMStorage,
|
||||
|
||||
@ -575,6 +575,7 @@ void gemm_host_f16xf16_f32_f32_tnt(TypeA const* device_ptr_A, LayoutA layout_A,
|
||||
dim3 dimCluster(size<0>(cluster_shape), size<1>(cluster_shape), size<2>(cluster_shape));
|
||||
dim3 dimGrid(size(ceil_div(Gemm_M, bM * size<1>(cluster_layout_vmnk))) * dimCluster.x,
|
||||
size(ceil_div(Gemm_N, bN * size<2>(cluster_layout_vmnk))) * dimCluster.y);
|
||||
|
||||
int smemBytes = sizeof(SMEMStorage);
|
||||
|
||||
auto* kernel_ptr = &gemm_device<SMEMStorage,
|
||||
|
||||
@ -681,8 +681,8 @@ void gemm_host_f16xf16_f32_f32_tnt(TypeA const* device_ptr_A, LayoutA layout_A,
|
||||
|
||||
dim3 dimBlock(128);
|
||||
dim3 dimCluster(size<0>(cluster_shape), size<1>(cluster_shape), size<2>(cluster_shape));
|
||||
dim3 dimGrid(round_up(size(ceil_div(Gemm_M, bM)), dimCluster.x),
|
||||
round_up(size(ceil_div(Gemm_N, bN)), dimCluster.y));
|
||||
dim3 dimGrid(size(ceil_div(Gemm_M, bM * size<1>(cluster_layout_vmnk))) * dimCluster.x,
|
||||
size(ceil_div(Gemm_N, bN * size<2>(cluster_layout_vmnk))) * dimCluster.y);
|
||||
int smemBytes = sizeof(SMEMStorage);
|
||||
|
||||
auto* kernel_ptr = &gemm_device<SMEMStorage,
|
||||
|
||||
Reference in New Issue
Block a user