Collection of changes to fix clang build. (#1200)
* Remove unused variables * Qualify calls to make_fragment_? from templated base class. Fixes clang build error. * Add missing `#include <cstdio>` * Various changes to fix clang compile errors. * More changes to fix clang build. Remaining issues: - `params` initializer of `CollectiveEpilogue`. - `ops` initializer of `Sm90VisitorImplBase`. - `__usAtomicCAS` needs to be added to clang upstream. * Fix remaining clang build issues. * Qualify `cute::rank()` calls. * Qualify some more calls that are otherwise ambiguous between `cute` and `std` namespace. * Double-escape special registers in inline asm. * small change --------- Co-authored-by: Haicheng Wu <haichengw@nvidia.com>
This commit is contained in:
@ -86,9 +86,9 @@ CUTE_DEVICE dim3 cluster_grid_dims()
|
||||
{
|
||||
#if defined(CUTE_ARCH_CLUSTER_SM90_ENABLED)
|
||||
uint32_t x, y, z;
|
||||
asm volatile("mov.u32 %0, %nclusterid.x;\n" : "=r"(x) : );
|
||||
asm volatile("mov.u32 %0, %nclusterid.y;\n" : "=r"(y) : );
|
||||
asm volatile("mov.u32 %0, %nclusterid.z;\n" : "=r"(z) : );
|
||||
asm volatile("mov.u32 %0, %%nclusterid.x;\n" : "=r"(x) : );
|
||||
asm volatile("mov.u32 %0, %%nclusterid.y;\n" : "=r"(y) : );
|
||||
asm volatile("mov.u32 %0, %%nclusterid.z;\n" : "=r"(z) : );
|
||||
return {x, y, z};
|
||||
#elif defined(__CUDA_ARCH__)
|
||||
// MSVC requires protecting use of gridDim with __CUDA_ARCH__.
|
||||
@ -105,9 +105,9 @@ CUTE_DEVICE dim3 cluster_id_in_grid()
|
||||
{
|
||||
#if defined(CUTE_ARCH_CLUSTER_SM90_ENABLED)
|
||||
uint32_t x, y, z;
|
||||
asm volatile("mov.u32 %0, %clusterid.x;\n" : "=r"(x) : );
|
||||
asm volatile("mov.u32 %0, %clusterid.y;\n" : "=r"(y) : );
|
||||
asm volatile("mov.u32 %0, %clusterid.z;\n" : "=r"(z) : );
|
||||
asm volatile("mov.u32 %0, %%clusterid.x;\n" : "=r"(x) : );
|
||||
asm volatile("mov.u32 %0, %%clusterid.y;\n" : "=r"(y) : );
|
||||
asm volatile("mov.u32 %0, %%clusterid.z;\n" : "=r"(z) : );
|
||||
return {x, y, z};
|
||||
#elif defined(__CUDA_ARCH__)
|
||||
// MSVC requires protecting use of blockIdx with __CUDA_ARCH__.
|
||||
@ -124,9 +124,9 @@ CUTE_DEVICE dim3 block_id_in_cluster()
|
||||
{
|
||||
#if defined(CUTE_ARCH_CLUSTER_SM90_ENABLED)
|
||||
uint32_t x, y, z;
|
||||
asm volatile("mov.u32 %0, %cluster_ctaid.x;\n" : "=r"(x) : );
|
||||
asm volatile("mov.u32 %0, %cluster_ctaid.y;\n" : "=r"(y) : );
|
||||
asm volatile("mov.u32 %0, %cluster_ctaid.z;\n" : "=r"(z) : );
|
||||
asm volatile("mov.u32 %0, %%cluster_ctaid.x;\n" : "=r"(x) : );
|
||||
asm volatile("mov.u32 %0, %%cluster_ctaid.y;\n" : "=r"(y) : );
|
||||
asm volatile("mov.u32 %0, %%cluster_ctaid.z;\n" : "=r"(z) : );
|
||||
return {x, y, z};
|
||||
#else
|
||||
return {0,0,0};
|
||||
@ -138,9 +138,9 @@ CUTE_DEVICE dim3 cluster_shape()
|
||||
{
|
||||
#if defined(CUTE_ARCH_CLUSTER_SM90_ENABLED)
|
||||
uint32_t x, y, z;
|
||||
asm volatile("mov.u32 %0, %cluster_nctaid.x;\n" : "=r"(x) : );
|
||||
asm volatile("mov.u32 %0, %cluster_nctaid.y;\n" : "=r"(y) : );
|
||||
asm volatile("mov.u32 %0, %cluster_nctaid.z;\n" : "=r"(z) : );
|
||||
asm volatile("mov.u32 %0, %%cluster_nctaid.x;\n" : "=r"(x) : );
|
||||
asm volatile("mov.u32 %0, %%cluster_nctaid.y;\n" : "=r"(y) : );
|
||||
asm volatile("mov.u32 %0, %%cluster_nctaid.z;\n" : "=r"(z) : );
|
||||
return {x, y, z};
|
||||
#else
|
||||
return {1,1,1};
|
||||
@ -152,7 +152,7 @@ CUTLASS_DEVICE uint32_t block_rank_in_cluster()
|
||||
{
|
||||
#if defined(CUTE_ARCH_CLUSTER_SM90_ENABLED)
|
||||
uint32_t rank;
|
||||
asm volatile("mov.u32 %0, %cluster_ctarank;\n" : "=r"(rank) :);
|
||||
asm volatile("mov.u32 %0, %%cluster_ctarank;\n" : "=r"(rank) :);
|
||||
return rank;
|
||||
#else
|
||||
return 0;
|
||||
|
||||
Reference in New Issue
Block a user