3.6.0 update (#2005)

* 3.6.0 update

* doc and swap stuff

---------

Co-authored-by: yuzhai <yuzhai@nvidia.com>
Co-authored-by: Haicheng Wu <haichengw@nvidia.com>
This commit is contained in:
Yujia Zhai
2024-12-24 22:34:40 -08:00
committed by GitHub
parent e1cd8c7866
commit 3d261a5974
258 changed files with 10863 additions and 3883 deletions

View File

@ -39,11 +39,7 @@
#pragma once
#if defined(__CUDACC_RTC__)
#include <cuda/std/cassert>
#else
#include <assert.h>
#endif
#include "cutlass/cutlass.h"
#include "cutlass/numeric_types.h"
@ -53,12 +49,9 @@
#include "cutlass/tensor_coord.h"
#include "cutlass/aligned_buffer.h"
#include "cutlass/functional.h"
#include "cutlass/gemm/gemm.h"
#include "cutlass/transform/pitch_linear_thread_map.h"
#include "cutlass/transform/threadblock/regular_tile_iterator.h"
#include "cutlass/epilogue/threadblock/epilogue_base.h"
#include "cutlass/epilogue/threadblock/predicated_tile_iterator.h"

View File

@ -43,7 +43,7 @@ class gen_test:
def gen_cpp_sample(self):
code = "/* Auto Generated code - Do not edit.*/\n"
code += "#include <stdio.h> \n"
code += "#include <cstdio> \n"
code += "#include \"cutlass/gemm/device/gemm_batched.h\" \n"
code += "#include \"cutlass/cutlass.h\" \n"

View File

@ -380,7 +380,7 @@ class gen_one_API:
def gen_CUTLASS_irrelevant_API(self):
code = ""
code += "#include <cuda_runtime.h>\n"
code += "#include <assert.h>\n"
code += "#include <cassert>\n"
param_name = "Fused" + str(self.b2b_num) + "xGemm_"
for i in range(self.b2b_num):