3.6.0 update (#2005)
* 3.6.0 update * doc and swap stuff --------- Co-authored-by: yuzhai <yuzhai@nvidia.com> Co-authored-by: Haicheng Wu <haichengw@nvidia.com>
This commit is contained in:
@ -39,11 +39,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(__CUDACC_RTC__)
|
||||
#include <cuda/std/cassert>
|
||||
#else
|
||||
#include <assert.h>
|
||||
#endif
|
||||
|
||||
#include "cutlass/cutlass.h"
|
||||
#include "cutlass/numeric_types.h"
|
||||
@ -53,12 +49,9 @@
|
||||
#include "cutlass/tensor_coord.h"
|
||||
#include "cutlass/aligned_buffer.h"
|
||||
#include "cutlass/functional.h"
|
||||
|
||||
#include "cutlass/gemm/gemm.h"
|
||||
|
||||
#include "cutlass/transform/pitch_linear_thread_map.h"
|
||||
#include "cutlass/transform/threadblock/regular_tile_iterator.h"
|
||||
|
||||
#include "cutlass/epilogue/threadblock/epilogue_base.h"
|
||||
#include "cutlass/epilogue/threadblock/predicated_tile_iterator.h"
|
||||
|
||||
|
||||
@ -43,7 +43,7 @@ class gen_test:
|
||||
|
||||
def gen_cpp_sample(self):
|
||||
code = "/* Auto Generated code - Do not edit.*/\n"
|
||||
code += "#include <stdio.h> \n"
|
||||
code += "#include <cstdio> \n"
|
||||
|
||||
code += "#include \"cutlass/gemm/device/gemm_batched.h\" \n"
|
||||
code += "#include \"cutlass/cutlass.h\" \n"
|
||||
|
||||
@ -380,7 +380,7 @@ class gen_one_API:
|
||||
def gen_CUTLASS_irrelevant_API(self):
|
||||
code = ""
|
||||
code += "#include <cuda_runtime.h>\n"
|
||||
code += "#include <assert.h>\n"
|
||||
code += "#include <cassert>\n"
|
||||
|
||||
param_name = "Fused" + str(self.b2b_num) + "xGemm_"
|
||||
for i in range(self.b2b_num):
|
||||
|
||||
Reference in New Issue
Block a user