CUTLASS 3.2.1 (#1113)

* Updates for 3.2.1 release.

* Minor fix in gemm op profiler for raster order.

* Add scheduler mapping for raster order in the kernels.
This commit is contained in:
ANIKET SHIVAM
2023-09-26 14:24:26 -07:00
committed by GitHub
parent e0aaa3c3b3
commit 90d3b0fb18
428 changed files with 22253 additions and 21762 deletions

View File

@ -42,7 +42,6 @@
#include <cute/numeric/half.hpp>
#include <cute/numeric/complex.hpp>
#include <cutlass/layout/layout.h>
// The computed infinity norm does not include
@ -222,8 +221,10 @@ auto host_matrix_to_const_cute_tensor(CutlassHostTensorType& X)
};
// Returns EXIT_SUCCESS if the 2-norm relative error is exactly zero, else returns EXIT_FAILURE.
// This makes the return value suitable as the return value of main().
template <typename T1, typename T2>
double
int
print_relative_error(
std::size_t n,
T1 const& data,
@ -285,5 +286,5 @@ print_relative_error(
if (print_error)
printf("Maximum relative error: [%.5e]\n", max_ind_rel_err);
return tot_rel_err;
return (tot_rel_err == 0.0) ? EXIT_SUCCESS : EXIT_FAILURE;
}

View File

@ -368,8 +368,8 @@ template <class TensorType>
auto make_layout_rank3(const TensorType& tensor) {
// append a batch mode of size 1 if we do not have tensors that are rank 3
return make_layout(
make_shape(get<0>(tensor.shape()), get<1>(tensor.shape()), Int<1>{}),
make_stride(get<0>(tensor.stride()), get<1>(tensor.stride()), int64_t(cosize(tensor.layout()))));
make_shape(cute::get<0>(tensor.shape()), cute::get<1>(tensor.shape()), cute::Int<1>{}),
make_stride(cute::get<0>(tensor.stride()), cute::get<1>(tensor.stride()), int64_t(cosize(tensor.layout()))));
}
/// GEMM - General Matrix-Matrix contraction without conjugation options