CUTLASS 3.2.1 (#1113)
* Updates for 3.2.1 release. * Minor fix in gemm op profiler for raster order. * Add scheduler mapping for raster order in the kernels.
This commit is contained in:
@ -42,7 +42,6 @@
|
||||
|
||||
#include <cute/numeric/half.hpp>
|
||||
#include <cute/numeric/complex.hpp>
|
||||
|
||||
#include <cutlass/layout/layout.h>
|
||||
|
||||
// The computed infinity norm does not include
|
||||
@ -222,8 +221,10 @@ auto host_matrix_to_const_cute_tensor(CutlassHostTensorType& X)
|
||||
};
|
||||
|
||||
|
||||
// Returns EXIT_SUCCESS if the 2-norm relative error is exactly zero, else returns EXIT_FAILURE.
|
||||
// This makes the return value suitable as the return value of main().
|
||||
template <typename T1, typename T2>
|
||||
double
|
||||
int
|
||||
print_relative_error(
|
||||
std::size_t n,
|
||||
T1 const& data,
|
||||
@ -285,5 +286,5 @@ print_relative_error(
|
||||
if (print_error)
|
||||
printf("Maximum relative error: [%.5e]\n", max_ind_rel_err);
|
||||
|
||||
return tot_rel_err;
|
||||
return (tot_rel_err == 0.0) ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
}
|
||||
|
||||
@ -368,8 +368,8 @@ template <class TensorType>
|
||||
auto make_layout_rank3(const TensorType& tensor) {
|
||||
// append a batch mode of size 1 if we do not have tensors that are rank 3
|
||||
return make_layout(
|
||||
make_shape(get<0>(tensor.shape()), get<1>(tensor.shape()), Int<1>{}),
|
||||
make_stride(get<0>(tensor.stride()), get<1>(tensor.stride()), int64_t(cosize(tensor.layout()))));
|
||||
make_shape(cute::get<0>(tensor.shape()), cute::get<1>(tensor.shape()), cute::Int<1>{}),
|
||||
make_stride(cute::get<0>(tensor.stride()), cute::get<1>(tensor.stride()), int64_t(cosize(tensor.layout()))));
|
||||
}
|
||||
|
||||
/// GEMM - General Matrix-Matrix contraction without conjugation options
|
||||
|
||||
Reference in New Issue
Block a user