This commit is contained in:
Srinath Kailasa
2025-07-31 03:14:54 +01:00
committed by GitHub
parent 6fb5e667c1
commit 3b054767b3
3 changed files with 12 additions and 12 deletions

View File

@ -86,7 +86,7 @@ and destroying the `Tensor` doesn't deallocate the array of elements.
This has implications for developers of generic `Tensor` algorithms.
For example, input `Tensor` parameters of a function
should be passed by referece or const reference,
should be passed by reference or const reference,
because passing a `Tensor` by value
may or may not make a deep copy of the `Tensor`'s elements.

View File

@ -72,8 +72,8 @@ namespace layout {
// test capacity
auto capacity = tensor_nhwc.capacity(cutlass::Tensor4DCoord(n_size, h_size, w_size, c_size));
decltype(capacity) referece_capacity = ldh * n_size;
EXPECT_EQ(capacity, referece_capacity);
decltype(capacity) reference_capacity = ldh * n_size;
EXPECT_EQ(capacity, reference_capacity);
// test packed
auto packed_tensor_layout = tensor_nhwc.packed(cutlass::Tensor4DCoord(n_size, h_size, w_size, c_size));
@ -115,8 +115,8 @@ namespace layout {
// test capacity
auto capacity = tensor_nchw.capacity(cutlass::Tensor4DCoord(n_size, h_size, w_size, c_size));
decltype(capacity) referece_capacity = ldc * n_size;
EXPECT_EQ(capacity, referece_capacity);
decltype(capacity) reference_capacity = ldc * n_size;
EXPECT_EQ(capacity, reference_capacity);
// test packed
auto packed_tensor_layout = tensor_nchw.packed(cutlass::Tensor4DCoord(n_size, h_size, w_size, c_size));

View File

@ -34,19 +34,19 @@
#include "../common/cutlass_unit_test.h"
#include "cutlass/layout/tensor.h"
#include "cutlass/util/device_memory.h"
#include "cutlass/util/device_memory.h"
/////////////////////////////////////////////////////////////////////////////////////////////////
namespace test {
namespace layout {
void test_nhwc_layout(int n_size, int h_size, int w_size, int c_size) {
int ldc = c_size + 1;
int ldw = ldc * (w_size + 2);
int ldh = ldw * (h_size + 3);
typedef cutlass::layout::TensorNHWC Tensor;
Tensor::Stride tensor_stride({ ldc, ldw, ldh });
Tensor tensor_nhw_packed_c(tensor_stride);
@ -74,8 +74,8 @@ namespace layout {
// test capacity
auto capacity = tensor_nhw_packed_c.capacity(
cutlass::Tensor4DCoord(n_size, h_size, w_size, c_size));
decltype(capacity) referece_capacity = ldh * n_size;
EXPECT_EQ(capacity, referece_capacity);
decltype(capacity) reference_capacity = ldh * n_size;
EXPECT_EQ(capacity, reference_capacity);
}
@ -86,7 +86,7 @@ namespace layout {
int ldh = ldw * h_size;
typedef cutlass::layout::TensorNHWC Tensor;
Tensor::Stride tensor_stride({ ldc, ldw, ldh });
Tensor tensor_nhw_packed_c(tensor_stride);
@ -130,7 +130,7 @@ namespace layout {
dim3 grid(1,1);
dim3 block(c_size, 1, 1);
test::layout::test_nhwc_inverse<<< grid, block >>>(output.get(),
test::layout::test_nhwc_inverse<<< grid, block >>>(output.get(),
n_size, h_size, w_size, c_size);
cudaError_t result = cudaDeviceSynchronize();