CUTLASS 2.10 updates (#622)

Co-authored-by: Aniket Shivam <ashivam@nvidia.com>
This commit is contained in:
ANIKET SHIVAM
2022-09-12 18:26:30 -07:00
committed by GitHub
parent beae168f90
commit e773429f7e
96 changed files with 8365 additions and 1667 deletions

View File

@ -189,3 +189,35 @@ TEST(NumericConversion, f16x8_to_f32x8_rn) {
}
/////////////////////////////////////////////////////////////////////////////////////////////////
TEST(NumericConversion, f32x8_to_s8x8_rn) {
int const kN = 8;
using Source = float;
using Destination = int8_t;
dim3 grid(1, 1);
dim3 block(1, 1);
cutlass::HostTensor<Destination, cutlass::layout::RowMajor> destination({1, kN});
cutlass::HostTensor<Source, cutlass::layout::RowMajor> source({1, kN});
for (int i = 0; i < kN; ++i) {
source.host_data()[i] = float(i);
}
source.sync_device();
test::core::kernel::convert<Destination, Source, kN><<< grid, block >>>(
reinterpret_cast<cutlass::Array<Destination, kN> *>(destination.device_data()),
reinterpret_cast<cutlass::Array<Source, kN> const *>(source.device_data())
);
destination.sync_host();
for (int i = 0; i < kN; ++i) {
EXPECT_TRUE(float(destination.host_data()[i]) == source.host_data()[i]);
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////