releaase 2.11 (#703)

This commit is contained in:
Aditya Atluri
2022-11-19 06:02:15 -08:00
committed by GitHub
parent 3c90f6aea6
commit c975e2ccbb
329 changed files with 47332 additions and 10607 deletions

View File

@ -47,10 +47,10 @@ namespace kernel {
/////////////////////////////////////////////////////////////////////////////////////////////////
/// Conversion template
/// Simple conversion function
template <typename Destination, typename Source, int Count>
__global__ void convert(
cutlass::Array<Destination, Count> *destination,
cutlass::Array<Destination, Count> *destination,
cutlass::Array<Source, Count> const *source) {
cutlass::NumericArrayConverter<Destination, Source, Count> convert;
@ -60,47 +60,9 @@ __global__ void convert(
/////////////////////////////////////////////////////////////////////////////////////////////////
} // namespace kernel
} // namespace core
} // namespace test
/////////////////////////////////////////////////////////////////////////////////////////////////
TEST(NumericConversion, f32_to_f16_rn) {
int const kN = 1;
using Source = float;
using Destination = cutlass::half_t;
dim3 grid(1, 1);
dim3 block(1, 1);
cutlass::HostTensor<cutlass::half_t, cutlass::layout::RowMajor> destination({1, kN});
cutlass::HostTensor<float, cutlass::layout::RowMajor> source({1, kN});
for (int i = 0; i < kN; ++i) {
source.host_data()[i] = float(i);
}
source.sync_device();
test::core::kernel::convert<Destination, Source, 1><<< grid, block >>>(
reinterpret_cast<cutlass::Array<Destination, 1> *>(destination.device_data()),
reinterpret_cast<cutlass::Array<Source, 1> const *>(source.device_data())
);
destination.sync_host();
for (int i = 0; i < kN; ++i) {
EXPECT_TRUE(float(destination.host_data()[i]) == source.host_data()[i]);
}
}
TEST(NumericConversion, f32x8_to_f16x8_rn) {
int const kN = 8;
using Source = float;
using Destination = cutlass::half_t;
template <typename Destination, typename Source, int Count>
void run_test() {
const int kN = Count;
dim3 grid(1, 1);
dim3 block(1, 1);
@ -109,12 +71,12 @@ TEST(NumericConversion, f32x8_to_f16x8_rn) {
cutlass::HostTensor<Source, cutlass::layout::RowMajor> source({1, kN});
for (int i = 0; i < kN; ++i) {
source.host_data()[i] = float(i);
source.host_data()[i] = Source(i % 4);
}
source.sync_device();
test::core::kernel::convert<Destination, Source, kN><<< grid, block >>>(
convert<Destination, Source, kN><<< grid, block >>>(
reinterpret_cast<cutlass::Array<Destination, kN> *>(destination.device_data()),
reinterpret_cast<cutlass::Array<Source, kN> const *>(source.device_data())
);
@ -122,70 +84,247 @@ TEST(NumericConversion, f32x8_to_f16x8_rn) {
destination.sync_host();
for (int i = 0; i < kN; ++i) {
EXPECT_TRUE(float(destination.host_data()[i]) == source.host_data()[i]);
EXPECT_TRUE(float(destination.host_data()[i]) == float(source.host_data()[i]));
}
}
} // namespace kernel
} // namespace core
} // namespace test
/////////////////////////////////////////////////////////////////////////////////////////////////
TEST(NumericConversion, f32_to_f16_rn) {
int const kN = 1;
using Source = float;
using Destination = cutlass::half_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, f32x8_to_f16x8_rn) {
int const kN = 8;
using Source = float;
using Destination = cutlass::half_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
/////////////////////////////////////////////////////////////////////////////////////////////////
TEST(NumericConversion, f16_to_f32_rn) {
TEST(NumericConversion, f16_to_f32_rn) {
int const kN = 1;
using Source = cutlass::half_t;
using Destination = float;
dim3 grid(1, 1);
dim3 block(1, 1);
cutlass::HostTensor<float, cutlass::layout::RowMajor> destination({1, kN});
cutlass::HostTensor<cutlass::half_t, cutlass::layout::RowMajor> source({1, kN});
for (int i = 0; i < kN; ++i) {
source.host_data()[i] = Source(i);
}
source.sync_device();
test::core::kernel::convert<Destination, Source, kN><<< grid, block >>>(
reinterpret_cast<cutlass::Array<Destination, kN> *>(destination.device_data()),
reinterpret_cast<cutlass::Array<Source, kN> const *>(source.device_data())
);
destination.sync_host();
for (int i = 0; i < kN; ++i) {
EXPECT_TRUE(float(destination.host_data()[i]) == float(source.host_data()[i]));
}
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, f16x8_to_f32x8_rn) {
int const kN = 8;
using Source = cutlass::half_t;
using Destination = float;
test::core::kernel::run_test<Destination, Source, kN>();
}
dim3 grid(1, 1);
dim3 block(1, 1);
/////////////////////////////////////////////////////////////////////////////////////////////////
cutlass::HostTensor<float, cutlass::layout::RowMajor> destination({1, kN});
cutlass::HostTensor<cutlass::half_t, cutlass::layout::RowMajor> source({1, kN});
TEST(NumericConversion, f32_to_fe4m3_rn) {
int const kN = 1;
using Source = float;
using Destination = cutlass::float_e4m3_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
for (int i = 0; i < kN; ++i) {
source.host_data()[i] = float(i);
}
TEST(NumericConversion, f32_to_fe4m3_rn_array) {
int const kN = 27;
using Source = float;
using Destination = cutlass::float_e4m3_t;
source.sync_device();
test::core::kernel::run_test<Destination, Source, kN>();
}
test::core::kernel::convert<Destination, Source, kN><<< grid, block >>>(
reinterpret_cast<cutlass::Array<Destination, kN> *>(destination.device_data()),
reinterpret_cast<cutlass::Array<Source, kN> const *>(source.device_data())
);
TEST(NumericConversion, f32_to_fe5m2_rn) {
int const kN = 1;
using Source = float;
using Destination = cutlass::float_e5m2_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
destination.sync_host();
TEST(NumericConversion, f32_to_fe5m2_rn_array) {
int const kN = 27;
using Source = float;
using Destination = cutlass::float_e5m2_t;
for (int i = 0; i < kN; ++i) {
EXPECT_TRUE(float(destination.host_data()[i]) == float(source.host_data()[i]));
}
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, f16_to_fe4m3_rn) {
int const kN = 1;
using Source = cutlass::half_t;
using Destination = cutlass::float_e4m3_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, f16_to_fe4m3_rn_array) {
int const kN = 27;
using Source = cutlass::half_t;
using Destination = cutlass::float_e4m3_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, f16_to_fe5m2_rn) {
int const kN = 1;
using Source = cutlass::half_t;
using Destination = cutlass::float_e5m2_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, f16_to_fe5m2_rn_array) {
int const kN = 27;
using Source = cutlass::half_t;
using Destination = cutlass::float_e5m2_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, bf16_to_fe4m3_rn) {
int const kN = 1;
using Source = cutlass::bfloat16_t;
using Destination = cutlass::float_e4m3_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, bf16_to_fe4m3_rn_array) {
int const kN = 27;
using Source = cutlass::bfloat16_t;
using Destination = cutlass::float_e4m3_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, bf16_to_fe5m2_rn) {
int const kN = 1;
using Source = cutlass::bfloat16_t;
using Destination = cutlass::float_e5m2_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, bf16_to_fe5m2_rn_array) {
int const kN = 27;
using Source = cutlass::bfloat16_t;
using Destination = cutlass::float_e5m2_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
/////////////////////////////////////////////////////////////////////////////////////////////////
TEST(NumericConversion, fe4m3_to_fe5m2_rn) {
int const kN = 1;
using Source = cutlass::float_e4m3_t;
using Destination = cutlass::float_e5m2_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe4m3_to_fe5m2_array) {
int const kN = 27;
using Source = cutlass::float_e4m3_t;
using Destination = cutlass::float_e5m2_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe5m2_to_fe4m3_rn) {
int const kN = 1;
using Source = cutlass::float_e5m2_t;
using Destination = cutlass::float_e4m3_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe5m2_to_fe4m3_array) {
int const kN = 27;
using Source = cutlass::float_e5m2_t;
using Destination = cutlass::float_e4m3_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe4m3_to_f32_rn) {
int const kN = 1;
using Source = cutlass::float_e4m3_t;
using Destination = float;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe4m3_to_f32_array) {
int const kN = 27;
using Source = cutlass::float_e4m3_t;
using Destination = float;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe5m2_to_f32_rn) {
int const kN = 1;
using Source = cutlass::float_e5m2_t;
using Destination = float;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe5m2_to_f32_array) {
int const kN = 27;
using Source = cutlass::float_e5m2_t;
using Destination = float;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe4m3_to_f16_rn) {
int const kN = 1;
using Source = cutlass::float_e4m3_t;
using Destination = cutlass::half_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe4m3_to_f16_array) {
int const kN = 27;
using Source = cutlass::float_e4m3_t;
using Destination = cutlass::half_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe5m2_to_f16_rn) {
int const kN = 1;
using Source = cutlass::float_e5m2_t;
using Destination = cutlass::half_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe5m2_to_f16_array) {
int const kN = 27;
using Source = cutlass::float_e5m2_t;
using Destination = cutlass::half_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe4m3_to_bf16_rn) {
int const kN = 1;
using Source = cutlass::float_e4m3_t;
using Destination = cutlass::bfloat16_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe4m3_to_bf16_array) {
int const kN = 27;
using Source = cutlass::float_e4m3_t;
using Destination = cutlass::bfloat16_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe5m2_to_bf16_rn) {
int const kN = 1;
using Source = cutlass::float_e5m2_t;
using Destination = cutlass::bfloat16_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
TEST(NumericConversion, fe5m2_to_bf16_array) {
int const kN = 27;
using Source = cutlass::float_e5m2_t;
using Destination = cutlass::bfloat16_t;
test::core::kernel::run_test<Destination, Source, kN>();
}
/////////////////////////////////////////////////////////////////////////////////////////////////