releaase 2.11 (#703)

2022-11-19 06:02:15 -08:00
parent 3c90f6aea6
commit c975e2ccbb
329 changed files with 47332 additions and 10607 deletions
--- a/test/unit/core/numeric_conversion.cu
+++ b/test/unit/core/numeric_conversion.cu
@ -47,10 +47,10 @@ namespace kernel {

 /////////////////////////////////////////////////////////////////////////////////////////////////

-/// Conversion template
+/// Simple conversion function
 template <typename Destination, typename Source, int Count>
 __global__ void convert(
-  cutlass::Array<Destination, Count> *destination, 
+  cutlass::Array<Destination, Count> *destination,
  cutlass::Array<Source, Count> const *source) {

  cutlass::NumericArrayConverter<Destination, Source, Count> convert;
@ -60,47 +60,9 @@ __global__ void convert(

 /////////////////////////////////////////////////////////////////////////////////////////////////

-} // namespace kernel
-} // namespace core
-} // namespace test
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-
-TEST(NumericConversion, f32_to_f16_rn) {
-
-  int const kN = 1;
-  using Source = float;
-  using Destination = cutlass::half_t;
-
-  dim3 grid(1, 1);
-  dim3 block(1, 1);
-
-  cutlass::HostTensor<cutlass::half_t, cutlass::layout::RowMajor> destination({1, kN});
-  cutlass::HostTensor<float, cutlass::layout::RowMajor> source({1, kN});
-
-  for (int i = 0; i < kN; ++i) {
-    source.host_data()[i] = float(i);
-  }
-
-  source.sync_device();
-
-  test::core::kernel::convert<Destination, Source, 1><<< grid, block >>>(
-    reinterpret_cast<cutlass::Array<Destination, 1> *>(destination.device_data()),
-    reinterpret_cast<cutlass::Array<Source, 1> const *>(source.device_data())
-  );
-
-  destination.sync_host();
-
-  for (int i = 0; i < kN; ++i) {
-    EXPECT_TRUE(float(destination.host_data()[i]) == source.host_data()[i]);
-  }
-}
-
-TEST(NumericConversion, f32x8_to_f16x8_rn) {
-
-  int const kN = 8;
-  using Source = float;
-  using Destination = cutlass::half_t;
+template <typename Destination, typename Source, int Count>
+void run_test() {
+  const int kN = Count;

  dim3 grid(1, 1);
  dim3 block(1, 1);
@ -109,12 +71,12 @@ TEST(NumericConversion, f32x8_to_f16x8_rn) {
  cutlass::HostTensor<Source, cutlass::layout::RowMajor> source({1, kN});

  for (int i = 0; i < kN; ++i) {
-    source.host_data()[i] = float(i);
+    source.host_data()[i] = Source(i % 4);
  }

  source.sync_device();

-  test::core::kernel::convert<Destination, Source, kN><<< grid, block >>>(
+  convert<Destination, Source, kN><<< grid, block >>>(
    reinterpret_cast<cutlass::Array<Destination, kN> *>(destination.device_data()),
    reinterpret_cast<cutlass::Array<Source, kN> const *>(source.device_data())
  );
@ -122,70 +84,247 @@ TEST(NumericConversion, f32x8_to_f16x8_rn) {
  destination.sync_host();

  for (int i = 0; i < kN; ++i) {
-    EXPECT_TRUE(float(destination.host_data()[i]) == source.host_data()[i]);
+    EXPECT_TRUE(float(destination.host_data()[i]) == float(source.host_data()[i]));
  }
 }

+} // namespace kernel
+} // namespace core
+} // namespace test
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+TEST(NumericConversion, f32_to_f16_rn) {
+  int const kN = 1;
+  using Source = float;
+  using Destination = cutlass::half_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, f32x8_to_f16x8_rn) {
+  int const kN = 8;
+  using Source = float;
+  using Destination = cutlass::half_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
 /////////////////////////////////////////////////////////////////////////////////////////////////

-TEST(NumericConversion, f16_to_f32_rn) {
-  
+TEST(NumericConversion, f16_to_f32_rn) {  
  int const kN = 1;
  using Source = cutlass::half_t;
  using Destination = float;
-
-  dim3 grid(1, 1);
-  dim3 block(1, 1);
-
-  cutlass::HostTensor<float, cutlass::layout::RowMajor> destination({1, kN});
-  cutlass::HostTensor<cutlass::half_t, cutlass::layout::RowMajor> source({1, kN});
-
-  for (int i = 0; i < kN; ++i) {
-    source.host_data()[i] = Source(i);
-  }
-
-  source.sync_device();
-
-  test::core::kernel::convert<Destination, Source, kN><<< grid, block >>>(
-    reinterpret_cast<cutlass::Array<Destination, kN> *>(destination.device_data()),
-    reinterpret_cast<cutlass::Array<Source, kN> const *>(source.device_data())
-  );
-
-  destination.sync_host();
-
-  for (int i = 0; i < kN; ++i) {
-    EXPECT_TRUE(float(destination.host_data()[i]) == float(source.host_data()[i]));
-  }
+  test::core::kernel::run_test<Destination, Source, kN>();
 }

 TEST(NumericConversion, f16x8_to_f32x8_rn) {
-
  int const kN = 8;
  using Source = cutlass::half_t;
  using Destination = float;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}

-  dim3 grid(1, 1);
-  dim3 block(1, 1);
+/////////////////////////////////////////////////////////////////////////////////////////////////

-  cutlass::HostTensor<float, cutlass::layout::RowMajor> destination({1, kN});
-  cutlass::HostTensor<cutlass::half_t, cutlass::layout::RowMajor> source({1, kN});
+TEST(NumericConversion, f32_to_fe4m3_rn) {
+  int const kN = 1;
+  using Source = float;
+  using Destination = cutlass::float_e4m3_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}

-  for (int i = 0; i < kN; ++i) {
-    source.host_data()[i] = float(i);
-  }
+TEST(NumericConversion, f32_to_fe4m3_rn_array) {
+  int const kN = 27;
+  using Source = float;
+  using Destination = cutlass::float_e4m3_t;

-  source.sync_device();
+  test::core::kernel::run_test<Destination, Source, kN>();
+}

-  test::core::kernel::convert<Destination, Source, kN><<< grid, block >>>(
-    reinterpret_cast<cutlass::Array<Destination, kN> *>(destination.device_data()),
-    reinterpret_cast<cutlass::Array<Source, kN> const *>(source.device_data())
-  );
+TEST(NumericConversion, f32_to_fe5m2_rn) {
+  int const kN = 1;
+  using Source = float;
+  using Destination = cutlass::float_e5m2_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}

-  destination.sync_host();
+TEST(NumericConversion, f32_to_fe5m2_rn_array) {
+  int const kN = 27;
+  using Source = float;
+  using Destination = cutlass::float_e5m2_t;

-  for (int i = 0; i < kN; ++i) {
-    EXPECT_TRUE(float(destination.host_data()[i]) == float(source.host_data()[i]));
-  }
+  test::core::kernel::run_test<Destination, Source, kN>();
+
+}
+
+TEST(NumericConversion, f16_to_fe4m3_rn) {
+  int const kN = 1;
+  using Source = cutlass::half_t;
+  using Destination = cutlass::float_e4m3_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, f16_to_fe4m3_rn_array) {
+  int const kN = 27;
+  using Source = cutlass::half_t;
+  using Destination = cutlass::float_e4m3_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, f16_to_fe5m2_rn) {
+  int const kN = 1;
+  using Source = cutlass::half_t;
+  using Destination = cutlass::float_e5m2_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, f16_to_fe5m2_rn_array) {
+  int const kN = 27;
+  using Source = cutlass::half_t;
+  using Destination = cutlass::float_e5m2_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, bf16_to_fe4m3_rn) {
+  int const kN = 1;
+  using Source = cutlass::bfloat16_t;
+  using Destination = cutlass::float_e4m3_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, bf16_to_fe4m3_rn_array) {
+  int const kN = 27;
+  using Source = cutlass::bfloat16_t;
+  using Destination = cutlass::float_e4m3_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, bf16_to_fe5m2_rn) {
+  int const kN = 1;
+  using Source = cutlass::bfloat16_t;
+  using Destination = cutlass::float_e5m2_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, bf16_to_fe5m2_rn_array) {
+  int const kN = 27;
+  using Source = cutlass::bfloat16_t;
+  using Destination = cutlass::float_e5m2_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+TEST(NumericConversion, fe4m3_to_fe5m2_rn) {
+  int const kN = 1;
+  using Source = cutlass::float_e4m3_t;
+  using Destination = cutlass::float_e5m2_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe4m3_to_fe5m2_array) {
+  int const kN = 27;
+  using Source = cutlass::float_e4m3_t;
+  using Destination = cutlass::float_e5m2_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe5m2_to_fe4m3_rn) {
+  int const kN = 1;
+  using Source = cutlass::float_e5m2_t;
+  using Destination = cutlass::float_e4m3_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe5m2_to_fe4m3_array) {
+  int const kN = 27;
+  using Source = cutlass::float_e5m2_t;
+  using Destination = cutlass::float_e4m3_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe4m3_to_f32_rn) {
+  int const kN = 1;
+  using Source = cutlass::float_e4m3_t;
+  using Destination = float;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe4m3_to_f32_array) {
+  int const kN = 27;
+  using Source = cutlass::float_e4m3_t;
+  using Destination = float;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe5m2_to_f32_rn) {
+  int const kN = 1;
+  using Source = cutlass::float_e5m2_t;
+  using Destination = float;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe5m2_to_f32_array) {
+  int const kN = 27;
+  using Source = cutlass::float_e5m2_t;
+  using Destination = float;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe4m3_to_f16_rn) {
+  int const kN = 1;
+  using Source = cutlass::float_e4m3_t;
+  using Destination = cutlass::half_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe4m3_to_f16_array) {
+  int const kN = 27;
+  using Source = cutlass::float_e4m3_t;
+  using Destination = cutlass::half_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe5m2_to_f16_rn) {
+  int const kN = 1;
+  using Source = cutlass::float_e5m2_t;
+  using Destination = cutlass::half_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe5m2_to_f16_array) {
+  int const kN = 27;
+  using Source = cutlass::float_e5m2_t;
+  using Destination = cutlass::half_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe4m3_to_bf16_rn) {
+  int const kN = 1;
+  using Source = cutlass::float_e4m3_t;
+  using Destination = cutlass::bfloat16_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe4m3_to_bf16_array) {
+  int const kN = 27;
+  using Source = cutlass::float_e4m3_t;
+  using Destination = cutlass::bfloat16_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe5m2_to_bf16_rn) {
+  int const kN = 1;
+  using Source = cutlass::float_e5m2_t;
+  using Destination = cutlass::bfloat16_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
+}
+
+TEST(NumericConversion, fe5m2_to_bf16_array) {
+  int const kN = 27;
+  using Source = cutlass::float_e5m2_t;
+  using Destination = cutlass::bfloat16_t;
+  test::core::kernel::run_test<Destination, Source, kN>();
 }

 /////////////////////////////////////////////////////////////////////////////////////////////////