releaase 2.11 (#703)
This commit is contained in:
@ -47,10 +47,10 @@ namespace kernel {
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/// Conversion template
|
||||
/// Simple conversion function
|
||||
template <typename Destination, typename Source, int Count>
|
||||
__global__ void convert(
|
||||
cutlass::Array<Destination, Count> *destination,
|
||||
cutlass::Array<Destination, Count> *destination,
|
||||
cutlass::Array<Source, Count> const *source) {
|
||||
|
||||
cutlass::NumericArrayConverter<Destination, Source, Count> convert;
|
||||
@ -60,47 +60,9 @@ __global__ void convert(
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace core
|
||||
} // namespace test
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TEST(NumericConversion, f32_to_f16_rn) {
|
||||
|
||||
int const kN = 1;
|
||||
using Source = float;
|
||||
using Destination = cutlass::half_t;
|
||||
|
||||
dim3 grid(1, 1);
|
||||
dim3 block(1, 1);
|
||||
|
||||
cutlass::HostTensor<cutlass::half_t, cutlass::layout::RowMajor> destination({1, kN});
|
||||
cutlass::HostTensor<float, cutlass::layout::RowMajor> source({1, kN});
|
||||
|
||||
for (int i = 0; i < kN; ++i) {
|
||||
source.host_data()[i] = float(i);
|
||||
}
|
||||
|
||||
source.sync_device();
|
||||
|
||||
test::core::kernel::convert<Destination, Source, 1><<< grid, block >>>(
|
||||
reinterpret_cast<cutlass::Array<Destination, 1> *>(destination.device_data()),
|
||||
reinterpret_cast<cutlass::Array<Source, 1> const *>(source.device_data())
|
||||
);
|
||||
|
||||
destination.sync_host();
|
||||
|
||||
for (int i = 0; i < kN; ++i) {
|
||||
EXPECT_TRUE(float(destination.host_data()[i]) == source.host_data()[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(NumericConversion, f32x8_to_f16x8_rn) {
|
||||
|
||||
int const kN = 8;
|
||||
using Source = float;
|
||||
using Destination = cutlass::half_t;
|
||||
template <typename Destination, typename Source, int Count>
|
||||
void run_test() {
|
||||
const int kN = Count;
|
||||
|
||||
dim3 grid(1, 1);
|
||||
dim3 block(1, 1);
|
||||
@ -109,12 +71,12 @@ TEST(NumericConversion, f32x8_to_f16x8_rn) {
|
||||
cutlass::HostTensor<Source, cutlass::layout::RowMajor> source({1, kN});
|
||||
|
||||
for (int i = 0; i < kN; ++i) {
|
||||
source.host_data()[i] = float(i);
|
||||
source.host_data()[i] = Source(i % 4);
|
||||
}
|
||||
|
||||
source.sync_device();
|
||||
|
||||
test::core::kernel::convert<Destination, Source, kN><<< grid, block >>>(
|
||||
convert<Destination, Source, kN><<< grid, block >>>(
|
||||
reinterpret_cast<cutlass::Array<Destination, kN> *>(destination.device_data()),
|
||||
reinterpret_cast<cutlass::Array<Source, kN> const *>(source.device_data())
|
||||
);
|
||||
@ -122,70 +84,247 @@ TEST(NumericConversion, f32x8_to_f16x8_rn) {
|
||||
destination.sync_host();
|
||||
|
||||
for (int i = 0; i < kN; ++i) {
|
||||
EXPECT_TRUE(float(destination.host_data()[i]) == source.host_data()[i]);
|
||||
EXPECT_TRUE(float(destination.host_data()[i]) == float(source.host_data()[i]));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace core
|
||||
} // namespace test
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TEST(NumericConversion, f32_to_f16_rn) {
|
||||
int const kN = 1;
|
||||
using Source = float;
|
||||
using Destination = cutlass::half_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, f32x8_to_f16x8_rn) {
|
||||
int const kN = 8;
|
||||
using Source = float;
|
||||
using Destination = cutlass::half_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TEST(NumericConversion, f16_to_f32_rn) {
|
||||
|
||||
TEST(NumericConversion, f16_to_f32_rn) {
|
||||
int const kN = 1;
|
||||
using Source = cutlass::half_t;
|
||||
using Destination = float;
|
||||
|
||||
dim3 grid(1, 1);
|
||||
dim3 block(1, 1);
|
||||
|
||||
cutlass::HostTensor<float, cutlass::layout::RowMajor> destination({1, kN});
|
||||
cutlass::HostTensor<cutlass::half_t, cutlass::layout::RowMajor> source({1, kN});
|
||||
|
||||
for (int i = 0; i < kN; ++i) {
|
||||
source.host_data()[i] = Source(i);
|
||||
}
|
||||
|
||||
source.sync_device();
|
||||
|
||||
test::core::kernel::convert<Destination, Source, kN><<< grid, block >>>(
|
||||
reinterpret_cast<cutlass::Array<Destination, kN> *>(destination.device_data()),
|
||||
reinterpret_cast<cutlass::Array<Source, kN> const *>(source.device_data())
|
||||
);
|
||||
|
||||
destination.sync_host();
|
||||
|
||||
for (int i = 0; i < kN; ++i) {
|
||||
EXPECT_TRUE(float(destination.host_data()[i]) == float(source.host_data()[i]));
|
||||
}
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, f16x8_to_f32x8_rn) {
|
||||
|
||||
int const kN = 8;
|
||||
using Source = cutlass::half_t;
|
||||
using Destination = float;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
dim3 grid(1, 1);
|
||||
dim3 block(1, 1);
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
cutlass::HostTensor<float, cutlass::layout::RowMajor> destination({1, kN});
|
||||
cutlass::HostTensor<cutlass::half_t, cutlass::layout::RowMajor> source({1, kN});
|
||||
TEST(NumericConversion, f32_to_fe4m3_rn) {
|
||||
int const kN = 1;
|
||||
using Source = float;
|
||||
using Destination = cutlass::float_e4m3_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
for (int i = 0; i < kN; ++i) {
|
||||
source.host_data()[i] = float(i);
|
||||
}
|
||||
TEST(NumericConversion, f32_to_fe4m3_rn_array) {
|
||||
int const kN = 27;
|
||||
using Source = float;
|
||||
using Destination = cutlass::float_e4m3_t;
|
||||
|
||||
source.sync_device();
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
test::core::kernel::convert<Destination, Source, kN><<< grid, block >>>(
|
||||
reinterpret_cast<cutlass::Array<Destination, kN> *>(destination.device_data()),
|
||||
reinterpret_cast<cutlass::Array<Source, kN> const *>(source.device_data())
|
||||
);
|
||||
TEST(NumericConversion, f32_to_fe5m2_rn) {
|
||||
int const kN = 1;
|
||||
using Source = float;
|
||||
using Destination = cutlass::float_e5m2_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
destination.sync_host();
|
||||
TEST(NumericConversion, f32_to_fe5m2_rn_array) {
|
||||
int const kN = 27;
|
||||
using Source = float;
|
||||
using Destination = cutlass::float_e5m2_t;
|
||||
|
||||
for (int i = 0; i < kN; ++i) {
|
||||
EXPECT_TRUE(float(destination.host_data()[i]) == float(source.host_data()[i]));
|
||||
}
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
|
||||
}
|
||||
|
||||
TEST(NumericConversion, f16_to_fe4m3_rn) {
|
||||
int const kN = 1;
|
||||
using Source = cutlass::half_t;
|
||||
using Destination = cutlass::float_e4m3_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, f16_to_fe4m3_rn_array) {
|
||||
int const kN = 27;
|
||||
using Source = cutlass::half_t;
|
||||
using Destination = cutlass::float_e4m3_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, f16_to_fe5m2_rn) {
|
||||
int const kN = 1;
|
||||
using Source = cutlass::half_t;
|
||||
using Destination = cutlass::float_e5m2_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, f16_to_fe5m2_rn_array) {
|
||||
int const kN = 27;
|
||||
using Source = cutlass::half_t;
|
||||
using Destination = cutlass::float_e5m2_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, bf16_to_fe4m3_rn) {
|
||||
int const kN = 1;
|
||||
using Source = cutlass::bfloat16_t;
|
||||
using Destination = cutlass::float_e4m3_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, bf16_to_fe4m3_rn_array) {
|
||||
int const kN = 27;
|
||||
using Source = cutlass::bfloat16_t;
|
||||
using Destination = cutlass::float_e4m3_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, bf16_to_fe5m2_rn) {
|
||||
int const kN = 1;
|
||||
using Source = cutlass::bfloat16_t;
|
||||
using Destination = cutlass::float_e5m2_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, bf16_to_fe5m2_rn_array) {
|
||||
int const kN = 27;
|
||||
using Source = cutlass::bfloat16_t;
|
||||
using Destination = cutlass::float_e5m2_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TEST(NumericConversion, fe4m3_to_fe5m2_rn) {
|
||||
int const kN = 1;
|
||||
using Source = cutlass::float_e4m3_t;
|
||||
using Destination = cutlass::float_e5m2_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe4m3_to_fe5m2_array) {
|
||||
int const kN = 27;
|
||||
using Source = cutlass::float_e4m3_t;
|
||||
using Destination = cutlass::float_e5m2_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe5m2_to_fe4m3_rn) {
|
||||
int const kN = 1;
|
||||
using Source = cutlass::float_e5m2_t;
|
||||
using Destination = cutlass::float_e4m3_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe5m2_to_fe4m3_array) {
|
||||
int const kN = 27;
|
||||
using Source = cutlass::float_e5m2_t;
|
||||
using Destination = cutlass::float_e4m3_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe4m3_to_f32_rn) {
|
||||
int const kN = 1;
|
||||
using Source = cutlass::float_e4m3_t;
|
||||
using Destination = float;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe4m3_to_f32_array) {
|
||||
int const kN = 27;
|
||||
using Source = cutlass::float_e4m3_t;
|
||||
using Destination = float;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe5m2_to_f32_rn) {
|
||||
int const kN = 1;
|
||||
using Source = cutlass::float_e5m2_t;
|
||||
using Destination = float;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe5m2_to_f32_array) {
|
||||
int const kN = 27;
|
||||
using Source = cutlass::float_e5m2_t;
|
||||
using Destination = float;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe4m3_to_f16_rn) {
|
||||
int const kN = 1;
|
||||
using Source = cutlass::float_e4m3_t;
|
||||
using Destination = cutlass::half_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe4m3_to_f16_array) {
|
||||
int const kN = 27;
|
||||
using Source = cutlass::float_e4m3_t;
|
||||
using Destination = cutlass::half_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe5m2_to_f16_rn) {
|
||||
int const kN = 1;
|
||||
using Source = cutlass::float_e5m2_t;
|
||||
using Destination = cutlass::half_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe5m2_to_f16_array) {
|
||||
int const kN = 27;
|
||||
using Source = cutlass::float_e5m2_t;
|
||||
using Destination = cutlass::half_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe4m3_to_bf16_rn) {
|
||||
int const kN = 1;
|
||||
using Source = cutlass::float_e4m3_t;
|
||||
using Destination = cutlass::bfloat16_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe4m3_to_bf16_array) {
|
||||
int const kN = 27;
|
||||
using Source = cutlass::float_e4m3_t;
|
||||
using Destination = cutlass::bfloat16_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe5m2_to_bf16_rn) {
|
||||
int const kN = 1;
|
||||
using Source = cutlass::float_e5m2_t;
|
||||
using Destination = cutlass::bfloat16_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
TEST(NumericConversion, fe5m2_to_bf16_array) {
|
||||
int const kN = 27;
|
||||
using Source = cutlass::float_e5m2_t;
|
||||
using Destination = cutlass::bfloat16_t;
|
||||
test::core::kernel::run_test<Destination, Source, kN>();
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Reference in New Issue
Block a user