Updates for 3.4 release. (#1305)
This commit is contained in:
@ -38,9 +38,10 @@
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
#include <cute/tensor.hpp>
|
||||
#include <cute/container/bit_field.hpp>
|
||||
|
||||
#include <cute/algorithm/tuple_algorithms.hpp>
|
||||
|
||||
using namespace cute;
|
||||
|
||||
TEST(CuTe_core, Bitfield)
|
||||
|
||||
@ -43,26 +43,30 @@ test_complement(Layout const& layout, CoSizeHi const& cosize_hi)
|
||||
|
||||
auto result = complement(layout, cosize_hi);
|
||||
|
||||
CUTLASS_TRACE_HOST("complement( " << layout << ", " << cosize_hi << ") => " << result);
|
||||
CUTLASS_TRACE_HOST("complement(" << layout << ", " << cosize_hi << ") => " << result);
|
||||
|
||||
// Post-condition on the domain size of the complement (1)
|
||||
EXPECT_GE( size(result), cosize_hi / size(filter(layout)));
|
||||
// Post-condition on the codomain size of the complement (2)
|
||||
EXPECT_LE(cosize(result), cute::ceil_div(cosize_hi, cosize(layout)) * cosize(layout));
|
||||
auto completed = make_layout(layout, result);
|
||||
|
||||
// Lower-bound on the codomain size of the layout ++ complement (1)
|
||||
EXPECT_GE(cosize(completed), cosize_hi);
|
||||
// Upper-bound on the codomain size of the complement (2)
|
||||
EXPECT_LE(cosize(result), cute::round_up(cosize_hi, cosize(layout)));
|
||||
|
||||
// Post-condition on the codomain of the complement
|
||||
for (int i = 1; i < size(result); ++i) {
|
||||
EXPECT_LT(result(i-1), result(i)); // Ordered (3)
|
||||
for (int j = 0; j < size(layout); ++j) {
|
||||
EXPECT_NE(result(i), layout(j)); // Complemented (4)
|
||||
EXPECT_NE(result(i), layout(j)); // Disjoint (4)
|
||||
}
|
||||
}
|
||||
|
||||
// Other observations
|
||||
EXPECT_LE(size(result),cosize(result)); // As a result of the ordered condition (3)
|
||||
EXPECT_GE(cosize(result), cosize_hi / size(filter(layout))); // As a result of (1) (2) and (5)
|
||||
if constexpr (is_static<decltype(stride(make_layout(layout,result)))>::value) { // If we can apply complement again
|
||||
EXPECT_EQ(size(complement(make_layout(layout,result))), 1); // There's no more codomain left over
|
||||
EXPECT_LE(size(result), cosize(result)); // As a result of the ordered condition (3)
|
||||
EXPECT_GE(size(result), cosize_hi / size(filter(layout)));
|
||||
EXPECT_LE(cosize(completed), cosize(result) + cosize(layout));
|
||||
EXPECT_GE(cosize(result), cosize_hi / size(filter(layout)));
|
||||
if constexpr (is_static<decltype(stride(completed))>::value) { // If we can apply complement again
|
||||
EXPECT_EQ(size(complement(completed)), 1); // There's no more codomain left over
|
||||
}
|
||||
}
|
||||
|
||||
@ -125,6 +129,7 @@ TEST(CuTe_core, Complement)
|
||||
test_complement(layout, Int<1>{});
|
||||
test_complement(layout);
|
||||
test_complement(layout, Int<16>{});
|
||||
test_complement(layout, Int<19>{});
|
||||
}
|
||||
|
||||
{
|
||||
@ -153,6 +158,12 @@ TEST(CuTe_core, Complement)
|
||||
test_complement(layout);
|
||||
}
|
||||
|
||||
{
|
||||
auto layout = Layout<Shape<_2,_4>, Stride<_1,_6>>{};
|
||||
|
||||
test_complement(layout);
|
||||
}
|
||||
|
||||
{
|
||||
auto layout = Layout<Shape<_2,_4,_8>, Stride<_8,_1,_64>>{};
|
||||
|
||||
@ -167,26 +178,34 @@ TEST(CuTe_core, Complement)
|
||||
}
|
||||
|
||||
{
|
||||
auto layout = make_layout(Shape<Shape<_2,_2>,Shape<_2, _2>>{},
|
||||
auto layout = make_layout(Shape <Shape <_2,_2>,Shape <_2, _2>>{},
|
||||
Stride<Stride<_1,_4>,Stride<_8,_32>>{});
|
||||
|
||||
test_complement(layout);
|
||||
}
|
||||
|
||||
{
|
||||
auto layout = make_layout(Shape<Shape<_2,_2>,Shape<_2, _2>>{},
|
||||
auto layout = make_layout(Shape <Shape <_2, _2>,Shape <_2,_2>>{},
|
||||
Stride<Stride<_1,_32>,Stride<_8,_4>>{});
|
||||
|
||||
test_complement(layout);
|
||||
}
|
||||
|
||||
// Fails due to non-injective input
|
||||
//{
|
||||
//auto layout = make_layout(Shape<Shape<_2,_2>,Shape<_2, _2>>{},
|
||||
// Fails due to non-injective layout
|
||||
// {
|
||||
// auto layout = make_layout(Shape<Shape<_2,_2>,Shape<_2, _2>>{},
|
||||
// Stride<Stride<_1,_8>,Stride<_8,_4>>{});
|
||||
|
||||
//test_complement(layout);
|
||||
//}
|
||||
// test_complement(layout);
|
||||
// }
|
||||
|
||||
// Fails due to non-injective layout
|
||||
// {
|
||||
// auto layout = Layout<Shape<_2,_2>, Stride<_2,_3>>{};
|
||||
|
||||
// test_complement(layout);
|
||||
// test_complement(layout, Int<19>{});
|
||||
// }
|
||||
|
||||
{
|
||||
auto layout = Layout<Shape<_4,_6>, Stride<_1,_6>>{};
|
||||
|
||||
@ -42,8 +42,8 @@ using namespace cute;
|
||||
|
||||
template <class LayoutA, class LayoutB>
|
||||
void
|
||||
test_composition(const LayoutA& layoutA,
|
||||
const LayoutB& layoutB)
|
||||
test_composition(LayoutA const& layoutA,
|
||||
LayoutB const& layoutB)
|
||||
{
|
||||
auto layoutR = composition(layoutA, layoutB);
|
||||
|
||||
@ -52,14 +52,12 @@ test_composition(const LayoutA& layoutA,
|
||||
CUTLASS_TRACE_HOST(" => ");
|
||||
CUTLASS_TRACE_HOST(layoutR);
|
||||
|
||||
// Test that layout R is compatible with layout B
|
||||
// Test that layout B is compatible with layout R
|
||||
EXPECT_TRUE(compatible(layoutB, layoutR));
|
||||
|
||||
// True post-condition: Every coordinate c of layoutB with L1D(c) < size(layoutR) is a coordinate of layoutR.
|
||||
|
||||
// Test that R(c) = A(B(c)) for all coordinates c in layoutR
|
||||
for (int i = 0; i < size(layoutR); ++i) {
|
||||
EXPECT_EQ(layoutR(i), layoutA(layoutB(i)));
|
||||
// Test that R(c) = A(B(c)) for all coordinates c in layoutB
|
||||
for (int c = 0; c < size(layoutB); ++c) {
|
||||
EXPECT_EQ(layoutR(c), layoutA(layoutB(c)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -45,10 +45,10 @@ test_logical_divide(LayoutA const& layoutA,
|
||||
auto layoutR = logical_divide(layoutA, layoutB);
|
||||
|
||||
CUTLASS_TRACE_HOST("test_logical_divide()");
|
||||
CUTLASS_TRACE_HOST(shape(layoutA) << " / " << shape(layoutB) << " => " << shape(layoutR) );
|
||||
CUTLASS_TRACE_HOST( shape(layoutA) << " / " << shape(layoutB) << " => " << shape(layoutR));
|
||||
CUTLASS_TRACE_HOST(stride(layoutA) << " " << stride(layoutB) << " => " << stride(layoutR));
|
||||
|
||||
// Test that layout R is compatible with layout B
|
||||
// Test that layout B is compatible with layout R_0
|
||||
ASSERT_EQ(rank(layoutR), 2);
|
||||
ASSERT_TRUE(compatible(layoutB, layout<0>(layoutR)));
|
||||
}
|
||||
@ -186,10 +186,10 @@ TEST(CuTe_core, Logical_divide)
|
||||
|
||||
// Enforcement for dynamic cases
|
||||
auto result = logical_divide(layout, tile);
|
||||
static_assert(decltype(shape<0>(result) == Int<32>{})::value);
|
||||
static_assert(decltype(stride<0>(result) == Int<1>{})::value);
|
||||
assert(shape<1>(result) == 1);
|
||||
static_assert(decltype(stride<1>(result) == Int<32>{})::value);
|
||||
ASSERT_TRUE(decltype(shape<0>(result) == Int<32>{})::value);
|
||||
ASSERT_TRUE(decltype(stride<0>(result) == Int<1>{})::value);
|
||||
ASSERT_TRUE(shape<1>(result) == 1);
|
||||
ASSERT_TRUE(decltype(stride<1>(result) == Int<32>{})::value);
|
||||
}
|
||||
|
||||
{
|
||||
@ -200,10 +200,10 @@ TEST(CuTe_core, Logical_divide)
|
||||
|
||||
// Enforcement for dynamic cases
|
||||
auto result = logical_divide(layout, tile);
|
||||
static_assert(decltype(shape<0>(result) == Int<32>{})::value);
|
||||
static_assert(decltype(stride<0>(result) == Int<1>{})::value);
|
||||
assert(shape<1>(result) == 2);
|
||||
static_assert(decltype(stride<1>(result) == Int<32>{})::value);
|
||||
ASSERT_TRUE(decltype(shape<0>(result) == Int<32>{})::value);
|
||||
ASSERT_TRUE(decltype(stride<0>(result) == Int<1>{})::value);
|
||||
ASSERT_TRUE(shape<1>(result) == 2);
|
||||
ASSERT_TRUE(decltype(stride<1>(result) == Int<32>{})::value);
|
||||
}
|
||||
|
||||
{
|
||||
@ -221,10 +221,10 @@ TEST(CuTe_core, Logical_divide)
|
||||
|
||||
// Enforcement for dynamic cases
|
||||
auto result = logical_divide(layout, tile);
|
||||
static_assert(decltype(shape<0>(result) == Int<48>{})::value);
|
||||
static_assert(decltype(stride<0>(result) == Int<1>{})::value);
|
||||
assert(shape<1>(result) == 1);
|
||||
static_assert(decltype(stride<1>(result) == Int<48>{})::value);
|
||||
ASSERT_TRUE(decltype(shape<0>(result) == Int<48>{})::value);
|
||||
ASSERT_TRUE(decltype(stride<0>(result) == Int<1>{})::value);
|
||||
ASSERT_TRUE(shape<1>(result) == 1);
|
||||
ASSERT_TRUE(decltype(stride<1>(result) == Int<48>{})::value);
|
||||
}
|
||||
|
||||
// DISALLOWED
|
||||
|
||||
@ -46,13 +46,9 @@ test_logical_product(LayoutA const& layoutA,
|
||||
CUTLASS_TRACE_HOST(shape(layoutA) << " x " << shape(layoutB) << " => " << shape(layoutR) );
|
||||
CUTLASS_TRACE_HOST(stride(layoutA) << " " << stride(layoutB) << " => " << stride(layoutR));
|
||||
|
||||
// Test that layout R is compatible with layout B
|
||||
ASSERT_EQ(rank(layoutR), 2);
|
||||
//assert(compatible(layoutB, layout<0>(layoutR)));
|
||||
//assert(consistent(layoutA, layout<1>(layoutR)));
|
||||
|
||||
// True post-condition:
|
||||
|
||||
ASSERT_TRUE(layoutA == layout<0>(layoutR));
|
||||
ASSERT_TRUE(compatible(layoutB, layout<1>(layoutR)));
|
||||
}
|
||||
|
||||
TEST(CuTe_core, Logical_product)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -58,7 +58,7 @@ template <
|
||||
class HostEVTNodeBase {
|
||||
public:
|
||||
using Gemm = Gemm_;
|
||||
using TestBedImpl = typename detail::TestbedImpl<Gemm>;
|
||||
using TestBedImpl = typename detail::TestbedImpl<Gemm, cutlass::epilogue::thread::Identity, true>;
|
||||
using Kernel = typename Gemm::GemmKernel;
|
||||
using Epilogue = typename Kernel::CollectiveEpilogue;
|
||||
using ElementCompute = typename TestBedImpl::ElementCompute;
|
||||
@ -238,9 +238,9 @@ public:
|
||||
_bias.resize(cutlass::Coord<1>(_N));
|
||||
|
||||
EXPECT_TRUE(
|
||||
impl_.initialize_tensor(
|
||||
detail::initialize_tensor(
|
||||
_bias.host_view(), cutlass::Distribution::Uniform,
|
||||
impl_.seed + 2023
|
||||
impl_.collective_mma_inputs.seed + 2023
|
||||
)
|
||||
);
|
||||
_bias.sync_device();
|
||||
@ -306,9 +306,9 @@ public:
|
||||
_bias.resize(cutlass::Coord<1>(_M));
|
||||
|
||||
EXPECT_TRUE(
|
||||
impl_.initialize_tensor(
|
||||
detail::initialize_tensor(
|
||||
_bias.host_view(), cutlass::Distribution::Uniform,
|
||||
impl_.seed + 2023
|
||||
impl_.collective_mma_inputs.seed + 2023
|
||||
)
|
||||
);
|
||||
_bias.sync_device();
|
||||
@ -393,10 +393,10 @@ public:
|
||||
)
|
||||
);
|
||||
EXPECT_TRUE(
|
||||
impl_.initialize_tensor(
|
||||
detail::initialize_tensor(
|
||||
_tensor_aux_load.host_view(),
|
||||
cutlass::Distribution::Uniform,
|
||||
impl_.seed + 2023
|
||||
impl_.collective_mma_inputs.seed + 2023
|
||||
)
|
||||
);
|
||||
_tensor_aux_load.sync_device();
|
||||
@ -1154,7 +1154,7 @@ public:
|
||||
// The EVT Module to test
|
||||
using EVTModule = typename EVT::EVTModule;
|
||||
|
||||
using TestBedImpl = typename detail::TestbedImpl<Gemm>;
|
||||
using TestBedImpl = typename detail::TestbedImpl<Gemm, cutlass::epilogue::thread::Identity, true>;
|
||||
using Kernel = typename Gemm::GemmKernel;
|
||||
using Epilogue = typename Gemm::GemmKernel::CollectiveEpilogue;
|
||||
using ElementAccumulator = typename Kernel::ElementAccumulator;
|
||||
@ -1178,7 +1178,9 @@ public:
|
||||
cutlass::Distribution::Kind init_C_ = cutlass::Distribution::Uniform,
|
||||
uint64_t seed_ = TestBedImpl::kDefaultSeed
|
||||
) :
|
||||
impl_(init_A_, init_B_, init_C_, seed_), check_relative_equality(check_relative_equality_) { }
|
||||
impl_((check_relative_equality_ ? CheckEquality::RELATIVE : CheckEquality::EXACT), ScalarLoc::ON_DEVICE, VectorBeta::ENABLED,
|
||||
init_A_, init_B_, init_C_, cutlass::Distribution::Uniform, cutlass::Distribution::Uniform, seed_),
|
||||
check_relative_equality(check_relative_equality_) { }
|
||||
|
||||
Testbed3xEVT(
|
||||
cutlass::Distribution::Kind init_A_ = cutlass::Distribution::Uniform,
|
||||
@ -1186,7 +1188,9 @@ public:
|
||||
cutlass::Distribution::Kind init_C_ = cutlass::Distribution::Uniform,
|
||||
uint64_t seed_ = TestBedImpl::kDefaultSeed
|
||||
) :
|
||||
impl_(init_A_, init_B_, init_C_, seed_), check_relative_equality(false) { }
|
||||
impl_(CheckEquality::EXACT, ScalarLoc::ON_DEVICE, VectorBeta::ENABLED,
|
||||
init_A_, init_B_, init_C_, cutlass::Distribution::Uniform, cutlass::Distribution::Uniform, seed_),
|
||||
check_relative_equality(false) { }
|
||||
|
||||
Testbed3xEVT(
|
||||
typename LayoutTagA::Stride stride_factor_A_,
|
||||
@ -1198,15 +1202,10 @@ public:
|
||||
cutlass::Distribution::Kind init_C_ = cutlass::Distribution::Uniform,
|
||||
uint64_t seed_ = TestBedImpl::kDefaultSeed
|
||||
) :
|
||||
impl_(stride_factor_A_,
|
||||
stride_factor_B_,
|
||||
stride_factor_C_,
|
||||
stride_factor_D_,
|
||||
init_A_,
|
||||
init_B_,
|
||||
init_C_,
|
||||
seed_),
|
||||
check_relative_equality(false) { }
|
||||
impl_(stride_factor_A_, stride_factor_B_, stride_factor_C_, stride_factor_D_,
|
||||
CheckEquality::EXACT, ScalarLoc::ON_DEVICE, VectorBeta::ENABLED,
|
||||
init_A_, init_B_, init_C_, cutlass::Distribution::Uniform, cutlass::Distribution::Uniform, seed_),
|
||||
check_relative_equality(false) { }
|
||||
|
||||
/// Initializes data structures
|
||||
void initialize(ProblemShapeType problem_size) {
|
||||
@ -1229,11 +1228,11 @@ public:
|
||||
auto K = cute::get<2>(problem_shape_MNKL);
|
||||
auto L = cute::get<3>(problem_shape_MNKL);
|
||||
|
||||
auto A = cute::make_tensor(impl_.tensor_A.host_data(),
|
||||
cute::make_layout(cute::make_shape(M, K, L), impl_.stride_a));
|
||||
auto B = cute::make_tensor(impl_.tensor_B.host_data(),
|
||||
cute::make_layout(cute::make_shape(N, K, L), impl_.stride_b));
|
||||
auto LayoutD = cute::make_layout(cute::make_shape(M, N, L), impl_.stride_d);
|
||||
auto A = cute::make_tensor(impl_.collective_mma_inputs.tensor_A.host_data(),
|
||||
cute::make_layout(cute::make_shape(M, K, L), impl_.collective_mma_inputs.stride_a));
|
||||
auto B = cute::make_tensor(impl_.collective_mma_inputs.tensor_B.host_data(),
|
||||
cute::make_layout(cute::make_shape(N, K, L), impl_.collective_mma_inputs.stride_b));
|
||||
auto LayoutD = cute::make_layout(cute::make_shape(M, N, L), impl_.collective_epilogue.stride_d);
|
||||
|
||||
cutlass::reference::host::GettMainloopParams<ElementAccumulator, decltype(A), decltype(B)> mainloop_params{A, B};
|
||||
|
||||
@ -1277,9 +1276,9 @@ public:
|
||||
<< ", Batch count = " << L << "\n\n";
|
||||
|
||||
file
|
||||
<< "A =\n" << impl_.tensor_A.host_view()
|
||||
<< "\nB =\n" << impl_.tensor_B.host_view()
|
||||
<< "\nC =\n" << impl_.tensor_C.host_view() << "\n\n";
|
||||
<< "A =\n" << impl_.collective_mma_inputs.tensor_A.host_view()
|
||||
<< "\nB =\n" << impl_.collective_mma_inputs.tensor_B.host_view()
|
||||
<< "\nC =\n" << impl_.collective_epilogue.tensor_C.host_view() << "\n\n";
|
||||
|
||||
file << error_ss.str();
|
||||
}
|
||||
@ -1329,15 +1328,15 @@ public:
|
||||
cutlass::gemm::GemmUniversalMode::kGemm,
|
||||
problem_size,
|
||||
{
|
||||
impl_.tensor_A.device_data(), impl_.stride_a,
|
||||
impl_.tensor_B.device_data(), impl_.stride_b
|
||||
impl_.collective_mma_inputs.tensor_A.device_data(), impl_.collective_mma_inputs.stride_a,
|
||||
impl_.collective_mma_inputs.tensor_B.device_data(), impl_.collective_mma_inputs.stride_b
|
||||
},
|
||||
{ // Epilogue arguments
|
||||
{}, // thread
|
||||
static_cast<ElementC*>(host_reference.get_tensor_C_ptr()),
|
||||
impl_.stride_c,
|
||||
impl_.collective_epilogue.stride_c,
|
||||
static_cast<ElementD*>(host_reference.get_tensor_D_ptr()),
|
||||
impl_.stride_d
|
||||
impl_.collective_epilogue.stride_d
|
||||
}, // Epilogue arguments end
|
||||
hw_info,
|
||||
scheduler_args
|
||||
|
||||
@ -101,7 +101,8 @@ struct Testbed3xTensorBroadcast {
|
||||
cutlass::Distribution::Kind init_C_ = cutlass::Distribution::Uniform,
|
||||
uint64_t seed_ = TestBedImpl::kDefaultSeed
|
||||
) :
|
||||
impl_(init_A_, init_B_, init_C_, seed_) { }
|
||||
impl_(CheckEquality::EXACT, ScalarLoc::ON_DEVICE, VectorBeta::ENABLED,
|
||||
init_A_, init_B_, init_C_, cutlass::Distribution::Uniform, cutlass::Distribution::Uniform, seed_) { }
|
||||
|
||||
Testbed3xTensorBroadcast(
|
||||
typename LayoutTagA::Stride stride_factor_A_,
|
||||
@ -117,9 +118,12 @@ struct Testbed3xTensorBroadcast {
|
||||
stride_factor_B_,
|
||||
stride_factor_C_,
|
||||
stride_factor_D_,
|
||||
CheckEquality::EXACT, ScalarLoc::ON_HOST, VectorBeta::ENABLED,
|
||||
init_A_,
|
||||
init_B_,
|
||||
init_C_,
|
||||
cutlass::Distribution::Uniform,
|
||||
cutlass::Distribution::Uniform,
|
||||
seed_) { }
|
||||
|
||||
/// Initializes data structures
|
||||
@ -135,7 +139,7 @@ struct Testbed3xTensorBroadcast {
|
||||
auto bias_size = PerColBias ? cute::get<1>(problem_shape_MNKL) : cute::get<0>(problem_shape_MNKL);
|
||||
bias.resize(cutlass::Coord<1>(bias_size));
|
||||
|
||||
EXPECT_TRUE(impl_.initialize_tensor(bias.host_view(), cutlass::Distribution::Uniform, impl_.seed + 2023));
|
||||
EXPECT_TRUE(detail::initialize_tensor(bias.host_view(), cutlass::Distribution::Uniform, impl_.collective_mma_inputs.seed + 2023));
|
||||
bias.sync_device();
|
||||
}
|
||||
|
||||
@ -147,8 +151,8 @@ struct Testbed3xTensorBroadcast {
|
||||
|
||||
auto c_coord = cutlass::make_Coord(M * L, N);
|
||||
|
||||
tensor_C1.resize(c_coord, cutlass::layout::Affine2Layout_Factory<LayoutTagD>::layout_factory(c_coord, impl_.stride_factor_C));
|
||||
EXPECT_TRUE(impl_.initialize_tensor(tensor_C1.host_view(), cutlass::Distribution::Uniform, impl_.seed + 2024));
|
||||
tensor_C1.resize(c_coord, cutlass::layout::Affine2Layout_Factory<LayoutTagD>::layout_factory(c_coord, impl_.collective_epilogue.stride_factor_C));
|
||||
EXPECT_TRUE(detail::initialize_tensor(tensor_C1.host_view(), cutlass::Distribution::Uniform, impl_.collective_mma_inputs.seed + 2024));
|
||||
tensor_C1.sync_device();
|
||||
}
|
||||
|
||||
@ -161,19 +165,19 @@ struct Testbed3xTensorBroadcast {
|
||||
{
|
||||
auto [M, N, K, L] = problem_shape_MNKL;
|
||||
|
||||
impl_.tensor_D.sync_host();
|
||||
EXPECT_GT(cutlass::reference::host::TensorNorm(impl_.tensor_A.host_view()), 0);
|
||||
EXPECT_GT(cutlass::reference::host::TensorNorm(impl_.tensor_B.host_view()), 0);
|
||||
impl_.collective_epilogue.tensor_D.sync_host();
|
||||
EXPECT_GT(cutlass::reference::host::TensorNorm(impl_.collective_mma_inputs.tensor_A.host_view()), 0);
|
||||
EXPECT_GT(cutlass::reference::host::TensorNorm(impl_.collective_mma_inputs.tensor_B.host_view()), 0);
|
||||
|
||||
if (impl_.tensor_D.size() > 1) {
|
||||
EXPECT_GT(cutlass::reference::host::TensorNorm(impl_.tensor_D.host_view()), 0);
|
||||
if (impl_.collective_epilogue.tensor_D.size() > 1) {
|
||||
EXPECT_GT(cutlass::reference::host::TensorNorm(impl_.collective_epilogue.tensor_D.host_view()), 0);
|
||||
}
|
||||
|
||||
if (impl_.reference_D.size() > 1) {
|
||||
EXPECT_GT(cutlass::reference::host::TensorNorm(impl_.reference_D.host_view()), 0);
|
||||
if (impl_.collective_epilogue.reference_D.size() > 1) {
|
||||
EXPECT_GT(cutlass::reference::host::TensorNorm(impl_.collective_epilogue.reference_D.host_view()), 0);
|
||||
}
|
||||
|
||||
bool passed = cutlass::reference::host::TensorEquals(impl_.reference_D.host_view(), impl_.tensor_D.host_view());
|
||||
bool passed = cutlass::reference::host::TensorEquals(impl_.collective_epilogue.reference_D.host_view(), impl_.collective_epilogue.tensor_D.host_view());
|
||||
|
||||
EXPECT_TRUE(passed);
|
||||
|
||||
@ -196,12 +200,12 @@ struct Testbed3xTensorBroadcast {
|
||||
}
|
||||
|
||||
file
|
||||
<< "A =\n" << impl_.tensor_A.host_view()
|
||||
<< "\nB =\n" << impl_.tensor_B.host_view()
|
||||
<< "\nC0 =\n" << impl_.tensor_C.host_view()
|
||||
<< "A =\n" << impl_.collective_mma_inputs.tensor_A.host_view()
|
||||
<< "\nB =\n" << impl_.collective_mma_inputs.tensor_B.host_view()
|
||||
<< "\nC0 =\n" << impl_.collective_epilogue.tensor_C.host_view()
|
||||
<< "\nC1 =\n" << tensor_C1.host_view()
|
||||
<< "\n\nReference =\n" << impl_.reference_D.host_view()
|
||||
<< "\n\nComputed =\n" <<impl_.tensor_D.host_view();
|
||||
<< "\n\nReference =\n" << impl_.collective_epilogue.reference_D.host_view()
|
||||
<< "\n\nComputed =\n" <<impl_.collective_epilogue.tensor_D.host_view();
|
||||
}
|
||||
|
||||
return passed;
|
||||
@ -221,40 +225,39 @@ struct Testbed3xTensorBroadcast {
|
||||
auto K = cute::get<2>(problem_shape_MNKL);
|
||||
auto L = cute::get<3>(problem_shape_MNKL);
|
||||
|
||||
auto A = cute::make_tensor(impl_.tensor_A.host_data(),
|
||||
cute::make_layout(cute::make_shape(M, K, L), impl_.stride_a));
|
||||
auto B = cute::make_tensor(impl_.tensor_B.host_data(),
|
||||
cute::make_layout(cute::make_shape(N, K, L), impl_.stride_b));
|
||||
auto D = cute::make_tensor(impl_.reference_D.host_data(),
|
||||
cute::make_layout(cute::make_shape(M, N, L), impl_.stride_d));
|
||||
auto A = cute::make_tensor(impl_.collective_mma_inputs.tensor_A.host_data(),
|
||||
cute::make_layout(cute::make_shape(M, K, L), impl_.collective_mma_inputs.stride_a));
|
||||
auto B = cute::make_tensor(impl_.collective_mma_inputs.tensor_B.host_data(),
|
||||
cute::make_layout(cute::make_shape(N, K, L), impl_.collective_mma_inputs.stride_b));
|
||||
auto D = cute::make_tensor(impl_.collective_epilogue.reference_D.host_data(),
|
||||
cute::make_layout(cute::make_shape(M, N, L), impl_.collective_epilogue.stride_d));
|
||||
auto Bias = cute::make_tensor(static_cast<ElementBias*>(use_bias ? bias.host_data() : nullptr),
|
||||
cute::make_layout(PerColBias ? cute::make_shape(1, N) : cute::make_shape(M, 1)));
|
||||
auto C0 = cute::make_tensor(impl_.tensor_C.host_data(),
|
||||
cute::make_layout(cute::make_shape(M, N, L), impl_.stride_c));
|
||||
auto C0 = cute::make_tensor(impl_.collective_epilogue.tensor_C.host_data(),
|
||||
cute::make_layout(cute::make_shape(M, N, L), impl_.collective_epilogue.stride_c));
|
||||
auto C1 = cute::make_tensor(tensor_C1.host_data(),
|
||||
cute::make_layout(cute::make_shape(M, N, L), impl_.stride_c));
|
||||
cute::make_layout(cute::make_shape(M, N, L), impl_.collective_epilogue.stride_c));
|
||||
|
||||
// Create host workspace for output of testbed. This computes a portion of the epilogue:
|
||||
// ref_compute_out = Activation(alpha * (A @ B) + bias)
|
||||
cutlass::HostTensor<ElementCompute, LayoutTagC> ref_compute_out;
|
||||
auto c_coord = cutlass::make_Coord(M * L, N);
|
||||
ref_compute_out.resize(c_coord, cutlass::layout::Affine2Layout_Factory<LayoutTagD>::layout_factory(c_coord, impl_.stride_factor_C), false);
|
||||
ref_compute_out.resize(c_coord, cutlass::layout::Affine2Layout_Factory<LayoutTagD>::layout_factory(c_coord, impl_.collective_epilogue.stride_factor_C), false);
|
||||
auto RefComputeOut = cute::make_tensor(ref_compute_out.host_data(),
|
||||
cute::make_layout(cute::make_shape(M, N, L), impl_.stride_c));
|
||||
cute::make_layout(cute::make_shape(M, N, L), impl_.collective_epilogue.stride_c));
|
||||
|
||||
cutlass::reference::host::GettMainloopParams<ElementAccumulator, decltype(A), decltype(B)> mainloop_params{A, B};
|
||||
|
||||
// Use a dummy null tensor for operand C because the epilogue overrides C.
|
||||
auto dummy_C = cute::make_tensor(static_cast<ElementC*>(nullptr),
|
||||
cute::make_layout(cute::make_shape(M, N, L), impl_.stride_c));
|
||||
cute::make_layout(cute::make_shape(M, N, L), impl_.collective_epilogue.stride_c));
|
||||
ElementCompute dummy_beta(0);
|
||||
auto dummy_Aux = cute::make_tensor(static_cast<ElementD*>(nullptr),
|
||||
cute::make_layout(cute::make_shape(M, N, L), impl_.stride_d));
|
||||
cute::make_layout(cute::make_shape(M, N, L), impl_.collective_epilogue.stride_d));
|
||||
auto dummy_Valpha = cute::make_tensor(static_cast<ElementCompute*>(nullptr),
|
||||
cute::make_layout(cute::make_shape(M, 1)));
|
||||
auto dummy_Vbeta = cute::make_tensor(static_cast<ElementCompute*>(nullptr),
|
||||
cute::make_layout(cute::make_shape(M, 1)));
|
||||
|
||||
cutlass::reference::host::GettEpilogueParams<
|
||||
ElementScalar,
|
||||
ElementScalar,
|
||||
@ -361,17 +364,17 @@ struct Testbed3xTensorBroadcast {
|
||||
arguments = typename Gemm::Arguments{
|
||||
cutlass::gemm::GemmUniversalMode::kGemm,
|
||||
problem_size,
|
||||
{ impl_.tensor_A.device_data(), impl_.stride_a,
|
||||
impl_.tensor_B.device_data(), impl_.stride_b,
|
||||
{ impl_.collective_mma_inputs.tensor_A.device_data(), impl_.collective_mma_inputs.stride_a,
|
||||
impl_.collective_mma_inputs.tensor_B.device_data(), impl_.collective_mma_inputs.stride_b,
|
||||
impl_.mma_promotion_interval
|
||||
},
|
||||
{ // Epilogue arguments
|
||||
{ alpha, beta }, // ThreadOp arguments
|
||||
impl_.stride_c,
|
||||
impl_.tensor_D.device_data(),
|
||||
impl_.stride_d,
|
||||
impl_.collective_epilogue.stride_c,
|
||||
impl_.collective_epilogue.tensor_D.device_data(),
|
||||
impl_.collective_epilogue.stride_d,
|
||||
use_bias ? bias.device_data() : nullptr,
|
||||
impl_.tensor_C.device_data(),
|
||||
impl_.collective_epilogue.tensor_C.device_data(),
|
||||
tensor_C1.device_data()
|
||||
}, // Epilogue arguments end
|
||||
hw_info
|
||||
|
||||
@ -112,6 +112,8 @@ TEST(SM80_Device_Gemm_tf32t_tf32n_f32n_tensor_op_f32, 128x128x32_64x64x64) {
|
||||
EXPECT_TRUE(test::gemm::device::TestAll<Gemm>());
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TEST(SM80_Device_Gemm_tf32t_tf32t_f32n_tensor_op_f32, 128x128x32_64x64x64) {
|
||||
using Config = cutlass::gemm::device::DefaultGemmConfigurationToCutlass3Types<
|
||||
cutlass::arch::OpClassTensorOp, cutlass::arch::Sm80,
|
||||
@ -132,4 +134,24 @@ TEST(SM80_Device_Gemm_tf32t_tf32t_f32n_tensor_op_f32, 128x128x32_64x64x64) {
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TEST(SM80_Device_Gemm_tf32t_tf32n_f32n_tensor_op_f32, 128x128x32_64x64x64_profiling) {
|
||||
using Config = cutlass::gemm::device::DefaultGemmConfigurationToCutlass3Types<
|
||||
cutlass::arch::OpClassTensorOp, cutlass::arch::Sm80,
|
||||
cutlass::tfloat32_t, cutlass::layout::RowMajor,
|
||||
cutlass::tfloat32_t, cutlass::layout::ColumnMajor,
|
||||
float, cutlass::layout::RowMajor,
|
||||
float>;
|
||||
|
||||
using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
|
||||
Shape<int,int,int,int>,
|
||||
Config::CollectiveMainloop,
|
||||
Config::CollectiveEpilogue
|
||||
>;
|
||||
|
||||
using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
|
||||
EXPECT_TRUE(test::gemm::device::TestGemmPerf3x<Gemm>());
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//#endif // #if defined(CUTLASS_ARCH_MMA_SM80_SUPPORTED)
|
||||
|
||||
@ -97,9 +97,7 @@ TEST(SM90_Device_Gemm_f16t_f16n_f32t_tensor_op_gmma_f32_cooperative_epilogue, 25
|
||||
>;
|
||||
|
||||
using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
|
||||
|
||||
test::gemm::device::Testbed3x<Gemm, cutlass::epilogue::thread::ReLu> testbed;
|
||||
bool passed = test::gemm::device::TestAll<Gemm>(1, 1, testbed);
|
||||
bool passed = test::gemm::device::TestAll<Gemm, cutlass::epilogue::thread::ReLu>(1, 1);
|
||||
EXPECT_TRUE(passed);
|
||||
}
|
||||
|
||||
@ -156,6 +154,7 @@ TEST(SM90_Device_Gemm_f16t_f16n_f32t_tensor_op_gmma_f32_cooperative_epilogue, 25
|
||||
#pragma GCC diagnostic pop // Re-enable deprecation warnings
|
||||
}
|
||||
|
||||
|
||||
TEST(SM90_Device_Gemm_f16t_f16n_f32t_tensor_op_gmma_f32_cooperative_epilogue, 256x128x64_2x2x1_BiasF32_ReLU) {
|
||||
using LayoutA = cutlass::layout::RowMajor;
|
||||
using LayoutB = cutlass::layout::ColumnMajor;
|
||||
@ -239,9 +238,8 @@ TEST(SM90_Device_Gemm_f16t_f16n_f32t_tensor_op_gmma_f32_cooperative_epilogue, 25
|
||||
>;
|
||||
|
||||
using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
|
||||
|
||||
bool check_relative_equality = true;
|
||||
bool passed = test::gemm::device::TestAllBiasElementwise<Gemm>(1, 1, check_relative_equality);
|
||||
using namespace test::gemm::device;
|
||||
bool passed = TestAllBiasElementwise<Gemm>(1, 1, CheckEquality::RELATIVE);
|
||||
EXPECT_TRUE(passed);
|
||||
}
|
||||
|
||||
@ -600,8 +598,8 @@ TEST(SM90_Device_Gemm_f16t_f16n_f32t_tensor_op_gmma_f32_cooperative_epilogue, 25
|
||||
>;
|
||||
|
||||
using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
|
||||
|
||||
bool passed = test::gemm::device::TestAllBiasElementwise<Gemm>(1.0, 0.0, /*check_relative_equality=*/true);
|
||||
using namespace test::gemm::device;
|
||||
bool passed = TestAllBiasElementwise<Gemm>(1.0, 0.0, CheckEquality::RELATIVE);
|
||||
EXPECT_TRUE(passed);
|
||||
}
|
||||
|
||||
|
||||
@ -97,8 +97,7 @@ TEST(SM90_Device_Gemm_f16t_f16n_f32t_tensor_op_gmma_f32_persistent_epilogue, 128
|
||||
>;
|
||||
|
||||
using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
|
||||
test::gemm::device::Testbed3x<Gemm, cutlass::epilogue::thread::ReLu> testbed;
|
||||
bool passed = test::gemm::device::TestAll<Gemm>(1, 1, testbed);
|
||||
bool passed = test::gemm::device::TestAll<Gemm, cutlass::epilogue::thread::ReLu>(1, 1);
|
||||
EXPECT_TRUE(passed);
|
||||
}
|
||||
|
||||
@ -186,8 +185,8 @@ TEST(SM90_Device_Gemm_f16t_f16n_f32t_tensor_op_gmma_f32_persistent_epilogue, 128
|
||||
|
||||
using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
|
||||
|
||||
bool check_relative_equality = true;
|
||||
bool passed = test::gemm::device::TestAllBiasElementwise<Gemm>(1, 1, check_relative_equality);
|
||||
using namespace test::gemm::device;
|
||||
bool passed = TestAllBiasElementwise<Gemm>(1, 1, CheckEquality::RELATIVE);
|
||||
EXPECT_TRUE(passed);
|
||||
}
|
||||
|
||||
|
||||
@ -1,24 +1,30 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
* conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
|
||||
* to endorse or promote products derived from this software without specific prior written
|
||||
* permission.
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************************************/
|
||||
|
||||
@ -1,24 +1,30 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
* conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
|
||||
* to endorse or promote products derived from this software without specific prior written
|
||||
* permission.
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************************************/
|
||||
|
||||
@ -1,24 +1,30 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
* provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
* conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
|
||||
* to endorse or promote products derived from this software without specific prior written
|
||||
* permission.
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************************************/
|
||||
|
||||
@ -50,6 +50,7 @@
|
||||
#if defined(CUTLASS_ARCH_MMA_SM80_SUPPORTED)
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#if (!((__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ == 8)))
|
||||
|
||||
TEST(SM80_Device_Syr2k_cf32n_cf32t_l_tensor_op_f32, 64x64x16_32x32x16) {
|
||||
|
||||
@ -145,6 +146,7 @@ TEST(SM80_Device_Syr2k_cf32n_cf32t_u_tensor_op_f32, 64x64x16_32x32x16) {
|
||||
EXPECT_TRUE(test::gemm::device::TestAllRank2KUniversal<Rank2K>());
|
||||
}
|
||||
|
||||
#endif
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#endif // #if defined(CUTLASS_ARCH_MMA_SM80_SUPPORTED)
|
||||
|
||||
@ -50,6 +50,7 @@
|
||||
#if defined(CUTLASS_ARCH_MMA_SM80_SUPPORTED)
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#if (!((__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ == 8)))
|
||||
|
||||
TEST(SM80_Device_Syr2k_cf32n_cf32t_l_tensor_op_fast_f32, 64x64x16_32x32x16) {
|
||||
|
||||
@ -145,6 +146,7 @@ TEST(SM80_Device_Syr2k_cf32n_cf32t_u_tensor_op_fast_f32, 64x64x16_32x32x16) {
|
||||
EXPECT_TRUE(test::gemm::device::TestAllRank2KUniversal<Rank2K>());
|
||||
}
|
||||
|
||||
#endif
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#endif // #if defined(CUTLASS_ARCH_MMA_SM80_SUPPORTED)
|
||||
|
||||
@ -50,6 +50,7 @@
|
||||
#if defined(CUTLASS_ARCH_MMA_SM80_SUPPORTED)
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#if (!((__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ == 8)))
|
||||
|
||||
TEST(SM80_Device_Syr2k_cf64n_cf64n_l_tensor_op_f64, 32x32x16_16x16x16) {
|
||||
|
||||
@ -145,6 +146,7 @@ TEST(SM80_Device_Syr2k_cf64n_cf64n_u_tensor_op_f64, 32x32x16_16x16x16) {
|
||||
EXPECT_TRUE(test::gemm::device::TestAllRank2KUniversal<Rank2K>());
|
||||
}
|
||||
|
||||
#endif
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#endif // #if defined(CUTLASS_ARCH_MMA_SM80_SUPPORTED)
|
||||
|
||||
@ -0,0 +1,30 @@
|
||||
/***************************************************************************************************
|
||||
* Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************************************/
|
||||
|
||||
Reference in New Issue
Block a user