508
test/python/conv2d/conv2d_test_utils.py
Normal file
508
test/python/conv2d/conv2d_test_utils.py
Normal file
@ -0,0 +1,508 @@
|
||||
#################################################################################################
|
||||
#
|
||||
# Copyright (c) 2023 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#################################################################################################
|
||||
|
||||
"""
|
||||
Util Functions for Conv2d Test
|
||||
"""
|
||||
import torch
|
||||
import cutlass
|
||||
import unittest
|
||||
import cutlass_bindings
|
||||
from cutlass.utils.datatypes import binding_type, binding_opclass
|
||||
from cutlass.backend.test.conv2d_testbed import Conv2dLauncher, getTensorRef, getTensorView
|
||||
from cutlass.backend.utils.device import device_cc
|
||||
from cutlass.backend.test.utils import get_name_conv2d
|
||||
import numpy as np
|
||||
|
||||
def conv2d_few_channel_problemsizes(channels):
|
||||
problem_sizes = [
|
||||
cutlass_bindings.conv.Conv2dProblemSize(
|
||||
cutlass_bindings.Tensor4DCoord(1, 8, 8, channels),
|
||||
cutlass_bindings.Tensor4DCoord(16, 3, 3, channels),
|
||||
cutlass_bindings.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass_bindings.MatrixCoord(2, 2),
|
||||
cutlass_bindings.MatrixCoord(1, 1),
|
||||
cutlass_bindings.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass_bindings.conv.Conv2dProblemSize(
|
||||
cutlass_bindings.Tensor4DCoord(1, 16, 16, channels),
|
||||
cutlass_bindings.Tensor4DCoord(16, 3, 3, channels),
|
||||
cutlass_bindings.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass_bindings.MatrixCoord(2, 2),
|
||||
cutlass_bindings.MatrixCoord(1, 1),
|
||||
cutlass_bindings.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass_bindings.conv.Conv2dProblemSize(
|
||||
cutlass_bindings.Tensor4DCoord(1, 16, 16, channels),
|
||||
cutlass_bindings.Tensor4DCoord(16, 7, 7, channels),
|
||||
cutlass_bindings.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass_bindings.MatrixCoord(1, 1),
|
||||
cutlass_bindings.MatrixCoord(1, 1),
|
||||
cutlass_bindings.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass_bindings.conv.Conv2dProblemSize(
|
||||
cutlass_bindings.Tensor4DCoord(1, 224, 224, channels),
|
||||
cutlass_bindings.Tensor4DCoord(32, 7, 7, channels),
|
||||
cutlass_bindings.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass_bindings.MatrixCoord(1, 1),
|
||||
cutlass_bindings.MatrixCoord(1, 1),
|
||||
cutlass_bindings.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass_bindings.conv.Conv2dProblemSize(
|
||||
cutlass_bindings.Tensor4DCoord(1, 224, 224, channels),
|
||||
cutlass_bindings.Tensor4DCoord(64, 7, 7, channels),
|
||||
cutlass_bindings.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass_bindings.MatrixCoord(2, 2),
|
||||
cutlass_bindings.MatrixCoord(1, 1),
|
||||
cutlass_bindings.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass_bindings.conv.Conv2dProblemSize(
|
||||
cutlass_bindings.Tensor4DCoord(1, 224, 224, channels),
|
||||
cutlass_bindings.Tensor4DCoord(64, 5, 5, channels),
|
||||
cutlass_bindings.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass_bindings.MatrixCoord(1, 1),
|
||||
cutlass_bindings.MatrixCoord(1, 1),
|
||||
cutlass_bindings.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass_bindings.conv.Conv2dProblemSize(
|
||||
cutlass_bindings.Tensor4DCoord(1, 224, 224, channels),
|
||||
cutlass_bindings.Tensor4DCoord(64, 5, 5, channels),
|
||||
cutlass_bindings.Tensor4DCoord(1, 1, 1, 1),
|
||||
cutlass_bindings.MatrixCoord(2, 2),
|
||||
cutlass_bindings.MatrixCoord(1, 1),
|
||||
cutlass_bindings.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
]
|
||||
|
||||
return problem_sizes
|
||||
|
||||
torch_dtype = {
|
||||
cutlass.DataType.f16: torch.float16,
|
||||
cutlass.DataType.f32: torch.float32,
|
||||
cutlass.DataType.f64: torch.float64
|
||||
}
|
||||
|
||||
numpy_dtype = {
|
||||
cutlass.DataType.f16: np.float16,
|
||||
cutlass.DataType.f32: np.float32,
|
||||
cutlass.DataType.f64: np.float64
|
||||
}
|
||||
|
||||
|
||||
def validate_problem_size(ps, conv_kind, split_k_slices):
|
||||
P = (ps.H + 2 * ps.pad_h - ps.dilation_h * (ps.R - 1) - 1) // ps.stride_h + 1
|
||||
Q = (ps.W + 2 * ps.pad_w - ps.dilation_w * (ps.S - 1) - 1) // ps.stride_w + 1
|
||||
if P != ps.P or Q != ps.Q:
|
||||
return False
|
||||
|
||||
# Split-K (serial or parallel) is not supported for strided dgrad
|
||||
if conv_kind == "dgrad" and split_k_slices > 1 and (ps.stride_h > 1 or ps.stride_w > 1):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
# Override the backend launcher
|
||||
class Conv2dLauncherFrontend(Conv2dLauncher):
|
||||
def __init__(self, plan: cutlass.Conv2d, seed: int = 80, backend="numpy"):
|
||||
self.operation = plan
|
||||
self.conv_kind = plan.conv_kind
|
||||
self.seed = seed
|
||||
self.backend = backend
|
||||
|
||||
self.dtype_A = plan._element_a
|
||||
self.dtype_B = plan._element_b
|
||||
self.dtype_C = plan._element_c
|
||||
self.dtype_acc = plan._element_accumulator
|
||||
|
||||
self.layout_A = cutlass_bindings.TensorNHWC
|
||||
self.layout_B = cutlass_bindings.TensorNHWC
|
||||
self.layout_C = cutlass_bindings.TensorNHWC
|
||||
self.layout_D = cutlass_bindings.TensorNHWC
|
||||
|
||||
self.element_compute = cutlass_bindings.float32
|
||||
self.enable_cached_results = True
|
||||
|
||||
# Get randomization_max
|
||||
if self.dtype_A in [cutlass.DataType.f16, cutlass.DataType.bf16]:
|
||||
if self.dtype_acc in [cutlass.DataType.f16, cutlass.DataType.bf16]:
|
||||
self.randomization_max = 2
|
||||
else:
|
||||
self.randomization_max = 3
|
||||
else:
|
||||
self.randomization_max = 7
|
||||
|
||||
self.activation = plan.activation
|
||||
|
||||
self.host_conv2d = cutlass_bindings.test.conv.host.conv2d
|
||||
|
||||
|
||||
def set_seed(self):
|
||||
if self.backend == "numpy":
|
||||
np.random.seed(self.seed)
|
||||
else:
|
||||
torch.manual_seed(self.seed)
|
||||
|
||||
def uniform_init(self, size, dtype):
|
||||
if self.backend == "numpy":
|
||||
return super().uniform_init(size, numpy_dtype[dtype])
|
||||
else:
|
||||
tensor = torch.ceil(
|
||||
torch.empty(size=size, dtype=torch_dtype[dtype], device="cuda").uniform_(-self.randomization_max - 0.5, self.randomization_max - 0.5)
|
||||
).to(memory_format=torch.channels_last)
|
||||
return tensor
|
||||
|
||||
def zeros_like(self, tensor):
|
||||
if self.backend == "numpy":
|
||||
return np.zeros_like(tensor)
|
||||
else:
|
||||
return torch.zeros_like(tensor).to(memory_format=torch.channels_last)
|
||||
|
||||
def reference(self, ps, A, B, C, alpha, beta, activation):
|
||||
if self.backend == "numpy":
|
||||
numpy_result = self.host_reference(ps, A, B, C, alpha, beta, activation)
|
||||
return numpy_result
|
||||
else:
|
||||
if self.conv_kind == cutlass_bindings.conv.Operator.fprop:
|
||||
torch_result = alpha * torch.ops.aten.conv2d(
|
||||
A,
|
||||
B,
|
||||
stride=(ps.stride_h, ps.stride_w),
|
||||
padding=(ps.pad_h, ps.pad_w),
|
||||
dilation=(ps.dilation_h, ps.dilation_w)
|
||||
) + beta * C
|
||||
elif self.conv_kind == cutlass_bindings.conv.Operator.dgrad:
|
||||
torch_result = alpha * torch.nn.grad.conv2d_input(
|
||||
(ps.N, ps.C, ps.H, ps.W),
|
||||
B,
|
||||
A,
|
||||
padding=(ps.pad_h, ps.pad_w),
|
||||
stride=(ps.stride_h, ps.stride_w)
|
||||
) + beta * C
|
||||
elif self.conv_kind == cutlass_bindings.conv.Operator.wgrad:
|
||||
torch_result = alpha * torch.nn.grad.conv2d_weight(
|
||||
B,
|
||||
(ps.K, ps.C, ps.R, ps.S),
|
||||
A,
|
||||
padding=(ps.pad_h, ps.pad_w),
|
||||
stride=(ps.stride_h, ps.stride_w)
|
||||
) + beta * C
|
||||
else:
|
||||
raise Exception(f"Conv kind {self.conv_kind} is currently unsupported.")
|
||||
|
||||
if activation == cutlass.backend.epilogue.relu:
|
||||
torch_result = torch.nn.functional.relu(torch_result)
|
||||
elif activation == cutlass.backend.epilogue.leaky_relu:
|
||||
torch_result = torch.nn.functional.leaky_relu(torch_result, 0.5)
|
||||
|
||||
return torch_result
|
||||
|
||||
def host_reference(self, problem_size, tensor_A, tensor_B, tensor_C, alpha, beta, activation):
|
||||
if self.element_compute == cutlass_bindings.float16:
|
||||
alpha = cutlass_bindings.float16(alpha)
|
||||
beta = cutlass_bindings.float16(beta)
|
||||
elif self.element_compute == cutlass_bindings.int32:
|
||||
alpha = int(alpha)
|
||||
beta = int(beta)
|
||||
else:
|
||||
alpha = alpha
|
||||
beta = beta
|
||||
|
||||
# If cached result is loaded
|
||||
cached_result_loaded = False
|
||||
|
||||
if self.enable_cached_results:
|
||||
# Get problem key
|
||||
cached_test_key = cutlass_bindings.test.conv.host.CreateCachedConv2dTestKey(
|
||||
self.conv_kind,
|
||||
problem_size,
|
||||
alpha,
|
||||
beta,
|
||||
getTensorView(
|
||||
tensor_A, self.layout_A, self.conv_kind, problem_size, "a"
|
||||
),
|
||||
getTensorView(
|
||||
tensor_B, self.layout_B, self.conv_kind, problem_size, "b"
|
||||
),
|
||||
getTensorView(
|
||||
tensor_C, self.layout_C, self.conv_kind, problem_size, "c"
|
||||
),
|
||||
)
|
||||
|
||||
cached_test_key.problem = cached_test_key.problem + f"_{activation.tag.split('::')[-1]}"
|
||||
|
||||
cached_test_result = cutlass_bindings.test.conv.host.CachedTestResult()
|
||||
|
||||
conv2d_result_cache_name = "cached_results_SM%d_%d.txt" % (
|
||||
self.operation.arch,
|
||||
self.seed,
|
||||
)
|
||||
|
||||
cached_results = cutlass_bindings.test.conv.host.CachedTestResultListing(
|
||||
conv2d_result_cache_name
|
||||
)
|
||||
# CachedTestResultListing cached_results(conv2d_result_cache_name);
|
||||
cached = cached_results.find(cached_test_key)
|
||||
cached_result_loaded = cached[0]
|
||||
if cached_result_loaded:
|
||||
cached_test_result = cached[1]
|
||||
|
||||
if not cached_result_loaded:
|
||||
# Compute the conv2d on host
|
||||
tensor_D_ref = np.ones_like(tensor_C)
|
||||
tensor_ref_A = getTensorRef(
|
||||
tensor_A, self.layout_A, self.conv_kind, problem_size, "a"
|
||||
)
|
||||
tensor_ref_B = getTensorRef(
|
||||
tensor_B, self.layout_B, self.conv_kind, problem_size, "b"
|
||||
)
|
||||
tensor_ref_C = getTensorRef(
|
||||
tensor_C, self.layout_C, self.conv_kind, problem_size, "c"
|
||||
)
|
||||
tensor_ref_D_ref = getTensorRef(
|
||||
tensor_D_ref, self.layout_D, self.conv_kind, problem_size, "d"
|
||||
)
|
||||
|
||||
self.host_conv2d(
|
||||
self.conv_kind,
|
||||
problem_size,
|
||||
tensor_ref_A,
|
||||
tensor_ref_B,
|
||||
tensor_ref_C,
|
||||
tensor_ref_D_ref,
|
||||
alpha,
|
||||
beta,
|
||||
)
|
||||
|
||||
if activation == cutlass.backend.epilogue.leaky_relu:
|
||||
tensor_D_ref = activation.numpy(tensor_D_ref, 0.5)
|
||||
else:
|
||||
tensor_D_ref = activation.numpy(tensor_D_ref)
|
||||
|
||||
tensor_view_D_ref = getTensorView(
|
||||
tensor_D_ref, self.layout_D, self.conv_kind, problem_size, "d"
|
||||
)
|
||||
|
||||
if self.enable_cached_results:
|
||||
cached_test_result.D = cutlass_bindings.test.conv.host.TensorHash(
|
||||
tensor_view_D_ref
|
||||
)
|
||||
cached_results = (
|
||||
cutlass_bindings.test.conv.host.CachedTestResultListing(
|
||||
conv2d_result_cache_name
|
||||
)
|
||||
)
|
||||
cached_results.append(cached_test_key, cached_test_result)
|
||||
cached_results.write(conv2d_result_cache_name)
|
||||
else:
|
||||
return tensor_D_ref
|
||||
|
||||
return cached_test_result.D
|
||||
|
||||
def equal(self, tensor_D, tensor_D_ref, problem_size):
|
||||
if self.backend == "numpy":
|
||||
return super().equal(tensor_D, tensor_D_ref, problem_size)
|
||||
else:
|
||||
torch.cuda.synchronize()
|
||||
return torch.equal(tensor_D, tensor_D_ref)
|
||||
|
||||
|
||||
def run(self, ps, split_k_mode=cutlass_bindings.conv.SplitKMode.Serial, split_k_slices=1, alpha=1.0, beta=0.0):
|
||||
|
||||
#
|
||||
# Initialize input and output tensors
|
||||
#
|
||||
if self.conv_kind == cutlass_bindings.conv.Operator.fprop:
|
||||
if self.backend == "torch":
|
||||
tensor_A_size = (ps.N, ps.C, ps.H, ps.W)
|
||||
tensor_B_size = (ps.K, ps.C, ps.R, ps.S)
|
||||
tensor_C_size = (ps.N, ps.K, ps.P, ps.Q)
|
||||
else:
|
||||
tensor_A_size = (ps.N, ps.H, ps.W, ps.C)
|
||||
tensor_B_size = (ps.K, ps.R, ps.S, ps.C)
|
||||
tensor_C_size = (ps.N, ps.P, ps.Q, ps.K)
|
||||
elif self.conv_kind == cutlass_bindings.conv.Operator.dgrad:
|
||||
if self.backend == "torch":
|
||||
tensor_A_size = (ps.N, ps.K, ps.P, ps.Q)
|
||||
tensor_B_size = (ps.K, ps.C, ps.R, ps.S)
|
||||
tensor_C_size = (ps.N, ps.C, ps.H, ps.W)
|
||||
else:
|
||||
tensor_A_size = (ps.N, ps.P, ps.Q, ps.K)
|
||||
tensor_B_size = (ps.K, ps.R, ps.S, ps.C)
|
||||
tensor_C_size = (ps.N, ps.H, ps.W, ps.C)
|
||||
elif self.conv_kind == cutlass_bindings.conv.Operator.wgrad:
|
||||
if self.backend == "torch":
|
||||
tensor_A_size = (ps.N, ps.K, ps.P, ps.Q)
|
||||
tensor_B_size = (ps.N, ps.C, ps.H, ps.W)
|
||||
tensor_C_size = (ps.K, ps.C, ps.R, ps.S)
|
||||
else:
|
||||
tensor_A_size = (ps.N, ps.P, ps.Q, ps.K)
|
||||
tensor_B_size = (ps.N, ps.H, ps.W, ps.C)
|
||||
tensor_C_size = (ps.K, ps.R, ps.S, ps.C)
|
||||
else:
|
||||
raise Exception(f"Conv kind {self.conv_kind} is not supported")
|
||||
|
||||
self.set_seed()
|
||||
|
||||
tensor_A = self.uniform_init(size=tensor_A_size, dtype=self.dtype_A)
|
||||
tensor_B = self.uniform_init(size=tensor_B_size, dtype=self.dtype_B)
|
||||
tensor_C = self.uniform_init(size=tensor_C_size, dtype=self.dtype_C)
|
||||
tensor_D = self.zeros_like(tensor_C)
|
||||
|
||||
self.operation.run(tensor_A, tensor_B, tensor_C, tensor_D,
|
||||
stride=(ps.stride_h, ps.stride_w),
|
||||
padding=(ps.pad_h, ps.pad_w),
|
||||
dilation=(ps.dilation_h, ps.dilation_w),
|
||||
alpha=alpha, beta=beta,
|
||||
split_k=(split_k_mode, split_k_slices))
|
||||
|
||||
tensor_D_ref = self.reference(
|
||||
ps, tensor_A, tensor_B, tensor_C, alpha, beta, self.activation
|
||||
)
|
||||
|
||||
return self.equal(tensor_D, tensor_D_ref, ps)
|
||||
|
||||
|
||||
def add_test(
|
||||
cls,
|
||||
cc,
|
||||
conv_kind,
|
||||
problem_sizes,
|
||||
element,
|
||||
element_accumulator,
|
||||
element_output,
|
||||
opclass,
|
||||
threadblock_shape,
|
||||
warp_count,
|
||||
instruction_shape,
|
||||
stages,
|
||||
iterator_algorithm=None,
|
||||
swizzle=None,
|
||||
split_k_mode="serial",
|
||||
split_k_slices=1,
|
||||
activation = "identity"
|
||||
):
|
||||
"""Create a test-running function with the given specification"""
|
||||
test_name = get_name_conv2d(
|
||||
cc, conv_kind, element, element_accumulator,
|
||||
element_output, opclass, threadblock_shape, warp_count, instruction_shape, stages,
|
||||
iterator_algorithm, swizzle, split_k_mode, split_k_slices, activation)
|
||||
|
||||
def run(self):
|
||||
# Create the plan
|
||||
plan = cutlass.Conv2d(
|
||||
kind=conv_kind,
|
||||
element=element,
|
||||
element_accumulator=element_accumulator,
|
||||
element_C=element_output,
|
||||
element_D=element_output
|
||||
)
|
||||
|
||||
# Set the opclass
|
||||
plan.opclass = opclass
|
||||
# Set the tile description
|
||||
td = {
|
||||
"threadblock_shape": threadblock_shape,
|
||||
"warp_count": warp_count,
|
||||
"stages": stages,
|
||||
"instruction_shape": instruction_shape,
|
||||
}
|
||||
|
||||
plan.tile_description = td
|
||||
# Set iterator algorithm
|
||||
if iterator_algorithm is not None:
|
||||
plan.iterator_algorithm = iterator_algorithm
|
||||
# Set swizzling functor
|
||||
if swizzle is not None:
|
||||
plan.swizzling_stride = swizzle
|
||||
|
||||
if activation != "identity":
|
||||
if activation == "leaky_relu":
|
||||
plan.activation = (cutlass.epilogue.leaky_relu, 0.5)
|
||||
else:
|
||||
plan.activation = getattr(cutlass.epilogue, activation)
|
||||
|
||||
conv2d_launcher = Conv2dLauncherFrontend(plan, 80, backend="numpy")
|
||||
|
||||
for ps in problem_sizes:
|
||||
if not validate_problem_size(ps, conv_kind, split_k_slices): continue
|
||||
|
||||
self.assertTrue(
|
||||
conv2d_launcher.run(ps, split_k_mode, split_k_slices, 1.0, 0.5)
|
||||
)
|
||||
|
||||
setattr(cls, test_name, run)
|
||||
|
||||
return run
|
||||
|
||||
|
||||
def get_conv_problems():
|
||||
# 64: minimum channel size
|
||||
conv_problems = list(cutlass_bindings.test.conv.TestbedConv2dProblemSizes(64).conv2d_default_sizes)
|
||||
# Insert alignment 4 & 2 tests
|
||||
conv_problems += [
|
||||
cutlass_bindings.conv.Conv2dProblemSize(
|
||||
cutlass_bindings.Tensor4DCoord(1, 4, 4, 12),
|
||||
cutlass_bindings.Tensor4DCoord(8, 3, 3, 12),
|
||||
cutlass_bindings.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass_bindings.MatrixCoord(3, 3),
|
||||
cutlass_bindings.MatrixCoord(1, 1),
|
||||
cutlass_bindings.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass_bindings.conv.Conv2dProblemSize(
|
||||
cutlass_bindings.Tensor4DCoord(1, 4, 4, 14),
|
||||
cutlass_bindings.Tensor4DCoord(8, 3, 3, 14),
|
||||
cutlass_bindings.Tensor4DCoord(0, 0, 0, 0),
|
||||
cutlass_bindings.MatrixCoord(3, 3),
|
||||
cutlass_bindings.MatrixCoord(1, 1),
|
||||
cutlass_bindings.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
cutlass_bindings.conv.Conv2dProblemSize(
|
||||
cutlass_bindings.Tensor4DCoord(1, 23, 56, 98),
|
||||
cutlass_bindings.Tensor4DCoord(128, 3, 3, 98),
|
||||
cutlass_bindings.Tensor4DCoord(4, 0, 5, 0),
|
||||
cutlass_bindings.MatrixCoord(3, 3),
|
||||
cutlass_bindings.MatrixCoord(1, 1),
|
||||
cutlass_bindings.conv.Mode.cross_correlation,
|
||||
1, 1
|
||||
),
|
||||
]
|
||||
|
||||
return conv_problems
|
||||
Reference in New Issue
Block a user