v4.2 release. (#2587)
* Fix default cluster callback values to 1 to avoid profiler failure when these values are not set in command line. * v4.2 release.
This commit is contained in:
@ -62,7 +62,7 @@
|
||||
"import torch\n",
|
||||
"import random\n",
|
||||
"\n",
|
||||
"import cutlass\n",
|
||||
"import cutlass_cppgen\n",
|
||||
"\n",
|
||||
"# This controls whether the C++ GEMM declaration will be printed at each step. \n",
|
||||
"# Set to `false` to omit this information.\n",
|
||||
@ -80,7 +80,7 @@
|
||||
"dilation = (1, 1)\n",
|
||||
"\n",
|
||||
"# Compute the output size [N, P, Q, K]\n",
|
||||
"N, P, Q, K = cutlass.Conv2d.output_size((N, H, W, C), (K, R, S, C), padding, stride, dilation)\n",
|
||||
"N, P, Q, K = cutlass_cppgen.Conv2d.output_size((N, H, W, C), (K, R, S, C), padding, stride, dilation)\n",
|
||||
"\n",
|
||||
"dtype = torch.float16\n",
|
||||
"type_A = torch.float16\n",
|
||||
@ -111,7 +111,7 @@
|
||||
"source": [
|
||||
"## Declaring and running a Conv2d Fprop\n",
|
||||
"\n",
|
||||
"We first show you how to run a Conv2d in the forward propagation. To get started, one only needs to provide the tensors declared above to the `cutlass.op.Conv2dFprop` call. This sets up a default Conv2d fprop operation for the given device on which you are running. \n",
|
||||
"We first show you how to run a Conv2d in the forward propagation. To get started, one only needs to provide the tensors declared above to the `cutlass_cppgen.op.Conv2dFprop` call. This sets up a default Conv2d fprop operation for the given device on which you are running. \n",
|
||||
"\n",
|
||||
"Assuming that we are runing on SM80, the default is a Conv2d that leverages FP16 Tensor Core operations.\n",
|
||||
"\n",
|
||||
@ -125,7 +125,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Specifying `element_accumulator` is not required if it is the same as `element`\n",
|
||||
"plan = cutlass.Conv2dFprop(element=dtype, element_accumulator=torch.float32)\n",
|
||||
"plan = cutlass_cppgen.Conv2dFprop(element=dtype, element_accumulator=torch.float32)\n",
|
||||
"plan.run(input, weight, tensor_C, output, stride, padding, dilation, alpha, beta, print_module=print_module)"
|
||||
]
|
||||
},
|
||||
@ -133,7 +133,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"There are many other ways to construct a plan from `cutlass.op.Conv2dFprop` (e.g., by specifying the types of each operand, by providing representative tensors as input). For more details on these, see the documentation in the `cutlass.op.Conv2dFprop` constructor.\n",
|
||||
"There are many other ways to construct a plan from `cutlass_cppgen.op.Conv2dFprop` (e.g., by specifying the types of each operand, by providing representative tensors as input). For more details on these, see the documentation in the `cutlass_cppgen.op.Conv2dFprop` constructor.\n",
|
||||
"\n",
|
||||
"We then compare the output to running the Conv2d using PyTorch. PyTorch use NCHW layout by default, so permutations are required."
|
||||
]
|
||||
@ -200,7 +200,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"plan_dgrad = cutlass.Conv2dDgrad(element=dtype, element_accumulator=torch.float32)\n",
|
||||
"plan_dgrad = cutlass_cppgen.Conv2dDgrad(element=dtype, element_accumulator=torch.float32)\n",
|
||||
"plan_dgrad.run(grad_output, weight, tensor_C_dgrad, grad_input, stride, padding, dilation, alpha, beta, print_module=print_module)\n",
|
||||
"\n",
|
||||
"grad_input_torch = alpha * torch.nn.grad.conv2d_input(\n",
|
||||
@ -225,7 +225,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"plan_wgrad = cutlass.Conv2dWgrad(element=dtype, element_accumulator=torch.float32)\n",
|
||||
"plan_wgrad = cutlass_cppgen.Conv2dWgrad(element=dtype, element_accumulator=torch.float32)\n",
|
||||
"plan_wgrad.run(grad_output, input, tensor_C_wgrad, grad_weight, stride, padding, dilation, alpha, beta, print_module=print_module)\n",
|
||||
"\n",
|
||||
"grad_weight_torch = alpha * torch.nn.grad.conv2d_weight(\n",
|
||||
|
||||
Reference in New Issue
Block a user