@ -8,7 +8,7 @@
|
||||
"source": [
|
||||
"# Exporting a CUTLASS grouped GEMM kernel to a PyTorch CUDA extension\n",
|
||||
"This notebook walks through a basic example of using the CUTLASS Python interface to declare\n",
|
||||
"a grouped GEMM kernel and export it as a PyTorch CUDA extension.\n",
|
||||
"a grouped GEMM kernel and export it as a PyTorch CUDA extension. Note that GEMM and Conv2d can also be exported as PyTorch CUDA extensions. \n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/NVIDIA/cutlass/tree/master/examples/00_basic_gemm.ipynb)\n",
|
||||
"\n",
|
||||
@ -230,14 +230,6 @@
|
||||
"print('Non-Grouped: {:.3f} us'.format(nongrouped * 1e6/num_profile))\n",
|
||||
"print('Speedup: {:.3f}'.format(nongrouped / grouped))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f22fc696",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
Reference in New Issue
Block a user