CUTLASS 3.2 (#1024)

* CUTLASS 3.2
This commit is contained in:
ANIKET SHIVAM
2023-08-07 14:50:32 -10:00
committed by GitHub
parent a0d787b746
commit 4575443d44
392 changed files with 47559 additions and 7940 deletions

View File

@ -8,7 +8,7 @@
"source": [
"# Exporting a CUTLASS grouped GEMM kernel to a PyTorch CUDA extension\n",
"This notebook walks through a basic example of using the CUTLASS Python interface to declare\n",
"a grouped GEMM kernel and export it as a PyTorch CUDA extension.\n",
"a grouped GEMM kernel and export it as a PyTorch CUDA extension. Note that GEMM and Conv2d can also be exported as PyTorch CUDA extensions. \n",
"\n",
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NVIDIA/cutlass/tree/master/examples/00_basic_gemm.ipynb)\n",
"\n",
@ -230,14 +230,6 @@
"print('Non-Grouped: {:.3f} us'.format(nongrouped * 1e6/num_profile))\n",
"print('Speedup: {:.3f}'.format(nongrouped / grouped))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f22fc696",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {