CUTLASS 3.2 (#1024)

* CUTLASS 3.2
2023-08-07 14:50:32 -10:00
parent a0d787b746
commit 4575443d44
392 changed files with 47559 additions and 7940 deletions
--- a/examples/python/02_pytorch_extension_grouped_gemm.ipynb
+++ b/examples/python/02_pytorch_extension_grouped_gemm.ipynb
@ -8,7 +8,7 @@
   "source": [
    "# Exporting a CUTLASS grouped GEMM kernel to a PyTorch CUDA extension\n",
    "This notebook walks through a basic example of using the CUTLASS Python interface to declare\n",
-    "a grouped GEMM kernel and export it as a PyTorch CUDA extension.\n",
+    "a grouped GEMM kernel and export it as a PyTorch CUDA extension. Note that GEMM and Conv2d can also be exported as PyTorch CUDA extensions. \n",
    "\n",
    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NVIDIA/cutlass/tree/master/examples/00_basic_gemm.ipynb)\n",
    "\n",
@ -230,14 +230,6 @@
    "print('Non-Grouped: {:.3f} us'.format(nongrouped * 1e6/num_profile))\n",
    "print('Speedup: {:.3f}'.format(nongrouped / grouped))"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f22fc696",
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ],
 "metadata": {