diff --git a/CHANGELOG.md b/CHANGELOG.md index eda03e71..7dbea286 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # NVIDIA CUTLASS Changelog +## [3.9.1](https://github.com/NVIDIA/cutlass/releases/tag/v3.9.1) (2025-04-30) + +* Fixed Group Gemm hang issue in CUTLASS 3.x +* Improved Hopper [Blockwise](./examples/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling.cu) and [Groupwise](./examples/67_hopper_fp8_warp_specialized_gemm_with_blockwise_scaling/67_hopper_fp8_warp_specialized_gemm_with_groupwise_scaling.cu) GEMM performance. ## [3.9.0](https://github.com/NVIDIA/cutlass/releases/tag/v3.9.0) (2025-04-24) diff --git a/README.md b/README.md index d82200a1..63bd4d41 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ ![ALT](./media/images/gemm-hierarchy-with-epilogue-no-labels.png "Complete CUDA GEMM decomposition") -# CUTLASS 3.9.0 +# CUTLASS 3.9.1 -_CUTLASS 3.9.0 - April 2025_ +_CUTLASS 3.9.1 - April 2025_ CUTLASS is a collection of CUDA C++ template abstractions for implementing high-performance matrix-matrix multiplication (GEMM) and related computations at all levels diff --git a/include/cutlass/version.h b/include/cutlass/version.h index 4514330c..304f9abf 100644 --- a/include/cutlass/version.h +++ b/include/cutlass/version.h @@ -36,7 +36,7 @@ #define CUTLASS_MAJOR 3 #define CUTLASS_MINOR 9 -#define CUTLASS_PATCH 0 +#define CUTLASS_PATCH 1 #ifdef CUTLASS_VERSIONS_GENERATED #include "cutlass/version_extended.h" diff --git a/python/cutlass/__init__.py b/python/cutlass/__init__.py index ebda2ff2..762b06c3 100644 --- a/python/cutlass/__init__.py +++ b/python/cutlass/__init__.py @@ -133,7 +133,7 @@ def get_option_registry(): this._option_registry = OptionRegistry(device_cc()) return this._option_registry -this.__version__ = '3.9.0' +this.__version__ = '3.9.1' from cutlass.backend import create_memory_pool from cutlass.emit.pytorch import pytorch diff --git a/python/setup_library.py b/python/setup_library.py index d5f74b9a..a4583805 100644 --- a/python/setup_library.py +++ b/python/setup_library.py @@ -36,7 +36,7 @@ from setuptools import setup def perform_setup(): setup( name='cutlass_library', - version='3.9.0', + version='3.9.1', description='CUTLASS library generation scripts', packages=['cutlass_library'] ) diff --git a/python/setup_pycute.py b/python/setup_pycute.py index 31f92295..0be2f108 100644 --- a/python/setup_pycute.py +++ b/python/setup_pycute.py @@ -36,7 +36,7 @@ from setuptools import setup def perform_setup(): setup( name='pycute', - version='3.9.0', + version='3.9.1', description='Python implementation of CuTe', packages=['pycute'], )