Compare commits
2 Commits
thakkarv/4
...
release/3.
| Author | SHA1 | Date | |
|---|---|---|---|
| 44c704eae8 | |||
| 6581237a48 |
@ -1,5 +1,8 @@
|
||||
# NVIDIA CUTLASS Changelog
|
||||
|
||||
## [3.2.2](https://github.com/NVIDIA/cutlass/releases/tag/v3.2.2) (2023-10-25)
|
||||
* Fixes illegal memory access issue [1138](https://github.com/NVIDIA/cutlass/issues/1138) hit by FlashAttention tests in PyTorch.
|
||||
|
||||
## [3.2.1](https://github.com/NVIDIA/cutlass/releases/tag/v3.2.1) (2023-09-22)
|
||||
* Python support SM90 Epilogue Visitor Tree (EVT) on top of the C++ support released in 3.2.0.
|
||||
* SM80 EVT support in C++ and Python.
|
||||
|
||||
@ -40,7 +40,7 @@ endif()
|
||||
message(STATUS "CMake Version: ${CMAKE_VERSION}")
|
||||
set(IMPLICIT_CMAKE_CXX_STANDARD OFF CACHE BOOL "Do not explicitly specify -std=c++11 if set")
|
||||
|
||||
project(CUTLASS VERSION 3.2.1 LANGUAGES CXX)
|
||||
project(CUTLASS VERSION 3.2.2 LANGUAGES CXX)
|
||||
include(${CMAKE_CURRENT_SOURCE_DIR}/CUDA.cmake)
|
||||
|
||||
if (CUDA_VERSION VERSION_LESS 11.3)
|
||||
|
||||
@ -62,6 +62,9 @@ CUTLASS 3.2.1 is an update to CUTLASS adding:
|
||||
- SM90 rasterization direction support in the CUTLASS profiler.
|
||||
- Improvement for CUTLASS profiler build times.
|
||||
|
||||
CUTLASS 3.2.2 is a minor update to CUTLASS adding:
|
||||
- Bug fix for illegal memory access issue hit by Flash Attention tests in PyTorch. See [1138](https://github.com/NVIDIA/cutlass/issues/1138) for details.
|
||||
|
||||
Minimum requirements:
|
||||
|
||||
- Architecture: Volta
|
||||
|
||||
@ -93,8 +93,6 @@ struct device_ptr
|
||||
{
|
||||
using value_type = T;
|
||||
|
||||
static const uint32_t ElementsPerStoredItem = sizeof(T) * 8 / sizeof_bits_v<T>;
|
||||
|
||||
CUTE_HOST_DEVICE constexpr
|
||||
device_ptr(T* ptr) : ptr_(ptr) {}
|
||||
|
||||
@ -113,7 +111,7 @@ struct device_ptr
|
||||
|
||||
template <class Index>
|
||||
CUTE_HOST_DEVICE constexpr
|
||||
DerivedType operator+(Index const& i) const { return {ptr_ + i / ElementsPerStoredItem}; }
|
||||
DerivedType operator+(Index const& i) const { return {ptr_ + i}; }
|
||||
|
||||
CUTE_HOST_DEVICE constexpr friend
|
||||
ptrdiff_t operator-(device_ptr<T,DerivedType> const& a,
|
||||
|
||||
Reference in New Issue
Block a user