CUTLASS 3.3.0 (#1167)

* Release 3.3.0

Adds support for mixed precision GEMMs On Hopper and Ampere
Adds support for < 16B aligned GEMMs on Hopper
Enhancements to EVT
Enhancements to Python interface
Enhancements to Sub-byte type handling in CuTe
Several other bug-fixes and performance improvements.

* minor doc update
This commit is contained in:
Pradeep Ramani
2023-11-02 08:09:05 -07:00
committed by GitHub
parent 922fb5108b
commit c008b4aea8
263 changed files with 16214 additions and 5008 deletions

View File

@ -35,12 +35,18 @@ Utilities for emitting Symm kernels
"""
import enum
import os.path
import shutil
import functools
import operator
import os.path
import shutil
from cutlass_library.library import *
try:
import builtins
if hasattr(builtins, "CUTLASS_IGNORE_PACKAGE") and CUTLASS_IGNORE_PACKAGE == True:
raise ImportError("Disabling attempt to import cutlass_library")
from cutlass_library.library import *
except ImportError:
from library import *
###################################################################################################
@ -82,7 +88,7 @@ class SymmOperation:
#
def is_mixed_input(self):
return self.A.element != self.B.element
#
def is_planar_complex(self):
return False
@ -241,7 +247,7 @@ using Operation_${operation_name} =
// Symm operator ${operation_name}
using Operation_${operation_name} =
typename cutlass::gemm::device::Symm<
${element_a}, ${layout_a}, ${side_mode}, ${fill_mode},
${element_a}, ${layout_a}, ${side_mode}, ${fill_mode},
${element_b}, ${layout_b},
${element_c}, ${layout_c},
${element_accumulator},