CUTLASS 3.3.0 (#1167)

* Release 3.3.0 Adds support for mixed precision GEMMs On Hopper and Ampere Adds support for < 16B aligned GEMMs on Hopper Enhancements to EVT Enhancements to Python interface Enhancements to Sub-byte type handling in CuTe Several other bug-fixes and performance improvements. * minor doc update
2023-11-02 08:09:05 -07:00
parent 922fb5108b
commit c008b4aea8
263 changed files with 16214 additions and 5008 deletions
--- a/python/cutlass_library/rank_k_operation.py
+++ b/python/cutlass_library/rank_k_operation.py
@ -35,12 +35,18 @@ Utilities for emitting RankK kernels
 """

 import enum
-import os.path
-import shutil
 import functools
 import operator
+import os.path
+import shutil

-from cutlass_library.library import *
+try:
+  import builtins
+  if hasattr(builtins, "CUTLASS_IGNORE_PACKAGE") and CUTLASS_IGNORE_PACKAGE == True:
+    raise ImportError("Disabling attempt to import cutlass_library")
+  from cutlass_library.library import *
+except ImportError:
+  from library import *


 ###################################################################################################
@ -80,7 +86,7 @@ class RankKOperation:
  #
  def is_mixed_input(self):
    return False
-  
+
  #
  def is_planar_complex(self):
    return False
@ -259,7 +265,7 @@ using Operation_${operation_name} =
  def emit(self, operation):

    threadblock_shape = operation.tile_description.threadblock_shape
- 
+
    warp_count = operation.tile_description.warp_count
    warp_shape = [threadblock_shape[idx] // warp_count[idx] for idx in range(3)]