v3.9 update (#2203)
* v3.9 update * voidD --------- Co-authored-by: yuzhai <yuzhai@nvidia.com>
This commit is contained in:
@ -134,7 +134,7 @@ def get_option_registry():
|
||||
this._option_registry = OptionRegistry(device_cc())
|
||||
return this._option_registry
|
||||
|
||||
this.__version__ = '3.8.0'
|
||||
this.__version__ = '3.9.0'
|
||||
|
||||
from cutlass.backend import create_memory_pool
|
||||
from cutlass.emit.pytorch import pytorch
|
||||
|
||||
@ -282,6 +282,8 @@ def _computeFlopsPerByte(operation, m, n, k, batch_count=1, beta=0.0):
|
||||
def emit_gemm_kernel_testlist(manifest, curr_build_dir, arch, mode
|
||||
):
|
||||
profiler_reference_computing = "--verification-providers=device --providers=cutlass"
|
||||
|
||||
|
||||
# beta values for L0 and L1
|
||||
# TODO: randomize beta values for wider coverage
|
||||
beta_values = [0.5]
|
||||
|
||||
@ -10025,7 +10025,8 @@ def GenerateSM120_TensorOp_fp4_UMMA_gemm_with_block_scaled(manifest, cuda_versio
|
||||
|
||||
tile_sizes_cooperative = [
|
||||
[128, 128, 128],
|
||||
[128, 128, 256]
|
||||
[128, 128, 256],
|
||||
[256, 128, 128]
|
||||
]
|
||||
|
||||
tile_sizes_pingpong = [
|
||||
|
||||
@ -36,7 +36,7 @@ from setuptools import setup
|
||||
def perform_setup():
|
||||
setup(
|
||||
name='cutlass_library',
|
||||
version='3.8.0',
|
||||
version='3.9.0',
|
||||
description='CUTLASS library generation scripts',
|
||||
packages=['cutlass_library']
|
||||
)
|
||||
|
||||
@ -36,7 +36,7 @@ from setuptools import setup
|
||||
def perform_setup():
|
||||
setup(
|
||||
name='pycute',
|
||||
version='3.8.0',
|
||||
version='3.9.0',
|
||||
description='Python implementation of CuTe',
|
||||
packages=['pycute'],
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user