v4.2 tag release. (#2638)
This commit is contained in:
@ -153,7 +153,7 @@ class GemmUniversalLauncher:
|
||||
else:
|
||||
data_cutlass = data_ref.transpose(-1, -2).contiguous()
|
||||
|
||||
data_cutlass = data_cutlass_cppgen.to("cuda")
|
||||
data_cutlass = data_cutlass.to("cuda")
|
||||
|
||||
# As of this writing, few operations in PyTorch are supported with FP8 data.
|
||||
# Thus, we perform computation in FP32 for FP8 reference checks.
|
||||
|
||||
@ -240,8 +240,8 @@ class GemmErrorTests(unittest.TestCase):
|
||||
"""
|
||||
cc = device_cc()
|
||||
|
||||
# F64 Tensor Core operations are only avaiable on devices with CC >= 80
|
||||
supports_tensorop_f64 = cc >= 80
|
||||
# F64 Tensor Core operations are only avaiable on certain devices
|
||||
supports_tensorop_f64 = cc in [80, 89, 90]
|
||||
plan = cutlass_cppgen.op.Gemm(cc=cc, element=cutlass_cppgen.DataType.f64, layout=cutlass_cppgen.LayoutType.RowMajor)
|
||||
|
||||
error_msg = f'Incorrectly raised an exception for availability of TensorOp with F64 operands on SM{cc}'
|
||||
@ -288,7 +288,7 @@ class GemmErrorTests(unittest.TestCase):
|
||||
with ExpectException(cc < 80, f'Requested more than 2 stages on SM{cc}'):
|
||||
td.stages = 3
|
||||
plan.construct(td)
|
||||
else:
|
||||
elif cc == 90:
|
||||
original_kschedule = td.kernel_schedule
|
||||
original_eschedule = td.epilogue_schedule
|
||||
with ExpectException(False, f'Incorrectly flagged an error for insufficient shared memory'):
|
||||
@ -296,10 +296,13 @@ class GemmErrorTests(unittest.TestCase):
|
||||
td.epilogue_schedule = cutlass_cppgen.EpilogueScheduleType.NoSmemWarpSpecialized
|
||||
td.stages = 3
|
||||
plan.construct(td)
|
||||
|
||||
# Reset schedules
|
||||
td.kernel_schedule = original_kschedule
|
||||
td.epilogue_schedule = original_eschedule
|
||||
elif cc in [100, 101, 103]:
|
||||
with ExpectException(False, f'Incorrectly flagged an error for insufficient shared memory'):
|
||||
td.stages = 3
|
||||
plan.construct(td)
|
||||
|
||||
with ExpectException(True, f'Requested too many stages'):
|
||||
td.stages = 100
|
||||
@ -321,12 +324,12 @@ class GemmErrorTests(unittest.TestCase):
|
||||
td.epilogue_schedule = cutlass_cppgen.EpilogueScheduleType.TmaWarpSpecialized
|
||||
plan.construct(td)
|
||||
|
||||
with ExpectException(True, f'Requested a non-auto kernel schedule with an auto epilogue schedule'):
|
||||
with ExpectException(cc == 90, f'Requested a non-auto kernel schedule with an auto epilogue schedule'):
|
||||
td.kernel_schedule = cutlass_cppgen.KernelScheduleType.TmaWarpSpecializedPingpong
|
||||
td.epilogue_schedule = cutlass_cppgen.EpilogueScheduleType.ScheduleAuto
|
||||
plan.construct(td)
|
||||
|
||||
with ExpectException(True, f'Requested an auto kernel schedule with a non-auto epilogue schedule'):
|
||||
with ExpectException(cc == 90, f'Requested an auto kernel schedule with a non-auto epilogue schedule'):
|
||||
td.kernel_schedule = cutlass_cppgen.KernelScheduleType.ScheduleAuto
|
||||
td.epilogue_schedule = cutlass_cppgen.EpilogueScheduleType.TmaWarpSpecialized
|
||||
plan.construct(td)
|
||||
|
||||
Reference in New Issue
Block a user