v4.1 release

This commit is contained in:
Junkai-Wu
2025-07-03 20:07:53 +08:00
committed by GitHub
parent b995f93317
commit a1aaf2300a
155 changed files with 18407 additions and 6068 deletions

View File

@ -117,6 +117,82 @@ class TestEVTCompute(EVTTestCaseBase):
input_keys = ["C", "alpha", "beta"]
result_keys = ["D"]
launcher.verify((m, n, k), input_keys, result_keys, l)
def test_tanh(self):
"""
Test Tanh op
"""
def evt_tanh(accum):
D = tanh(accum)
return D
for m, n, k, l in self.get_problem_sizes(8):
example_inputs = {
"accum": self.fake_tensor(self.element, (l, m, n)),
"D": self.fake_tensor(self.element, (l, m, n))
}
launcher = EVTTestBed(self.element, evt_tanh, example_inputs)
input_keys = []
result_keys = ["D"]
launcher.verify((m, n, k), input_keys, result_keys, l)
def test_sigmoid(self):
"""
Test Sigmoid op
"""
def evt_sigmoid(accum):
D = sigmoid(accum)
return D
for m, n, k, l in self.get_problem_sizes(8):
example_inputs = {
"accum": self.fake_tensor(self.element, (l, m, n)),
"D": self.fake_tensor(self.element, (l, m, n))
}
launcher = EVTTestBed(self.element, evt_sigmoid, example_inputs)
input_keys = []
result_keys = ["D"]
launcher.verify((m, n, k), input_keys, result_keys, l)
def test_gelu(self):
"""
Test GELU op
"""
def evt_gelu(accum):
D = gelu(accum)
return D
for m, n, k, l in self.get_problem_sizes(8):
example_inputs = {
"accum": self.fake_tensor(self.element, (l, m, n)),
"D": self.fake_tensor(self.element, (l, m, n))
}
launcher = EVTTestBed(self.element, evt_gelu, example_inputs)
input_keys = []
result_keys = ["D"]
launcher.verify((m, n, k), input_keys, result_keys, l)
def test_exp(self):
"""
Test Exp op
"""
def evt_exp(accum):
D = exp(accum)
return D
for m, n, k, l in self.get_problem_sizes(8):
example_inputs = {
"accum": self.fake_tensor(self.element, (l, m, n)),
"D": self.fake_tensor(self.element, (l, m, n))
}
launcher = EVTTestBed(self.element, evt_exp, example_inputs)
input_keys = []
result_keys = ["D"]
launcher.verify((m, n, k), input_keys, result_keys, l)
if __name__ == '__main__':
unittest.main()

View File

@ -49,6 +49,51 @@ cutlass.set_log_level(logging.WARNING)
@unittest.skipIf(device_cc() not in [80, 86, 89, 90], "This unittest is only supported on CC [80, 86, 89, 90]")
class TestEVTMixed(EVTTestCaseBase):
def test_same_variable_used_multiple_times(self):
"""
The same variable z0 is used multiple times
"""
def evt_aux_store(accum):
z0 = relu(accum)
D = z0 + z0
return z0, D
for m, n, k, l in self.get_problem_sizes(8):
example_inputs = {
"accum": self.fake_tensor(self.element, (l, m, n)),
"D": self.fake_tensor(self.element, (l, m, n)),
"z0": self.fake_tensor(self.element, (l, m, n)),
}
launcher = EVTTestBed(self.element, evt_aux_store, example_inputs)
input_keys = ["accum"]
result_keys = ["z0", "D"]
launcher.verify((m, n, k), input_keys, result_keys, l)
def test_no_lca(self):
"""
The same variable z0 is used multiple times
"""
def evt_no_lca(accum, bias):
E = relu(accum)
F = E + bias
tmp_2 = E + 2
D = tmp_2 + E
return D
for m, n, k, l in self.get_problem_sizes(8):
example_inputs = {
"accum": self.fake_tensor(self.element, (l, m, n)),
"D": self.fake_tensor(self.element, (l, m, n)),
"bias": self.fake_tensor(self.element, (m,1), stride=(1,0)),
}
launcher = EVTTestBed(self.element, evt_no_lca, example_inputs)
input_keys = ["accum", "bias"]
result_keys = ["D"]
launcher.verify((m, n, k), input_keys, result_keys, l)
def test_mixed_dag(self):
def evt_mixed_dag(accum, alpha, C, beta, aux, cbias, rbias):
F = alpha * accum + (beta * C + aux)

View File

@ -49,6 +49,31 @@ cutlass.set_log_level(logging.WARNING)
@unittest.skipIf(device_cc() not in [80, 86, 89, 90], "This unittest is only supported on CC [80, 86, 89, 90]")
class TestEVTStore(EVTTestCaseBase):
@unittest.skipIf(device_cc() != 90, "This test is only for CC 90")
def test_invalid_store(self):
"""
Test invalid store
"""
def evt_invalid_store(accum):
D = accum
F = D + 1 # D has users, which is not allowed on SM90 or higher
return D, F
for m, n, k, l in self.get_problem_sizes(8):
example_inputs = {
"accum": self.fake_tensor(self.element, (l, m, n)),
"D": self.fake_tensor(self.element, (l, m, n)),
"F": self.fake_tensor(self.element, (l, m, n))
}
with self.assertRaisesRegex(
RuntimeError,
r"On SM90 or higher, D is expected to be a output node with 0 users "
r"to enable smem reuse between C and D, but got 1"
):
launcher = EVTTestBed(self.element, evt_invalid_store, example_inputs)
break # Only need to test once
def test_aux_store(self):
"""
Returning a tensor with shape [m, n]

View File

@ -185,7 +185,9 @@ class EVTTestBed:
# Compare the results
for result, ref in zip(result_keys, reference_results):
assert torch.equal(epilogue_args[result].flatten(), ref.flatten())
assert torch.equal(
epilogue_args[result].flatten(),
ref.masked_fill(torch.isnan(ref), float('inf')).flatten())
# Run profile
if self.profile:
@ -210,8 +212,11 @@ class EVTTestCaseBase(unittest.TestCase):
torch.random.manual_seed(42)
def fake_tensor(self, element, shape):
return Tensor(element=element, shape=shape, layout_tag=cutlass.LayoutType.RowMajor)
def fake_tensor(self, element, shape, stride=None):
if stride is None:
return Tensor(element=element, shape=shape, layout_tag=cutlass.LayoutType.RowMajor)
else:
return Tensor(element=element, shape=shape, stride=stride)
def get_problem_sizes(self, alignment, k=None, batch_count=[3,]):
k = k if k else self.k