Rename python/cutlass to python/cutlass_cppgen (#2652)
This commit is contained in:
33
python/cutlass_cppgen/backend/evt/frontend/__init__.py
Normal file
33
python/cutlass_cppgen/backend/evt/frontend/__init__.py
Normal file
@ -0,0 +1,33 @@
|
||||
#################################################################################################
|
||||
#
|
||||
# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#################################################################################################
|
||||
|
||||
from cutlass_cppgen.backend.evt.frontend.python_ast import PythonASTFrontend
|
||||
272
python/cutlass_cppgen/backend/evt/frontend/frontend_base.py
Normal file
272
python/cutlass_cppgen/backend/evt/frontend/frontend_base.py
Normal file
@ -0,0 +1,272 @@
|
||||
#################################################################################################
|
||||
#
|
||||
# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#################################################################################################
|
||||
|
||||
"""
|
||||
Base class for Python EVT Frontend
|
||||
"""
|
||||
|
||||
from typing import Union
|
||||
|
||||
from cutlass_library import DataType
|
||||
from cutlass_cppgen.backend.evt.ir import (
|
||||
ComputeNode,
|
||||
DAGIR,
|
||||
LayoutNode,
|
||||
LoadNode,
|
||||
StoreNode,
|
||||
)
|
||||
from cutlass_cppgen.backend.evt.passes import (
|
||||
EVTGraphDrawer,
|
||||
EVTPassManager,
|
||||
GetSmemSize,
|
||||
PassDAG2Tree,
|
||||
PassGetArgumentType,
|
||||
PassGetImpl,
|
||||
PassFixElementD,
|
||||
PassLayoutManipulateElimination,
|
||||
PassPreprocessRed,
|
||||
PassShapeTypePropagation,
|
||||
)
|
||||
from cutlass_cppgen.backend.evt.passes.util import cc_map
|
||||
from cutlass_cppgen.backend.utils import device_cc
|
||||
from cutlass_cppgen.epilogue.evt_ops import permute, reshape
|
||||
from cutlass_cppgen.utils.datatypes import library_type
|
||||
|
||||
|
||||
class EVTFrontendBase:
|
||||
layout_fns = {
|
||||
"permute": permute,
|
||||
"reshape": reshape
|
||||
}
|
||||
|
||||
def __init__(self, cc, element_compute=DataType.f32, additional_passes=[], **kwargs) -> None:
|
||||
self.cc = cc
|
||||
self.element_compute = library_type(element_compute)
|
||||
self.dag_ir = DAGIR(self.cc, self.element_compute)
|
||||
self.compute_cnt = 0
|
||||
self.layout_cnt = 0
|
||||
self.imm_cnt = 0
|
||||
|
||||
self.pass_manager = EVTPassManager(
|
||||
self.dag_ir,
|
||||
[
|
||||
PassPreprocessRed,
|
||||
PassGetArgumentType,
|
||||
PassShapeTypePropagation,
|
||||
PassLayoutManipulateElimination,
|
||||
PassGetImpl,
|
||||
PassDAG2Tree,
|
||||
PassFixElementD
|
||||
] + additional_passes)
|
||||
|
||||
if self.cc == 80:
|
||||
self._epilogue_stages = 1
|
||||
else:
|
||||
self._epilogue_stages = None
|
||||
|
||||
@property
|
||||
def epilogue_stages(self):
|
||||
return self._epilogue_stages
|
||||
|
||||
@epilogue_stages.setter
|
||||
def epilogue_stages(self, stages):
|
||||
self._epilogue_stages = stages
|
||||
|
||||
|
||||
def parse(self, *args, **kwargs):
|
||||
raise NotImplementedError(f"The 'parse' function must be overloaded in frontend class")
|
||||
|
||||
def trace(self, *args, **kwargs):
|
||||
# Parse the input
|
||||
self.parse(*args, **kwargs)
|
||||
|
||||
# Verify the DAG IR to ensure that "D" is the output node with out_degree = 0
|
||||
if (self.cc >= 90):
|
||||
if (self.dag_ir.out_degree("D") != 0):
|
||||
raise RuntimeError(
|
||||
f"On SM90 or higher, D is expected to be a output node with 0 users to "
|
||||
f"enable smem reuse between C and D, but got {self.dag_ir.out_degree('D')}")
|
||||
|
||||
# Run the passes
|
||||
self.pass_manager()
|
||||
# Set the epilogue type
|
||||
self.epilogue_thread_type = self.dag_ir.epilogue_thread_type
|
||||
if cc_map[self.cc] in [90, 100]:
|
||||
self.arg_c_type = self.dag_ir.arg_c_type
|
||||
self.arg_d_type = self.dag_ir.arg_d_type
|
||||
self.reduction_names = self.dag_ir.reduction_names
|
||||
|
||||
#
|
||||
# Helper functions for DAG IR manipulation
|
||||
#
|
||||
|
||||
def add_node(self, node):
|
||||
self.dag_ir.add_node(node)
|
||||
|
||||
def add_edge(self, src, tgt, weight=0):
|
||||
self.dag_ir.add_edge(src, tgt, weight=weight)
|
||||
|
||||
def set_tensor(self, node_name, example):
|
||||
"""
|
||||
Add an example tensor to node {node_name} in the DAG IR
|
||||
"""
|
||||
meta = self.dag_ir.get_node_meta(node_name)
|
||||
meta.tensor = {"tensor": example}
|
||||
|
||||
def set_store_tensor(self, node_name, example):
|
||||
"""
|
||||
Add an example tensor to node {node_name} in the DAG IR
|
||||
"""
|
||||
meta = self.dag_ir.get_node_meta(node_name)
|
||||
meta.store_tensor = {"tensor": example}
|
||||
|
||||
def mark_output(self, node_name):
|
||||
"""
|
||||
Mark a store node as output
|
||||
"""
|
||||
meta = self.dag_ir.get_node_meta(node_name)
|
||||
if not isinstance(meta, StoreNode):
|
||||
raise ValueError(
|
||||
f"Only StoreNodes can be marked as output. "
|
||||
f"Got {type(meta).__name__}: {node_name}")
|
||||
meta.is_output = True
|
||||
|
||||
# Add node with specific type
|
||||
|
||||
def add_load_node(self, name, example):
|
||||
"""
|
||||
Add a Load node to DAG IR
|
||||
:param name: name of the loaded variable
|
||||
:type name: str
|
||||
:param example: example input
|
||||
:type example: np.ndarray|torch.Tensor|cupy.ndarray|float
|
||||
"""
|
||||
if name is None:
|
||||
raise ValueError(f"Name is not provided.")
|
||||
if example is None:
|
||||
raise ValueError(f"Example input for {name} is not provided.")
|
||||
load_node = LoadNode(name)
|
||||
load_node.tensor = {"tensor": example}
|
||||
# Special logics for accumulator
|
||||
if name == "accum":
|
||||
if load_node.tensor.rank == 2:
|
||||
new_shape = tuple([1, ] + list(load_node.tensor.shape))
|
||||
load_node.tensor.broadcast(new_shape)
|
||||
elif load_node.tensor.rank < 2 or load_node.tensor.rank > 3:
|
||||
raise ValueError(f"Expect example inputs for 'accum' be a rank-2 or rank-3 tensor. Got {load_node.tensor.shape}.")
|
||||
self.add_node(load_node)
|
||||
|
||||
def add_imm(self, value: Union[float,int]):
|
||||
"""
|
||||
Add an immediate scalar value to DAG IR
|
||||
:param value: the value of the immediate scalar
|
||||
:type value: float
|
||||
"""
|
||||
try:
|
||||
value = float(value)
|
||||
except:
|
||||
raise ValueError(f"{type(value).__name__} cannot be converted to float.")
|
||||
|
||||
name = f"imm_{value}_k{self.imm_cnt}".replace('.', '_')
|
||||
self.imm_cnt += 1
|
||||
load_node = LoadNode(name)
|
||||
load_node.tensor = {"tensor": value, "is_constant": True}
|
||||
self.add_node(load_node)
|
||||
return name
|
||||
|
||||
def add_compute_node(self, op, name=None):
|
||||
"""
|
||||
Add a compute node.
|
||||
:param op: the computation op
|
||||
:param name: the node name (optional)
|
||||
:type name: str
|
||||
:return: the name of the compute node
|
||||
"""
|
||||
if name is None:
|
||||
name = f"compute_{self.compute_cnt}"
|
||||
self.compute_cnt += 1
|
||||
compute_node = ComputeNode(
|
||||
name=name, fn=op,
|
||||
element_output=self.element_compute,
|
||||
element_compute=self.element_compute)
|
||||
self.add_node(compute_node)
|
||||
return compute_node.name
|
||||
|
||||
def add_layout_node(self, op, kwargs, name=None):
|
||||
"""
|
||||
Add a layout node.
|
||||
:param op: the layout op
|
||||
:type op: evt_ops
|
||||
:param name: the node name (optional)
|
||||
:type name: str
|
||||
:return: the name of the layout node
|
||||
"""
|
||||
if name is None:
|
||||
name = f"layout_{self.layout_cnt}"
|
||||
self.layout_cnt += 1
|
||||
layout_node = LayoutNode(name=name, fn=op, kwargs=kwargs)
|
||||
self.add_node(layout_node)
|
||||
return layout_node.name
|
||||
|
||||
def add_store_node(self, name):
|
||||
store_node = StoreNode(name)
|
||||
self.add_node(store_node)
|
||||
|
||||
#
|
||||
# Visualization The DAG IR
|
||||
#
|
||||
|
||||
def visualize(self, name="dag_ir"):
|
||||
"""
|
||||
Visualize the dag ir with svg file
|
||||
:param name: the name of the graph
|
||||
"""
|
||||
drawer = EVTGraphDrawer(self.dag_ir, name)
|
||||
try:
|
||||
for name, graph in drawer.get_dot_graph():
|
||||
graph.write_svg(f"./{name}.svg")
|
||||
except:
|
||||
raise RuntimeError(
|
||||
"'dot' is not found in path. GraphDrawer is disabled. "
|
||||
"Please install it with 'sudo apt-get install graphviz'."
|
||||
)
|
||||
|
||||
#
|
||||
# Get shared memory size
|
||||
#
|
||||
|
||||
def get_smem_size(self, tile_description):
|
||||
"""
|
||||
Get the shared memory size of the epilogue
|
||||
"""
|
||||
smem_size = GetSmemSize(self.dag_ir)(tile_description)
|
||||
return smem_size
|
||||
194
python/cutlass_cppgen/backend/evt/frontend/python_ast.py
Normal file
194
python/cutlass_cppgen/backend/evt/frontend/python_ast.py
Normal file
@ -0,0 +1,194 @@
|
||||
#################################################################################################
|
||||
#
|
||||
# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#################################################################################################
|
||||
|
||||
"""
|
||||
Python AST frontend that parses input into DAG IR
|
||||
"""
|
||||
|
||||
import ast
|
||||
import inspect
|
||||
import textwrap
|
||||
|
||||
from cutlass_library import DataType
|
||||
|
||||
import cutlass_cppgen
|
||||
from cutlass_cppgen.backend.evt.frontend.frontend_base import EVTFrontendBase
|
||||
from cutlass_cppgen.backend.epilogue import identity, relu, tanh, sigmoid, silu, hardswish, gelu
|
||||
from cutlass_cppgen.backend.library import FunctionalOp
|
||||
|
||||
|
||||
class PythonASTFrontend(EVTFrontendBase, ast.NodeVisitor):
|
||||
def __init__(self, cc, element_compute=DataType.f32, **kwargs):
|
||||
super().__init__(cc, element_compute, **kwargs)
|
||||
# Flags
|
||||
# If this state is True, visit_Constant returns values without creating imm node
|
||||
self.no_imm = False
|
||||
self.visiting_return = False
|
||||
|
||||
def parse(self, example_inputs):
|
||||
self.example_inputs = example_inputs
|
||||
self.source = textwrap.dedent(inspect.getsource(self.__call__))
|
||||
self.ast = ast.parse(self.source)
|
||||
self.visit(self.ast)
|
||||
|
||||
#
|
||||
# Helper functions
|
||||
#
|
||||
@staticmethod
|
||||
def ast_op_to_bindings(op):
|
||||
mapping = {
|
||||
ast.Add: FunctionalOp.Plus,
|
||||
ast.Sub: FunctionalOp.Minus,
|
||||
ast.Mult: FunctionalOp.Multiplies,
|
||||
ast.Div: FunctionalOp.Divides,
|
||||
"maximum": FunctionalOp.Maximum,
|
||||
"minimum": FunctionalOp.Minimum,
|
||||
"identity": identity.binding_type,
|
||||
"relu": relu.binding_type,
|
||||
"tanh": tanh.binding_type,
|
||||
"sigmoid": sigmoid.binding_type,
|
||||
"silu": silu.binding_type,
|
||||
"hardswish": hardswish.binding_type,
|
||||
"gelu": gelu.binding_type,
|
||||
"multiply_add": FunctionalOp.MultiplyAdd,
|
||||
"sum": (FunctionalOp.Plus, FunctionalOp.AtomicAdd),
|
||||
"max": (FunctionalOp.Maximum, FunctionalOp.AtomicMaximum),
|
||||
"exp": FunctionalOp.Exp
|
||||
}
|
||||
return mapping[op]
|
||||
|
||||
#
|
||||
# Visiting different node types
|
||||
#
|
||||
|
||||
def visit_FunctionDef(self, node: ast.FunctionDef):
|
||||
# Visit args and register load nodes
|
||||
for arg in node.args.args:
|
||||
self.visit(arg)
|
||||
for expr in node.body:
|
||||
self.visit(expr)
|
||||
|
||||
def visit_arg(self, node: ast.arg):
|
||||
# Name of the argument
|
||||
name = node.arg
|
||||
try:
|
||||
example_tensor = self.example_inputs[name]
|
||||
except:
|
||||
raise RuntimeError(f"Example input for {name} is not provided.")
|
||||
|
||||
self.add_load_node(name, example_tensor)
|
||||
|
||||
def visit_Name(self, node: ast.Name):
|
||||
return node.id
|
||||
|
||||
def visit_Constant(self, node: ast.Constant):
|
||||
if self.no_imm:
|
||||
return node.value
|
||||
else:
|
||||
name = self.add_imm(node.value)
|
||||
return name
|
||||
|
||||
def visit_Tuple(self, node: ast.Tuple):
|
||||
results = []
|
||||
for elt in node.elts:
|
||||
results.append(self.visit(elt))
|
||||
return tuple(results)
|
||||
|
||||
def visit_keyword(self, node: ast.keyword):
|
||||
return {node.arg: self.visit(node.value)}
|
||||
|
||||
def visit_BinOp(self, node: ast.BinOp):
|
||||
if self.visiting_return:
|
||||
raise SyntaxError("Return value cannot be an expression")
|
||||
lhs = self.visit(node.left)
|
||||
rhs = self.visit(node.right)
|
||||
op = self.ast_op_to_bindings(type(node.op))
|
||||
name = self.add_compute_node(op)
|
||||
|
||||
# Add edges
|
||||
# The edge weights are used to sort the input args
|
||||
self.add_edge(lhs, name, weight=0)
|
||||
self.add_edge(rhs, name, weight=1)
|
||||
return name
|
||||
|
||||
def visit_Assign(self, node: ast.BinOp):
|
||||
target = self.visit(node.targets[0])
|
||||
value = self.visit(node.value)
|
||||
# Create the assign node
|
||||
self.add_store_node(target)
|
||||
|
||||
# Add edges
|
||||
self.add_edge(value, target)
|
||||
return target
|
||||
|
||||
def visit_Call(self, node: ast.Call):
|
||||
if self.visiting_return:
|
||||
raise SyntaxError("Return value cannot be an expression")
|
||||
func = self.visit(node.func)
|
||||
args = [self.visit(arg) for arg in node.args]
|
||||
|
||||
if func in self.layout_fns.keys():
|
||||
# Parse kwargs
|
||||
# By default, visiting imm automatically creates a load node
|
||||
# However, in function call, keyword args are used to set
|
||||
# specific function attributes such as indices for permute
|
||||
# So no_imm is set to True temporarily
|
||||
self.no_imm = True
|
||||
kwargs = {}
|
||||
for kw in node.keywords:
|
||||
kwargs.update(self.visit(kw))
|
||||
self.no_imm = False
|
||||
op = self.layout_fns[func]
|
||||
name = self.add_layout_node(op, kwargs)
|
||||
else:
|
||||
op = self.ast_op_to_bindings(func)
|
||||
name = self.add_compute_node(op)
|
||||
|
||||
# Add edges
|
||||
for idx, arg in enumerate(args):
|
||||
self.add_edge(arg, name, weight=idx)
|
||||
return name
|
||||
|
||||
def visit_Return(self, node: ast.Return):
|
||||
self.visiting_return = True
|
||||
results = self.visit(node.value)
|
||||
self.visiting_return = False
|
||||
self.return_names = results
|
||||
if not isinstance(results, tuple):
|
||||
results = (results,)
|
||||
for rst in results:
|
||||
try:
|
||||
example_tensor = self.example_inputs[rst]
|
||||
except:
|
||||
raise RuntimeError(f"Example input for {rst} is not provided.")
|
||||
self.set_store_tensor(rst, example_tensor)
|
||||
self.mark_output(rst)
|
||||
Reference in New Issue
Block a user