Rename python/cutlass to python/cutlass_cppgen (#2652)

2025-09-18 13:26:57 -05:00
parent 74825181f2
commit b234a8c024
71 changed files with 1 additions and 1 deletions
--- a/python/cutlass_cppgen/backend/evt/frontend/init.py
+++ b/python/cutlass_cppgen/backend/evt/frontend/init.py
@ -0,0 +1,33 @@
+#################################################################################################
+#
+# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+#################################################################################################
+
+from cutlass_cppgen.backend.evt.frontend.python_ast import PythonASTFrontend
--- a/python/cutlass_cppgen/backend/evt/frontend/frontend_base.py
+++ b/python/cutlass_cppgen/backend/evt/frontend/frontend_base.py
@ -0,0 +1,272 @@
+#################################################################################################
+#
+# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+#################################################################################################
+
+"""
+Base class for Python EVT Frontend
+"""
+
+from typing import Union
+
+from cutlass_library import DataType
+from cutlass_cppgen.backend.evt.ir import (
+    ComputeNode,
+    DAGIR,
+    LayoutNode,
+    LoadNode,
+    StoreNode,
+)
+from cutlass_cppgen.backend.evt.passes import (
+    EVTGraphDrawer,
+    EVTPassManager,
+    GetSmemSize,
+    PassDAG2Tree,
+    PassGetArgumentType,
+    PassGetImpl,
+    PassFixElementD,
+    PassLayoutManipulateElimination,
+    PassPreprocessRed,
+    PassShapeTypePropagation,
+)
+from cutlass_cppgen.backend.evt.passes.util import cc_map
+from cutlass_cppgen.backend.utils import device_cc
+from cutlass_cppgen.epilogue.evt_ops import permute, reshape
+from cutlass_cppgen.utils.datatypes import library_type
+
+
+class EVTFrontendBase:
+    layout_fns = {
+        "permute": permute,
+        "reshape": reshape
+    }
+
+    def __init__(self, cc, element_compute=DataType.f32, additional_passes=[], **kwargs) -> None:
+        self.cc = cc
+        self.element_compute = library_type(element_compute)
+        self.dag_ir = DAGIR(self.cc, self.element_compute)
+        self.compute_cnt = 0
+        self.layout_cnt = 0
+        self.imm_cnt = 0
+
+        self.pass_manager = EVTPassManager(
+            self.dag_ir,
+            [
+                PassPreprocessRed,
+                PassGetArgumentType,
+                PassShapeTypePropagation,
+                PassLayoutManipulateElimination,
+                PassGetImpl,
+                PassDAG2Tree,
+                PassFixElementD
+            ] + additional_passes)
+
+        if self.cc == 80:
+            self._epilogue_stages = 1
+        else:
+            self._epilogue_stages = None
+
+    @property
+    def epilogue_stages(self):
+        return self._epilogue_stages
+
+    @epilogue_stages.setter
+    def epilogue_stages(self, stages):
+        self._epilogue_stages = stages
+
+
+    def parse(self, *args, **kwargs):
+        raise NotImplementedError(f"The 'parse' function must be overloaded in frontend class")
+
+    def trace(self, *args, **kwargs):
+        # Parse the input
+        self.parse(*args, **kwargs)
+
+        # Verify the DAG IR to ensure that "D" is the output node with out_degree = 0
+        if (self.cc >= 90):
+            if (self.dag_ir.out_degree("D") != 0):
+                raise RuntimeError(
+                    f"On SM90 or higher, D is expected to be a output node with 0 users to "
+                    f"enable smem reuse between C and D, but got {self.dag_ir.out_degree('D')}")
+
+        # Run the passes
+        self.pass_manager()
+        # Set the epilogue type
+        self.epilogue_thread_type = self.dag_ir.epilogue_thread_type
+        if cc_map[self.cc] in [90, 100]:
+            self.arg_c_type = self.dag_ir.arg_c_type
+            self.arg_d_type = self.dag_ir.arg_d_type
+        self.reduction_names = self.dag_ir.reduction_names
+
+    #
+    # Helper functions for DAG IR manipulation
+    #
+
+    def add_node(self, node):
+        self.dag_ir.add_node(node)
+
+    def add_edge(self, src, tgt, weight=0):
+        self.dag_ir.add_edge(src, tgt, weight=weight)
+
+    def set_tensor(self, node_name, example):
+        """
+        Add an example tensor to node {node_name} in the DAG IR
+        """
+        meta = self.dag_ir.get_node_meta(node_name)
+        meta.tensor = {"tensor": example}
+
+    def set_store_tensor(self, node_name, example):
+        """
+        Add an example tensor to node {node_name} in the DAG IR
+        """
+        meta = self.dag_ir.get_node_meta(node_name)
+        meta.store_tensor = {"tensor": example}
+
+    def mark_output(self, node_name):
+        """
+        Mark a store node as output
+        """
+        meta = self.dag_ir.get_node_meta(node_name)
+        if not isinstance(meta, StoreNode):
+            raise ValueError(
+                f"Only StoreNodes can be marked as output. "
+                f"Got {type(meta).__name__}: {node_name}")
+        meta.is_output = True
+
+    # Add node with specific type
+
+    def add_load_node(self, name, example):
+        """
+        Add a Load node to DAG IR
+        :param name: name of the loaded variable
+        :type name: str
+        :param example: example input
+        :type example: np.ndarray|torch.Tensor|cupy.ndarray|float
+        """
+        if name is None:
+            raise ValueError(f"Name is not provided.")
+        if example is None:
+            raise ValueError(f"Example input for {name} is not provided.")
+        load_node = LoadNode(name)
+        load_node.tensor = {"tensor": example}
+        # Special logics for accumulator
+        if name == "accum":
+            if load_node.tensor.rank == 2:
+                new_shape = tuple([1, ] + list(load_node.tensor.shape))
+                load_node.tensor.broadcast(new_shape)
+            elif load_node.tensor.rank < 2 or load_node.tensor.rank > 3:
+                raise ValueError(f"Expect example inputs for 'accum' be a rank-2 or rank-3 tensor. Got {load_node.tensor.shape}.")
+        self.add_node(load_node)
+
+    def add_imm(self, value: Union[float,int]):
+        """
+        Add an immediate scalar value to DAG IR
+        :param value: the value of the immediate scalar
+        :type value: float
+        """
+        try:
+            value = float(value)
+        except:
+            raise ValueError(f"{type(value).__name__} cannot be converted to float.")
+
+        name = f"imm_{value}_k{self.imm_cnt}".replace('.', '_')
+        self.imm_cnt += 1
+        load_node = LoadNode(name)
+        load_node.tensor = {"tensor": value, "is_constant": True}
+        self.add_node(load_node)
+        return name
+
+    def add_compute_node(self, op, name=None):
+        """
+        Add a compute node.
+        :param op: the computation op
+        :param name: the node name (optional)
+        :type name: str
+        :return: the name of the compute node
+        """
+        if name is None:
+            name = f"compute_{self.compute_cnt}"
+            self.compute_cnt += 1
+        compute_node = ComputeNode(
+            name=name, fn=op,
+            element_output=self.element_compute,
+            element_compute=self.element_compute)
+        self.add_node(compute_node)
+        return compute_node.name
+
+    def add_layout_node(self, op, kwargs, name=None):
+        """
+        Add a layout node.
+        :param op: the layout op
+        :type op: evt_ops
+        :param name: the node name (optional)
+        :type name: str
+        :return: the name of the layout node
+        """
+        if name is None:
+            name = f"layout_{self.layout_cnt}"
+            self.layout_cnt += 1
+        layout_node = LayoutNode(name=name, fn=op, kwargs=kwargs)
+        self.add_node(layout_node)
+        return layout_node.name
+
+    def add_store_node(self, name):
+        store_node = StoreNode(name)
+        self.add_node(store_node)
+
+    #
+    # Visualization The DAG IR
+    #
+
+    def visualize(self, name="dag_ir"):
+        """
+        Visualize the dag ir with svg file
+        :param name: the name of the graph
+        """
+        drawer = EVTGraphDrawer(self.dag_ir, name)
+        try:
+            for name, graph in drawer.get_dot_graph():
+                graph.write_svg(f"./{name}.svg")
+        except:
+            raise RuntimeError(
+                "'dot' is not found in path. GraphDrawer is disabled. "
+                "Please install it with 'sudo apt-get install graphviz'."
+            )
+
+    #
+    # Get shared memory size
+    #
+
+    def get_smem_size(self, tile_description):
+        """
+        Get the shared memory size of the epilogue
+        """
+        smem_size = GetSmemSize(self.dag_ir)(tile_description)
+        return smem_size
--- a/python/cutlass_cppgen/backend/evt/frontend/python_ast.py
+++ b/python/cutlass_cppgen/backend/evt/frontend/python_ast.py
@ -0,0 +1,194 @@
+#################################################################################################
+#
+# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+#################################################################################################
+
+"""
+Python AST frontend that parses input into DAG IR
+"""
+
+import ast
+import inspect
+import textwrap
+
+from cutlass_library import DataType
+
+import cutlass_cppgen
+from cutlass_cppgen.backend.evt.frontend.frontend_base import EVTFrontendBase
+from cutlass_cppgen.backend.epilogue import identity, relu, tanh, sigmoid, silu, hardswish, gelu
+from cutlass_cppgen.backend.library import FunctionalOp
+
+
+class PythonASTFrontend(EVTFrontendBase, ast.NodeVisitor):
+    def __init__(self, cc, element_compute=DataType.f32, **kwargs):
+        super().__init__(cc, element_compute, **kwargs)
+        # Flags
+        # If this state is True, visit_Constant returns values without creating imm node
+        self.no_imm = False
+        self.visiting_return = False
+
+    def parse(self, example_inputs):
+        self.example_inputs = example_inputs
+        self.source = textwrap.dedent(inspect.getsource(self.__call__))
+        self.ast = ast.parse(self.source)
+        self.visit(self.ast)
+
+    #
+    # Helper functions
+    #
+    @staticmethod
+    def ast_op_to_bindings(op):
+        mapping = {
+            ast.Add: FunctionalOp.Plus,
+            ast.Sub: FunctionalOp.Minus,
+            ast.Mult: FunctionalOp.Multiplies,
+            ast.Div: FunctionalOp.Divides,
+            "maximum": FunctionalOp.Maximum,
+            "minimum": FunctionalOp.Minimum,
+            "identity": identity.binding_type,
+            "relu": relu.binding_type,
+            "tanh": tanh.binding_type,
+            "sigmoid": sigmoid.binding_type,
+            "silu": silu.binding_type,
+            "hardswish": hardswish.binding_type,
+            "gelu": gelu.binding_type,
+            "multiply_add": FunctionalOp.MultiplyAdd,
+            "sum": (FunctionalOp.Plus, FunctionalOp.AtomicAdd),
+            "max": (FunctionalOp.Maximum, FunctionalOp.AtomicMaximum),
+            "exp": FunctionalOp.Exp
+        }
+        return mapping[op]
+
+    #
+    # Visiting different node types
+    #
+
+    def visit_FunctionDef(self, node: ast.FunctionDef):
+        # Visit args and register load nodes
+        for arg in node.args.args:
+            self.visit(arg)
+        for expr in node.body:
+            self.visit(expr)
+
+    def visit_arg(self, node: ast.arg):
+        # Name of the argument
+        name = node.arg
+        try:
+            example_tensor = self.example_inputs[name]
+        except:
+            raise RuntimeError(f"Example input for {name} is not provided.")
+
+        self.add_load_node(name, example_tensor)
+
+    def visit_Name(self, node: ast.Name):
+        return node.id
+
+    def visit_Constant(self, node: ast.Constant):
+        if self.no_imm:
+            return node.value
+        else:
+            name = self.add_imm(node.value)
+            return name
+
+    def visit_Tuple(self, node: ast.Tuple):
+        results = []
+        for elt in node.elts:
+            results.append(self.visit(elt))
+        return tuple(results)
+
+    def visit_keyword(self, node: ast.keyword):
+        return {node.arg: self.visit(node.value)}
+
+    def visit_BinOp(self, node: ast.BinOp):
+        if self.visiting_return:
+            raise SyntaxError("Return value cannot be an expression")
+        lhs = self.visit(node.left)
+        rhs = self.visit(node.right)
+        op = self.ast_op_to_bindings(type(node.op))
+        name = self.add_compute_node(op)
+
+        # Add edges
+        # The edge weights are used to sort the input args
+        self.add_edge(lhs, name, weight=0)
+        self.add_edge(rhs, name, weight=1)
+        return name
+
+    def visit_Assign(self, node: ast.BinOp):
+        target = self.visit(node.targets[0])
+        value = self.visit(node.value)
+        # Create the assign node
+        self.add_store_node(target)
+
+        # Add edges
+        self.add_edge(value, target)
+        return target
+
+    def visit_Call(self, node: ast.Call):
+        if self.visiting_return:
+            raise SyntaxError("Return value cannot be an expression")
+        func = self.visit(node.func)
+        args = [self.visit(arg) for arg in node.args]
+
+        if func in self.layout_fns.keys():
+            # Parse kwargs
+            # By default, visiting imm automatically creates a load node
+            # However, in function call, keyword args are used to set
+            # specific function attributes such as indices for permute
+            # So no_imm is set to True temporarily
+            self.no_imm = True
+            kwargs = {}
+            for kw in node.keywords:
+                kwargs.update(self.visit(kw))
+            self.no_imm = False
+            op = self.layout_fns[func]
+            name = self.add_layout_node(op, kwargs)
+        else:
+            op = self.ast_op_to_bindings(func)
+            name = self.add_compute_node(op)
+
+        # Add edges
+        for idx, arg in enumerate(args):
+            self.add_edge(arg, name, weight=idx)
+        return name
+
+    def visit_Return(self, node: ast.Return):
+        self.visiting_return = True
+        results = self.visit(node.value)
+        self.visiting_return = False
+        self.return_names = results
+        if not isinstance(results, tuple):
+            results = (results,)
+        for rst in results:
+            try:
+                example_tensor = self.example_inputs[rst]
+            except:
+                raise RuntimeError(f"Example input for {rst} is not provided.")
+            self.set_store_tensor(rst, example_tensor)
+            self.mark_output(rst)