v4.3 update. (#2709)
* v4.3 update. * Update the cute_dsl_api changelog's doc link * Update version to 4.3.0 * Update the example link * Update doc to encourage user to install DSL from requirements.txt --------- Co-authored-by: Larry Wu <larwu@nvidia.com>
This commit is contained in:
@ -30,11 +30,12 @@ cmake_minimum_required(VERSION 3.15)
|
||||
project(tensor)
|
||||
|
||||
# Find Python
|
||||
find_package(Python COMPONENTS Interpreter Development REQUIRED)
|
||||
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
|
||||
|
||||
# Get Python site-packages directory using Python
|
||||
execute_process(
|
||||
COMMAND ${Python_EXECUTABLE} -c "import site; print(site.getsitepackages()[0])"
|
||||
COMMAND ${Python3_EXECUTABLE} -c "import site; print(site.getsitepackages()[0])"
|
||||
OUTPUT_VARIABLE Python_SITE_PACKAGES
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
@ -45,7 +46,13 @@ message(STATUS "Python site-packages directory: ${Python_SITE_PACKAGES}")
|
||||
list(APPEND CMAKE_PREFIX_PATH ${Python_SITE_PACKAGES}/nanobind/cmake)
|
||||
|
||||
# Find nanobind
|
||||
find_package(nanobind REQUIRED)
|
||||
find_package(nanobind)
|
||||
if(NOT nanobind_FOUND)
|
||||
message(FATAL_ERROR
|
||||
"nanobind not found!\n"
|
||||
"Please install nanobind with: pip install nanobind\n"
|
||||
)
|
||||
endif()
|
||||
|
||||
# Add the module
|
||||
nanobind_add_module(tensor tensor.cpp)
|
||||
|
||||
@ -54,7 +54,6 @@ import cutlass.cute as cute
|
||||
|
||||
from cutlass._mlir import ir
|
||||
from cutlass._mlir.dialects import llvm
|
||||
import cutlass._mlir.extras.types as T
|
||||
|
||||
|
||||
class ExampleTensorValue(ir.Value):
|
||||
@ -244,7 +243,7 @@ import tempfile
|
||||
import torch
|
||||
|
||||
|
||||
def run_test(tmpdir=None):
|
||||
def run_test(tmpdir=None, cmake_args=""):
|
||||
# Skip cleanup if user provides tmpdir
|
||||
cleanup = tmpdir is None
|
||||
# Initialize temporary build directory
|
||||
@ -253,7 +252,8 @@ def run_test(tmpdir=None):
|
||||
try:
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
subprocess.run(["cmake", "-B", tmpdir, current_dir], check=True)
|
||||
cmake_args = cmake_args.split()
|
||||
subprocess.run(["cmake", "-B", tmpdir, current_dir] + cmake_args, check=True)
|
||||
subprocess.run(["cmake", "--build", tmpdir], check=True)
|
||||
|
||||
sys.path.append(tmpdir)
|
||||
@ -284,7 +284,10 @@ def run_test(tmpdir=None):
|
||||
# Execute compiled function
|
||||
compiled_func(tensor)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
import traceback
|
||||
|
||||
traceback.print_exception(type(e), e, e.__traceback__)
|
||||
raise e
|
||||
finally:
|
||||
if cleanup:
|
||||
# Clean up the temporary directory
|
||||
@ -298,8 +301,17 @@ if __name__ == "__main__":
|
||||
description="Set temporary directory for building C modules"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tmp-dir", type=str, help="Temporary directory path for building C modules"
|
||||
"--tmp-dir",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Temporary directory path for building C modules",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cmake-args",
|
||||
type=str,
|
||||
default="",
|
||||
help="Extra CMake arguments for building C modules",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
run_test(args.tmp_dir)
|
||||
run_test(tmpdir=args.tmp_dir, cmake_args=args.cmake_args)
|
||||
|
||||
77
examples/python/CuTeDSL/cute/torch_fake_tensor.py
Normal file
77
examples/python/CuTeDSL/cute/torch_fake_tensor.py
Normal file
@ -0,0 +1,77 @@
|
||||
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
import torch
|
||||
|
||||
import cutlass.cute as cute
|
||||
from cutlass.cute.runtime import from_dlpack
|
||||
|
||||
|
||||
"""Example demonstrating how to use CuTe with PyTorch's FakeTensor mode.
|
||||
|
||||
This example shows how to:
|
||||
1. Use PyTorch's FakeTensor mode to compile a CuTe function without real data
|
||||
2. Execute the compiled function on real data later
|
||||
|
||||
FakeTensor mode allows compiling code without allocating real memory, which is useful
|
||||
for ahead-of-time compilation scenarios. The compiled function can then be executed
|
||||
on real tensors that match the expected shapes and dtypes.
|
||||
|
||||
Primary goals of this example are to demonstrate: How to use PyTorch's FakeTensor mode with CuTe
|
||||
to enable ahead-of-time compilation without real data allocation.
|
||||
|
||||
The example:
|
||||
1. Creates a fake tensor in PyTorch using FakeTensor mode
|
||||
2. Compiles a CuTe function using the fake tensor without allocating real memory
|
||||
3. Creates a real tensor with matching shape and dtype
|
||||
4. Executes the compiled function on the real tensor
|
||||
|
||||
To run this example:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
python examples/cute/torch_fake_tensor.py
|
||||
"""
|
||||
|
||||
|
||||
@cute.jit
|
||||
def print_tensor(t: cute.Tensor):
|
||||
cute.print_tensor(t)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._subclasses.fake_tensor import FakeTensorMode
|
||||
|
||||
shape = (3, 4)
|
||||
with FakeTensorMode():
|
||||
fake_tensor = torch.zeros(shape, dtype=torch.float32)
|
||||
compiled_fn = cute.compile(print_tensor, from_dlpack(fake_tensor))
|
||||
|
||||
real_tensor = torch.randn(shape, dtype=torch.float32)
|
||||
compiled_fn(from_dlpack(real_tensor))
|
||||
Reference in New Issue
Block a user