Files
cutlass/python/CuTeDSL/base_dsl/runtime/device_tensor.py
2025-05-13 15:55:29 -04:00

122 lines
4.0 KiB
Python

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
#
# Use of this software is governed by the terms and conditions of the
# NVIDIA End User License Agreement (EULA), available at:
# https://docs.nvidia.com/cutlass/media/docs/pythonDSL/license.html
#
# Any use, reproduction, disclosure, or distribution of this software
# and related documentation outside the scope permitted by the EULA
# is strictly prohibited.
import copy
from . import cuda as cuda_helpers
from .tensor_descriptor import *
from ..common import *
def allocate(tensor: TensorDescriptor, stream=None):
"""
Allocates GPU memory
"""
if tensor._check_is_managed_by_framework():
raise DSLRuntimeError(
"GPU tensors are managed by the framework and cannot be modified."
)
if not tensor.device_pointer is None:
raise DSLRuntimeError("Tensor is already allocated on the device.")
tensor.device_pointer = cuda_helpers.allocate(tensor.size_in_bytes, stream)
log().info("Allocate done tensor=[%s] dev_ptr=[%s]", tensor, tensor.device_pointer)
def deallocate(tensor: TensorDescriptor, stream=None):
"""
Deallocates GPU memory
"""
if tensor._check_is_managed_by_framework():
raise DSLRuntimeError(
"GPU tensors are managed by the framework and cannot be modified."
)
if tensor.device_pointer is None:
raise DSLRuntimeError("Tensor is not allocated on the device.")
log().info(
"Deallocating done tensor=[%s] dev_ptr=[%s]", tensor, tensor.device_pointer
)
cuda_helpers.deallocate(tensor.device_pointer, stream)
tensor.device_pointer = None
def copy_to_gpu(tensor: TensorDescriptor, do_allocate=True, stream=None):
"""
Copies data from host memory to the GPU memory.
If do_allocate is True, it first calls allocate
"""
log().info("copyin tensor=[%s] dev_ptr=[%s]", tensor, tensor.device_pointer)
if do_allocate:
allocate(tensor, stream)
cuda_helpers.memcpy_h2d(
tensor.data_ptr, tensor.device_pointer, tensor.size_in_bytes, stream
)
log().info("copyin done tensor=[%s] dev_ptr=[%s]", tensor, tensor.device_pointer)
return tensor
def copy_from_gpu(tensor: TensorDescriptor, do_deallocate=True, stream=None):
"""
Copies data from GPU memory back to the host.
If do_deallocate is True, it calls deallocate
"""
log().info("copyout tensor=[%s] dev_ptr=[%s]", tensor, tensor.device_pointer)
if tensor._check_is_managed_by_framework():
raise DSLRuntimeError(
"GPU tensors are managed by the framework and cannot be modified."
)
if tensor.device_pointer is None:
raise DSLRuntimeError("Tensor is not allocated on the device.")
cuda_helpers.memcpy_d2h(
tensor.data_ptr, tensor.device_pointer, tensor.size_in_bytes, stream
)
if do_deallocate:
deallocate(tensor, stream)
log().info("copyout done tensor=[%s] dev_ptr=[%s]", tensor, tensor.device_pointer)
def to_gpu(tensor, stream=None) -> TensorDescriptor:
"""
Copies the tensor to the GPU memory from Host memory
"""
if isinstance(tensor, TensorDescriptor):
new_tensor = copy.copy(tensor)
copy_to_gpu(new_tensor, stream=stream)
return new_tensor
if TensorDescriptor.can_transformed_to_dlpack(tensor):
new_tensor = TensorDescriptor(tensor)
copy_to_gpu(new_tensor, stream=stream)
return new_tensor
raise DSLRuntimeError("Unsupported type")
def from_gpu(tensor, stream=None) -> TensorDescriptor:
"""
Copies the tensor to the GPU memory from Host memory
"""
if isinstance(tensor, TensorDescriptor):
new_tensor = copy.copy(tensor)
copy_from_gpu(new_tensor, stream=stream)
return new_tensor
if TensorDescriptor.can_transformed_to_dlpack(tensor):
new_tensor = TensorDescriptor(tensor)
copy_from_gpu(new_tensor, stream=stream)
return new_tensor
raise DSLRuntimeError("Unsupported type")