Updates for 3.1 (#932)
This commit is contained in:
@ -200,7 +200,7 @@ class ArtifactManager:
|
||||
self.compiled_cache_host.insert(key, compiled_host_fns)
|
||||
return True
|
||||
|
||||
def emit_compile_(self, operation_list, compilation_options, requires_nvcc_hostlib_compilation):
|
||||
def emit_compile_(self, operation_list, compilation_options):
|
||||
"""
|
||||
Compile a list of kernels and store them into database
|
||||
"""
|
||||
@ -306,41 +306,17 @@ class ArtifactManager:
|
||||
cubin_image = file.read()
|
||||
|
||||
# Set up the host-side library code
|
||||
if requires_nvcc_hostlib_compilation:
|
||||
cmd_template = (
|
||||
"echo '%s'|${cuda_install_path}/bin/nvcc -x cu -Xcompiler=\"-fpermissive -w -fPIC\" ${options}"
|
||||
% source_buffer_host
|
||||
)
|
||||
cmd = SubstituteTemplate(
|
||||
cmd_template,
|
||||
{
|
||||
"cuda_install_path": CUDA_INSTALL_PATH,
|
||||
"options": compilation_options.get_str(),
|
||||
},
|
||||
)
|
||||
else:
|
||||
options = compilation_options.get()
|
||||
cmd = (
|
||||
"echo '%s'|g++ -x c++ -fpermissive -w -fPIC -DCUTLASS_PYTHON_HOST_CC=1"
|
||||
% source_buffer_host
|
||||
)
|
||||
filtered_opts = [
|
||||
"-default-device",
|
||||
"-Xcicc",
|
||||
"-Xllc",
|
||||
"--expt-relaxed-constexpr",
|
||||
"-Xcudafe --diag_suppress=esa_on_defaulted_function_ignored",
|
||||
]
|
||||
for opt in options:
|
||||
opt = opt.decode("utf-8")
|
||||
if opt not in filtered_opts and "-arch=sm_" not in opt:
|
||||
if "--include-path=" in opt:
|
||||
cmd += " " + opt.replace(
|
||||
"--include-path=",
|
||||
"-I",
|
||||
)
|
||||
else:
|
||||
cmd += " " + opt
|
||||
cmd_template = (
|
||||
"echo '%s'|${cuda_install_path}/bin/nvcc -x cu -Xcompiler=\"-fpermissive -w -fPIC\" ${options}"
|
||||
% source_buffer_host
|
||||
)
|
||||
cmd = SubstituteTemplate(
|
||||
cmd_template,
|
||||
{
|
||||
"cuda_install_path": CUDA_INSTALL_PATH,
|
||||
"options": compilation_options.get_str(),
|
||||
},
|
||||
)
|
||||
|
||||
tempfile.tempdir = "./"
|
||||
temp = tempfile.NamedTemporaryFile(
|
||||
@ -375,7 +351,6 @@ class ArtifactManager:
|
||||
# save the cubin
|
||||
operation_key = []
|
||||
operation_list = []
|
||||
requires_nvcc_hostlib_compilation = False
|
||||
for operation in operations:
|
||||
# step 1: get kernel string as key
|
||||
key = operation.rt_module.emit() + operation.procedural_name() + self.backend
|
||||
@ -398,17 +373,9 @@ class ArtifactManager:
|
||||
operation_list.append(operation.rt_module)
|
||||
operation_key.append(key)
|
||||
|
||||
# Creating the Params structures for certain 3.0 kernels currently requires CUDA. For these cases, use NVCC to generate
|
||||
# the PyCUTLASS host-side library. Otherwise, g++ will be used.
|
||||
if isinstance(operation, GemmOperationUniversal) and operation.api == ApiVersion.v3x:
|
||||
if self.backend == "nvrtc":
|
||||
raise RuntimeError("CUTLASS 3 kernels currently require NVCC for compilation.")
|
||||
|
||||
requires_nvcc_hostlib_compilation = True
|
||||
|
||||
if len(operation_list) > 0:
|
||||
cubin_image, host_lib, host_file = self.emit_compile_(
|
||||
operation_list, compile_options, requires_nvcc_hostlib_compilation)
|
||||
operation_list, compile_options)
|
||||
|
||||
err, module = cuda.cuModuleLoadData(cubin_image)
|
||||
if err != cuda.CUresult.CUDA_SUCCESS:
|
||||
|
||||
@ -43,10 +43,10 @@
|
||||
template<typename T, typename L, typename TF>
|
||||
void bind_tensor_ref_view(py::module &m, std::string name) {
|
||||
py::class_<cutlass::TensorRef<T, L>>(m, ("TensorRef" + name).c_str())
|
||||
.def("__init__", [](cutlass::TensorRef<T, L>& tensor_ref, int64_t address, const L& layout_ ) {
|
||||
.def(py::init([](int64_t address, const L& layout_ ) {
|
||||
T* ptr = reinterpret_cast< T*>(address);
|
||||
new (&tensor_ref) cutlass::TensorRef<T, L>(ptr, layout_);
|
||||
})
|
||||
return new cutlass::TensorRef<T, L>(ptr, layout_);
|
||||
}))
|
||||
.def("data", [](cutlass::TensorRef<T, L>& tensor_ref) {
|
||||
T* ptr = tensor_ref.data();
|
||||
return int64_t(ptr);
|
||||
|
||||
@ -29,9 +29,12 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#################################################################################################
|
||||
|
||||
import copy
|
||||
import os
|
||||
from pybind11.setup_helpers import Pybind11Extension
|
||||
import setuptools
|
||||
from setuptools import setup
|
||||
from setuptools.command.build_ext import build_ext
|
||||
|
||||
|
||||
def _cutlass_path_from_dir() -> str:
|
||||
@ -61,31 +64,57 @@ cutlass_path = (
|
||||
else _cutlass_path_from_dir()
|
||||
)
|
||||
|
||||
|
||||
cuda_install_path = (
|
||||
os.getenv('CUDA_INSTALL_PATH')
|
||||
if os.getenv('CUDA_INSTALL_PATH') is not None
|
||||
else _cuda_install_path_from_nvcc()
|
||||
)
|
||||
|
||||
ext_modules = []
|
||||
|
||||
try:
|
||||
from pybind11.setup_helpers import Pybind11Extension, build_ext
|
||||
include_dirs = [
|
||||
cutlass_path + '/include',
|
||||
cuda_install_path + '/include',
|
||||
cutlass_path + '/tools/util/include',
|
||||
cutlass_path + '/test',
|
||||
]
|
||||
class BuildExtension(build_ext):
|
||||
"""
|
||||
Wrapper around `build_ext` to use NVCC when compiling the CUTLASS Python-C++ bindings.
|
||||
"""
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
ext_modules = [
|
||||
Pybind11Extension('cutlass_bindings',
|
||||
['cutlass/cpp/cutlass_bindings.cpp'],
|
||||
include_dirs=include_dirs,
|
||||
extra_compile_args=['-fpermissive', '-w', '-std=c++17', '-DCUTLASS_PYTHON_HOST_CC=1'])
|
||||
]
|
||||
except ImportError:
|
||||
pass
|
||||
def build_extensions(self):
|
||||
original_compile = self.compiler._compile
|
||||
|
||||
def custom_compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
|
||||
"""
|
||||
Wrapper around build_ext.compiler._compile method
|
||||
"""
|
||||
postargs = copy.deepcopy(extra_postargs)
|
||||
postargs = [f for f in postargs if f not in ['-g0', '-fvisibility=hidden']]
|
||||
postargs.extend(["-Xcompiler='-fPIC'", "-Xcompiler='-g0'", "-Xcompiler='-O3'", '-x', 'cu'])
|
||||
try:
|
||||
original_compiler = self.compiler.compiler_so
|
||||
self.compiler.set_executable('compiler_so', [f'{cuda_install_path}/bin/nvcc'])
|
||||
original_compile(obj, src, ext, cc_args, postargs, pp_opts)
|
||||
finally:
|
||||
self.compiler.set_executable('compiler_so', original_compiler)
|
||||
|
||||
self.compiler._compile = custom_compile
|
||||
super().build_extensions()
|
||||
|
||||
|
||||
include_dirs = [
|
||||
cutlass_path + '/include',
|
||||
cuda_install_path + '/include',
|
||||
cutlass_path + '/tools/util/include',
|
||||
cutlass_path + '/test',
|
||||
]
|
||||
|
||||
|
||||
ext_modules = [
|
||||
Pybind11Extension('cutlass_bindings',
|
||||
['cutlass/cpp/cutlass_bindings.cpp'],
|
||||
include_dirs=include_dirs,
|
||||
extra_compile_args=['-Xcompiler="-fpermissive"', '-w', '-std=c++17'],
|
||||
libraries=['cudart'])
|
||||
]
|
||||
|
||||
|
||||
setup(
|
||||
@ -103,4 +132,7 @@ setup(
|
||||
'treelib'
|
||||
],
|
||||
ext_modules=ext_modules,
|
||||
cmdclass={
|
||||
'build_ext': BuildExtension
|
||||
}
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user