Files
cutlass/python/CuTeDSL/base_dsl/env_manager.py
2025-07-03 08:07:53 -04:00

302 lines
12 KiB
Python

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
#
# Use of this software is governed by the terms and conditions of the
# NVIDIA End User License Agreement (EULA), available at:
# https://docs.nvidia.com/cutlass/media/docs/pythonDSL/license.html
#
# Any use, reproduction, disclosure, or distribution of this software
# and related documentation outside the scope permitted by the EULA
# is strictly prohibited.
"""
This module provides utilities for the environment variables setup.
It provides an EnvironmentVarManager, which reads environment variables for the DSL
and caches them for efficient access.
It also provides utilities to automatically setup a subset of environment variables
based on heuristics.
"""
import os
import sys
import shutil
import glob
from pathlib import Path
from functools import lru_cache
from typing import Any
from ..base_dsl.runtime.cuda import get_compute_capability_major_minor
from .utils.logger import log
IS_WINDOWS = sys.platform == "win32"
CLIB_EXT = ".dll" if IS_WINDOWS else ".so"
# =============================================================================
# Environment Variable Helpers
# =============================================================================
@lru_cache(maxsize=None)
def get_str_env_var(var_name, default_value=None):
value = os.getenv(var_name)
return value if value is not None else default_value
@lru_cache(maxsize=None)
def get_bool_env_var(var_name, default_value=False):
value = get_str_env_var(var_name)
if value is None:
return default_value
return value not in {"False", "0", ""}
@lru_cache(maxsize=None)
def get_int_env_var(var_name, default_value=0):
value = get_str_env_var(var_name)
return int(value) if value and value.isdigit() else default_value
def detect_gpu_arch(prefix):
"""
Attempts to detect the machine's GPU architecture.
Returns:
A string representing the GPU architecture (e.g. "70" for compute capability 7.0),
or a default value(e.g. "sm_100") if the GPU architecture cannot be determined.
"""
arch = (None, None)
try:
arch = get_compute_capability_major_minor()
except Exception as e:
log().info(f"Failed to get CUDA compute capability: {e}")
if arch == (None, None):
# default to sm_100
arch = (10, 0)
major, minor = arch
suffix = ""
if major >= 9:
suffix = "a"
return f"sm_{major}{minor}{suffix}"
def find_libs_in_ancestors(start, target_libs, lib_folder_guesses):
"""
Search ancestor directories for a candidate library folder containing all required libraries.
Starting from the given path, this function traverses up through each parent directory.
For every ancestor, it checks candidate subdirectories (specified by lib_folder_guesses)
for files that match the required library extension (CLIB_EXT). Library file names are
canonicalized by removing the "lib" prefix from their stem. If a candidate directory contains
all of the required libraries (as specified in target_libs), the function returns a list of
absolute paths to these library files.
Parameters:
start (str or Path): The starting directory from which to begin the search.
target_libs (iterable of str): A collection of required library names (without the "lib" prefix).
lib_folder_guesses (iterable of str): Relative paths from an ancestor directory that may contain the libraries.
Returns:
list[str] or None: A list of resolved paths to the required library files if found; otherwise, None.
"""
# Traverse through all parent directories of the resolved starting path.
for ancestor in Path(start).resolve().parents:
# Iterate over each candidate relative directory path.
for rel_path in lib_folder_guesses:
target_dir = ancestor / rel_path
# Skip if the candidate directory does not exist.
if not target_dir.is_dir():
continue
# Initialize a list to hold the resolved paths of matching library files.
libs_cand = []
# Create a set of the remaining libraries we need to find.
remaining_libs = set(target_libs)
# Iterate over all items in the candidate directory.
for p in target_dir.iterdir():
# Consider only files with the expected library extension.
if p.suffix == CLIB_EXT:
# Canonicalize the library name by removing the "lib" prefix.
lib_name = p.stem.removeprefix("lib")
# If this library is required, add its resolved path and mark it as found.
if lib_name in remaining_libs:
libs_cand.append(str(p.resolve()))
remaining_libs.remove(lib_name)
# If all required libraries have been found, return the list of library paths.
if len(remaining_libs) == 0:
return libs_cand
# Return None if no candidate directory contains all required libraries.
return None
def _find_cuda_home():
"""Find the CUDA installation path using a series of heuristic methods.
Methods below are checked in order, and the function returns on first match:
1. Checking the environment variables CUDA_HOME and CUDA_PATH.
2. Searching for the 'nvcc' compiler in the system PATH and deriving the path of cuda.
3. Scanning common installation directories based on the operating system.
- On Windows systems (when IS_WINDOWS is True), it searches in:
C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*
- On Unix-like systems, it searches in:
/usr/local/cuda*
Returns:
Optional[str]: The absolute CUDA installation path if found; otherwise, None.
Note:
The variable IS_WINDOWS is defined in the module scope.
"""
# Guess #1
cuda_home = get_str_env_var("CUDA_HOME") or get_str_env_var("CUDA_PATH")
if cuda_home is None:
# Guess #2
nvcc_path = shutil.which("nvcc")
if nvcc_path is not None:
cuda_home = os.path.dirname(os.path.dirname(nvcc_path))
else:
# Guess #3
if IS_WINDOWS:
glob_pat = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*"
else:
glob_pat = "/usr/local/cuda*"
cuda_homes = glob.glob(glob_pat)
if len(cuda_homes) == 0:
cuda_home = ""
else:
cuda_home = cuda_homes[0]
if not os.path.exists(cuda_home):
cuda_home = None
return cuda_home
def get_cuda_toolkit_path():
"""
Get cuda_toolkit_path. It returns get_str_env_var('CUDA_TOOLKIT_PATH') if
set. Otherwise, attempts to discover a valid CUDA toolkit location and
return. If not found, return None.
"""
# Check if the environment variable is already set, if so, return it immediately.
try:
cuda_toolkit_path_existing = get_str_env_var("CUDA_TOOLKIT_PATH")
if cuda_toolkit_path_existing:
return cuda_toolkit_path_existing
found_cuda_home = _find_cuda_home()
if found_cuda_home:
return found_cuda_home
except Exception as e:
log().info("default_env: exception on get_cuda_toolkit_path", e)
return None
def get_prefix_dsl_libs(prefix: str):
"""
Returns get_str_env_var('{prefix}_LIBS') if set.
Otherwise, attempts to discover libs based on heuristics and return
If not found, return None.
"""
# Check if the environment variable is already set, if so, return it immediately.
try:
prefix_libs_existing = get_str_env_var(f"{prefix}_LIBS")
if prefix_libs_existing:
return prefix_libs_existing
def get_libs_cand(start):
target_libs = {
"mlir_c_runner_utils",
"mlir_runner_utils",
"mlir_cuda_runtime",
}
lib_folder_guesses = [
"lib",
]
libs_cand = find_libs_in_ancestors(start, target_libs, lib_folder_guesses)
if libs_cand:
dsl_libs = ":".join(libs_cand)
return dsl_libs
return None
# find from install folder
dsl_libs = get_libs_cand(__file__)
if not dsl_libs:
# try to find from build folder structure
dsl_libs = get_libs_cand(Path(__file__).parent.parent.resolve())
return dsl_libs
except Exception as e:
log().info(f"default_env: exception on get_prefix_dsl_libs", e)
return None
class EnvironmentVarManager:
"""Manages environment variables for configuration options.
Printing options:
- [DSL_NAME]_LOG_TO_CONSOLE: Print logging to stderr (default: False)
- [DSL_NAME]_PRINT_AFTER_PREPROCESSOR: Print after preprocess (default: False)
- [DSL_NAME]_PRINT_IR: Print generated IR (default: False)
- [DSL_NAME]_FILTER_STACKTRACE: Filter internal stacktrace (default: True)
File options:
- [DSL_NAME]_KEEP_IR: Save generated IR in a file (default: False)
- [DSL_NAME]_LOG_TO_FILE: Store all logging into a file, excluding COMPILE_LOGS (default: False)
Other options:
- [DSL_NAME]_LOG_LEVEL: Logging level to set, for LOG_TO_CONSOLE or LOG_TO_FILE (default: 1).
- [DSL_NAME]_DRYRUN: Generates IR only (default: False)
- [DSL_NAME]_ARCH: GPU architecture (default: "sm_100")
- [DSL_NAME]_WARNINGS_AS_ERRORS: Enable warnings as error (default: False)
- [DSL_NAME]_WARNINGS_IGNORE: Ignore warnings (default: False)
- [DSL_NAME]_JIT_TIME_PROFILING: Whether or not to profile the IR generation/compilation/execution time (default: False)
- [DSL_NAME]_DISABLE_FILE_CACHING: Disable file caching (default: False)
- [DSL_NAME]_FILE_CACHING_CAPACITY: Limits the number of the cache save/load files (default: 1000)
- [DSL_NAME]_LIBS: Path to dependent shared libraries (default: None)
- [DSL_NAME]_NO_SOURCE_LOCATION: Generate source location (default: False)
"""
def __init__(self, prefix="DSL"):
self.prefix = prefix # change if needed
# Printing options
self.log_to_console = get_bool_env_var(f"{prefix}_LOG_TO_CONSOLE", False)
self.print_after_preprocessor = get_bool_env_var(
f"{prefix}_PRINT_AFTER_PREPROCESSOR", False
)
self.printIR = get_bool_env_var(f"{prefix}_PRINT_IR", False)
self.filterStacktrace = get_bool_env_var(f"{prefix}_FILTER_STACKTRACE", True)
# File options
self.keepIR = get_bool_env_var(f"{prefix}_KEEP_IR", False)
self.log_to_file = get_bool_env_var(f"{prefix}_LOG_TO_FILE", False)
# Other options
self.log_level = get_int_env_var(f"{prefix}_LOG_LEVEL", 1)
self.dryrun = get_bool_env_var(f"{prefix}_DRYRUN", False)
self.arch = get_str_env_var(f"{prefix}_ARCH", detect_gpu_arch(prefix))
self.warnings_as_errors = get_bool_env_var(
f"{prefix}_WARNINGS_AS_ERRORS", False
)
self.warnings_ignore = get_bool_env_var(f"{prefix}_WARNINGS_IGNORE", False)
self.jitTimeProfiling = get_bool_env_var(f"{prefix}_JIT_TIME_PROFILING", False)
self.disable_file_caching = get_bool_env_var(
f"{prefix}_DISABLE_FILE_CACHING", False
)
self.file_caching_capacity = get_int_env_var(
f"{prefix}_FILE_CACHING_CAPACITY", 1000
)
self.generate_source_location = not get_bool_env_var(
f"{prefix}_NO_SOURCE_LOCATION", False
)
# set cuda
self.cuda_toolkit = get_cuda_toolkit_path()
# set mlir shared libraries
self.shared_libs = get_prefix_dsl_libs(prefix)