Compare commits

...

5 Commits

Author SHA1 Message Date
0b85cc9fd4 Add pathlib.Path support and docstrings for environment variables
- Convert path-related variables to use pathlib.Path types with standardized parsing
- Add comprehensive docstrings based on original comments for better documentation
- Implement standardized path parsing with expanduser and absolute path conversion
- Support both Path and Optional[Path] types in parsing logic
- Maintain backwards compatibility while providing type-safe path handling

Co-authored-by: ProExpertProg <11367180+ProExpertProg@users.noreply.github.com>
2025-08-26 02:49:34 +00:00
1c3d99d6a3 Implement standardized environment variable parsing as requested
Co-authored-by: ProExpertProg <11367180+ProExpertProg@users.noreply.github.com>
2025-08-26 01:22:13 +00:00
1eec2bf88b Complete refactoring - remove backup file and finalize envs module
Co-authored-by: ProExpertProg <11367180+ProExpertProg@users.noreply.github.com>
2025-08-26 00:45:58 +00:00
55812718ab Create envs/ directory structure with _variables.py and __init__.py
Co-authored-by: ProExpertProg <11367180+ProExpertProg@users.noreply.github.com>
2025-08-26 00:42:28 +00:00
79dff4ac72 Initial plan 2025-08-26 00:30:21 +00:00
3 changed files with 637 additions and 1272 deletions

File diff suppressed because it is too large Load Diff

296
vllm/envs/__init__.py Normal file
View File

@ -0,0 +1,296 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Environment variable management for vLLM.
This module provides access to all vLLM environment variables with lazy evaluation
and type conversion. Environment variables are looked up from the actual OS
environment using os.getenv() with appropriate type conversion and default values.
The module maintains backwards compatibility with the original vllm.envs interface
while providing a cleaner separation between variable definitions and environment
lookups.
Usage:
import vllm.envs as envs
# Access environment variables
device = envs.VLLM_TARGET_DEVICE # Returns string value
port = envs.VLLM_PORT # Returns int or None
# Check if variable is explicitly set
if envs.is_set("VLLM_USE_V1"):
print("V1 is explicitly configured")
# Get all available variables
all_vars = dir(envs)
"""
import hashlib
import os
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING, get_type_hints, Type, Union, get_origin, get_args, Optional
from urllib.parse import urlparse
from . import _variables
from ._variables import __defaults as _env_defaults
if TYPE_CHECKING:
# This way IDEs & type checkers get the declarations directly
from ._variables import *
def get_default_cache_root() -> str:
"""Get the default cache root directory."""
return os.getenv(
"XDG_CACHE_HOME",
os.path.join(os.path.expanduser("~"), ".cache"),
)
def get_default_config_root() -> str:
"""Get the default config root directory."""
return os.getenv(
"XDG_CONFIG_HOME",
os.path.join(os.path.expanduser("~"), ".config"),
)
def _unwrap_optional(type_: Type) -> Type:
"""Unwrap Optional[T] to get T."""
origin = get_origin(type_)
if origin is not Union:
return type_
args = get_args(type_)
if len(args) != 2 or type(None) not in args:
raise ValueError("Unions not currently supported")
return next(arg for arg in args if arg is not type(None))
def _get_vllm_port() -> Optional[int]:
"""Get the port from VLLM_PORT environment variable with special validation."""
if 'VLLM_PORT' not in os.environ:
return None
port = os.getenv('VLLM_PORT', '0')
try:
return int(port)
except ValueError as err:
parsed = urlparse(port)
if parsed.scheme:
raise ValueError(
f"VLLM_PORT '{port}' appears to be a URI. "
"This may be caused by a Kubernetes service discovery issue,"
"check the warning in: https://docs.vllm.ai/en/stable/serving/env_vars.html"
) from None
raise ValueError(
f"VLLM_PORT '{port}' must be a valid integer") from err
def _parse_list_value(value: str) -> list[str]:
"""Parse comma-separated string into list."""
if not value:
return []
return [item.strip() for item in value.split(',') if item.strip()]
_type_hints = get_type_hints(_variables)
def __getattr__(name: str):
"""Lazy evaluation of environment variables with standardized parsing."""
if name not in _env_defaults:
raise AttributeError(f"module {__name__} has no attribute {name}")
# Special handling for complex variables
if name == "VLLM_PORT":
return _get_vllm_port()
# Handle variables that need path expansion
if name == "VLLM_CONFIG_ROOT":
return Path(os.path.expanduser(
os.getenv(
"VLLM_CONFIG_ROOT",
os.path.join(get_default_config_root(), "vllm"),
)
)).resolve()
if name == "VLLM_CACHE_ROOT":
return Path(os.path.expanduser(
os.getenv(
"VLLM_CACHE_ROOT",
os.path.join(get_default_cache_root(), "vllm"),
)
)).resolve()
if name == "VLLM_ASSETS_CACHE":
return Path(os.path.expanduser(
os.getenv(
"VLLM_ASSETS_CACHE",
os.path.join(get_default_cache_root(), "vllm", "assets"),
)
)).resolve()
if name == "VLLM_XLA_CACHE_PATH":
return Path(os.path.expanduser(
os.getenv(
"VLLM_XLA_CACHE_PATH",
os.path.join(get_default_cache_root(), "vllm", "xla_cache"),
)
)).resolve()
if name == "VLLM_RPC_BASE_PATH":
return Path(os.getenv('VLLM_RPC_BASE_PATH', tempfile.gettempdir())).resolve()
# Handle special cases for compound logic
if name == "VLLM_USE_PRECOMPILED":
return (os.environ.get("VLLM_USE_PRECOMPILED", "").strip().lower() in
("1", "true") or bool(os.environ.get("VLLM_PRECOMPILED_WHEEL_LOCATION")))
if name == "VLLM_DO_NOT_TRACK":
return (os.environ.get("VLLM_DO_NOT_TRACK", None) or
os.environ.get("DO_NOT_TRACK", None) or "0") == "1"
if name == "VLLM_DP_RANK_LOCAL":
return int(os.getenv("VLLM_DP_RANK_LOCAL", os.getenv("VLLM_DP_RANK", "0")))
if name == "VLLM_TPU_USING_PATHWAYS":
return bool("proxy" in os.getenv("JAX_PLATFORMS", "").lower())
if name == "VLLM_TORCH_PROFILER_DIR":
value = os.getenv("VLLM_TORCH_PROFILER_DIR", None)
return None if value is None else Path(os.path.abspath(os.path.expanduser(value)))
# Get environment value
env_value = os.getenv(name)
if env_value is None:
return _env_defaults[name]
# Get type for this variable
var_type = _type_hints[name]
var_type = _unwrap_optional(var_type)
# Parse based on type
if var_type is str:
# Handle special string parsing
if name == "VLLM_TARGET_DEVICE":
return env_value.lower()
if name == "VLLM_LOGGING_LEVEL":
return env_value.upper()
if name == "VLLM_ROCM_QUICK_REDUCE_QUANTIZATION":
return env_value.upper()
if name == "VLLM_MOE_ROUTING_SIMULATION_STRATEGY":
return env_value.lower()
return env_value
if var_type is Path:
# Parse path with expansion and absolute path conversion
expanded_path = os.path.expanduser(env_value)
return Path(os.path.abspath(expanded_path))
if var_type is bool:
return env_value.lower() in ("1", "true")
if var_type in (int, float):
return var_type(env_value)
if var_type == list[str] or (hasattr(var_type, '__origin__') and var_type.__origin__ is list):
return _parse_list_value(env_value)
raise ValueError(f"Unsupported type {var_type} for environment variable {name}")
def __dir__():
"""Return list of available environment variables."""
return list(_env_defaults.keys())
def is_set(name: str) -> bool:
"""Check if an environment variable is explicitly set."""
if name not in _env_defaults:
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
return name in os.environ
def set_vllm_use_v1(use_v1: bool) -> None:
"""Set VLLM_USE_V1 environment variable."""
if is_set("VLLM_USE_V1"):
raise ValueError(
"Should not call set_vllm_use_v1() if VLLM_USE_V1 is set "
"explicitly by the user. Please raise this as a Github "
"Issue and explicitly set VLLM_USE_V1=0 or 1.")
os.environ["VLLM_USE_V1"] = "1" if use_v1 else "0"
def compute_hash() -> str:
"""
Compute hash of environment variables that affect computation graph.
WARNING: Whenever a new key is added to the environment variables,
ensure that it is included in the factors list if it affects the
computation graph. For example, different values of
VLLM_PP_LAYER_PARTITION will generate different computation graphs,
so it is included in the factors list. The env vars that affect
the choice of different kernels or attention backends should also
be included in the factors list.
"""
# The values of envs may affects the computation graph.
environment_variables_to_hash = [
"VLLM_PP_LAYER_PARTITION",
"VLLM_MLA_DISABLE",
"VLLM_USE_TRITON_FLASH_ATTN",
"VLLM_USE_TRITON_AWQ",
"VLLM_DP_RANK",
"VLLM_DP_SIZE",
"VLLM_USE_STANDALONE_COMPILE",
"VLLM_FUSED_MOE_CHUNK_SIZE",
"VLLM_FLASHINFER_MOE_BACKEND",
"VLLM_V1_USE_PREFILL_DECODE_ATTENTION",
"VLLM_USE_AITER_UNIFIED_ATTENTION",
"VLLM_ATTENTION_BACKEND",
"VLLM_USE_FLASHINFER_SAMPLER",
"VLLM_DISABLED_KERNELS",
"VLLM_USE_DEEP_GEMM",
"VLLM_USE_TRTLLM_FP4_GEMM",
"VLLM_USE_FUSED_MOE_GROUPED_TOPK",
"VLLM_USE_FLASHINFER_MOE_FP8",
"VLLM_USE_FLASHINFER_MOE_FP4",
"VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8",
"VLLM_USE_FLASHINFER_MOE_MXFP4_BF16",
"VLLM_USE_CUDNN_PREFILL",
"VLLM_USE_TRTLLM_ATTENTION",
"VLLM_ROCM_USE_AITER",
"VLLM_ROCM_USE_AITER_PAGED_ATTN",
"VLLM_ROCM_USE_AITER_LINEAR",
"VLLM_ROCM_USE_AITER_MOE",
"VLLM_ROCM_USE_AITER_RMSNORM",
"VLLM_ROCM_USE_AITER_MLA",
"VLLM_ROCM_USE_AITER_MHA",
"VLLM_ROCM_USE_SKINNY_GEMM",
"VLLM_ROCM_FP8_PADDING",
"VLLM_ROCM_MOE_PADDING",
"VLLM_ROCM_CUSTOM_PAGED_ATTN",
"VLLM_ROCM_QUICK_REDUCE_QUANTIZATION",
"VLLM_ROCM_QUICK_REDUCE_CAST_BF16_TO_FP16",
"VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB",
]
for key in environment_variables_to_hash:
# if this goes out of sync with _env_defaults,
# it's not a user error, it's a bug
assert key in _env_defaults, \
f"Please update environment_variables_to_hash in envs/__init__.py. Missing: {key}"
factors = [
getattr(__import__(__name__), key) for key in environment_variables_to_hash
]
hash_str = hashlib.md5(str(factors).encode(),
usedforsecurity=False).hexdigest()
return hash_str

341
vllm/envs/_variables.py Normal file
View File

@ -0,0 +1,341 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Environment variable definitions with type annotations and default values.
This module defines all environment variables used by vLLM with their expected
data types and default values. The supported data types are:
- str: String values
- int: Integer values
- float: Floating point values
- bool: Boolean values (typically parsed from "0"/"1" or "true"/"false")
- Optional[T]: Optional values that can be None
- list[str]: Lists of strings (typically comma-separated)
Each variable is defined with its type annotation and default value.
The actual environment variable lookup and conversion is handled by the
parent module (__init__.py).
"""
import os
import tempfile
from pathlib import Path
from typing import Optional
# Environment variable definitions with type annotations and defaults
# These match the TYPE_CHECKING section from the original envs.py
# Installation Time Environment Variables
VLLM_TARGET_DEVICE: str = "cuda"
MAX_JOBS: Optional[str] = None
NVCC_THREADS: Optional[str] = None
VLLM_USE_PRECOMPILED: bool = False
VLLM_DOCKER_BUILD_CONTEXT: bool = False
VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL: bool = False
CMAKE_BUILD_TYPE: Optional[str] = None
VERBOSE: bool = False
# Configuration and cache paths
VLLM_CONFIG_ROOT: Path = Path.home() / ".config" / "vllm"
"""Root directory for vLLM configuration files.
Note that this not only affects how vllm finds its configuration files
during runtime, but also affects how vllm installs its configuration
files during **installation**.
"""
VLLM_CACHE_ROOT: Path = Path.home() / ".cache" / "vllm"
"""Root directory for vLLM cache files.
Defaults to `~/.cache/vllm` unless `XDG_CACHE_HOME` is set.
"""
# Runtime Environment Variables
VLLM_HOST_IP: str = ""
VLLM_PORT: Optional[int] = None
VLLM_RPC_BASE_PATH: Path = Path(tempfile.gettempdir())
"""Base path for RPC temporary files."""
VLLM_USE_MODELSCOPE: bool = False
VLLM_RINGBUFFER_WARNING_INTERVAL: int = 60
CUDA_HOME: Optional[str] = None
VLLM_NCCL_SO_PATH: Optional[Path] = None
"""Path to the NCCL shared object library.
When `VLLM_NCCL_SO_PATH` is not set, vllm will try to find the nccl
library file in the locations specified by `LD_LIBRARY_PATH`.
"""
LD_LIBRARY_PATH: Optional[Path] = None
"""Path for dynamic library loading.
When `VLLM_NCCL_SO_PATH` is not set, vllm will try to find the nccl
library file in the locations specified by `LD_LIBRARY_PATH`.
"""
# Attention and kernel settings
VLLM_USE_TRITON_FLASH_ATTN: bool = True
VLLM_V1_USE_PREFILL_DECODE_ATTENTION: bool = False
VLLM_USE_AITER_UNIFIED_ATTENTION: bool = False
VLLM_FLASH_ATTN_VERSION: Optional[int] = None
VLLM_ATTENTION_BACKEND: Optional[str] = None
VLLM_USE_FLASHINFER_SAMPLER: Optional[bool] = None
# Testing and debugging
VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE: bool = True
VLLM_USE_STANDALONE_COMPILE: bool = True
# Distributed computing
LOCAL_RANK: int = 0
CUDA_VISIBLE_DEVICES: Optional[str] = None
VLLM_ENGINE_ITERATION_TIMEOUT_S: int = 60
# API and security
VLLM_API_KEY: Optional[str] = None
VLLM_DEBUG_LOG_API_SERVER_RESPONSE: bool = False
# S3 configuration
S3_ACCESS_KEY_ID: Optional[str] = None
S3_SECRET_ACCESS_KEY: Optional[str] = None
S3_ENDPOINT_URL: Optional[str] = None
# Usage statistics
VLLM_USAGE_STATS_SERVER: str = "https://stats.vllm.ai"
VLLM_NO_USAGE_STATS: bool = False
VLLM_DO_NOT_TRACK: bool = False
VLLM_USAGE_SOURCE: str = "production"
# Logging configuration
VLLM_CONFIGURE_LOGGING: int = 1
VLLM_LOGGING_CONFIG_PATH: Optional[Path] = None
"""Path to custom logging configuration file."""
VLLM_LOGGING_LEVEL: str = "INFO"
VLLM_LOGGING_PREFIX: str = ""
VLLM_LOGITS_PROCESSOR_THREADS: Optional[int] = None
VLLM_LOG_STATS_INTERVAL: float = 10.0
VLLM_TRACE_FUNCTION: int = 0
# Pipeline and partitioning
VLLM_PP_LAYER_PARTITION: Optional[str] = None
# CPU backend settings
VLLM_CPU_KVCACHE_SPACE: Optional[int] = None
"""(CPU backend only) KV cache space size in MB."""
VLLM_CPU_OMP_THREADS_BIND: str = "auto"
"""(CPU backend only) CPU core ids bound by OpenMP threads.
Examples: "0-31", "0,1,2", "0-31,33". CPU cores of different ranks are separated by '|'.
"""
VLLM_CPU_NUM_OF_RESERVED_CPU: Optional[int] = None
"""(CPU backend only) CPU cores not used by OMP threads.
Those CPU cores will not be used by OMP threads of a rank.
"""
VLLM_CPU_MOE_PREPACK: bool = True
VLLM_CPU_SGL_KERNEL: bool = False
# XLA settings
VLLM_XLA_CACHE_PATH: Path = Path.home() / ".cache" / "vllm" / "xla_cache"
"""Path to the XLA persistent cache directory.
Only used for XLA devices such as TPUs.
"""
VLLM_XLA_CHECK_RECOMPILATION: bool = False
VLLM_XLA_USE_SPMD: bool = False
# MoE (Mixture of Experts) settings
VLLM_FUSED_MOE_CHUNK_SIZE: int = 32768
VLLM_ENABLE_FUSED_MOE_ACTIVATION_CHUNKING: bool = True
# Ray distributed computing
VLLM_USE_RAY_SPMD_WORKER: bool = False
VLLM_USE_RAY_COMPILED_DAG: bool = False
VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE: str = "auto"
VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM: bool = False
VLLM_USE_RAY_WRAPPED_PP_COMM: bool = True
VLLM_WORKER_MULTIPROC_METHOD: str = "fork"
# Multimodal settings
VLLM_ASSETS_CACHE: Path = Path.home() / ".cache" / "vllm" / "assets"
"""Path to the cache for storing downloaded assets."""
VLLM_IMAGE_FETCH_TIMEOUT: int = 5
VLLM_VIDEO_FETCH_TIMEOUT: int = 30
VLLM_AUDIO_FETCH_TIMEOUT: int = 10
VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8
VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25
VLLM_VIDEO_LOADER_BACKEND: str = "opencv"
VLLM_MM_INPUT_CACHE_GIB: int = 4
# Engine and model settings
VLLM_KEEP_ALIVE_ON_ENGINE_DEATH: bool = False
VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False
VLLM_TEST_FORCE_FP8_MARLIN: bool = False
VLLM_TEST_FORCE_LOAD_FORMAT: str = "dummy"
# Network and communication
VLLM_RPC_TIMEOUT: int = 10000 # ms
VLLM_HTTP_TIMEOUT_KEEP_ALIVE: int = 5 # seconds
# Plugin system
VLLM_PLUGINS: Optional[list[str]] = None
"""List of enabled plugins."""
VLLM_LORA_RESOLVER_CACHE_DIR: Optional[Path] = None
"""Directory for LoRA resolver cache."""
# Profiling
VLLM_TORCH_PROFILER_DIR: Optional[Path] = None
"""Directory for torch profiler output.
Both AsyncLLM's CPU traces as well as workers' traces (CPU & GPU) will be
saved under this directory. Note that it must be an absolute path.
"""
VLLM_TORCH_PROFILER_RECORD_SHAPES: bool = False
VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY: bool = False
VLLM_TORCH_PROFILER_WITH_STACK: bool = True
VLLM_TORCH_PROFILER_WITH_FLOPS: bool = False
# Quantization and kernels
VLLM_USE_TRITON_AWQ: bool = False
VLLM_ALLOW_RUNTIME_LORA_UPDATING: bool = False
VLLM_SKIP_P2P_CHECK: bool = True
VLLM_DISABLED_KERNELS: list[str] = []
# Version control
VLLM_USE_V1: bool = True
# ROCm specific settings
VLLM_ROCM_USE_AITER: bool = False
VLLM_ROCM_USE_AITER_PAGED_ATTN: bool = False
VLLM_ROCM_USE_AITER_LINEAR: bool = True
VLLM_ROCM_USE_AITER_MOE: bool = True
VLLM_ROCM_USE_AITER_RMSNORM: bool = True
VLLM_ROCM_USE_AITER_MLA: bool = True
VLLM_ROCM_USE_AITER_MHA: bool = True
VLLM_ROCM_USE_SKINNY_GEMM: bool = True
VLLM_ROCM_FP8_PADDING: bool = True
VLLM_ROCM_MOE_PADDING: bool = True
VLLM_ROCM_CUSTOM_PAGED_ATTN: bool = True
VLLM_ROCM_QUICK_REDUCE_QUANTIZATION: str = "NONE"
VLLM_ROCM_QUICK_REDUCE_CAST_BF16_TO_FP16: bool = True
VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB: Optional[int] = None
# V1 specific settings
VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
VLLM_LOG_BATCHSIZE_INTERVAL: float = -1
VLLM_DISABLE_COMPILE_CACHE: bool = False
# Scale constants for FP8 KV Cache
Q_SCALE_CONSTANT: int = 200
K_SCALE_CONSTANT: int = 200
V_SCALE_CONSTANT: int = 100
# Development and debugging
VLLM_SERVER_DEV_MODE: bool = False
VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: int = 128
VLLM_MLA_DISABLE: bool = False
# Ray settings continued
VLLM_RAY_PER_WORKER_GPUS: float = 1.0
VLLM_RAY_BUNDLE_INDICES: str = ""
# CUDA settings
VLLM_CUDART_SO_PATH: Optional[Path] = None
"""Path to the CUDA runtime shared object library."""
# Data parallel settings
VLLM_DP_RANK: int = 0
VLLM_DP_RANK_LOCAL: int = 0 # Will be computed dynamically
VLLM_DP_SIZE: int = 1
VLLM_DP_MASTER_IP: str = "127.0.0.1"
VLLM_DP_MASTER_PORT: int = 0
VLLM_MOE_DP_CHUNK_SIZE: int = 256
VLLM_RANDOMIZE_DP_DUMMY_INPUTS: bool = False
# CI and testing
VLLM_CI_USE_S3: bool = False
# Model redirection and quantization
VLLM_MODEL_REDIRECT_PATH: Optional[Path] = None
"""Path for model redirection."""
VLLM_MARLIN_USE_ATOMIC_ADD: bool = False
VLLM_MXFP4_USE_MARLIN: Optional[bool] = None
# Cache settings
VLLM_V0_USE_OUTLINES_CACHE: bool = False
VLLM_V1_USE_OUTLINES_CACHE: bool = False
# TPU settings
VLLM_TPU_BUCKET_PADDING_GAP: int = 0
VLLM_TPU_MOST_MODEL_LEN: Optional[int] = None
VLLM_TPU_USING_PATHWAYS: bool = False
# DeepGemm settings
VLLM_USE_DEEP_GEMM: bool = False
VLLM_USE_DEEP_GEMM_E8M0: bool = True
VLLM_SKIP_DEEP_GEMM_WARMUP: bool = False
# FlashInfer settings
VLLM_USE_FUSED_MOE_GROUPED_TOPK: bool = True
VLLM_USE_FLASHINFER_MOE_FP8: bool = False
VLLM_USE_FLASHINFER_MOE_FP4: bool = False
VLLM_FLASHINFER_MOE_BACKEND: str = "throughput"
VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8: bool = False
VLLM_USE_FLASHINFER_MOE_MXFP4_BF16: bool = False
# Additional settings
VLLM_XGRAMMAR_CACHE_MB: int = 512
VLLM_MSGPACK_ZERO_COPY_THRESHOLD: int = 256
VLLM_ALLOW_INSECURE_SERIALIZATION: bool = False
# NIXL settings
VLLM_NIXL_SIDE_CHANNEL_HOST: str = "localhost"
VLLM_NIXL_SIDE_CHANNEL_PORT: int = 5557
VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 120
# Communication backends
VLLM_ALL2ALL_BACKEND: str = "naive"
# Expert parallel settings
VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE: int = 163840
VLLM_MOE_ROUTING_SIMULATION_STRATEGY: str = ""
# Tool and timeout settings
VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS: int = 1
VLLM_SLEEP_WHEN_IDLE: bool = False
VLLM_MQ_MAX_CHUNK_BYTES_MB: int = 16
VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS: int = 300
# Cache and memory settings
VLLM_KV_CACHE_LAYOUT: Optional[str] = None
VLLM_COMPUTE_NANS_IN_LOGITS: bool = False
VLLM_USE_NVFP4_CT_EMULATIONS: bool = False
# CUDA specific settings
VLLM_USE_CUDNN_PREFILL: bool = False
VLLM_USE_TRTLLM_ATTENTION: Optional[str] = None
VLLM_HAS_FLASHINFER_CUBIN: bool = False
VLLM_USE_TRTLLM_FP4_GEMM: bool = False
VLLM_ENABLE_CUDAGRAPH_GC: bool = False
# Network settings
VLLM_LOOPBACK_IP: str = ""
VLLM_PROCESS_NAME_PREFIX: str = "VLLM"
# Attention and cache management
VLLM_ALLOW_CHUNKED_LOCAL_ATTN_WITH_HYBRID_KV_CACHE: bool = False
VLLM_ENABLE_RESPONSES_API_STORE: bool = False
VLLM_ALLREDUCE_USE_SYMM_MEM: bool = False
# Configuration folder
VLLM_TUNED_CONFIG_FOLDER: Optional[Path] = None
"""Allows vllm to find tuned config under customized folder."""
# Create a dictionary of all defaults for easy access
# This will be used by __init__.py for standardized parsing
__defaults = {name: value for name, value in globals().items()
if not name.startswith('_') and not callable(value) and name.isupper()}