Merge branch 'main' into woosuk-tpu
This commit is contained in:
56
setup.py
56
setup.py
@ -15,6 +15,8 @@ from torch.utils.cpp_extension import CUDA_HOME
|
||||
|
||||
ROOT_DIR = os.path.dirname(__file__)
|
||||
logger = logging.getLogger(__name__)
|
||||
# Target device of vLLM, supporting [cuda (by default), rocm, neuron, cpu]
|
||||
VLLM_TARGET_DEVICE = os.getenv("VLLM_TARGET_DEVICE", "cuda")
|
||||
|
||||
# vLLM only supports Linux platform
|
||||
assert sys.platform.startswith(
|
||||
@ -112,6 +114,7 @@ class cmake_build_ext(build_ext):
|
||||
'-DCMAKE_BUILD_TYPE={}'.format(cfg),
|
||||
'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}'.format(outdir),
|
||||
'-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY={}'.format(self.build_temp),
|
||||
'-DVLLM_TARGET_DEVICE={}'.format(VLLM_TARGET_DEVICE),
|
||||
]
|
||||
|
||||
verbose = bool(int(os.getenv('VERBOSE', '0')))
|
||||
@ -186,11 +189,14 @@ class cmake_build_ext(build_ext):
|
||||
|
||||
def _is_cuda() -> bool:
|
||||
has_cuda = torch.version.cuda is not None
|
||||
return has_cuda and not (_is_neuron() or _is_tpu())
|
||||
return (VLLM_TARGET_DEVICE == "cuda"
|
||||
and has_cuda
|
||||
and not (_is_neuron() or _is_tpu()))
|
||||
|
||||
|
||||
def _is_hip() -> bool:
|
||||
return torch.version.hip is not None
|
||||
return (VLLM_TARGET_DEVICE == "cuda"
|
||||
or VLLM_TARGET_DEVICE == "rocm") and torch.version.hip is not None
|
||||
|
||||
|
||||
def _is_neuron() -> bool:
|
||||
@ -206,8 +212,12 @@ def _is_tpu() -> bool:
|
||||
return True # FIXME
|
||||
|
||||
|
||||
def _is_cpu() -> bool:
|
||||
return VLLM_TARGET_DEVICE == "cpu"
|
||||
|
||||
|
||||
def _build_custom_ops() -> bool:
|
||||
return _is_cuda() or _is_hip()
|
||||
return _is_cuda() or _is_hip() or _is_cpu()
|
||||
|
||||
|
||||
def _install_punica() -> bool:
|
||||
@ -307,6 +317,8 @@ def get_vllm_version() -> str:
|
||||
version += f"+neuron{neuron_version_str}"
|
||||
elif _is_tpu():
|
||||
version += "+tpu"
|
||||
elif _is_cpu():
|
||||
version += "+cpu"
|
||||
else:
|
||||
raise RuntimeError("Unknown runtime environment")
|
||||
|
||||
@ -324,22 +336,40 @@ def read_readme() -> str:
|
||||
|
||||
def get_requirements() -> List[str]:
|
||||
"""Get Python package dependencies from requirements.txt."""
|
||||
|
||||
def _read_requirements(filename: str) -> List[str]:
|
||||
with open(get_path(filename)) as f:
|
||||
requirements = f.read().strip().split("\n")
|
||||
resolved_requirements = []
|
||||
for line in requirements:
|
||||
if line.startswith("-r "):
|
||||
resolved_requirements += _read_requirements(line.split()[1])
|
||||
else:
|
||||
resolved_requirements.append(line)
|
||||
return resolved_requirements
|
||||
|
||||
if _is_cuda():
|
||||
with open(get_path("requirements.txt")) as f:
|
||||
requirements = f.read().strip().split("\n")
|
||||
requirements = _read_requirements("requirements-cuda.txt")
|
||||
cuda_major = torch.version.cuda.split(".")[0]
|
||||
modified_requirements = []
|
||||
for req in requirements:
|
||||
if "vllm-nccl-cu12" in req:
|
||||
modified_requirements.append(
|
||||
req.replace("vllm-nccl-cu12", f"vllm-nccl-cu{cuda_major}"))
|
||||
else:
|
||||
modified_requirements.append(req)
|
||||
requirements = modified_requirements
|
||||
elif _is_hip():
|
||||
with open(get_path("requirements-rocm.txt")) as f:
|
||||
requirements = f.read().strip().split("\n")
|
||||
requirements = _read_requirements("requirements-rocm.txt")
|
||||
elif _is_neuron():
|
||||
with open(get_path("requirements-neuron.txt")) as f:
|
||||
requirements = f.read().strip().split("\n")
|
||||
requirements = _read_requirements("requirements-neuron.txt")
|
||||
elif _is_tpu():
|
||||
with open(get_path("requirements-tpu.txt")) as f:
|
||||
requirements = f.read().strip().split("\n")
|
||||
requirements = _read_requirements("requirements-tpu.txt")
|
||||
elif _is_cpu():
|
||||
requirements = _read_requirements("requirements-cpu.txt")
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unsupported platform, please use CUDA, ROCM or Neuron.")
|
||||
|
||||
"Unsupported platform, please use CUDA, ROCm, Neuron, or CPU.")
|
||||
return requirements
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user