Merge branch 'main' into woosuk-tpu

2024-04-10 07:51:35 +00:00
parent d899009a63 b3104b2a10
commit 60ff6b8c5c
149 changed files with 11618 additions and 1383 deletions
--- a/setup.py
+++ b/setup.py
@ -15,6 +15,8 @@ from torch.utils.cpp_extension import CUDA_HOME

 ROOT_DIR = os.path.dirname(__file__)
 logger = logging.getLogger(__name__)
+# Target device of vLLM, supporting [cuda (by default), rocm, neuron, cpu]
+VLLM_TARGET_DEVICE = os.getenv("VLLM_TARGET_DEVICE", "cuda")

 # vLLM only supports Linux platform
 assert sys.platform.startswith(
@ -112,6 +114,7 @@ class cmake_build_ext(build_ext):
            '-DCMAKE_BUILD_TYPE={}'.format(cfg),
            '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}'.format(outdir),
            '-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY={}'.format(self.build_temp),
+            '-DVLLM_TARGET_DEVICE={}'.format(VLLM_TARGET_DEVICE),
        ]

        verbose = bool(int(os.getenv('VERBOSE', '0')))
@ -186,11 +189,14 @@ class cmake_build_ext(build_ext):

 def _is_cuda() -> bool:
    has_cuda = torch.version.cuda is not None
-    return has_cuda and not (_is_neuron() or _is_tpu())
+    return (VLLM_TARGET_DEVICE == "cuda"
+            and has_cuda
+            and not (_is_neuron() or _is_tpu()))


 def _is_hip() -> bool:
-    return torch.version.hip is not None
+    return (VLLM_TARGET_DEVICE == "cuda"
+            or VLLM_TARGET_DEVICE == "rocm") and torch.version.hip is not None


 def _is_neuron() -> bool:
@ -206,8 +212,12 @@ def _is_tpu() -> bool:
    return True  # FIXME


+def _is_cpu() -> bool:
+    return VLLM_TARGET_DEVICE == "cpu"
+
+
 def _build_custom_ops() -> bool:
-    return _is_cuda() or _is_hip()
+    return _is_cuda() or _is_hip() or _is_cpu()


 def _install_punica() -> bool:
@ -307,6 +317,8 @@ def get_vllm_version() -> str:
            version += f"+neuron{neuron_version_str}"
    elif _is_tpu():
        version += "+tpu"
+    elif _is_cpu():
+        version += "+cpu"
    else:
        raise RuntimeError("Unknown runtime environment")

@ -324,22 +336,40 @@ def read_readme() -> str:

 def get_requirements() -> List[str]:
    """Get Python package dependencies from requirements.txt."""
+
+    def _read_requirements(filename: str) -> List[str]:
+        with open(get_path(filename)) as f:
+            requirements = f.read().strip().split("\n")
+        resolved_requirements = []
+        for line in requirements:
+            if line.startswith("-r "):
+                resolved_requirements += _read_requirements(line.split()[1])
+            else:
+                resolved_requirements.append(line)
+        return resolved_requirements
+
    if _is_cuda():
-        with open(get_path("requirements.txt")) as f:
-            requirements = f.read().strip().split("\n")
+        requirements = _read_requirements("requirements-cuda.txt")
+        cuda_major = torch.version.cuda.split(".")[0]
+        modified_requirements = []
+        for req in requirements:
+            if "vllm-nccl-cu12" in req:
+                modified_requirements.append(
+                    req.replace("vllm-nccl-cu12", f"vllm-nccl-cu{cuda_major}"))
+            else:
+                modified_requirements.append(req)
+        requirements = modified_requirements
    elif _is_hip():
-        with open(get_path("requirements-rocm.txt")) as f:
-            requirements = f.read().strip().split("\n")
+        requirements = _read_requirements("requirements-rocm.txt")
    elif _is_neuron():
-        with open(get_path("requirements-neuron.txt")) as f:
-            requirements = f.read().strip().split("\n")
+        requirements = _read_requirements("requirements-neuron.txt")
    elif _is_tpu():
-        with open(get_path("requirements-tpu.txt")) as f:
-            requirements = f.read().strip().split("\n")
+        requirements = _read_requirements("requirements-tpu.txt")
+    elif _is_cpu():
+        requirements = _read_requirements("requirements-cpu.txt")
    else:
        raise ValueError(
-            "Unsupported platform, please use CUDA, ROCM or Neuron.")
-
+            "Unsupported platform, please use CUDA, ROCm, Neuron, or CPU.")
    return requirements