Revert precompile wheel changes (#22055)

2025-08-01 01:26:24 -07:00
parent 98df153abf
commit da31f6ad3d
4 changed files with 107 additions and 137 deletions
--- a/setup.py
+++ b/setup.py
@ -7,7 +7,6 @@ import json
 import logging
 import os
 import re
-import shutil
 import subprocess
 import sys
 from pathlib import Path
@ -282,69 +281,10 @@ class cmake_build_ext(build_ext):
            self.copy_file(file, dst_file)


-class precompiled_wheel_utils:
+class repackage_wheel(build_ext):
    """Extracts libraries and other files from an existing wheel."""

-    @staticmethod
-    def extract_precompiled_and_patch_package(wheel_url_or_path: str) -> dict:
-        import tempfile
-        import zipfile
-
-        temp_dir = None
-        try:
-            if not os.path.isfile(wheel_url_or_path):
-                wheel_filename = wheel_url_or_path.split("/")[-1]
-                temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
-                wheel_path = os.path.join(temp_dir, wheel_filename)
-                print(f"Downloading wheel from {wheel_url_or_path} "
-                      f"to {wheel_path}")
-                from urllib.request import urlretrieve
-                urlretrieve(wheel_url_or_path, filename=wheel_path)
-            else:
-                wheel_path = wheel_url_or_path
-                print(f"Using existing wheel at {wheel_path}")
-
-            package_data_patch = {}
-
-            with zipfile.ZipFile(wheel_path) as wheel:
-                files_to_copy = [
-                    "vllm/_C.abi3.so",
-                    "vllm/_moe_C.abi3.so",
-                    "vllm/_flashmla_C.abi3.so",
-                    "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
-                    "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
-                    "vllm/cumem_allocator.abi3.so",
-                ]
-
-                compiled_regex = re.compile(
-                    r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
-                file_members = list(
-                    filter(lambda x: x.filename in files_to_copy,
-                           wheel.filelist))
-                file_members += list(
-                    filter(lambda x: compiled_regex.match(x.filename),
-                           wheel.filelist))
-
-                for file in file_members:
-                    print(f"[extract] {file.filename}")
-                    target_path = os.path.join(".", file.filename)
-                    os.makedirs(os.path.dirname(target_path), exist_ok=True)
-                    with wheel.open(file.filename) as src, open(
-                            target_path, "wb") as dst:
-                        shutil.copyfileobj(src, dst)
-
-                    pkg = os.path.dirname(file.filename).replace("/", ".")
-                    package_data_patch.setdefault(pkg, []).append(
-                        os.path.basename(file.filename))
-
-            return package_data_patch
-        finally:
-            if temp_dir is not None:
-                print(f"Removing temporary directory {temp_dir}")
-                shutil.rmtree(temp_dir)
-
-    @staticmethod
-    def get_base_commit_in_main_branch() -> str:
+    def get_base_commit_in_main_branch(self) -> str:
        # Force to use the nightly wheel. This is mainly used for CI testing.
        if envs.VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL:
            return "nightly"
@ -357,10 +297,6 @@ class precompiled_wheel_utils:
            ]).decode("utf-8")
            upstream_main_commit = json.loads(resp_json)["sha"]

-            # In Docker build context, .git may be immutable or missing.
-            if envs.VLLM_DOCKER_BUILD_CONTEXT:
-                return upstream_main_commit
-
            # Check if the upstream_main_commit exists in the local repo
            try:
                subprocess.check_output(
@ -393,15 +329,92 @@ class precompiled_wheel_utils:
                "wheel may not be compatible with your dev branch: %s", err)
            return "nightly"

+    def run(self) -> None:
+        assert _is_cuda(
+        ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
+
+        wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
+        if wheel_location is None:
+            base_commit = self.get_base_commit_in_main_branch()
+            wheel_location = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
+            # Fallback to nightly wheel if latest commit wheel is unavailable,
+            # in this rare case, the nightly release CI hasn't finished on main.
+            if not is_url_available(wheel_location):
+                wheel_location = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
+
+        import zipfile
+
+        if os.path.isfile(wheel_location):
+            wheel_path = wheel_location
+            print(f"Using existing wheel={wheel_path}")
+        else:
+            # Download the wheel from a given URL, assume
+            # the filename is the last part of the URL
+            wheel_filename = wheel_location.split("/")[-1]
+
+            import tempfile
+
+            # create a temporary directory to store the wheel
+            temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
+            wheel_path = os.path.join(temp_dir, wheel_filename)
+
+            print(f"Downloading wheel from {wheel_location} to {wheel_path}")
+
+            from urllib.request import urlretrieve
+
+            try:
+                urlretrieve(wheel_location, filename=wheel_path)
+            except Exception as e:
+                from setuptools.errors import SetupError
+
+                raise SetupError(
+                    f"Failed to get vLLM wheel from {wheel_location}") from e
+
+        with zipfile.ZipFile(wheel_path) as wheel:
+            files_to_copy = [
+                "vllm/_C.abi3.so",
+                "vllm/_moe_C.abi3.so",
+                "vllm/_flashmla_C.abi3.so",
+                "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
+                "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
+                "vllm/cumem_allocator.abi3.so",
+                # "vllm/_version.py", # not available in nightly wheels yet
+            ]
+
+            file_members = list(
+                filter(lambda x: x.filename in files_to_copy, wheel.filelist))
+
+            # vllm_flash_attn python code:
+            # Regex from
+            #  `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)`
+            compiled_regex = re.compile(
+                r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
+            file_members += list(
+                filter(lambda x: compiled_regex.match(x.filename),
+                       wheel.filelist))
+
+            for file in file_members:
+                print(f"Extracting and including {file.filename} "
+                      "from existing wheel")
+                package_name = os.path.dirname(file.filename).replace("/", ".")
+                file_name = os.path.basename(file.filename)
+
+                if package_name not in package_data:
+                    package_data[package_name] = []
+
+                wheel.extract(file)
+                if file_name.endswith(".py"):
+                    # python files shouldn't be added to package_data
+                    continue
+
+                package_data[package_name].append(file_name)
+

 def _no_device() -> bool:
    return VLLM_TARGET_DEVICE == "empty"


 def _is_cuda() -> bool:
-    # Allow forced CUDA in Docker/precompiled builds, even without torch.cuda
-    if envs.VLLM_USE_PRECOMPILED and envs.VLLM_DOCKER_BUILD_CONTEXT:
-        return True
    has_cuda = torch.version.cuda is not None
    return (VLLM_TARGET_DEVICE == "cuda" and has_cuda
            and not (_is_neuron() or _is_tpu()))
@ -626,37 +639,16 @@ package_data = {
    ]
 }

-# If using precompiled, extract and patch package_data (in advance of setup)
-if envs.VLLM_USE_PRECOMPILED:
-    assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
-    wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
-    if wheel_location is not None:
-        wheel_url = wheel_location
-    else:
-        base_commit = precompiled_wheel_utils.get_base_commit_in_main_branch()
-        wheel_url = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
-        from urllib.request import urlopen
-        try:
-            with urlopen(wheel_url) as resp:
-                if resp.status != 200:
-                    wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
-        except Exception as e:
-            print(f"[warn] Falling back to nightly wheel: {e}")
-            wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
-
-    patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(
-        wheel_url)
-    for pkg, files in patch.items():
-        package_data.setdefault(pkg, []).extend(files)
-
 if _no_device():
    ext_modules = []

-if not ext_modules or envs.VLLM_USE_PRECOMPILED:
-    # Disable build_ext when using precompiled wheel
+if not ext_modules:
    cmdclass = {}
 else:
-    cmdclass = {"build_ext": cmake_build_ext}
+    cmdclass = {
+        "build_ext":
+        repackage_wheel if envs.VLLM_USE_PRECOMPILED else cmake_build_ext
+    }

 setup(
    # static metadata should rather go in pyproject.toml