Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
255
setup.py
255
setup.py
@ -34,32 +34,36 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
# cannot import envs directly because it depends on vllm,
|
||||
# which is not installed yet
|
||||
envs = load_module_from_path('envs', os.path.join(ROOT_DIR, 'vllm', 'envs.py'))
|
||||
envs = load_module_from_path("envs", os.path.join(ROOT_DIR, "vllm", "envs.py"))
|
||||
|
||||
VLLM_TARGET_DEVICE = envs.VLLM_TARGET_DEVICE
|
||||
|
||||
if sys.platform.startswith("darwin") and VLLM_TARGET_DEVICE != "cpu":
|
||||
logger.warning(
|
||||
"VLLM_TARGET_DEVICE automatically set to `cpu` due to macOS")
|
||||
logger.warning("VLLM_TARGET_DEVICE automatically set to `cpu` due to macOS")
|
||||
VLLM_TARGET_DEVICE = "cpu"
|
||||
elif not (sys.platform.startswith("linux")
|
||||
or sys.platform.startswith("darwin")):
|
||||
elif not (sys.platform.startswith("linux") or sys.platform.startswith("darwin")):
|
||||
logger.warning(
|
||||
"vLLM only supports Linux platform (including WSL) and MacOS."
|
||||
"Building on %s, "
|
||||
"so vLLM may not be able to run correctly", sys.platform)
|
||||
"so vLLM may not be able to run correctly",
|
||||
sys.platform,
|
||||
)
|
||||
VLLM_TARGET_DEVICE = "empty"
|
||||
elif (sys.platform.startswith("linux") and torch.version.cuda is None
|
||||
and os.getenv("VLLM_TARGET_DEVICE") is None
|
||||
and torch.version.hip is None):
|
||||
elif (
|
||||
sys.platform.startswith("linux")
|
||||
and torch.version.cuda is None
|
||||
and os.getenv("VLLM_TARGET_DEVICE") is None
|
||||
and torch.version.hip is None
|
||||
):
|
||||
# if cuda or hip is not available and VLLM_TARGET_DEVICE is not set,
|
||||
# fallback to cpu
|
||||
VLLM_TARGET_DEVICE = "cpu"
|
||||
|
||||
|
||||
def is_sccache_available() -> bool:
|
||||
return which("sccache") is not None and \
|
||||
not bool(int(os.getenv("VLLM_DISABLE_SCCACHE", "0")))
|
||||
return which("sccache") is not None and not bool(
|
||||
int(os.getenv("VLLM_DISABLE_SCCACHE", "0"))
|
||||
)
|
||||
|
||||
|
||||
def is_ccache_available() -> bool:
|
||||
@ -83,8 +87,7 @@ def is_url_available(url: str) -> bool:
|
||||
|
||||
|
||||
class CMakeExtension(Extension):
|
||||
|
||||
def __init__(self, name: str, cmake_lists_dir: str = '.', **kwa) -> None:
|
||||
def __init__(self, name: str, cmake_lists_dir: str = ".", **kwa) -> None:
|
||||
super().__init__(name, sources=[], py_limited_api=True, **kwa)
|
||||
self.cmake_lists_dir = os.path.abspath(cmake_lists_dir)
|
||||
|
||||
@ -121,8 +124,8 @@ class cmake_build_ext(build_ext):
|
||||
if nvcc_threads is not None:
|
||||
nvcc_threads = int(nvcc_threads)
|
||||
logger.info(
|
||||
"Using NVCC_THREADS=%d as the number of nvcc threads.",
|
||||
nvcc_threads)
|
||||
"Using NVCC_THREADS=%d as the number of nvcc threads.", nvcc_threads
|
||||
)
|
||||
else:
|
||||
nvcc_threads = 1
|
||||
num_jobs = max(1, num_jobs // nvcc_threads)
|
||||
@ -146,36 +149,36 @@ class cmake_build_ext(build_ext):
|
||||
cfg = envs.CMAKE_BUILD_TYPE or default_cfg
|
||||
|
||||
cmake_args = [
|
||||
'-DCMAKE_BUILD_TYPE={}'.format(cfg),
|
||||
'-DVLLM_TARGET_DEVICE={}'.format(VLLM_TARGET_DEVICE),
|
||||
"-DCMAKE_BUILD_TYPE={}".format(cfg),
|
||||
"-DVLLM_TARGET_DEVICE={}".format(VLLM_TARGET_DEVICE),
|
||||
]
|
||||
|
||||
verbose = envs.VERBOSE
|
||||
if verbose:
|
||||
cmake_args += ['-DCMAKE_VERBOSE_MAKEFILE=ON']
|
||||
cmake_args += ["-DCMAKE_VERBOSE_MAKEFILE=ON"]
|
||||
|
||||
if is_sccache_available():
|
||||
cmake_args += [
|
||||
'-DCMAKE_C_COMPILER_LAUNCHER=sccache',
|
||||
'-DCMAKE_CXX_COMPILER_LAUNCHER=sccache',
|
||||
'-DCMAKE_CUDA_COMPILER_LAUNCHER=sccache',
|
||||
'-DCMAKE_HIP_COMPILER_LAUNCHER=sccache',
|
||||
"-DCMAKE_C_COMPILER_LAUNCHER=sccache",
|
||||
"-DCMAKE_CXX_COMPILER_LAUNCHER=sccache",
|
||||
"-DCMAKE_CUDA_COMPILER_LAUNCHER=sccache",
|
||||
"-DCMAKE_HIP_COMPILER_LAUNCHER=sccache",
|
||||
]
|
||||
elif is_ccache_available():
|
||||
cmake_args += [
|
||||
'-DCMAKE_C_COMPILER_LAUNCHER=ccache',
|
||||
'-DCMAKE_CXX_COMPILER_LAUNCHER=ccache',
|
||||
'-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache',
|
||||
'-DCMAKE_HIP_COMPILER_LAUNCHER=ccache',
|
||||
"-DCMAKE_C_COMPILER_LAUNCHER=ccache",
|
||||
"-DCMAKE_CXX_COMPILER_LAUNCHER=ccache",
|
||||
"-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache",
|
||||
"-DCMAKE_HIP_COMPILER_LAUNCHER=ccache",
|
||||
]
|
||||
|
||||
# Pass the python executable to cmake so it can find an exact
|
||||
# match.
|
||||
cmake_args += ['-DVLLM_PYTHON_EXECUTABLE={}'.format(sys.executable)]
|
||||
cmake_args += ["-DVLLM_PYTHON_EXECUTABLE={}".format(sys.executable)]
|
||||
|
||||
# Pass the python path to cmake so it can reuse the build dependencies
|
||||
# on subsequent calls to python.
|
||||
cmake_args += ['-DVLLM_PYTHON_PATH={}'.format(":".join(sys.path))]
|
||||
cmake_args += ["-DVLLM_PYTHON_PATH={}".format(":".join(sys.path))]
|
||||
|
||||
# Override the base directory for FetchContent downloads to $ROOT/.deps
|
||||
# This allows sharing dependencies between profiles,
|
||||
@ -183,7 +186,7 @@ class cmake_build_ext(build_ext):
|
||||
# To override this, set the FETCHCONTENT_BASE_DIR environment variable.
|
||||
fc_base_dir = os.path.join(ROOT_DIR, ".deps")
|
||||
fc_base_dir = os.environ.get("FETCHCONTENT_BASE_DIR", fc_base_dir)
|
||||
cmake_args += ['-DFETCHCONTENT_BASE_DIR={}'.format(fc_base_dir)]
|
||||
cmake_args += ["-DFETCHCONTENT_BASE_DIR={}".format(fc_base_dir)]
|
||||
|
||||
#
|
||||
# Setup parallelism and build tool
|
||||
@ -191,35 +194,36 @@ class cmake_build_ext(build_ext):
|
||||
num_jobs, nvcc_threads = self.compute_num_jobs()
|
||||
|
||||
if nvcc_threads:
|
||||
cmake_args += ['-DNVCC_THREADS={}'.format(nvcc_threads)]
|
||||
cmake_args += ["-DNVCC_THREADS={}".format(nvcc_threads)]
|
||||
|
||||
if is_ninja_available():
|
||||
build_tool = ['-G', 'Ninja']
|
||||
build_tool = ["-G", "Ninja"]
|
||||
cmake_args += [
|
||||
'-DCMAKE_JOB_POOL_COMPILE:STRING=compile',
|
||||
'-DCMAKE_JOB_POOLS:STRING=compile={}'.format(num_jobs),
|
||||
"-DCMAKE_JOB_POOL_COMPILE:STRING=compile",
|
||||
"-DCMAKE_JOB_POOLS:STRING=compile={}".format(num_jobs),
|
||||
]
|
||||
else:
|
||||
# Default build tool to whatever cmake picks.
|
||||
build_tool = []
|
||||
# Make sure we use the nvcc from CUDA_HOME
|
||||
if _is_cuda():
|
||||
cmake_args += [f'-DCMAKE_CUDA_COMPILER={CUDA_HOME}/bin/nvcc']
|
||||
cmake_args += [f"-DCMAKE_CUDA_COMPILER={CUDA_HOME}/bin/nvcc"]
|
||||
|
||||
other_cmake_args = os.environ.get("CMAKE_ARGS")
|
||||
if other_cmake_args:
|
||||
cmake_args += other_cmake_args.split()
|
||||
|
||||
subprocess.check_call(
|
||||
['cmake', ext.cmake_lists_dir, *build_tool, *cmake_args],
|
||||
cwd=self.build_temp)
|
||||
["cmake", ext.cmake_lists_dir, *build_tool, *cmake_args],
|
||||
cwd=self.build_temp,
|
||||
)
|
||||
|
||||
def build_extensions(self) -> None:
|
||||
# Ensure that CMake is present and working
|
||||
try:
|
||||
subprocess.check_output(['cmake', '--version'])
|
||||
subprocess.check_output(["cmake", "--version"])
|
||||
except OSError as e:
|
||||
raise RuntimeError('Cannot find CMake executable') from e
|
||||
raise RuntimeError("Cannot find CMake executable") from e
|
||||
|
||||
# Create build directory if it does not exist.
|
||||
if not os.path.exists(self.build_temp):
|
||||
@ -258,13 +262,18 @@ class cmake_build_ext(build_ext):
|
||||
# CMake appends the extension prefix to the install path,
|
||||
# and outdir already contains that prefix, so we need to remove it.
|
||||
prefix = outdir
|
||||
for _ in range(ext.name.count('.')):
|
||||
for _ in range(ext.name.count(".")):
|
||||
prefix = prefix.parent
|
||||
|
||||
# prefix here should actually be the same for all components
|
||||
install_args = [
|
||||
"cmake", "--install", ".", "--prefix", prefix, "--component",
|
||||
target_name(ext.name)
|
||||
"cmake",
|
||||
"--install",
|
||||
".",
|
||||
"--prefix",
|
||||
prefix,
|
||||
"--component",
|
||||
target_name(ext.name),
|
||||
]
|
||||
subprocess.check_call(install_args, cwd=self.build_temp)
|
||||
|
||||
@ -275,12 +284,15 @@ class cmake_build_ext(build_ext):
|
||||
# copy vllm/vllm_flash_attn/**/*.py from self.build_lib to current
|
||||
# directory so that they can be included in the editable build
|
||||
import glob
|
||||
files = glob.glob(os.path.join(self.build_lib, "vllm",
|
||||
"vllm_flash_attn", "**", "*.py"),
|
||||
recursive=True)
|
||||
|
||||
files = glob.glob(
|
||||
os.path.join(self.build_lib, "vllm", "vllm_flash_attn", "**", "*.py"),
|
||||
recursive=True,
|
||||
)
|
||||
for file in files:
|
||||
dst_file = os.path.join("vllm/vllm_flash_attn",
|
||||
file.split("vllm/vllm_flash_attn/")[-1])
|
||||
dst_file = os.path.join(
|
||||
"vllm/vllm_flash_attn", file.split("vllm/vllm_flash_attn/")[-1]
|
||||
)
|
||||
print(f"Copying {file} to {dst_file}")
|
||||
os.makedirs(os.path.dirname(dst_file), exist_ok=True)
|
||||
self.copy_file(file, dst_file)
|
||||
@ -290,8 +302,7 @@ class precompiled_build_ext(build_ext):
|
||||
"""Disables extension building when using precompiled binaries."""
|
||||
|
||||
def run(self) -> None:
|
||||
assert _is_cuda(
|
||||
), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
|
||||
assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
|
||||
|
||||
def build_extensions(self) -> None:
|
||||
print("Skipping build_ext: using precompiled extensions.")
|
||||
@ -312,9 +323,9 @@ class precompiled_wheel_utils:
|
||||
wheel_filename = wheel_url_or_path.split("/")[-1]
|
||||
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
|
||||
wheel_path = os.path.join(temp_dir, wheel_filename)
|
||||
print(f"Downloading wheel from {wheel_url_or_path} "
|
||||
f"to {wheel_path}")
|
||||
print(f"Downloading wheel from {wheel_url_or_path} to {wheel_path}")
|
||||
from urllib.request import urlretrieve
|
||||
|
||||
urlretrieve(wheel_url_or_path, filename=wheel_path)
|
||||
else:
|
||||
wheel_path = wheel_url_or_path
|
||||
@ -335,25 +346,29 @@ class precompiled_wheel_utils:
|
||||
]
|
||||
|
||||
compiled_regex = re.compile(
|
||||
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
|
||||
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py"
|
||||
)
|
||||
file_members = list(
|
||||
filter(lambda x: x.filename in files_to_copy,
|
||||
wheel.filelist))
|
||||
filter(lambda x: x.filename in files_to_copy, wheel.filelist)
|
||||
)
|
||||
file_members += list(
|
||||
filter(lambda x: compiled_regex.match(x.filename),
|
||||
wheel.filelist))
|
||||
filter(lambda x: compiled_regex.match(x.filename), wheel.filelist)
|
||||
)
|
||||
|
||||
for file in file_members:
|
||||
print(f"[extract] {file.filename}")
|
||||
target_path = os.path.join(".", file.filename)
|
||||
os.makedirs(os.path.dirname(target_path), exist_ok=True)
|
||||
with wheel.open(file.filename) as src, open(
|
||||
target_path, "wb") as dst:
|
||||
with (
|
||||
wheel.open(file.filename) as src,
|
||||
open(target_path, "wb") as dst,
|
||||
):
|
||||
shutil.copyfileobj(src, dst)
|
||||
|
||||
pkg = os.path.dirname(file.filename).replace("/", ".")
|
||||
package_data_patch.setdefault(pkg, []).append(
|
||||
os.path.basename(file.filename))
|
||||
os.path.basename(file.filename)
|
||||
)
|
||||
|
||||
return package_data_patch
|
||||
finally:
|
||||
@ -369,10 +384,13 @@ class precompiled_wheel_utils:
|
||||
|
||||
try:
|
||||
# Get the latest commit hash of the upstream main branch.
|
||||
resp_json = subprocess.check_output([
|
||||
"curl", "-s",
|
||||
"https://api.github.com/repos/vllm-project/vllm/commits/main"
|
||||
]).decode("utf-8")
|
||||
resp_json = subprocess.check_output(
|
||||
[
|
||||
"curl",
|
||||
"-s",
|
||||
"https://api.github.com/repos/vllm-project/vllm/commits/main",
|
||||
]
|
||||
).decode("utf-8")
|
||||
upstream_main_commit = json.loads(resp_json)["sha"]
|
||||
|
||||
# In Docker build context, .git may be immutable or missing.
|
||||
@ -382,25 +400,32 @@ class precompiled_wheel_utils:
|
||||
# Check if the upstream_main_commit exists in the local repo
|
||||
try:
|
||||
subprocess.check_output(
|
||||
["git", "cat-file", "-e", f"{upstream_main_commit}"])
|
||||
["git", "cat-file", "-e", f"{upstream_main_commit}"]
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
# If not present, fetch it from the remote repository.
|
||||
# Note that this does not update any local branches,
|
||||
# but ensures that this commit ref and its history are
|
||||
# available in our local repo.
|
||||
subprocess.check_call([
|
||||
"git", "fetch", "https://github.com/vllm-project/vllm",
|
||||
"main"
|
||||
])
|
||||
subprocess.check_call(
|
||||
["git", "fetch", "https://github.com/vllm-project/vllm", "main"]
|
||||
)
|
||||
|
||||
# Then get the commit hash of the current branch that is the same as
|
||||
# the upstream main commit.
|
||||
current_branch = subprocess.check_output(
|
||||
["git", "branch", "--show-current"]).decode("utf-8").strip()
|
||||
current_branch = (
|
||||
subprocess.check_output(["git", "branch", "--show-current"])
|
||||
.decode("utf-8")
|
||||
.strip()
|
||||
)
|
||||
|
||||
base_commit = subprocess.check_output([
|
||||
"git", "merge-base", f"{upstream_main_commit}", current_branch
|
||||
]).decode("utf-8").strip()
|
||||
base_commit = (
|
||||
subprocess.check_output(
|
||||
["git", "merge-base", f"{upstream_main_commit}", current_branch]
|
||||
)
|
||||
.decode("utf-8")
|
||||
.strip()
|
||||
)
|
||||
return base_commit
|
||||
except ValueError as err:
|
||||
raise ValueError(err) from None
|
||||
@ -408,7 +433,9 @@ class precompiled_wheel_utils:
|
||||
logger.warning(
|
||||
"Failed to get the base commit in the main branch. "
|
||||
"Using the nightly wheel. The libraries in this "
|
||||
"wheel may not be compatible with your dev branch: %s", err)
|
||||
"wheel may not be compatible with your dev branch: %s",
|
||||
err,
|
||||
)
|
||||
return "nightly"
|
||||
|
||||
|
||||
@ -418,12 +445,13 @@ def _no_device() -> bool:
|
||||
|
||||
def _is_cuda() -> bool:
|
||||
has_cuda = torch.version.cuda is not None
|
||||
return (VLLM_TARGET_DEVICE == "cuda" and has_cuda and not _is_tpu())
|
||||
return VLLM_TARGET_DEVICE == "cuda" and has_cuda and not _is_tpu()
|
||||
|
||||
|
||||
def _is_hip() -> bool:
|
||||
return (VLLM_TARGET_DEVICE == "cuda"
|
||||
or VLLM_TARGET_DEVICE == "rocm") and torch.version.hip is not None
|
||||
return (
|
||||
VLLM_TARGET_DEVICE == "cuda" or VLLM_TARGET_DEVICE == "rocm"
|
||||
) and torch.version.hip is not None
|
||||
|
||||
|
||||
def _is_tpu() -> bool:
|
||||
@ -462,8 +490,12 @@ def get_rocm_version():
|
||||
minor = ctypes.c_uint32()
|
||||
patch = ctypes.c_uint32()
|
||||
|
||||
if (get_rocm_core_version(ctypes.byref(major), ctypes.byref(minor),
|
||||
ctypes.byref(patch)) == 0):
|
||||
if (
|
||||
get_rocm_core_version(
|
||||
ctypes.byref(major), ctypes.byref(minor), ctypes.byref(patch)
|
||||
)
|
||||
== 0
|
||||
):
|
||||
return f"{major.value}.{minor.value}.{patch.value}"
|
||||
return None
|
||||
except Exception:
|
||||
@ -476,8 +508,9 @@ def get_nvcc_cuda_version() -> Version:
|
||||
Adapted from https://github.com/NVIDIA/apex/blob/8b7a1ff183741dd8f9b87e7bafd04cfde99cea28/setup.py
|
||||
"""
|
||||
assert CUDA_HOME is not None, "CUDA_HOME is not set"
|
||||
nvcc_output = subprocess.check_output([CUDA_HOME + "/bin/nvcc", "-V"],
|
||||
universal_newlines=True)
|
||||
nvcc_output = subprocess.check_output(
|
||||
[CUDA_HOME + "/bin/nvcc", "-V"], universal_newlines=True
|
||||
)
|
||||
output = nvcc_output.split()
|
||||
release_idx = output.index("release") + 1
|
||||
nvcc_cuda_version = parse(output[release_idx].split(",")[0])
|
||||
@ -489,14 +522,20 @@ def get_gaudi_sw_version():
|
||||
Returns the driver version.
|
||||
"""
|
||||
# Enable console printing for `hl-smi` check
|
||||
output = subprocess.run("hl-smi",
|
||||
shell=True,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
env={"ENABLE_CONSOLE": "true"})
|
||||
output = subprocess.run(
|
||||
"hl-smi",
|
||||
shell=True,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
env={"ENABLE_CONSOLE": "true"},
|
||||
)
|
||||
if output.returncode == 0 and output.stdout:
|
||||
return output.stdout.split("\n")[2].replace(
|
||||
" ", "").split(":")[1][:-1].split("-")[0]
|
||||
return (
|
||||
output.stdout.split("\n")[2]
|
||||
.replace(" ", "")
|
||||
.split(":")[1][:-1]
|
||||
.split("-")[0]
|
||||
)
|
||||
return "0.0.0" # when hl-smi is not available
|
||||
|
||||
|
||||
@ -546,8 +585,11 @@ def get_requirements() -> list[str]:
|
||||
for line in requirements:
|
||||
if line.startswith("-r "):
|
||||
resolved_requirements += _read_requirements(line.split()[1])
|
||||
elif not line.startswith("--") and not line.startswith(
|
||||
"#") and line.strip() != "":
|
||||
elif (
|
||||
not line.startswith("--")
|
||||
and not line.startswith("#")
|
||||
and line.strip() != ""
|
||||
):
|
||||
resolved_requirements.append(line)
|
||||
return resolved_requirements
|
||||
|
||||
@ -558,7 +600,7 @@ def get_requirements() -> list[str]:
|
||||
cuda_major, cuda_minor = torch.version.cuda.split(".")
|
||||
modified_requirements = []
|
||||
for req in requirements:
|
||||
if ("vllm-flash-attn" in req and cuda_major != "12"):
|
||||
if "vllm-flash-attn" in req and cuda_major != "12":
|
||||
# vllm-flash-attn is built only for CUDA 12.x.
|
||||
# Skip for other versions.
|
||||
continue
|
||||
@ -573,8 +615,7 @@ def get_requirements() -> list[str]:
|
||||
elif _is_xpu():
|
||||
requirements = _read_requirements("xpu.txt")
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unsupported platform, please use CUDA, ROCm, or CPU.")
|
||||
raise ValueError("Unsupported platform, please use CUDA, ROCm, or CPU.")
|
||||
return requirements
|
||||
|
||||
|
||||
@ -590,14 +631,13 @@ if _is_cuda():
|
||||
ext_modules.append(CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa2_C"))
|
||||
if envs.VLLM_USE_PRECOMPILED or get_nvcc_cuda_version() >= Version("12.3"):
|
||||
# FA3 requires CUDA 12.3 or later
|
||||
ext_modules.append(
|
||||
CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa3_C"))
|
||||
ext_modules.append(CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa3_C"))
|
||||
# Optional since this doesn't get built (produce an .so file) when
|
||||
# not targeting a hopper system
|
||||
ext_modules.append(CMakeExtension(name="vllm._flashmla_C", optional=True))
|
||||
ext_modules.append(
|
||||
CMakeExtension(name="vllm._flashmla_C", optional=True))
|
||||
ext_modules.append(
|
||||
CMakeExtension(name="vllm._flashmla_extension_C", optional=True))
|
||||
CMakeExtension(name="vllm._flashmla_extension_C", optional=True)
|
||||
)
|
||||
ext_modules.append(CMakeExtension(name="vllm.cumem_allocator"))
|
||||
|
||||
if _build_custom_ops():
|
||||
@ -619,6 +659,7 @@ if envs.VLLM_USE_PRECOMPILED:
|
||||
wheel_url = wheel_location
|
||||
else:
|
||||
import platform
|
||||
|
||||
arch = platform.machine()
|
||||
if arch == "x86_64":
|
||||
wheel_tag = "manylinux1_x86_64"
|
||||
@ -628,8 +669,11 @@ if envs.VLLM_USE_PRECOMPILED:
|
||||
raise ValueError(f"Unsupported architecture: {arch}")
|
||||
base_commit = precompiled_wheel_utils.get_base_commit_in_main_branch()
|
||||
wheel_url = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-{wheel_tag}.whl"
|
||||
nightly_wheel_url = f"https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-{wheel_tag}.whl"
|
||||
nightly_wheel_url = (
|
||||
f"https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-{wheel_tag}.whl"
|
||||
)
|
||||
from urllib.request import urlopen
|
||||
|
||||
try:
|
||||
with urlopen(wheel_url) as resp:
|
||||
if resp.status != 200:
|
||||
@ -638,8 +682,7 @@ if envs.VLLM_USE_PRECOMPILED:
|
||||
print(f"[warn] Falling back to nightly wheel: {e}")
|
||||
wheel_url = nightly_wheel_url
|
||||
|
||||
patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(
|
||||
wheel_url)
|
||||
patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(wheel_url)
|
||||
for pkg, files in patch.items():
|
||||
package_data.setdefault(pkg, []).extend(files)
|
||||
|
||||
@ -650,8 +693,9 @@ if not ext_modules:
|
||||
cmdclass = {}
|
||||
else:
|
||||
cmdclass = {
|
||||
"build_ext":
|
||||
precompiled_build_ext if envs.VLLM_USE_PRECOMPILED else cmake_build_ext
|
||||
"build_ext": precompiled_build_ext
|
||||
if envs.VLLM_USE_PRECOMPILED
|
||||
else cmake_build_ext
|
||||
}
|
||||
|
||||
setup(
|
||||
@ -664,8 +708,11 @@ setup(
|
||||
"tensorizer": ["tensorizer==2.10.1"],
|
||||
"fastsafetensors": ["fastsafetensors >= 0.1.10"],
|
||||
"runai": ["runai-model-streamer[s3,gcs] >= 0.14.0"],
|
||||
"audio": ["librosa", "soundfile",
|
||||
"mistral_common[audio]"], # Required for audio processing
|
||||
"audio": [
|
||||
"librosa",
|
||||
"soundfile",
|
||||
"mistral_common[audio]",
|
||||
], # Required for audio processing
|
||||
"video": [], # Kept for backwards compatibility
|
||||
# FlashInfer should be updated together with the Dockerfile
|
||||
"flashinfer": ["flashinfer-python==0.3.1"],
|
||||
|
||||
Reference in New Issue
Block a user