Removes source compilation of nixl dependency (#24874)

Signed-off-by: bbartels <benjamin@bartels.dev>
Signed-off-by: Benjamin Bartels <benjamin@bartels.dev>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Daniele <36171005+dtrifiro@users.noreply.github.com>
This commit is contained in:
Benjamin Bartels
2025-09-17 02:33:18 +01:00
committed by GitHub
parent cef32104b4
commit 64ad551878
5 changed files with 77 additions and 116 deletions

57
tools/install_gdrcopy.sh Executable file
View File

@ -0,0 +1,57 @@
#!/usr/bin/env bash
set -euo pipefail
# Usage: install_gdrcopy.sh <GDRCOPY_OS_VERSION> <GDRCOPY_CUDA_VERSION> <uuarch>
# uuarch must be "x64" or "aarch64"
# Optional: set GDRCOPY_VERSION to override the libgdrapi package version (default: 2.5.1-1)
# Requires: curl, apt-get, root privileges
if [[ $(id -u) -ne 0 ]]; then
echo "Must be run as root" >&2
exit 1
fi
if [[ $# -ne 3 ]]; then
echo "Usage: $0 <GDRCOPY_OS_VERSION> <GDRCOPY_CUDA_VERSION> <uuarch(x64|aarch64)>" >&2
exit 1
fi
OS_VER="$1"
CUDA_VER="$2"
UUARCH_RAW="$3"
# Normalize/validate arch
case "${UUARCH_RAW,,}" in
aarch64|arm64)
URL_ARCH="aarch64"
DEB_ARCH="arm64"
;;
x64|x86_64|amd64)
URL_ARCH="x64"
DEB_ARCH="amd64"
;;
*)
echo "Unsupported uuarch: ${UUARCH_RAW}. Use 'x64' or 'aarch64'." >&2
exit 1
;;
esac
OS_VER_LOWER="$(tr '[:upper:]' '[:lower:]' <<<"$OS_VER")"
GDRCOPY_PKG_VER="${GDRCOPY_VERSION:-2.5.1-1}"
DEB_NAME="libgdrapi_${GDRCOPY_PKG_VER}_${DEB_ARCH}.${OS_VER}.deb"
BASE_URL="https://developer.download.nvidia.com/compute/redist/gdrcopy"
URL="${BASE_URL}/CUDA%20${CUDA_VER}/${OS_VER_LOWER}/${URL_ARCH}/${DEB_NAME}"
echo "Downloading: ${URL}"
TMPDIR="$(mktemp -d)"
trap 'rm -rf "${TMPDIR}"' EXIT
curl -fSL "${URL}" -o "${TMPDIR}/${DEB_NAME}"
export DEBIAN_FRONTEND=noninteractive
apt-get update
apt-get install -y "${TMPDIR}/${DEB_NAME}"
apt-get clean
rm -rf /var/lib/apt/lists/*
echo "Installed ${DEB_NAME}"

View File

@ -1,109 +0,0 @@
#!/bin/bash
# Usage: ./install_nixl.sh [--force]
FORCE=false
if [ "$1" == "--force" ]; then
FORCE=true
fi
SUDO=false
if command -v sudo >/dev/null 2>&1 && sudo -n true 2>/dev/null; then
SUDO=true
fi
ARCH=$(uname -m)
ROOT_DIR="/usr/local"
mkdir -p "$ROOT_DIR"
GDR_HOME="$ROOT_DIR/gdrcopy"
UCX_HOME="$ROOT_DIR/ucx"
NIXL_HOME="$ROOT_DIR/nixl"
CUDA_HOME=/usr/local/cuda
export PATH="$GDR_HOME/bin:$UCX_HOME/bin:$NIXL_HOME/bin:$PATH"
export LD_LIBRARY_PATH="$GDR_HOME/lib:$UCX_HOME/lib:$NIXL_HOME/lib/$ARCH-linux-gnu:$LD_LIBRARY_PATH"
TEMP_DIR="nixl_installer"
mkdir -p "$TEMP_DIR"
cd "$TEMP_DIR"
pip install meson ninja pybind11
if [ ! -e "/dev/gdrdrv" ] || [ "$FORCE" = true ]; then
echo "Installing gdrcopy\n"
wget https://github.com/NVIDIA/gdrcopy/archive/refs/tags/v2.5.tar.gz
tar xzf v2.5.tar.gz; rm v2.5.tar.gz
cd gdrcopy-2.5
make prefix=$GDR_HOME CUDA=$CUDA_HOME all install
if $SUDO; then
echo "Running insmod.sh with sudo"
sudo ./insmod.sh
else
echo "Skipping insmod.sh - sudo not available"
echo "Please run 'sudo ./gdrcopy-2.5/insmod.sh' manually if needed"
fi
cd ..
else
echo "Found /dev/gdrdrv. Skipping gdrcopy installation"
fi
if ! command -v ucx_info &> /dev/null || [ "$FORCE" = true ]; then
echo "Installing UCX"
wget https://github.com/openucx/ucx/releases/download/v1.18.0/ucx-1.18.0.tar.gz
tar xzf ucx-1.18.0.tar.gz; rm ucx-1.18.0.tar.gz
cd ucx-1.18.0
# Checking Mellanox NICs
MLX_OPTS=""
if lspci | grep -i mellanox > /dev/null || command -v ibstat > /dev/null; then
echo "Mellanox NIC detected, adding Mellanox-specific options"
MLX_OPTS="--with-rdmacm \
--with-mlx5-dv \
--with-ib-hw-tm"
fi
./configure --prefix=$UCX_HOME \
--enable-shared \
--disable-static \
--disable-doxygen-doc \
--enable-optimizations \
--enable-cma \
--enable-devel-headers \
--with-cuda=$CUDA_HOME \
--with-dm \
--with-gdrcopy=$GDR_HOME \
--with-verbs \
--enable-mt \
$MLX_OPTS
make -j
make -j install-strip
if $SUDO; then
echo "Running ldconfig with sudo"
sudo ldconfig
else
echo "Skipping ldconfig - sudo not available"
echo "Please run 'sudo ldconfig' manually if needed"
fi
cd ..
else
echo "Found existing UCX. Skipping UCX installation"
fi
if ! command -v nixl_test &> /dev/null || [ "$FORCE" = true ]; then
echo "Installing NIXL"
wget https://github.com/ai-dynamo/nixl/archive/refs/tags/0.2.0.tar.gz
tar xzf 0.2.0.tar.gz; rm 0.2.0.tar.gz
cd nixl-0.2.0
meson setup build --prefix=$NIXL_HOME -Ducx_path=$UCX_HOME
cd build
ninja
ninja install
cd ../..
else
echo "Found existing NIXL. Skipping NIXL installation"
fi