Removes source compilation of nixl dependency (#24874)
Signed-off-by: bbartels <benjamin@bartels.dev> Signed-off-by: Benjamin Bartels <benjamin@bartels.dev> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Daniele <36171005+dtrifiro@users.noreply.github.com>
This commit is contained in:
57
tools/install_gdrcopy.sh
Executable file
57
tools/install_gdrcopy.sh
Executable file
@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Usage: install_gdrcopy.sh <GDRCOPY_OS_VERSION> <GDRCOPY_CUDA_VERSION> <uuarch>
|
||||
# uuarch must be "x64" or "aarch64"
|
||||
# Optional: set GDRCOPY_VERSION to override the libgdrapi package version (default: 2.5.1-1)
|
||||
# Requires: curl, apt-get, root privileges
|
||||
if [[ $(id -u) -ne 0 ]]; then
|
||||
echo "Must be run as root" >&2
|
||||
|
||||
exit 1
|
||||
fi
|
||||
if [[ $# -ne 3 ]]; then
|
||||
echo "Usage: $0 <GDRCOPY_OS_VERSION> <GDRCOPY_CUDA_VERSION> <uuarch(x64|aarch64)>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
OS_VER="$1"
|
||||
CUDA_VER="$2"
|
||||
UUARCH_RAW="$3"
|
||||
|
||||
# Normalize/validate arch
|
||||
case "${UUARCH_RAW,,}" in
|
||||
aarch64|arm64)
|
||||
URL_ARCH="aarch64"
|
||||
DEB_ARCH="arm64"
|
||||
;;
|
||||
x64|x86_64|amd64)
|
||||
URL_ARCH="x64"
|
||||
DEB_ARCH="amd64"
|
||||
;;
|
||||
*)
|
||||
echo "Unsupported uuarch: ${UUARCH_RAW}. Use 'x64' or 'aarch64'." >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
OS_VER_LOWER="$(tr '[:upper:]' '[:lower:]' <<<"$OS_VER")"
|
||||
GDRCOPY_PKG_VER="${GDRCOPY_VERSION:-2.5.1-1}"
|
||||
|
||||
DEB_NAME="libgdrapi_${GDRCOPY_PKG_VER}_${DEB_ARCH}.${OS_VER}.deb"
|
||||
BASE_URL="https://developer.download.nvidia.com/compute/redist/gdrcopy"
|
||||
URL="${BASE_URL}/CUDA%20${CUDA_VER}/${OS_VER_LOWER}/${URL_ARCH}/${DEB_NAME}"
|
||||
|
||||
echo "Downloading: ${URL}"
|
||||
TMPDIR="$(mktemp -d)"
|
||||
trap 'rm -rf "${TMPDIR}"' EXIT
|
||||
|
||||
curl -fSL "${URL}" -o "${TMPDIR}/${DEB_NAME}"
|
||||
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
apt-get update
|
||||
apt-get install -y "${TMPDIR}/${DEB_NAME}"
|
||||
apt-get clean
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
echo "Installed ${DEB_NAME}"
|
||||
@ -1,109 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Usage: ./install_nixl.sh [--force]
|
||||
|
||||
FORCE=false
|
||||
if [ "$1" == "--force" ]; then
|
||||
FORCE=true
|
||||
fi
|
||||
|
||||
SUDO=false
|
||||
if command -v sudo >/dev/null 2>&1 && sudo -n true 2>/dev/null; then
|
||||
SUDO=true
|
||||
fi
|
||||
|
||||
ARCH=$(uname -m)
|
||||
|
||||
ROOT_DIR="/usr/local"
|
||||
mkdir -p "$ROOT_DIR"
|
||||
GDR_HOME="$ROOT_DIR/gdrcopy"
|
||||
UCX_HOME="$ROOT_DIR/ucx"
|
||||
NIXL_HOME="$ROOT_DIR/nixl"
|
||||
CUDA_HOME=/usr/local/cuda
|
||||
|
||||
export PATH="$GDR_HOME/bin:$UCX_HOME/bin:$NIXL_HOME/bin:$PATH"
|
||||
export LD_LIBRARY_PATH="$GDR_HOME/lib:$UCX_HOME/lib:$NIXL_HOME/lib/$ARCH-linux-gnu:$LD_LIBRARY_PATH"
|
||||
|
||||
TEMP_DIR="nixl_installer"
|
||||
mkdir -p "$TEMP_DIR"
|
||||
cd "$TEMP_DIR"
|
||||
|
||||
pip install meson ninja pybind11
|
||||
|
||||
if [ ! -e "/dev/gdrdrv" ] || [ "$FORCE" = true ]; then
|
||||
echo "Installing gdrcopy\n"
|
||||
wget https://github.com/NVIDIA/gdrcopy/archive/refs/tags/v2.5.tar.gz
|
||||
tar xzf v2.5.tar.gz; rm v2.5.tar.gz
|
||||
cd gdrcopy-2.5
|
||||
make prefix=$GDR_HOME CUDA=$CUDA_HOME all install
|
||||
|
||||
if $SUDO; then
|
||||
echo "Running insmod.sh with sudo"
|
||||
sudo ./insmod.sh
|
||||
else
|
||||
echo "Skipping insmod.sh - sudo not available"
|
||||
echo "Please run 'sudo ./gdrcopy-2.5/insmod.sh' manually if needed"
|
||||
fi
|
||||
|
||||
cd ..
|
||||
else
|
||||
echo "Found /dev/gdrdrv. Skipping gdrcopy installation"
|
||||
fi
|
||||
|
||||
if ! command -v ucx_info &> /dev/null || [ "$FORCE" = true ]; then
|
||||
echo "Installing UCX"
|
||||
wget https://github.com/openucx/ucx/releases/download/v1.18.0/ucx-1.18.0.tar.gz
|
||||
tar xzf ucx-1.18.0.tar.gz; rm ucx-1.18.0.tar.gz
|
||||
cd ucx-1.18.0
|
||||
|
||||
# Checking Mellanox NICs
|
||||
MLX_OPTS=""
|
||||
if lspci | grep -i mellanox > /dev/null || command -v ibstat > /dev/null; then
|
||||
echo "Mellanox NIC detected, adding Mellanox-specific options"
|
||||
MLX_OPTS="--with-rdmacm \
|
||||
--with-mlx5-dv \
|
||||
--with-ib-hw-tm"
|
||||
fi
|
||||
|
||||
./configure --prefix=$UCX_HOME \
|
||||
--enable-shared \
|
||||
--disable-static \
|
||||
--disable-doxygen-doc \
|
||||
--enable-optimizations \
|
||||
--enable-cma \
|
||||
--enable-devel-headers \
|
||||
--with-cuda=$CUDA_HOME \
|
||||
--with-dm \
|
||||
--with-gdrcopy=$GDR_HOME \
|
||||
--with-verbs \
|
||||
--enable-mt \
|
||||
$MLX_OPTS
|
||||
make -j
|
||||
make -j install-strip
|
||||
|
||||
if $SUDO; then
|
||||
echo "Running ldconfig with sudo"
|
||||
sudo ldconfig
|
||||
else
|
||||
echo "Skipping ldconfig - sudo not available"
|
||||
echo "Please run 'sudo ldconfig' manually if needed"
|
||||
fi
|
||||
|
||||
cd ..
|
||||
else
|
||||
echo "Found existing UCX. Skipping UCX installation"
|
||||
fi
|
||||
|
||||
if ! command -v nixl_test &> /dev/null || [ "$FORCE" = true ]; then
|
||||
echo "Installing NIXL"
|
||||
wget https://github.com/ai-dynamo/nixl/archive/refs/tags/0.2.0.tar.gz
|
||||
tar xzf 0.2.0.tar.gz; rm 0.2.0.tar.gz
|
||||
cd nixl-0.2.0
|
||||
meson setup build --prefix=$NIXL_HOME -Ducx_path=$UCX_HOME
|
||||
cd build
|
||||
ninja
|
||||
ninja install
|
||||
|
||||
cd ../..
|
||||
else
|
||||
echo "Found existing NIXL. Skipping NIXL installation"
|
||||
fi
|
||||
Reference in New Issue
Block a user