[CI Fix] Pin deepep and pplx tags in tools/ep_kernels/, gate multigpu tests (#23568)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-08-25 21:29:00 -04:00
committed by GitHub
parent 2a97ffc33d
commit 906e461ed6
9 changed files with 40 additions and 12 deletions

View File

@ -77,6 +77,7 @@ clone_repo() {
local repo_url=$1
local dir_name=$2
local key_file=$3
local commit_hash=$4
if [ -d "$dir_name" ]; then
# Check if directory has uncommitted changes (dirty)
@ -87,17 +88,27 @@ clone_repo() {
echo "$dir_name directory exists but clone appears incomplete, cleaning up and re-cloning"
rm -rf "$dir_name"
git clone "$repo_url"
if [ -n "$commit_hash" ]; then
cd "$dir_name"
git checkout "$commit_hash"
cd ..
fi
else
echo "$dir_name directory exists and appears complete; manually update if needed"
fi
else
git clone "$repo_url"
if [ -n "$commit_hash" ]; then
cd "$dir_name"
git checkout "$commit_hash"
cd ..
fi
fi
}
# build and install pplx, require pytorch installed
pushd $WORKSPACE
clone_repo "https://github.com/ppl-ai/pplx-kernels" "pplx-kernels" "setup.py"
clone_repo "https://github.com/ppl-ai/pplx-kernels" "pplx-kernels" "setup.py" "c336faf"
cd pplx-kernels
# see https://github.com/pypa/pip/issues/9955#issuecomment-838065925
# PIP_NO_BUILD_ISOLATION=0 disables build isolation
@ -106,7 +117,7 @@ popd
# build and install deepep, require pytorch installed
pushd $WORKSPACE
clone_repo "https://github.com/deepseek-ai/DeepEP" "DeepEP" "setup.py"
clone_repo "https://github.com/deepseek-ai/DeepEP" "DeepEP" "setup.py" "e3908bf"
cd DeepEP
export NVSHMEM_DIR=$WORKSPACE/nvshmem_install
PIP_NO_BUILD_ISOLATION=0 pip install -vvv -e .