[Kernel] Build flash-attn from source (#8245)

This commit is contained in:
Luka Govedič
2024-09-21 02:27:10 -04:00
committed by GitHub
parent 0faab90eb0
commit 71c60491f2
9 changed files with 124 additions and 41 deletions

View File

@ -15,5 +15,6 @@ $python_executable -m pip install -r requirements-cuda.txt
export MAX_JOBS=1
# Make sure release wheels are built for the following architectures
export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
export VLLM_FA_CMAKE_GPU_ARCHES="80-real;90-real"
# Build
$python_executable setup.py bdist_wheel --dist-dir=dist