[Kernel] Build flash-attn from source (#8245)

2024-09-21 02:27:10 -04:00
parent 0faab90eb0
commit 71c60491f2
9 changed files with 124 additions and 41 deletions
--- a/.github/workflows/scripts/build.sh
+++ b/.github/workflows/scripts/build.sh
@ -15,5 +15,6 @@ $python_executable -m pip install -r requirements-cuda.txt
 export MAX_JOBS=1
 # Make sure release wheels are built for the following architectures
 export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
+export VLLM_FA_CMAKE_GPU_ARCHES="80-real;90-real"
 # Build
 $python_executable setup.py bdist_wheel --dist-dir=dist