ci: Install flash-attn using the same torch version before reqs

booxter · booxter · commit 3ec63b32f17f · 2025-05-20T09:56:57.000-04:00
This should help with failures due to mixed C bindings symbols we see in CI. This is modeled from: instructlab/instructlab#3399 In the future, the repo should adopt `scripts/install-ilab-with-cuda.sh` from ilab repo to avoid duplication. Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
diff --git a/.github/workflows/e2e-nvidia-l4-x1.yml b/.github/workflows/e2e-nvidia-l4-x1.yml
@@ -120,11 +120,29 @@ jobs:
           python3.11 -m pip cache remove llama_cpp_python
 
           pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
-          CMAKE_ARGS="-DGGML_CUDA=on" $pip_install .
 
-          # https://github.com/instructlab/instructlab/issues/1821
-          # install with Torch and build dependencies installed
+          pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
+
+          # pre-install some build dependencies
           $pip_install packaging wheel setuptools-scm
+
+          # flash-attn has a bug in the setup.py that causes pip to attempt installing it
+          # before torch is installed. This is a bug because their setup.py depends on
+          # importing the module, so it should have been listed in build_requires. Alas!
+          #
+          # See: https://github.com/Dao-AILab/flash-attention/pull/958
+          # Also: https://github.com/instructlab/instructlab/issues/1821
+          #
+          # first, pre-install flash-attn build dependencies
+          $pip_install torch packaging setuptools wheel psutil ninja
+
+          # now build flash-attn using the pre-installed build dependencies; this will
+          # guarantee that the build version of torch will match the runtime version of
+          # torch; otherwise, all kinds of problems may occur, like missing symbols when
+          # accessing C extensions and such
+          $pip_install flash-attn --no-build-isolation
+
+          CMAKE_ARGS="-DGGML_CUDA=on" $pip_install .
           $pip_install .[cuda] -r requirements-vllm-cuda.txt
         
       - name: Update instructlab-eval library
diff --git a/.github/workflows/e2e-nvidia-l40s-x4.yml b/.github/workflows/e2e-nvidia-l40s-x4.yml
@@ -151,11 +151,27 @@ jobs:
           python3.11 -m pip cache remove llama_cpp_python
 
           pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
-          CMAKE_ARGS="-DGGML_CUDA=on" $pip_install .
 
-          # https://github.com/instructlab/instructlab/issues/1821
-          # install with Torch and build dependencies installed
+          # pre-install some build dependencies
           $pip_install packaging wheel setuptools-scm
+
+          # flash-attn has a bug in the setup.py that causes pip to attempt installing it
+          # before torch is installed. This is a bug because their setup.py depends on
+          # importing the module, so it should have been listed in build_requires. Alas!
+          #
+          # See: https://github.com/Dao-AILab/flash-attention/pull/958
+          # Also: https://github.com/instructlab/instructlab/issues/1821
+          #
+          # first, pre-install flash-attn build dependencies
+          $pip_install torch packaging setuptools wheel psutil ninja
+
+          # now build flash-attn using the pre-installed build dependencies; this will
+          # guarantee that the build version of torch will match the runtime version of
+          # torch; otherwise, all kinds of problems may occur, like missing symbols when
+          # accessing C extensions and such
+          $pip_install flash-attn --no-build-isolation
+
+          CMAKE_ARGS="-DGGML_CUDA=on" $pip_install .
           $pip_install .[cuda] -r requirements-vllm-cuda.txt
 
       - name: Update instructlab-eval library