diff --git a/.github/workflows/e2e-nvidia-l4-x1.yml b/.github/workflows/e2e-nvidia-l4-x1.yml
index 1d81b1dd..e37e869c 100644
--- a/.github/workflows/e2e-nvidia-l4-x1.yml
+++ b/.github/workflows/e2e-nvidia-l4-x1.yml
@@ -112,19 +112,38 @@ jobs:
         working-directory: ./instructlab
         run: |
           export CUDA_HOME="/usr/local/cuda"
-          export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
+          export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64"
           export PATH="$PATH:$CUDA_HOME/bin"
           python3.11 -m venv --upgrade-deps venv
           . venv/bin/activate
           nvidia-smi
           python3.11 -m pip cache remove llama_cpp_python
 
-          CMAKE_ARGS="-DLLAMA_CUDA=on" python3.11 -m pip install -v .
+          pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
 
-          # https://github.com/instructlab/instructlab/issues/1821
-          # install with Torch and build dependencies installed
-          python3.11 -m pip install -v packaging wheel setuptools-scm
-          python3.11 -m pip install -v .[cuda] -r requirements-vllm-cuda.txt
+          pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
+
+          # pre-install some build dependencies
+          $pip_install packaging wheel setuptools-scm
+
+          # flash-attn has a bug in the setup.py that causes pip to attempt installing it
+          # before torch is installed. This is a bug because their setup.py depends on
+          # importing the module, so it should have been listed in build_requires. Alas!
+          #
+          # See: https://github.com/Dao-AILab/flash-attention/pull/958
+          # Also: https://github.com/instructlab/instructlab/issues/1821
+          #
+          # first, pre-install flash-attn build dependencies
+          $pip_install torch packaging setuptools wheel psutil ninja
+
+          # now build flash-attn using the pre-installed build dependencies; this will
+          # guarantee that the build version of torch will match the runtime version of
+          # torch; otherwise, all kinds of problems may occur, like missing symbols when
+          # accessing C extensions and such
+          $pip_install flash-attn --no-build-isolation
+
+          CMAKE_ARGS="-DGGML_CUDA=on" $pip_install .
+          $pip_install .[cuda] -r requirements-vllm-cuda.txt
         
       - name: Update instructlab-eval library
         working-directory: ./eval
diff --git a/.github/workflows/e2e-nvidia-l40s-x4.yml b/.github/workflows/e2e-nvidia-l40s-x4.yml
index d802c3d9..aed5864f 100644
--- a/.github/workflows/e2e-nvidia-l40s-x4.yml
+++ b/.github/workflows/e2e-nvidia-l40s-x4.yml
@@ -143,19 +143,36 @@ jobs:
         working-directory: ./instructlab
         run: |
           export CUDA_HOME="/usr/local/cuda"
-          export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
+          export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64"
           export PATH="$PATH:$CUDA_HOME/bin"
           python3.11 -m venv --upgrade-deps venv
           . venv/bin/activate
           nvidia-smi
           python3.11 -m pip cache remove llama_cpp_python
 
-          CMAKE_ARGS="-DLLAMA_CUDA=on" python3.11 -m pip install .
-
-          # https://github.com/instructlab/instructlab/issues/1821
-          # install with Torch and build dependencies installed
-          python3.11 -m pip install packaging wheel setuptools-scm
-          python3.11 -m pip install .[cuda] -r requirements-vllm-cuda.txt
+          pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
+
+          # pre-install some build dependencies
+          $pip_install packaging wheel setuptools-scm
+
+          # flash-attn has a bug in the setup.py that causes pip to attempt installing it
+          # before torch is installed. This is a bug because their setup.py depends on
+          # importing the module, so it should have been listed in build_requires. Alas!
+          #
+          # See: https://github.com/Dao-AILab/flash-attention/pull/958
+          # Also: https://github.com/instructlab/instructlab/issues/1821
+          #
+          # first, pre-install flash-attn build dependencies
+          $pip_install torch packaging setuptools wheel psutil ninja
+
+          # now build flash-attn using the pre-installed build dependencies; this will
+          # guarantee that the build version of torch will match the runtime version of
+          # torch; otherwise, all kinds of problems may occur, like missing symbols when
+          # accessing C extensions and such
+          $pip_install flash-attn --no-build-isolation
+
+          CMAKE_ARGS="-DGGML_CUDA=on" $pip_install .
+          $pip_install .[cuda] -r requirements-vllm-cuda.txt
 
       - name: Update instructlab-eval library
         working-directory: ./eval