diff --git a/.github/workflows/e2e-nvidia-l4-x1.yml b/.github/workflows/e2e-nvidia-l4-x1.yml index 1d81b1dd..e37e869c 100644 --- a/.github/workflows/e2e-nvidia-l4-x1.yml +++ b/.github/workflows/e2e-nvidia-l4-x1.yml @@ -112,19 +112,38 @@ jobs: working-directory: ./instructlab run: | export CUDA_HOME="/usr/local/cuda" - export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" + export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64" export PATH="$PATH:$CUDA_HOME/bin" python3.11 -m venv --upgrade-deps venv . venv/bin/activate nvidia-smi python3.11 -m pip cache remove llama_cpp_python - CMAKE_ARGS="-DLLAMA_CUDA=on" python3.11 -m pip install -v . + pip_install="python3.11 -m pip install -v -c constraints-dev.txt" - # https://github.com/instructlab/instructlab/issues/1821 - # install with Torch and build dependencies installed - python3.11 -m pip install -v packaging wheel setuptools-scm - python3.11 -m pip install -v .[cuda] -r requirements-vllm-cuda.txt + pip_install="python3.11 -m pip install -v -c constraints-dev.txt" + + # pre-install some build dependencies + $pip_install packaging wheel setuptools-scm + + # flash-attn has a bug in the setup.py that causes pip to attempt installing it + # before torch is installed. This is a bug because their setup.py depends on + # importing the module, so it should have been listed in build_requires. Alas! + # + # See: https://github.com/Dao-AILab/flash-attention/pull/958 + # Also: https://github.com/instructlab/instructlab/issues/1821 + # + # first, pre-install flash-attn build dependencies + $pip_install torch packaging setuptools wheel psutil ninja + + # now build flash-attn using the pre-installed build dependencies; this will + # guarantee that the build version of torch will match the runtime version of + # torch; otherwise, all kinds of problems may occur, like missing symbols when + # accessing C extensions and such + $pip_install flash-attn --no-build-isolation + + CMAKE_ARGS="-DGGML_CUDA=on" $pip_install . + $pip_install .[cuda] -r requirements-vllm-cuda.txt - name: Update instructlab-eval library working-directory: ./eval diff --git a/.github/workflows/e2e-nvidia-l40s-x4.yml b/.github/workflows/e2e-nvidia-l40s-x4.yml index d802c3d9..aed5864f 100644 --- a/.github/workflows/e2e-nvidia-l40s-x4.yml +++ b/.github/workflows/e2e-nvidia-l40s-x4.yml @@ -143,19 +143,36 @@ jobs: working-directory: ./instructlab run: | export CUDA_HOME="/usr/local/cuda" - export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" + export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64" export PATH="$PATH:$CUDA_HOME/bin" python3.11 -m venv --upgrade-deps venv . venv/bin/activate nvidia-smi python3.11 -m pip cache remove llama_cpp_python - CMAKE_ARGS="-DLLAMA_CUDA=on" python3.11 -m pip install . - - # https://github.com/instructlab/instructlab/issues/1821 - # install with Torch and build dependencies installed - python3.11 -m pip install packaging wheel setuptools-scm - python3.11 -m pip install .[cuda] -r requirements-vllm-cuda.txt + pip_install="python3.11 -m pip install -v -c constraints-dev.txt" + + # pre-install some build dependencies + $pip_install packaging wheel setuptools-scm + + # flash-attn has a bug in the setup.py that causes pip to attempt installing it + # before torch is installed. This is a bug because their setup.py depends on + # importing the module, so it should have been listed in build_requires. Alas! + # + # See: https://github.com/Dao-AILab/flash-attention/pull/958 + # Also: https://github.com/instructlab/instructlab/issues/1821 + # + # first, pre-install flash-attn build dependencies + $pip_install torch packaging setuptools wheel psutil ninja + + # now build flash-attn using the pre-installed build dependencies; this will + # guarantee that the build version of torch will match the runtime version of + # torch; otherwise, all kinds of problems may occur, like missing symbols when + # accessing C extensions and such + $pip_install flash-attn --no-build-isolation + + CMAKE_ARGS="-DGGML_CUDA=on" $pip_install . + $pip_install .[cuda] -r requirements-vllm-cuda.txt - name: Update instructlab-eval library working-directory: ./eval