Skip to content

Commit a0c3dbe

Browse files
authored
Merge pull request #260 from booxter/fix-e2e-jobs
ci: Fix e2e job failures
2 parents 820ad90 + 3ec63b3 commit a0c3dbe

File tree

2 files changed

+49
-13
lines changed

2 files changed

+49
-13
lines changed

.github/workflows/e2e-nvidia-l4-x1.yml

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,19 +112,38 @@ jobs:
112112
working-directory: ./instructlab
113113
run: |
114114
export CUDA_HOME="/usr/local/cuda"
115-
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
115+
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64"
116116
export PATH="$PATH:$CUDA_HOME/bin"
117117
python3.11 -m venv --upgrade-deps venv
118118
. venv/bin/activate
119119
nvidia-smi
120120
python3.11 -m pip cache remove llama_cpp_python
121121
122-
CMAKE_ARGS="-DLLAMA_CUDA=on" python3.11 -m pip install -v .
122+
pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
123123
124-
# https://github.com/instructlab/instructlab/issues/1821
125-
# install with Torch and build dependencies installed
126-
python3.11 -m pip install -v packaging wheel setuptools-scm
127-
python3.11 -m pip install -v .[cuda] -r requirements-vllm-cuda.txt
124+
pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
125+
126+
# pre-install some build dependencies
127+
$pip_install packaging wheel setuptools-scm
128+
129+
# flash-attn has a bug in the setup.py that causes pip to attempt installing it
130+
# before torch is installed. This is a bug because their setup.py depends on
131+
# importing the module, so it should have been listed in build_requires. Alas!
132+
#
133+
# See: https://github.com/Dao-AILab/flash-attention/pull/958
134+
# Also: https://github.com/instructlab/instructlab/issues/1821
135+
#
136+
# first, pre-install flash-attn build dependencies
137+
$pip_install torch packaging setuptools wheel psutil ninja
138+
139+
# now build flash-attn using the pre-installed build dependencies; this will
140+
# guarantee that the build version of torch will match the runtime version of
141+
# torch; otherwise, all kinds of problems may occur, like missing symbols when
142+
# accessing C extensions and such
143+
$pip_install flash-attn --no-build-isolation
144+
145+
CMAKE_ARGS="-DGGML_CUDA=on" $pip_install .
146+
$pip_install .[cuda] -r requirements-vllm-cuda.txt
128147
129148
- name: Update instructlab-eval library
130149
working-directory: ./eval

.github/workflows/e2e-nvidia-l40s-x4.yml

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -143,19 +143,36 @@ jobs:
143143
working-directory: ./instructlab
144144
run: |
145145
export CUDA_HOME="/usr/local/cuda"
146-
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
146+
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64"
147147
export PATH="$PATH:$CUDA_HOME/bin"
148148
python3.11 -m venv --upgrade-deps venv
149149
. venv/bin/activate
150150
nvidia-smi
151151
python3.11 -m pip cache remove llama_cpp_python
152152
153-
CMAKE_ARGS="-DLLAMA_CUDA=on" python3.11 -m pip install .
154-
155-
# https://github.com/instructlab/instructlab/issues/1821
156-
# install with Torch and build dependencies installed
157-
python3.11 -m pip install packaging wheel setuptools-scm
158-
python3.11 -m pip install .[cuda] -r requirements-vllm-cuda.txt
153+
pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
154+
155+
# pre-install some build dependencies
156+
$pip_install packaging wheel setuptools-scm
157+
158+
# flash-attn has a bug in the setup.py that causes pip to attempt installing it
159+
# before torch is installed. This is a bug because their setup.py depends on
160+
# importing the module, so it should have been listed in build_requires. Alas!
161+
#
162+
# See: https://github.com/Dao-AILab/flash-attention/pull/958
163+
# Also: https://github.com/instructlab/instructlab/issues/1821
164+
#
165+
# first, pre-install flash-attn build dependencies
166+
$pip_install torch packaging setuptools wheel psutil ninja
167+
168+
# now build flash-attn using the pre-installed build dependencies; this will
169+
# guarantee that the build version of torch will match the runtime version of
170+
# torch; otherwise, all kinds of problems may occur, like missing symbols when
171+
# accessing C extensions and such
172+
$pip_install flash-attn --no-build-isolation
173+
174+
CMAKE_ARGS="-DGGML_CUDA=on" $pip_install .
175+
$pip_install .[cuda] -r requirements-vllm-cuda.txt
159176
160177
- name: Update instructlab-eval library
161178
working-directory: ./eval

0 commit comments

Comments
 (0)