Skip to content

Commit da73149

Browse files
add timeouts
Signed-off-by: Brian Gallagher <[email protected]>
1 parent e24db03 commit da73149

File tree

3 files changed

+27
-2
lines changed

3 files changed

+27
-2
lines changed

.tekton/universal-image-py312-cuda128-torch280-pull-request.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ metadata:
1818
name: universal-image-py312-cuda128-torch280-on-pull-request
1919
namespace: open-data-hub-tenant
2020
spec:
21+
timeouts:
22+
pipeline: 9h
2123
params:
2224
- name: git-url
2325
value: '{{source_url}}'

.tekton/universal-image-py312-cuda128-torch280-push.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ metadata:
1717
name: universal-image-py312-cuda128-torch280-on-push
1818
namespace: open-data-hub-tenant
1919
spec:
20+
timeouts:
21+
pipeline: 9h
2022
params:
2123
- name: git-url
2224
value: '{{source_url}}'

images/universal/training/py312-cuda128-torch280/Dockerfile

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,30 @@ RUN pip install --retries 5 --timeout 300 --no-cache-dir \
154154
&& chmod -R g+w /opt/app-root/lib/python3.12/site-packages \
155155
&& fix-permissions /opt/app-root -P
156156

157+
# Provide CUDA user-space libraries via pip, aligning with runtime for extension builds
158+
RUN pip install --retries 5 --timeout 300 --no-cache-dir \
159+
nvidia-nccl-cu12==2.27.3 \
160+
nvidia-cublas-cu12==12.8.4.1 \
161+
nvidia-cuda-cupti-cu12==12.8.90 \
162+
nvidia-cuda-nvrtc-cu12==12.8.93 \
163+
nvidia-cuda-runtime-cu12==12.8.90 \
164+
nvidia-cudnn-cu12==9.10.2.21 \
165+
nvidia-cufft-cu12==11.3.3.83 \
166+
nvidia-cufile-cu12==1.13.1.3 \
167+
nvidia-curand-cu12==10.3.9.90 \
168+
nvidia-cusolver-cu12==11.7.3.90 \
169+
nvidia-cusparse-cu12==12.5.8.93 \
170+
nvidia-cusparselt-cu12==0.7.1 \
171+
nvidia-nvjitlink-cu12==12.8.93 \
172+
nvidia-nvtx-cu12==12.8.90 \
173+
&& fix-permissions /opt/app-root -P
174+
175+
# Ensure cuDNN from pip is discoverable during source builds
176+
ENV LD_LIBRARY_PATH="/opt/app-root/lib/python3.12/site-packages/nvidia/cudnn/lib:${LD_LIBRARY_PATH}"
177+
157178
# Deterministic 2-step: sub-dep first, then parent without deps (align with runtime)
158-
RUN pip install --retries 5 --timeout 300 --no-cache-dir --no-build-isolation causal-conv1d==1.5.3.post1 && \
159-
pip install --retries 5 --timeout 300 --no-cache-dir --no-build-isolation mamba-ssm==2.2.6.post3 --no-deps && \
179+
RUN pip install --no-cache-dir --no-build-isolation causal-conv1d==1.5.3.post1 && \
180+
pip install --no-cache-dir --no-build-isolation mamba-ssm==2.2.6.post3 --no-deps && \
160181
fix-permissions /opt/app-root -P
161182

162183
# Provide a POSIX entrypoint wrapper to choose behavior based on invocation

0 commit comments

Comments
 (0)