Skip to content

Commit b01bd20

Browse files
authored
Merge pull request #198 from red-hat-data-services/revert-196-test3
Revert "updating docker file"
2 parents 6083d6a + ae89ef5 commit b01bd20

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

.tekton/distributed-workloads-poc1-push.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ spec:
7474
- default: ""
7575
description: Build dependencies to be prefetched
7676
name: prefetch-input
77-
value: '[{"type": "pipenv", "path": "images/runtime/training/py312-cuda128-torch280"}, {"type": "rpm", "path": "images/runtime/training/py312-cuda128-torch280"}]'
77+
value: '{"type": "pipenv", "path": "images/runtime/training/py312-cuda128-torch280"}'
7878
type: string
7979
- default: ""
8080
description: Image tag expiration time, time values could be something like

images/runtime/training/py312-cuda128-torch280/Dockerfile

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,11 @@ ENV CUDA_VERSION=12.8.0 \
3434

3535
# Ref: https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.8.0/ubi9/base/Dockerfile
3636
# nvcc is required for Flash Attention
37-
# CUDA packages are now managed via rpms.lock.yaml - install from locked versions
38-
RUN dnf install -y \
37+
RUN dnf config-manager \
38+
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
39+
&& dnf install -y \
40+
--disablerepo=rhel-9-for-x86_64-baseos-rpms \
41+
--disablerepo=rhel-9-for-x86_64-appstream-rpms \
3942
cuda-cudart-12-8-${NV_CUDA_CUDART_VERSION} \
4043
cuda-compat-12-8-${NV_CUDA_COMPAT_VERSION} \
4144
cuda-nvcc-12-8-${NV_CUDA_NVCC_VERSION} \
@@ -48,8 +51,10 @@ ENV CUDA_HOME="/usr/local/cuda" \
4851
LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64:$LD_LIBRARY_PATH"
4952

5053
# Install InfiniBand and RDMA packages
51-
# InfiniBand/RDMA packages are now managed via rpms.lock.yaml - install from locked versions
52-
RUN dnf install -y \
54+
RUN dnf config-manager \
55+
--add-repo https://linux.mellanox.com/public/repo/mlnx_ofed/latest/rhel9.5/mellanox_mlnx_ofed.repo
56+
57+
RUN dnf install -y --disablerepo="*" --enablerepo="cuda-rhel9-x86_64,mlnx_ofed_24.10-1.1.4.0_base,ubi-9-appstream-rpms,ubi-9-baseos-rpms" \
5358
libibverbs-utils \
5459
infiniband-diags \
5560
libibumad3 \

0 commit comments

Comments
 (0)