Skip to content

Commit 0235a63

Browse files
chipspeakopenshift-merge-bot[bot]
authored andcommitted
task(RHOAIENG-33492): Bump Ray runtime image to CUDA 12.8
Signed-off-by: Pat O'Connor <[email protected]>
1 parent 1c7bc95 commit 0235a63

File tree

4 files changed

+70
-60
lines changed

4 files changed

+70
-60
lines changed

.tekton/ray-cuda-push.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ spec:
2424
- name: revision
2525
value: '{{revision}}'
2626
- name: output-image
27-
value: quay.io/modh/ray:2.47.1-py312-cu121
27+
value: quay.io/modh/ray:2.47.1-py312-cu128
2828
- name: additional-tag
29-
value: 2.47.1-py312-cu121-{{revision}}
29+
value: 2.47.1-py312-cu128-{{revision}}
3030
- name: dockerfile
3131
value: Dockerfile
3232
- name: path-context

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
* `MINIO_CLI_IMAGE` (Optional) - Minio CLI image used for uploading/downloading data from/into s3 bucket
2727
* `TEST_TIER` (Optional) - Specifies test tier to run, skipping tests which don't belong to specified test tier. Supported test tiers: Smoke, Sanity, Tier1, Tier2, Tier3, Pre-Upgrade and Post-Upgrade. Test tier can also be provided using test parameter `testTier`.
2828

29-
NOTE: `quay.io/modh/ray:2.35.0-py311-cu121` is the default image used for creating a RayCluster resource. If you have your own custom ray image which suits your purposes, specify it in `TEST_RAY_IMAGE` environment variable.
29+
NOTE: `quay.io/modh/ray:2.47.1-py312-cu128` is the default image used for creating a RayCluster resource. If you have your own custom ray image which suits your purposes, specify it in `TEST_RAY_IMAGE` environment variable.
3030

3131
### Environment variables for fms-hf-tuning test suite
3232

images/runtime/ray/cuda/Dockerfile

Lines changed: 66 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -3,34 +3,43 @@ ARG IMAGE_TAG=9.6-1755735361
33

44
FROM registry.access.redhat.com/ubi9/python-${PYTHON_VERSION}:${IMAGE_TAG}
55

6-
LABEL name="ray-ubi9-py312-cu121" \
7-
summary="CUDA 12.1 Python 3.12 image based on UBI9 for Ray" \
8-
description="CUDA 12.1 Python 3.12 image based on UBI9 for Ray" \
9-
io.k8s.display-name="CUDA 12.1 Python 3.12 base image for Ray" \
10-
io.k8s.description="CUDA 12.1 Python 3.12 image based on UBI9 for Ray" \
6+
ARG TARGETARCH
7+
8+
LABEL name="ray-ubi9-py312-cu128" \
9+
summary="CUDA 12.8 Python 3.12 image based on UBI9 for Ray" \
10+
description="CUDA 12.8 Python 3.12 image based on UBI9 for Ray" \
11+
io.k8s.display-name="CUDA 12.8 Python 3.12 base image for Ray" \
12+
io.k8s.description="CUDA 12.8 Python 3.12 image based on UBI9 for Ray" \
1113
authoritative-source-url="https://github.com/opendatahub-io/distributed-workloads"
1214

1315
# Install CUDA base from:
14-
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.1.1/ubi9/base/Dockerfile
16+
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.8.0/ubi9/base/Dockerfile
1517
USER 0
1618
WORKDIR /opt/app-root/bin
1719

18-
ENV NVARCH=x86_64
19-
ENV NVIDIA_REQUIRE_CUDA="cuda>=12.1 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526"
20-
ENV NV_CUDA_CUDART_VERSION=12.1.105-1
21-
22-
COPY cuda.repo-x86_64 /etc/yum.repos.d/cuda.repo
20+
ENV NVIDIA_REQUIRE_CUDA="cuda>=12.8 brand=unknown,driver>=470,driver<471 brand=grid,driver>=470,driver<471 brand=tesla,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=vapps,driver>=470,driver<471 brand=vpc,driver>=470,driver<471 brand=vcs,driver>=470,driver<471 brand=vws,driver>=470,driver<471 brand=cloudgaming,driver>=470,driver<471 brand=unknown,driver>=535,driver<536 brand=grid,driver>=535,driver<536 brand=tesla,driver>=535,driver<536 brand=nvidia,driver>=535,driver<536 brand=quadro,driver>=535,driver<536 brand=quadrortx,driver>=535,driver<536 brand=nvidiartx,driver>=535,driver<536 brand=vapps,driver>=535,driver<536 brand=vpc,driver>=535,driver<536 brand=vcs,driver>=535,driver<536 brand=vws,driver>=535,driver<536 brand=cloudgaming,driver>=535,driver<536 brand=unknown,driver>=550,driver<551 brand=grid,driver>=550,driver<551 brand=tesla,driver>=550,driver<551 brand=nvidia,driver>=550,driver<551 brand=quadro,driver>=550,driver<551 brand=quadrortx,driver>=550,driver<551 brand=nvidiartx,driver>=550,driver<551 brand=vapps,driver>=550,driver<551 brand=vpc,driver>=550,driver<551 brand=vcs,driver>=550,driver<551 brand=vws,driver>=550,driver<551 brand=cloudgaming,driver>=550,driver<551 brand=unknown,driver>=560,driver<561 brand=grid,driver>=560,driver<561 brand=tesla,driver>=560,driver<561 brand=nvidia,driver>=560,driver<561 brand=quadro,driver>=560,driver<561 brand=quadrortx,driver>=560,driver<561 brand=nvidiartx,driver>=560,driver<561 brand=vapps,driver>=560,driver<561 brand=vpc,driver>=560,driver<561 brand=vcs,driver>=560,driver<561 brand=vws,driver>=560,driver<561 brand=cloudgaming,driver>=560,driver<561 brand=unknown,driver>=565,driver<566 brand=grid,driver>=565,driver<566 brand=tesla,driver>=565,driver<566 brand=nvidia,driver>=565,driver<566 brand=quadro,driver>=565,driver<566 brand=quadrortx,driver>=565,driver<566 brand=nvidiartx,driver>=565,driver<566 brand=vapps,driver>=565,driver<566 brand=vpc,driver>=565,driver<566 brand=vcs,driver>=565,driver<566 brand=vws,driver>=565,driver<566 brand=cloudgaming,driver>=565,driver<566"
21+
ENV NV_CUDA_CUDART_VERSION=12.8.57-1
2322

2423
RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
24+
if [ "${TARGETARCH}" = "arm64" ]; then NVARCH=sbsa; else NVARCH=x86_64; fi && \
2525
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel9/${NVARCH}/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
2626
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -
2727

28-
ENV CUDA_VERSION=12.1.1
28+
ENV CUDA_VERSION=12.8.0
29+
30+
COPY cuda.repo-* ./
31+
COPY NGC-DL-CONTAINER-LICENSE /
32+
33+
RUN if [ "${TARGETARCH}" = "arm64" ]; then \
34+
cp cuda.repo-arm64 /etc/yum.repos.d/cuda.repo; \
35+
else \
36+
cp cuda.repo-x86_64 /etc/yum.repos.d/cuda.repo; \
37+
fi
2938

3039
# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a
3140
RUN yum upgrade -y && yum install -y \
32-
cuda-cudart-12-1-${NV_CUDA_CUDART_VERSION} \
33-
cuda-compat-12-1 \
41+
cuda-cudart-12-8-${NV_CUDA_CUDART_VERSION} \
42+
cuda-compat-12-8 \
3443
&& yum clean all \
3544
&& rm -rf /var/cache/yum/*
3645

@@ -41,30 +50,28 @@ RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
4150
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
4251
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
4352

44-
COPY NGC-DL-CONTAINER-LICENSE /
45-
4653
# nvidia-container-runtime
4754
ENV NVIDIA_VISIBLE_DEVICES=all
4855
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
4956

5057
# Install CUDA runtime from:
51-
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.1.1/ubi9/runtime/Dockerfile
52-
ENV NV_CUDA_LIB_VERSION=12.1.1-1
53-
ENV NV_NVTX_VERSION=12.1.105-1
54-
ENV NV_LIBNPP_VERSION=12.1.0.40-1
55-
ENV NV_LIBNPP_PACKAGE=libnpp-12-1-${NV_LIBNPP_VERSION}
56-
ENV NV_LIBCUBLAS_VERSION=12.1.3.1-1
58+
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.8.0/ubi9/runtime/Dockerfile
59+
ENV NV_CUDA_LIB_VERSION=12.8.0-1
60+
ENV NV_NVTX_VERSION=12.8.55-1
61+
ENV NV_LIBNPP_VERSION=12.3.3.65-1
62+
ENV NV_LIBNPP_PACKAGE=libnpp-12-8-${NV_LIBNPP_VERSION}
63+
ENV NV_LIBCUBLAS_VERSION=12.8.3.14-1
5764
ENV NV_LIBNCCL_PACKAGE_NAME=libnccl
58-
ENV NV_LIBNCCL_PACKAGE_VERSION=2.17.1-1
59-
ENV NV_LIBNCCL_VERSION=2.17.1
60-
ENV NCCL_VERSION=2.17.1
61-
ENV NV_LIBNCCL_PACKAGE=${NV_LIBNCCL_PACKAGE_NAME}-${NV_LIBNCCL_PACKAGE_VERSION}+cuda12.1
65+
ENV NV_LIBNCCL_PACKAGE_VERSION=2.25.1-1
66+
ENV NV_LIBNCCL_VERSION=2.25.1
67+
ENV NCCL_VERSION=2.25.1
68+
ENV NV_LIBNCCL_PACKAGE=${NV_LIBNCCL_PACKAGE_NAME}-${NV_LIBNCCL_PACKAGE_VERSION}+cuda12.8
6269

6370
RUN yum install -y \
64-
cuda-libraries-12-1-${NV_CUDA_LIB_VERSION} \
65-
cuda-nvtx-12-1-${NV_NVTX_VERSION} \
71+
cuda-libraries-12-8-${NV_CUDA_LIB_VERSION} \
72+
cuda-nvtx-12-8-${NV_NVTX_VERSION} \
6673
${NV_LIBNPP_PACKAGE} \
67-
libcublas-12-1-${NV_LIBCUBLAS_VERSION} \
74+
libcublas-12-8-${NV_LIBCUBLAS_VERSION} \
6875
${NV_LIBNCCL_PACKAGE} \
6976
&& yum clean all \
7077
&& rm -rf /var/cache/yum/*
@@ -73,45 +80,48 @@ RUN yum install -y \
7380
ENV XLA_FLAGS=--xla_gpu_cuda_data_dir=/usr/local/cuda
7481

7582
# Install CUDA devel from:
76-
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.1.1/ubi9/devel/Dockerfile
77-
ENV NV_CUDA_LIB_VERSION=12.1.1-1
78-
ENV NV_NVPROF_VERSION=12.1.105-1
79-
ENV NV_NVPROF_DEV_PACKAGE=cuda-nvprof-12-1-${NV_NVPROF_VERSION}
80-
ENV NV_CUDA_CUDART_DEV_VERSION=12.1.105-1
81-
ENV NV_NVML_DEV_VERSION=12.1.105-1
82-
ENV NV_LIBCUBLAS_DEV_VERSION=12.1.3.1-1
83-
ENV NV_LIBNPP_DEV_VERSION=12.1.0.40-1
84-
ENV NV_LIBNPP_DEV_PACKAGE=libnpp-devel-12-1-${NV_LIBNPP_DEV_VERSION}
83+
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.8.0/ubi9/devel/Dockerfile
84+
ENV NV_CUDA_LIB_VERSION=12.8.0-1
85+
# ARM64 doesn't have nvprof package - set in runtime
86+
ENV NV_NVPROF_VERSION=12.8.57-1
87+
ENV NV_NVPROF_DEV_PACKAGE=cuda-nvprof-12-8-${NV_NVPROF_VERSION}
88+
ENV NV_CUDA_CUDART_DEV_VERSION=12.8.57-1
89+
ENV NV_NVML_DEV_VERSION=12.8.55-1
90+
ENV NV_LIBCUBLAS_DEV_VERSION=12.8.3.14-1
91+
ENV NV_LIBNPP_DEV_VERSION=12.3.3.65-1
92+
ENV NV_LIBNPP_DEV_PACKAGE=libnpp-devel-12-8-${NV_LIBNPP_DEV_VERSION}
8593
ENV NV_LIBNCCL_DEV_PACKAGE_NAME=libnccl-devel
86-
ENV NV_LIBNCCL_DEV_PACKAGE_VERSION=2.17.1-1
87-
ENV NCCL_VERSION=2.17.1
88-
ENV NV_LIBNCCL_DEV_PACKAGE=${NV_LIBNCCL_DEV_PACKAGE_NAME}-${NV_LIBNCCL_DEV_PACKAGE_VERSION}+cuda12.1
89-
ENV NV_CUDA_NSIGHT_COMPUTE_VERSION=12.1.1-1
90-
ENV NV_CUDA_NSIGHT_COMPUTE_DEV_PACKAGE=cuda-nsight-compute-12-1-${NV_CUDA_NSIGHT_COMPUTE_VERSION}
94+
ENV NV_LIBNCCL_DEV_PACKAGE_VERSION=2.25.1-1
95+
ENV NCCL_VERSION=2.25.1
96+
ENV NV_LIBNCCL_DEV_PACKAGE=${NV_LIBNCCL_DEV_PACKAGE_NAME}-${NV_LIBNCCL_DEV_PACKAGE_VERSION}+cuda12.8
97+
ENV NV_CUDA_NSIGHT_COMPUTE_VERSION=12.8.0-1
98+
ENV NV_CUDA_NSIGHT_COMPUTE_DEV_PACKAGE=cuda-nsight-compute-12-8-${NV_CUDA_NSIGHT_COMPUTE_VERSION}
9199

92100
RUN yum install -y \
93101
make \
94102
findutils \
95-
cuda-command-line-tools-12-1-${NV_CUDA_LIB_VERSION} \
96-
cuda-libraries-devel-12-1-${NV_CUDA_LIB_VERSION} \
97-
cuda-minimal-build-12-1-${NV_CUDA_LIB_VERSION} \
98-
cuda-cudart-devel-12-1-${NV_CUDA_CUDART_DEV_VERSION} \
99-
${NV_NVPROF_DEV_PACKAGE} \
100-
cuda-nvml-devel-12-1-${NV_NVML_DEV_VERSION} \
101-
libcublas-devel-12-1-${NV_LIBCUBLAS_DEV_VERSION} \
103+
cuda-command-line-tools-12-8-${NV_CUDA_LIB_VERSION} \
104+
cuda-libraries-devel-12-8-${NV_CUDA_LIB_VERSION} \
105+
cuda-minimal-build-12-8-${NV_CUDA_LIB_VERSION} \
106+
cuda-cudart-devel-12-8-${NV_CUDA_CUDART_DEV_VERSION} \
107+
cuda-nvml-devel-12-8-${NV_NVML_DEV_VERSION} \
108+
libcublas-devel-12-8-${NV_LIBCUBLAS_DEV_VERSION} \
102109
${NV_LIBNPP_DEV_PACKAGE} \
103110
${NV_LIBNCCL_DEV_PACKAGE} \
104111
${NV_CUDA_NSIGHT_COMPUTE_DEV_PACKAGE} \
112+
&& if [ "${TARGETARCH}" != "arm64" ]; then \
113+
yum install -y ${NV_NVPROF_DEV_PACKAGE}; \
114+
fi \
105115
&& yum clean all \
106116
&& rm -rf /var/cache/yum/*
107117

108118
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs
109119

110-
# Install CUDA devel cudnn8 from:
111-
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.1.1/ubi9/devel/cudnn8/Dockerfile
112-
ENV NV_CUDNN_VERSION=8.9.0.131-1
113-
ENV NV_CUDNN_PACKAGE=libcudnn8-${NV_CUDNN_VERSION}.cuda12.1
114-
ENV NV_CUDNN_PACKAGE_DEV=libcudnn8-devel-${NV_CUDNN_VERSION}.cuda12.1
120+
# Install CUDA devel cudnn from:
121+
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.8.0/ubi9/devel/cudnn/Dockerfile
122+
ENV NV_CUDNN_VERSION=9.7.0.66-1
123+
ENV NV_CUDNN_PACKAGE=libcudnn9-cuda-12-${NV_CUDNN_VERSION}
124+
ENV NV_CUDNN_PACKAGE_DEV=libcudnn9-devel-cuda-12-${NV_CUDNN_VERSION}
115125

116126
LABEL com.nvidia.cudnn.version="${NV_CUDNN_VERSION}"
117127

@@ -133,4 +143,4 @@ RUN micropipenv install && rm -f ./Pipfile.lock
133143

134144
# Restore user workspace
135145
USER 1001
136-
WORKDIR /opt/app-root/src
146+
WORKDIR /opt/app-root/src

images/runtime/ray/cuda/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ CUDA enabled container image for Ray in OpenShift AI.
55
It includes the following layers:
66
* UBI 9
77
* Python 3.12
8-
* CUDA 12.1
8+
* CUDA 12.8
99
* Ray 2.47.1

0 commit comments

Comments
 (0)