Skip to content

Commit c2c5ea6

Browse files
sutaakaropenshift-merge-bot[bot]
authored andcommitted
Remove unnecessary CUDA dependencies from CUDA training image
1 parent 4d955c4 commit c2c5ea6

File tree

1 file changed

+8
-43
lines changed

1 file changed

+8
-43
lines changed

images/runtime/training/cuda/Dockerfile

Lines changed: 8 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -32,59 +32,24 @@ ENV CUDA_VERSION=12.1.0 \
3232
NVIDIA_VISIBLE_DEVICES=all \
3333
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
3434
NV_CUDA_CUDART_VERSION=12.1.55-1 \
35-
NV_CUDA_COMPAT_VERSION=530.30.02-1
35+
NV_CUDA_COMPAT_VERSION=530.30.02-1 \
36+
NV_CUDA_NVCC_VERSION=12.1.66-1
3637

38+
# Ref: https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.1.1/ubi9/base/Dockerfile
39+
# nvcc is required for Flash Attention
3740
RUN dnf config-manager \
3841
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
3942
&& dnf install -y \
4043
cuda-cudart-12-1-${NV_CUDA_CUDART_VERSION} \
4144
cuda-compat-12-1-${NV_CUDA_COMPAT_VERSION} \
45+
cuda-nvcc-12-1-${NV_CUDA_NVCC_VERSION} \
4246
&& echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf \
4347
&& echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf \
4448
&& dnf clean all
4549

46-
RUN dnf -y install --allowerasing cudnn9-cuda-12
47-
4850
ENV CUDA_HOME="/usr/local/cuda" \
4951
PATH="/usr/local/nvidia/bin:${CUDA_HOME}/bin:${PATH}" \
50-
LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64:$LD_LIBRARY_PATH" \
51-
LD_LIBRARY_PATH="/usr/local/cuda-9.0/lib64:$LD_LIBRARY_PATH"
52-
53-
# Ref: https://developer.nvidia.com/nccl/nccl-legacy-downloads
54-
ENV NV_CUDA_CUDART_DEV_VERSION=12.1.55-1 \
55-
NV_NVML_DEV_VERSION=12.1.55-1 \
56-
NV_LIBCUBLAS_DEV_VERSION=12.1.0.26-1 \
57-
NV_LIBNPP_DEV_VERSION=12.0.2.50-1 \
58-
NV_LIBNCCL_DEV_PACKAGE_VERSION=2.18.3-1+cuda12.1
59-
60-
RUN dnf config-manager \
61-
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
62-
&& dnf install -y \
63-
cuda-command-line-tools-12-1-${NV_CUDA_LIB_VERSION} \
64-
cuda-libraries-devel-12-1-${NV_CUDA_LIB_VERSION} \
65-
cuda-minimal-build-12-1-${NV_CUDA_LIB_VERSION} \
66-
cuda-cudart-devel-12-1-${NV_CUDA_CUDART_DEV_VERSION} \
67-
cuda-nvml-devel-12-1-${NV_NVML_DEV_VERSION} \
68-
libcublas-devel-12-1-${NV_LIBCUBLAS_DEV_VERSION} \
69-
libnpp-devel-12-1-${NV_LIBNPP_DEV_VERSION} \
70-
libnccl-devel-${NV_LIBNCCL_DEV_PACKAGE_VERSION} \
71-
&& dnf clean all
72-
73-
ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"
74-
75-
# Install CUDA devel cudnn8 from:
76-
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.1.1/ubi9/devel/cudnn8/Dockerfile
77-
ENV NV_CUDNN_VERSION=8.9.0.131-1
78-
ENV NV_CUDNN_PACKAGE=libcudnn8-${NV_CUDNN_VERSION}.cuda12.1
79-
ENV NV_CUDNN_PACKAGE_DEV=libcudnn8-devel-${NV_CUDNN_VERSION}.cuda12.1
80-
81-
LABEL com.nvidia.cudnn.version="${NV_CUDNN_VERSION}"
82-
83-
RUN dnf install -y \
84-
${NV_CUDNN_PACKAGE} \
85-
${NV_CUDNN_PACKAGE_DEV} \
86-
&& dnf clean all \
87-
&& rm -rf /var/cache/dnf/*
52+
LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64:$LD_LIBRARY_PATH"
8853

8954
# Install InfiniBand and RDMA packages
9055
RUN dnf config-manager \
@@ -106,15 +71,15 @@ RUN pip install --no-cache-dir -U "micropipenv[toml]"
10671
# Install Python dependencies from Pipfile.lock file
10772
COPY Pipfile.lock ./
10873

109-
RUN micropipenv install && \
74+
RUN micropipenv install -- --no-cache-dir && \
11075
rm -f ./Pipfile.lock && \
11176
# Fix permissions to support pip in OpenShift environments \
11277
chmod -R g+w /opt/app-root/lib/python3.11/site-packages && \
11378
fix-permissions /opt/app-root -P
11479

11580
# Install Flash Attention
11681
RUN pip install wheel
117-
RUN pip install flash-attn==2.7.4.post1 --no-build-isolation
82+
RUN pip install --no-cache-dir flash-attn==2.7.4.post1 --no-build-isolation
11883

11984
# Restore user workspace
12085
USER 1001

0 commit comments

Comments
 (0)