Skip to content

Commit 8b1d6cc

Browse files
authored
ci: build devel image with cuda 12.8 for blackwell (#391)
1 parent abf6344 commit 8b1d6cc

File tree

3 files changed

+65
-25
lines changed

3 files changed

+65
-25
lines changed

.github/workflows/publish_devel_image.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
strategy:
2222
fail-fast: false
2323
matrix:
24-
cuda: ["12.1", "12.4", "12.6"]
24+
cuda: ["12.1", "12.4", "12.6", "12.8"]
2525
gcc: ["12"]
2626
include: # build cuda 11.8 with gcc 11
2727
- cuda: "11.8"

docker/Dockerfile.devel

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,6 @@ RUN if [ -n "${CCACHE_VERSION}" ]; then bash ./install_ccache.sh; fi
4040
RUN rm install_ccache.sh
4141
RUN ccache --version
4242

43-
# Install cuda, cudnn and nccl
44-
ARG CUDA_VERSION=12.1
45-
COPY ./common/install_cuda.sh install_cuda.sh
46-
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
47-
ENV DESIRED_CUDA=${CUDA_VERSION}
48-
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
49-
RUN nvcc --version
50-
5143
# Install rust
5244
ENV RUSTUP_HOME=/usr/local/rustup
5345
ENV CARGO_HOME=/usr/local/cargo
@@ -57,13 +49,12 @@ RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
5749
RUN chmod -R a+w ${RUSTUP_HOME} ${CARGO_HOME}
5850
RUN rustup --version; cargo --version; rustc --version
5951

60-
# Install jemalloc (optional)
61-
RUN cd /tmp && \
62-
wget -nc --no-check-certificate https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2 && \
63-
tar -xvf jemalloc-5.3.0.tar.bz2 && \
64-
(cd jemalloc-5.3.0 && \
65-
./configure --enable-prof --disable-initial-exec-tls && \
66-
make -j$(nproc) && make install && \
67-
ldconfig)
52+
# Install cuda, cudnn and nccl
53+
ARG CUDA_VERSION=12.1
54+
COPY ./common/install_cuda.sh install_cuda.sh
55+
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
56+
ENV DESIRED_CUDA=${CUDA_VERSION}
57+
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
58+
RUN nvcc --version
6859

6960
CMD ["bash"]

docker/common/install_cuda.sh

Lines changed: 57 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
set -ex
66

77
NCCL_VERSION=v2.21.5-1
8-
CUDNN_VERSION=9.1.0.70
8+
CUDNN_VERSION=9.5.1.17
99

1010
function install_cusparselt_040 {
1111
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
@@ -40,7 +40,19 @@ function install_cusparselt_062 {
4040
rm -rf tmp_cusparselt
4141
}
4242

43+
function install_cusparselt_063 {
44+
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
45+
mkdir tmp_cusparselt && pushd tmp_cusparselt
46+
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
47+
tar xf libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
48+
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/include/* /usr/local/cuda/include/
49+
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/
50+
popd
51+
rm -rf tmp_cusparselt
52+
}
53+
4354
function install_118 {
55+
CUDNN_VERSION=9.1.0.70
4456
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
4557
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
4658
# install CUDA 11.8.0 in the same container
@@ -107,6 +119,7 @@ function install_121 {
107119
}
108120

109121
function install_124 {
122+
CUDNN_VERSION=9.1.0.70
110123
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
111124
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
112125
# install CUDA 12.4.1 in the same container
@@ -140,13 +153,13 @@ function install_124 {
140153
}
141154

142155
function install_126 {
143-
echo "Installing CUDA 12.6.2 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
156+
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
144157
rm -rf /usr/local/cuda-12.6 /usr/local/cuda
145-
# install CUDA 12.6.2 in the same container
146-
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux.run
147-
chmod +x cuda_12.6.2_560.35.03_linux.run
148-
./cuda_12.6.2_560.35.03_linux.run --toolkit --silent
149-
rm -f cuda_12.6.2_560.35.03_linux.run
158+
# install CUDA 12.6.3 in the same container
159+
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux.run
160+
chmod +x cuda_12.6.3_560.35.05_linux.run
161+
./cuda_12.6.3_560.35.05_linux.run --toolkit --silent
162+
rm -f cuda_12.6.3_560.35.05_linux.run
150163
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda
151164

152165
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
@@ -167,7 +180,7 @@ function install_126 {
167180
cd ..
168181
rm -rf nccl
169182

170-
install_cusparselt_062
183+
install_cusparselt_063
171184

172185
ldconfig
173186
}
@@ -302,6 +315,40 @@ function prune_126 {
302315
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
303316
}
304317

318+
function install_128 {
319+
CUDNN_VERSION=9.7.0.66
320+
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
321+
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
322+
# install CUDA 12.8.0 in the same container
323+
wget -q https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_570.86.10_linux.run
324+
chmod +x cuda_12.8.0_570.86.10_linux.run
325+
./cuda_12.8.0_570.86.10_linux.run --toolkit --silent
326+
rm -f cuda_12.8.0_570.86.10_linux.run
327+
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.8 /usr/local/cuda
328+
329+
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
330+
mkdir tmp_cudnn && cd tmp_cudnn
331+
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
332+
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
333+
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
334+
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
335+
cd ..
336+
rm -rf tmp_cudnn
337+
338+
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
339+
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
340+
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
341+
cd nccl && make -j src.build
342+
cp -a build/include/* /usr/local/cuda/include/
343+
cp -a build/lib/* /usr/local/cuda/lib64/
344+
cd ..
345+
rm -rf nccl
346+
347+
install_cusparselt_063
348+
349+
ldconfig
350+
}
351+
305352
# idiomatic parameter and option handling in sh
306353
while test $# -gt 0
307354
do
@@ -314,6 +361,8 @@ do
314361
;;
315362
12.6) install_126; prune_126
316363
;;
364+
12.8) install_128;
365+
;;
317366
*) echo "bad argument $1"; exit 1
318367
;;
319368
esac

0 commit comments

Comments
 (0)