Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/publish_devel_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
strategy:
fail-fast: false
matrix:
cuda: ["12.1", "12.4", "12.6"]
cuda: ["12.1", "12.4", "12.6", "12.8"]
gcc: ["12"]
include: # build cuda 11.8 with gcc 11
- cuda: "11.8"
Expand Down
23 changes: 7 additions & 16 deletions docker/Dockerfile.devel
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,6 @@ RUN if [ -n "${CCACHE_VERSION}" ]; then bash ./install_ccache.sh; fi
RUN rm install_ccache.sh
RUN ccache --version

# Install cuda, cudnn and nccl
ARG CUDA_VERSION=12.1
COPY ./common/install_cuda.sh install_cuda.sh
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
ENV DESIRED_CUDA=${CUDA_VERSION}
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
RUN nvcc --version

# Install rust
ENV RUSTUP_HOME=/usr/local/rustup
ENV CARGO_HOME=/usr/local/cargo
Expand All @@ -57,13 +49,12 @@ RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
RUN chmod -R a+w ${RUSTUP_HOME} ${CARGO_HOME}
RUN rustup --version; cargo --version; rustc --version

# Install jemalloc (optional)
RUN cd /tmp && \
wget -nc --no-check-certificate https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2 && \
tar -xvf jemalloc-5.3.0.tar.bz2 && \
(cd jemalloc-5.3.0 && \
./configure --enable-prof --disable-initial-exec-tls && \
make -j$(nproc) && make install && \
ldconfig)
# Install cuda, cudnn and nccl
ARG CUDA_VERSION=12.1
COPY ./common/install_cuda.sh install_cuda.sh
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
ENV DESIRED_CUDA=${CUDA_VERSION}
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
RUN nvcc --version

CMD ["bash"]
65 changes: 57 additions & 8 deletions docker/common/install_cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
set -ex

NCCL_VERSION=v2.21.5-1
CUDNN_VERSION=9.1.0.70
CUDNN_VERSION=9.5.1.17

function install_cusparselt_040 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
Expand Down Expand Up @@ -40,7 +40,19 @@ function install_cusparselt_062 {
rm -rf tmp_cusparselt
}

function install_cusparselt_063 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
tar xf libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}

function install_118 {
CUDNN_VERSION=9.1.0.70
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
# install CUDA 11.8.0 in the same container
Expand Down Expand Up @@ -107,6 +119,7 @@ function install_121 {
}

function install_124 {
CUDNN_VERSION=9.1.0.70
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
# install CUDA 12.4.1 in the same container
Expand Down Expand Up @@ -140,13 +153,13 @@ function install_124 {
}

function install_126 {
echo "Installing CUDA 12.6.2 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
rm -rf /usr/local/cuda-12.6 /usr/local/cuda
# install CUDA 12.6.2 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux.run
chmod +x cuda_12.6.2_560.35.03_linux.run
./cuda_12.6.2_560.35.03_linux.run --toolkit --silent
rm -f cuda_12.6.2_560.35.03_linux.run
# install CUDA 12.6.3 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux.run
chmod +x cuda_12.6.3_560.35.05_linux.run
./cuda_12.6.3_560.35.05_linux.run --toolkit --silent
rm -f cuda_12.6.3_560.35.05_linux.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
Expand All @@ -167,7 +180,7 @@ function install_126 {
cd ..
rm -rf nccl

install_cusparselt_062
install_cusparselt_063

ldconfig
}
Expand Down Expand Up @@ -302,6 +315,40 @@ function prune_126 {
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
}

function install_128 {
CUDNN_VERSION=9.7.0.66
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
# install CUDA 12.8.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_570.86.10_linux.run
chmod +x cuda_12.8.0_570.86.10_linux.run
./cuda_12.8.0_570.86.10_linux.run --toolkit --silent
rm -f cuda_12.8.0_570.86.10_linux.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.8 /usr/local/cuda

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf nccl

install_cusparselt_063

ldconfig
}

# idiomatic parameter and option handling in sh
while test $# -gt 0
do
Expand All @@ -314,6 +361,8 @@ do
;;
12.6) install_126; prune_126
;;
12.8) install_128;
;;
*) echo "bad argument $1"; exit 1
;;
esac
Expand Down
Loading