Skip to content

Commit a86f12a

Browse files
MartinMarciniszyncodego7250
authored andcommitted
[None][chore] reduce the layers of the devel docker image (NVIDIA#9077)
Signed-off-by: Martin Marciniszyn Mehringer <11665257+MartinMarciniszyn@users.noreply.github.com>
1 parent 4276a8e commit a86f12a

File tree

4 files changed

+52
-46
lines changed

4 files changed

+52
-46
lines changed

docker/Dockerfile.multi

Lines changed: 34 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ LABEL com.nvidia.eula="https://www.nvidia.com/en-us/agreements/enterprise-softwa
1212
LABEL com.nvidia.ai-terms="https://www.nvidia.com/en-us/agreements/enterprise-software/product-specific-terms-for-ai-products/"
1313

1414
# https://www.gnu.org/software/bash/manual/html_node/Bash-Startup-Files.html
15-
# The default values come from `nvcr.io/nvidia/pytorch`
16-
ENV BASH_ENV=${BASH_ENV:-/etc/bash.bashrc}
17-
ENV ENV=${ENV:-/etc/shinit_v2}
15+
ARG SH_ENV="/etc/shinit_v2"
16+
ENV ENV=${SH_ENV}
17+
ARG BASH_ENV="/etc/bash.bashrc"
18+
ENV BASH_ENV=${BASH_ENV}
1819

1920
ARG GITHUB_MIRROR=""
2021
RUN echo "Using GitHub mirror: $GITHUB_MIRROR"
@@ -43,48 +44,41 @@ COPY docker/common/install.sh \
4344
docker/common/install_ucx.sh \
4445
docker/common/install_nixl.sh \
4546
docker/common/install_etcd.sh \
46-
docker/common/install_mooncake.sh \
4747
./
4848

49-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} \
50-
PYTHON_VERSION=${PYTHON_VERSION} \
51-
bash ./install.sh --base && rm install_base.sh
52-
53-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --cmake && rm install_cmake.sh
54-
55-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --ccache && rm install_ccache.sh
56-
57-
RUN bash ./install.sh --cuda_toolkit && rm install_cuda_toolkit.sh
58-
5949
ARG TRT_VER
6050
ARG CUDA_VER
6151
ARG CUDNN_VER
6252
ARG NCCL_VER
6353
ARG CUBLAS_VER
64-
RUN TRT_VER=${TRT_VER} \
54+
ARG TORCH_INSTALL_TYPE="skip"
55+
RUN GITHUB_MIRROR=${GITHUB_MIRROR} \
56+
PYTHON_VERSION=${PYTHON_VERSION} \
57+
TRT_VER=${TRT_VER} \
6558
CUDA_VER=${CUDA_VER} \
6659
CUDNN_VER=${CUDNN_VER} \
6760
NCCL_VER=${NCCL_VER} \
6861
CUBLAS_VER=${CUBLAS_VER} \
69-
bash ./install.sh --tensorrt && rm install_tensorrt.sh
70-
71-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --polygraphy && rm install_polygraphy.sh
72-
73-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --mpi4py && rm install_mpi4py.sh
74-
75-
ARG TORCH_INSTALL_TYPE="skip"
76-
RUN TORCH_INSTALL_TYPE=${TORCH_INSTALL_TYPE} bash ./install.sh --pytorch && rm install_pytorch.sh
77-
78-
RUN bash ./install.sh --opencv && rm install.sh
79-
80-
# Install UCX first
81-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_ucx.sh && rm install_ucx.sh
82-
83-
# Install NIXL
84-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_nixl.sh && rm install_nixl.sh
85-
86-
# Install etcd
87-
RUN bash ./install_etcd.sh && rm install_etcd.sh
62+
TORCH_INSTALL_TYPE=${TORCH_INSTALL_TYPE} \
63+
bash ./install.sh --base --cmake --ccache --cuda_toolkit --tensorrt --polygraphy --mpi4py --pytorch --opencv && \
64+
rm install_base.sh && \
65+
rm install_cmake.sh && \
66+
rm install_ccache.sh && \
67+
rm install_cuda_toolkit.sh && \
68+
rm install_tensorrt.sh && \
69+
rm install_polygraphy.sh && \
70+
rm install_mpi4py.sh && \
71+
rm install_pytorch.sh && \
72+
rm install.sh
73+
74+
# Install UCX, NIXL, etcd
75+
# TODO: Combine these into the main install.sh script
76+
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_ucx.sh && \
77+
GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_nixl.sh && \
78+
bash ./install_etcd.sh && \
79+
rm install_ucx.sh && \
80+
rm install_nixl.sh && \
81+
rm install_etcd.sh
8882

8983
FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
9084

@@ -99,16 +93,18 @@ COPY --from=triton /opt/tritonserver/caches /opt/tritonserver/caches
9993

10094
# Copy all installation scripts at once to reduce layers
10195
COPY docker/common/install_triton.sh \
96+
docker/common/install_mooncake.sh \
10297
./
10398

104-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_triton.sh && rm install_triton.sh
105-
10699
# Install Mooncake, after triton handles boost requirement
107-
RUN if [ -f /etc/redhat-release ]; then \
100+
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_triton.sh && \
101+
if [ -f /etc/redhat-release ]; then \
108102
echo "Rocky8 detected, skipping mooncake installation"; \
109103
else \
110104
bash ./install_mooncake.sh; \
111-
fi && rm install_mooncake.sh
105+
fi && \
106+
rm install_triton.sh && \
107+
rm install_mooncake.sh
112108

113109
FROM ${DEVEL_IMAGE} AS wheel
114110
WORKDIR /src/tensorrt_llm

docker/Makefile

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,16 @@ define rewrite_tag
7575
$(shell echo $(IMAGE_WITH_TAG) | sed "s/\/tensorrt-llm:/\/tensorrt-llm-staging:/g")
7676
endef
7777

78+
base_pull:
79+
@echo "Pulling base image: $(BASE_IMAGE):$(BASE_TAG)"
80+
docker pull $(BASE_IMAGE):$(BASE_TAG)
81+
7882
%_build: DEVEL_IMAGE = $(if $(findstring 1,$(JENKINS_DEVEL)),$(shell . ../jenkins/current_image_tags.properties && echo $$LLM_DOCKER_IMAGE))
79-
%_build:
83+
%_build: SH_ENV = $(shell docker inspect --format='{{range .Config.Env}}{{println .}}{{end}}' $(BASE_IMAGE):$(BASE_TAG) \
84+
| grep '^ENV=' | sed 's/^[^=]*=//' 2>/dev/null)
85+
%_build: BASH_ENV = $(shell docker inspect --format='{{range .Config.Env}}{{println .}}{{end}}' $(BASE_IMAGE):$(BASE_TAG) \
86+
| grep '^BASH_ENV=' | sed 's/^[^=]*=//' 2>/dev/null)
87+
%_build: base_pull
8088
@echo "Building docker image: $(IMAGE_WITH_TAG)"
8189
docker buildx build $(DOCKER_BUILD_OPTS) $(DOCKER_BUILD_ARGS) \
8290
--progress $(DOCKER_PROGRESS) \
@@ -97,6 +105,8 @@ endef
97105
$(if $(GIT_COMMIT), --build-arg GIT_COMMIT="$(GIT_COMMIT)") \
98106
$(if $(GITHUB_MIRROR), --build-arg GITHUB_MIRROR="$(GITHUB_MIRROR)") \
99107
$(if $(PYTHON_VERSION), --build-arg PYTHON_VERSION="$(PYTHON_VERSION)") \
108+
$(if $(SH_ENV), --build-arg SH_ENV="$(SH_ENV)") \
109+
$(if $(BASH_ENV), --build-arg BASH_ENV="$(BASH_ENV)") \
100110
$(if $(STAGE), --target $(STAGE)) \
101111
--file Dockerfile.multi \
102112
--tag $(IMAGE_WITH_TAG) \

docker/common/install.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/bin/bash
1+
#!/usr/bin/env bash
22
set -Eeo pipefail
33
shopt -s nullglob
44
trap 'echo "[install.sh] Error on line $LINENO" >&2' ERR
@@ -125,7 +125,7 @@ fi
125125

126126
if [ $opencv -eq 1 ]; then
127127
echo "Installing OpenCV..."
128-
pip3 uninstall -y opencv
128+
bash -c "pip3 uninstall -y opencv"
129129
rm -rf /usr/local/lib/python3*/dist-packages/cv2/
130-
pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
130+
bash -c "pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir"
131131
fi

jenkins/current_image_tags.properties

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
1414
IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm
1515

16-
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511271125-9294
17-
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511271125-9294
18-
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511271125-9294
19-
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511271125-9294
16+
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511281406-9077
17+
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511281406-9077
18+
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511281406-9077
19+
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511281406-9077

0 commit comments

Comments
 (0)