Skip to content

Commit 99d7476

Browse files
committed
temp: DCGM - internal repository.
1 parent 99c5882 commit 99d7476

File tree

2 files changed

+31
-11
lines changed

2 files changed

+31
-11
lines changed

Dockerfile.sdk

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,13 @@ RUN pip3 install --upgrade "numpy<2" pillow attrdict && \
263263
"tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
264264
xargs pip3 install --upgrade
265265

266+
ARG DCGM_SOURCE_LIST
266267
# Install DCGM
268+
RUN if [ -n "${DCGM_SOURCE_LIST}" ]; then \
269+
echo "deb [trusted=yes] $DCGM_SOURCE_LIST / " > /etc/apt/sources.list.d/dcgm-list.list && \
270+
cat /etc/apt/sources.list.d/dcgm-list.list; \
271+
fi
272+
267273
RUN if [ "$TRITON_ENABLE_GPU" = "ON" ]; then \
268274
[ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" && \
269275
curl -o /tmp/cuda-keyring.deb \

build.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,15 @@ def tensorrtllm_cmake_args(images):
841841
return cargs
842842

843843

844-
def install_dcgm_libraries(dcgm_version, target_machine):
844+
def install_dcgm_libraries(dcgm_version):
845+
if os.getenv("DCGM_SOURCE_LIST"):
846+
dcgm_source_list = """
847+
RUN echo "deb [trusted=yes] {} / " > /etc/apt/sources.list.d/dcgm-list.list \\
848+
&& cat /etc/apt/sources.list.d/dcgm-list.list""".format(
849+
os.getenv("DCGM_SOURCE_LIST")
850+
)
851+
else:
852+
dcgm_source_list = ""
845853
if dcgm_version == "":
846854
fail(
847855
"unable to determine default repo-tag, DCGM version not known for {}".format(
@@ -852,11 +860,13 @@ def install_dcgm_libraries(dcgm_version, target_machine):
852860
else:
853861
# RHEL has the same install instructions for both aarch64 and x86
854862
if target_platform() == "rhel":
855-
if target_machine == "aarch64":
856-
return """
863+
return (
864+
dcgm_source_list
865+
+ """
857866
ENV DCGM_VERSION {}
858867
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
859-
RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo \\
868+
RUN ARCH=$( [ $(uname -m) = "x86_64" ] && echo "$(uname -m)" || echo "sbsa" ) && \\
869+
&& dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/${{ARCH}}/cuda-rhel8.repo \\
860870
&& dnf clean expire-cache \\
861871
&& dnf install --assumeyes \\
862872
datacenter-gpu-manager-4-core=1:{} \\
@@ -876,13 +886,16 @@ def install_dcgm_libraries(dcgm_version, target_machine):
876886
""".format(
877887
dcgm_version, dcgm_version, dcgm_version
878888
)
889+
)
879890
else:
880-
if target_machine == "aarch64":
881-
return """
891+
return (
892+
dcgm_source_list
893+
+ """
882894
ENV DCGM_VERSION {}
883895
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
884-
RUN curl -o /tmp/cuda-keyring.deb \\
885-
https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb \\
896+
RUN ARCH=$( [ $(uname -m) = "x86_64" ] && echo "$(uname -m)" || echo "sbsa" ) \\
897+
&& curl -o /tmp/cuda-keyring.deb \\
898+
https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${{ARCH}}/cuda-keyring_1.1-1_all.deb \\
886899
&& apt install /tmp/cuda-keyring.deb \\
887900
&& rm /tmp/cuda-keyring.deb \\
888901
&& apt update \\
@@ -907,6 +920,7 @@ def install_dcgm_libraries(dcgm_version, target_machine):
907920
""".format(
908921
dcgm_version, dcgm_version, dcgm_version
909922
)
923+
)
910924

911925

912926
def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
@@ -1007,7 +1021,7 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
10071021
&& mv /tmp/boost_1_80_0/boost /usr/include/boost
10081022
"""
10091023
if FLAGS.enable_gpu:
1010-
df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine())
1024+
df += install_dcgm_libraries(argmap["DCGM_VERSION"])
10111025
df += """
10121026
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
10131027
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
@@ -1120,7 +1134,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
11201134
"""
11211135

11221136
if FLAGS.enable_gpu:
1123-
df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine())
1137+
df += install_dcgm_libraries(argmap["DCGM_VERSION"])
11241138

11251139
df += """
11261140
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
@@ -1412,7 +1426,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
14121426
df += fastertransformer_buildscript.create_postbuild(is_multistage_build=False)
14131427

14141428
if enable_gpu:
1415-
df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine)
1429+
df += install_dcgm_libraries(argmap["DCGM_VERSION"])
14161430
# This segment will break the RHEL SBSA build. Need to determine whether
14171431
# this is necessary to incorporate.
14181432
if target_platform() != "rhel":

0 commit comments

Comments
 (0)