@@ -841,7 +841,15 @@ def tensorrtllm_cmake_args(images):
841
841
return cargs
842
842
843
843
844
- def install_dcgm_libraries (dcgm_version , target_machine ):
844
+ def install_dcgm_libraries (dcgm_version ):
845
+ if os .getenv ("DCGM_SOURCE_LIST" ):
846
+ dcgm_source_list = """
847
+ RUN echo "deb [trusted=yes] {} / " > /etc/apt/sources.list.d/dcgm-list.list \\
848
+ && cat /etc/apt/sources.list.d/dcgm-list.list""" .format (
849
+ os .getenv ("DCGM_SOURCE_LIST" )
850
+ )
851
+ else :
852
+ dcgm_source_list = ""
845
853
if dcgm_version == "" :
846
854
fail (
847
855
"unable to determine default repo-tag, DCGM version not known for {}" .format (
@@ -852,11 +860,13 @@ def install_dcgm_libraries(dcgm_version, target_machine):
852
860
else :
853
861
# RHEL has the same install instructions for both aarch64 and x86
854
862
if target_platform () == "rhel" :
855
- if target_machine == "aarch64" :
856
- return """
863
+ return (
864
+ dcgm_source_list
865
+ + """
857
866
ENV DCGM_VERSION {}
858
867
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
859
- RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo \\
868
+ RUN ARCH=$( [ $(uname -m) = "x86_64" ] && echo "$(uname -m)" || echo "sbsa" ) && \\
869
+ && dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/${{ARCH}}/cuda-rhel8.repo \\
860
870
&& dnf clean expire-cache \\
861
871
&& dnf install --assumeyes \\
862
872
datacenter-gpu-manager-4-core=1:{} \\
@@ -876,13 +886,16 @@ def install_dcgm_libraries(dcgm_version, target_machine):
876
886
""" .format (
877
887
dcgm_version , dcgm_version , dcgm_version
878
888
)
889
+ )
879
890
else :
880
- if target_machine == "aarch64" :
881
- return """
891
+ return (
892
+ dcgm_source_list
893
+ + """
882
894
ENV DCGM_VERSION {}
883
895
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
884
- RUN curl -o /tmp/cuda-keyring.deb \\
885
- https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb \\
896
+ RUN ARCH=$( [ $(uname -m) = "x86_64" ] && echo "$(uname -m)" || echo "sbsa" ) \\
897
+ && curl -o /tmp/cuda-keyring.deb \\
898
+ https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${{ARCH}}/cuda-keyring_1.1-1_all.deb \\
886
899
&& apt install /tmp/cuda-keyring.deb \\
887
900
&& rm /tmp/cuda-keyring.deb \\
888
901
&& apt update \\
@@ -907,6 +920,7 @@ def install_dcgm_libraries(dcgm_version, target_machine):
907
920
""" .format (
908
921
dcgm_version , dcgm_version , dcgm_version
909
922
)
923
+ )
910
924
911
925
912
926
def create_dockerfile_buildbase_rhel (ddir , dockerfile_name , argmap ):
@@ -1007,7 +1021,7 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
1007
1021
&& mv /tmp/boost_1_80_0/boost /usr/include/boost
1008
1022
"""
1009
1023
if FLAGS .enable_gpu :
1010
- df += install_dcgm_libraries (argmap ["DCGM_VERSION" ], target_machine () )
1024
+ df += install_dcgm_libraries (argmap ["DCGM_VERSION" ])
1011
1025
df += """
1012
1026
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
1013
1027
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
@@ -1120,7 +1134,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
1120
1134
"""
1121
1135
1122
1136
if FLAGS .enable_gpu :
1123
- df += install_dcgm_libraries (argmap ["DCGM_VERSION" ], target_machine () )
1137
+ df += install_dcgm_libraries (argmap ["DCGM_VERSION" ])
1124
1138
1125
1139
df += """
1126
1140
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
@@ -1412,7 +1426,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
1412
1426
df += fastertransformer_buildscript .create_postbuild (is_multistage_build = False )
1413
1427
1414
1428
if enable_gpu :
1415
- df += install_dcgm_libraries (argmap ["DCGM_VERSION" ], target_machine )
1429
+ df += install_dcgm_libraries (argmap ["DCGM_VERSION" ])
1416
1430
# This segment will break the RHEL SBSA build. Need to determine whether
1417
1431
# this is necessary to incorporate.
1418
1432
if target_platform () != "rhel" :
0 commit comments