Skip to content

Commit af4fe7f

Browse files
authored
Merge branch 'main' into mwittwer/explicit_model_load_parsing
2 parents 4d46bd0 + ace2c5b commit af4fe7f

File tree

4 files changed

+16
-12
lines changed

4 files changed

+16
-12
lines changed

Dockerfile.sdk

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ ARG JAVA_BINDINGS_MAVEN_VERSION=3.8.4
4343
ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG=1.5.8
4444
ARG TRITON_PERF_ANALYZER_BUILD=1
4545
# DCGM version to install for Model Analyzer
46-
ARG DCGM_VERSION=3.3.6
46+
ARG DCGM_VERSION=4.2.3-2
4747

4848
ARG NVIDIA_TRITON_SERVER_SDK_VERSION=unknown
4949
ARG NVIDIA_BUILD_ID=unknown
@@ -265,7 +265,7 @@ RUN if [ "$TRITON_ENABLE_GPU" = "ON" ]; then \
265265
curl -o /tmp/cuda-keyring.deb \
266266
https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/$arch/cuda-keyring_1.1-1_all.deb \
267267
&& apt install /tmp/cuda-keyring.deb && rm /tmp/cuda-keyring.deb && \
268-
apt-get update && apt-get install -y datacenter-gpu-manager=1:${DCGM_VERSION}; \
268+
apt update && apt install --yes datacenter-gpu-manager-4-dev=1:${DCGM_VERSION}; \
269269
fi
270270

271271
# Build expects "python" executable (not python3).

build.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@
7777
"ort_version": "1.22.0",
7878
"ort_openvino_version": "2025.2.0",
7979
"standalone_openvino_version": "2025.2.0",
80-
"dcgm_version": "3.3.6",
80+
"dcgm_version": "4.2.3-2",
8181
"vllm_version": "0.9.0.1",
8282
"rhel_py_version": "3.12.3",
8383
}
@@ -858,7 +858,7 @@ def install_dcgm_libraries(dcgm_version, target_machine):
858858
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
859859
RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo \\
860860
&& dnf clean expire-cache \\
861-
&& dnf install -y datacenter-gpu-manager-{}
861+
&& dnf install --assumeyes datacenter-gpu-manager-4-devel=1:{}
862862
""".format(
863863
dcgm_version, dcgm_version
864864
)
@@ -868,7 +868,7 @@ def install_dcgm_libraries(dcgm_version, target_machine):
868868
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
869869
RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \\
870870
&& dnf clean expire-cache \\
871-
&& dnf install -y datacenter-gpu-manager-{}
871+
&& dnf install --assumeyes datacenter-gpu-manager-4-devel=1:{}
872872
""".format(
873873
dcgm_version, dcgm_version
874874
)
@@ -881,8 +881,8 @@ def install_dcgm_libraries(dcgm_version, target_machine):
881881
https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb \\
882882
&& apt install /tmp/cuda-keyring.deb \\
883883
&& rm /tmp/cuda-keyring.deb \\
884-
&& apt-get update \\
885-
&& apt-get install -y datacenter-gpu-manager=1:{}
884+
&& apt update \\
885+
&& apt install --yes datacenter-gpu-manager-4-dev=1:{}
886886
""".format(
887887
dcgm_version, dcgm_version
888888
)
@@ -894,8 +894,8 @@ def install_dcgm_libraries(dcgm_version, target_machine):
894894
https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb \\
895895
&& apt install /tmp/cuda-keyring.deb \\
896896
&& rm /tmp/cuda-keyring.deb \\
897-
&& apt-get update \\
898-
&& apt-get install -y datacenter-gpu-manager=1:{}
897+
&& apt update \\
898+
&& apt install --yes datacenter-gpu-manager-4-dev=1:{}
899899
""".format(
900900
dcgm_version, dcgm_version
901901
)

compose.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ def create_argmap(images, skip_pull):
298298
dcgm_ver = re.search("DCGM_VERSION=([\S]{4,}) ", vars)
299299
dcgm_version = ""
300300
if dcgm_ver is None:
301-
dcgm_version = "3.3.6"
301+
dcgm_version = "4.2.3-2"
302302
log(
303303
"WARNING: DCGM version not found from image, installing the earlierst version {}".format(
304304
dcgm_version

qa/common/test_util.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ def check_gpus_compute_capability(min_capability):
312312

313313
import importlib.util
314314

315-
if importlib.util.find_spec("cuda.core.experimental"):
315+
if importlib.util.find_spec("cuda") is not None:
316316
import cuda.core.experimental as cuda_core_experimental
317317

318318
devices = cuda_core_experimental.system.devices
@@ -325,7 +325,7 @@ def check_gpus_compute_capability(min_capability):
325325
if compute_capability < min_capability:
326326
return False
327327

328-
elif importlib.util.find_spec("pycuda.driver"):
328+
elif importlib.util.find_spec("pycuda") is not None:
329329
import pycuda.driver as cuda
330330

331331
cuda.init()
@@ -339,6 +339,10 @@ def check_gpus_compute_capability(min_capability):
339339

340340
if compute_capability_value < min_capability:
341341
return False
342+
else:
343+
raise RuntimeError(
344+
"No packages found to determine the compute capability. Please check the environment."
345+
)
342346

343347
return True
344348

0 commit comments

Comments
 (0)