File tree Expand file tree Collapse file tree 2 files changed +24
-12
lines changed
gpustack_runtime/detector Expand file tree Collapse file tree 2 files changed +24
-12
lines changed Original file line number Diff line number Diff line change 44import logging
55import math
66import re
7+ import threading
78import time
89from _ctypes import byref
910from functools import lru_cache
@@ -573,6 +574,9 @@ def _get_gpm_metrics(
573574 return list (dev_gpm_metrics .metrics )
574575
575576
577+ _gpm_metrics_lock = threading .Lock ()
578+
579+
576580def _get_sm_util_from_gpm_metrics (
577581 dev : pynvml .c_nvmlDevice_t ,
578582 gpu_instance_id : int | None = None ,
@@ -593,12 +597,14 @@ def _get_sm_util_from_gpm_metrics(
593597 The SM utilization as an integer percentage, or None if failed.
594598
595599 """
596- dev_gpm_metrics = _get_gpm_metrics (
597- metrics = [pynvml .NVML_GPM_METRIC_SM_UTIL ],
598- dev = dev ,
599- gpu_instance_id = gpu_instance_id ,
600- interval = interval ,
601- )
600+ with _gpm_metrics_lock :
601+ dev_gpm_metrics = _get_gpm_metrics (
602+ metrics = [pynvml .NVML_GPM_METRIC_SM_UTIL ],
603+ dev = dev ,
604+ gpu_instance_id = gpu_instance_id ,
605+ interval = interval ,
606+ )
607+
602608 if dev_gpm_metrics and not math .isnan (dev_gpm_metrics [0 ].value ):
603609 return int (dev_gpm_metrics [0 ].value )
604610
Original file line number Diff line number Diff line change 33import contextlib
44import logging
55import math
6+ import threading
67import time
78from functools import lru_cache
89
@@ -544,6 +545,9 @@ def _get_gpm_metrics(
544545 return list (dev_gpm_metrics .metrics )
545546
546547
548+ _gpm_metrics_lock = threading .Lock ()
549+
550+
547551def _get_sm_util_from_gpm_metrics (
548552 dev : pyhgml .c_hgmlDevice_t ,
549553 gpu_instance_id : int | None = None ,
@@ -564,12 +568,14 @@ def _get_sm_util_from_gpm_metrics(
564568 The SM utilization as an integer percentage, or None if failed.
565569
566570 """
567- dev_gpm_metrics = _get_gpm_metrics (
568- metrics = [pyhgml .HGML_GPM_METRIC_SM_UTIL ],
569- dev = dev ,
570- gpu_instance_id = gpu_instance_id ,
571- interval = interval ,
572- )
571+ with _gpm_metrics_lock :
572+ dev_gpm_metrics = _get_gpm_metrics (
573+ metrics = [pyhgml .HGML_GPM_METRIC_SM_UTIL ],
574+ dev = dev ,
575+ gpu_instance_id = gpu_instance_id ,
576+ interval = interval ,
577+ )
578+
573579 if dev_gpm_metrics and not math .isnan (dev_gpm_metrics [0 ].value ):
574580 return int (dev_gpm_metrics [0 ].value )
575581
You can’t perform that action at this time.
0 commit comments