@@ -53,29 +53,17 @@ def get_device_utilization(device_id, device_count, synchronizer_func):
5353 synchronizer_func ()
5454 time .sleep (1 )
5555
56- output = (
57- subprocess . check_output (
58- [
59- "nvidia-smi " ,
60- f"--query-gpu=index,gpu_uuid,utilization.gpu,memory.used,memory.total" ,
61- "--format=csv,noheader,nounits" ,
62- ]
63- )
64- . decode ( )
65- .strip ()
56+ cmd = [
57+ "nvidia-smi" ,
58+ f"--query-gpu=index,gpu_uuid,utilization.gpu,memory.used,memory.total" ,
59+ "--format=csv,noheader,nounits " ,
60+ ]
61+ output = subprocess . check_output ( cmd ). decode (). strip ()
62+ _ , selected_gpu_uuid , gpu_util , used_mem , mem_total = next (
63+ line . split ( ", " )
64+ for line in output . split ( " \n " )
65+ if line .strip () and int ( line . split ( ", " )[ 0 ]) == selected_gpu_id
6666 )
67- for line in output .split ("\n " ):
68- if line .strip ():
69- (
70- gpu_id ,
71- selected_gpu_uuid ,
72- gpu_util ,
73- used_mem ,
74- mem_total ,
75- ) = line .split (", " )
76- if int (gpu_id ) == selected_gpu_id :
77- break
78-
7967 gpu_util = float (gpu_util )
8068 mem_util = float (used_mem ) * 100 / float (mem_total )
8169 print (
@@ -88,22 +76,19 @@ def get_device_utilization(device_id, device_count, synchronizer_func):
8876 max_mem_util = mem_util if mem_util > max_mem_util else max_mem_util
8977
9078 other_tasks = []
91- output = (
92- subprocess .check_output (
93- [
94- "nvidia-smi" ,
95- f"--query-compute-apps=gpu_uuid,pid,used_memory" ,
96- "--format=csv,noheader,nounits" ,
97- ]
98- )
99- .decode ()
100- .strip ()
101- )
102- for line in output .split ("\n " ):
103- if line .strip ():
104- gpu_uuid , pid , used_memory = line .split (", " )
105- if gpu_uuid == selected_gpu_uuid and int (pid ) != current_pid :
106- other_tasks .append (line )
79+ cmd = [
80+ "nvidia-smi" ,
81+ f"--query-compute-apps=gpu_uuid,pid,used_memory" ,
82+ "--format=csv,noheader,nounits" ,
83+ ]
84+ output = subprocess .check_output (cmd ).decode ().strip ()
85+ other_tasks = [
86+ line
87+ for line in output .split ("\n " )
88+ if line .strip ()
89+ and (line .split (", " )[0 ] == selected_gpu_uuid )
90+ and (line .split (", " )[1 ] != current_pid )
91+ ]
10792 # Note: in docker container, the current_pid maybe different from that captured by nvidia-smi.
10893 print (
10994 f"Note: There are { len (other_tasks )} tasks running on GPU { selected_gpu_id } (current_pid:{ current_pid } )." ,
0 commit comments