@@ -25,6 +25,10 @@ def naive_timer(duration_box, synchronizer_func):
2525 duration_box .value = (end - start ) * 1000 # Store in milliseconds
2626
2727
28+ def is_gpu_device (device ):
29+ return "cuda" in device or "dcu" in device
30+
31+
2832def get_device_utilization (device_id , device_count , synchronizer_func ):
2933 current_pid = os .getpid ()
3034
@@ -98,6 +102,7 @@ def get_device_utilization(device_id, device_count, synchronizer_func):
98102 gpu_uuid , pid , used_memory = line .split (", " )
99103 if gpu_uuid == selected_gpu_uuid and int (pid ) != current_pid :
100104 other_tasks .append (line )
105+ # Note: in docker container, the current_pid maybe different from that captured by nvidia-smi.
101106 print (
102107 f"Note: There are { len (other_tasks )} tasks running on GPU { selected_gpu_id } (current_pid:{ current_pid } )." ,
103108 file = sys .stderr ,
@@ -169,24 +174,33 @@ def print_basic_config(args, hardware_name, compile_framework_version):
169174 )
170175
171176
172- def print_running_status (args , eager_success , compiled_success ):
177+ def print_running_status (args , eager_success , compiled_success = None ):
173178 def convert_to_str (b ):
174179 return "success" if b else "failed"
175180
176- print_with_log_prompt (
177- "[Result][status]" ,
178- f"eager:{ convert_to_str (eager_success )} compiled:{ convert_to_str (compiled_success )} " ,
179- args .log_prompt ,
180- )
181+ if compiled_success is not None :
182+ print_with_log_prompt (
183+ "[Result][status]" ,
184+ f"eager:{ convert_to_str (eager_success )} compiled:{ convert_to_str (compiled_success )} " ,
185+ args .log_prompt ,
186+ )
187+ else :
188+ print_with_log_prompt (
189+ "[Result][status]" ,
190+ f"eager:{ convert_to_str (eager_success )} " ,
191+ args .log_prompt ,
192+ )
181193
182194
183195def print_times_and_speedup (args , eager_stats , compiled_stats ):
184- print_with_log_prompt (
185- "[Performance][eager]:" , json .dumps (eager_stats ), args .log_prompt
186- )
187- print_with_log_prompt (
188- "[Performance][compiled]:" , json .dumps (compiled_stats ), args .log_prompt
189- )
196+ if not eager_stats :
197+ print_with_log_prompt (
198+ "[Performance][eager]:" , json .dumps (eager_stats ), args .log_prompt
199+ )
200+ if not compiled_stats :
201+ print_with_log_prompt (
202+ "[Performance][compiled]:" , json .dumps (compiled_stats ), args .log_prompt
203+ )
190204
191205 e2e_speedup = 0
192206 gpu_speedup = 0
@@ -197,7 +211,7 @@ def print_times_and_speedup(args, eager_stats, compiled_stats):
197211 if eager_e2e_time_ms > 0 and compiled_e2e_time_ms > 0 :
198212 e2e_speedup = eager_e2e_time_ms / compiled_e2e_time_ms
199213
200- if "cuda" in args .device :
214+ if is_gpu_device ( args .device ) :
201215 eager_gpu_time_ms = eager_stats .get ("gpu" , {}).get ("mean" , 0 )
202216 compiled_gpu_time_ms = compiled_stats .get ("gpu" , {}).get ("mean" , 0 )
203217
@@ -207,7 +221,7 @@ def print_times_and_speedup(args, eager_stats, compiled_stats):
207221 if e2e_speedup > 0 :
208222 print_with_log_prompt ("[Speedup][e2e]:" , f"{ e2e_speedup :.5f} " , args .log_prompt )
209223
210- if "cuda" in args .device and gpu_speedup > 0 :
224+ if is_gpu_device ( args .device ) and gpu_speedup > 0 :
211225 print_with_log_prompt ("[Speedup][gpu]:" , f"{ gpu_speedup :.5f} " , args .log_prompt )
212226
213227
0 commit comments