@@ -323,7 +323,7 @@ def __init__(
323323 save_all = False ,
324324 nvidia_smi_fallback = None ,
325325 use_locked_clocks = False ,
326- continous_duration = 1 ,
326+ continuous_duration = 1 ,
327327 ):
328328 """Create an NVMLObserver."""
329329 if nvidia_smi_fallback :
@@ -355,7 +355,7 @@ def __init__(
355355 if any ([obs in self .needs_power for obs in observables ]):
356356 self .measure_power = True
357357 power_observables = [obs for obs in observables if obs in self .needs_power ]
358- self .continuous_observer = NVMLPowerObserver ( power_observables , self , self . nvml , continous_duration )
358+ self .continuous_observer = ContinuousObserver ( "nvml" , power_observables , self , continuous_duration = continuous_duration )
359359
360360 # remove power observables
361361 self .observables = [obs for obs in observables if obs not in self .needs_power ]
@@ -373,6 +373,10 @@ def __init__(
373373 self .during_obs = [obs for obs in observables if obs in ["core_freq" , "mem_freq" , "temperature" ]]
374374 self .iteration = {obs : [] for obs in self .during_obs }
375375
376+ def read_power (self ):
377+ """ Return power in Watt """
378+ return self .nvml .pwr_usage () / 1e3
379+
376380 def before_start (self ):
377381 # clear results of the observables for next measurement
378382 self .iteration = {obs : [] for obs in self .during_obs }
@@ -428,75 +432,6 @@ def get_results(self):
428432 return averaged_results
429433
430434
431- class NVMLPowerObserver (ContinuousObserver ):
432- """Observer that measures power using NVML and continuous benchmarking."""
433-
434- def __init__ (self , observables , parent , nvml_instance , continous_duration = 1 ):
435- self .parent = parent
436- self .nvml = nvml_instance
437-
438- supported = ["power_readings" , "nvml_power" , "nvml_energy" ]
439- for obs in observables :
440- if obs not in supported :
441- raise ValueError (f"Observable { obs } not in supported: { supported } " )
442- self .observables = observables
443-
444- # duration in seconds
445- self .continuous_duration = continous_duration
446-
447- self .power = 0
448- self .energy = 0
449- self .power_readings = []
450- self .t0 = 0
451-
452- # results from the last iteration-based benchmark
453- self .results = None
454-
455- def before_start (self ):
456- self .parent .before_start ()
457- self .power = 0
458- self .energy = 0
459- self .power_readings = []
460-
461- def after_start (self ):
462- self .parent .after_start ()
463- self .t0 = time .perf_counter ()
464-
465- def during (self ):
466- self .parent .during ()
467- power_usage = self .nvml .pwr_usage ()
468- timestamp = time .perf_counter () - self .t0
469- # only store the result if we get a new measurement from NVML
470- if len (self .power_readings ) == 0 or (
471- self .power_readings [- 1 ][1 ] != power_usage or timestamp - self .power_readings [- 1 ][0 ] > 0.01
472- ):
473- self .power_readings .append ([timestamp , power_usage ])
474-
475- def after_finish (self ):
476- self .parent .after_finish ()
477- # safeguard in case we have no measurements, perhaps the kernel was too short to measure anything
478- if not self .power_readings :
479- return
480-
481- # convert to seconds from milliseconds
482- execution_time = self .results ["time" ] / 1e3
483- self .power = np .median ([d [1 ] / 1e3 for d in self .power_readings ])
484- self .energy = self .power * execution_time
485-
486- def get_results (self ):
487- results = self .parent .get_results ()
488- keys = list (results .keys ())
489- for key in keys :
490- results ["pwr_" + key ] = results .pop (key )
491- if "nvml_energy" in self .observables :
492- results ["nvml_energy" ] = self .energy
493- if "nvml_power" in self .observables :
494- results ["nvml_power" ] = self .power
495- if "power_readings" in self .observables :
496- results ["power_readings" ] = self .power_readings
497- return results
498-
499-
500435# High-level Helper functions
501436
502437
0 commit comments