11import subprocess
22import time
33from pathlib import Path
4+ import os
45
56import numpy as np
67
7- from kernel_tuner .observers .observer import BenchmarkObserver
8+ from kernel_tuner .observers .observer import BenchmarkObserver , ContinuousObserver
9+ from kernel_tuner .observers .pmt import PMTObserver
10+ from kernel_tuner .observers .powersensor import PowerSensorObserver
811
912
1013class tegra :
1114 """Class that gathers the Tegra functionality for one device."""
1215
13- def __init__ (self ):
16+ def __init__ (self , powerPath , tempPath ):
17+ self .has_changed_clocks = False
1418 """Create object to control GPU core clock on a Tegra device."""
15-
19+ # Get paths
1620 self .dev_path = self .get_dev_path ()
21+ if tempPath == "" :
22+ self .gpu_temp_path = self .get_temp_path ()
23+ else :
24+ self .gpu_temp_path = tempPath
25+ if powerPath == "" :
26+ self .gpu_power_path = self .get_power_path ()
27+ else :
28+ self .gpu_power_path = powerPath
29+ self .gpu_channel = self .get_gpu_channel ()
30+
31+ # Read default clock values
1732 self .default_min_gr_clock = self ._read_clock_file ("min_freq" )
1833 self .default_max_gr_clock = self ._read_clock_file ("max_freq" )
1934 self .supported_gr_clocks = self ._read_clock_file ("available_frequencies" )
2035
2136 self .default_railgate_status = self ._read_railgate_file ()
22-
23- self .has_changed_clocks = False
24-
37+
2538 @staticmethod
2639 def get_dev_path ():
2740 """Get the path to device core clock control in /sys"""
@@ -36,6 +49,49 @@ def get_dev_path():
3649 raise FileNotFoundError ("No internal tegra GPU found" )
3750 return root_path
3851
52+ def get_temp_path (self ):
53+ """Find the file which holds the GPU temperature"""
54+ for zone in Path ("/sys/class/thermal" ).iterdir ():
55+ with open (zone / Path ("type" )) as fp :
56+ name = fp .read ().strip ()
57+ if name == "GPU-therm" :
58+ gpu_temp_path = zone + "/"
59+ break
60+ else :
61+ raise FileNotFoundError ("No GPU sensor for temperature found" )
62+
63+ return gpu_temp_path
64+
65+ def get_power_path (self , start_path = "/sys/bus/i2c/drivers/ina3221" ):
66+ """Recursively search for a file which holds power readings
67+ starting from start_path."""
68+ for entry in os .listdir (start_path ):
69+ path = os .path .join (start_path , entry )
70+ if os .path .isfile (path ) and entry == "curr1_input" :
71+ return start_path + "/"
72+ elif entry in start_path :
73+ continue
74+ elif os .path .isdir (path ):
75+ result = self .get_power_path (path )
76+ if result :
77+ return result
78+ return None
79+
80+ def get_gpu_channel (self ):
81+ """Get the channel number of the sensor which measures the GPU power"""
82+
83+ # Iterate over all channels in the of_node dir of the power path to
84+ # find the channel which holds GPU power information
85+ for channel_dir in Path (self .gpu_power_path + "of_node/" ).iterdir ():
86+ if ("channel@" in channel_dir .name ):
87+ with open (channel_dir / Path ("label" )) as fp :
88+ channel_label = fp .read ().strip ()
89+ if "GPU" in channel_label :
90+ return str (int (channel_dir .name [- 1 ])+ 1 )
91+
92+ # If this statement is reached, no channel for the GPU was found
93+ raise FileNotFoundError ("No channel found with GPU power readings" )
94+
3995 def _read_railgate_file (self ):
4096 """Read railgate status"""
4197 with open (self .dev_path / Path ("device/railgate_enable" )) as fp :
@@ -115,7 +171,22 @@ def __del__(self):
115171 if self .has_changed_clocks :
116172 self .reset_clock ()
117173
118-
174+ def read_gpu_temp (self ):
175+ """Read GPU temperature"""
176+ with open (self .gpu_temp_path + "temp" ) as fp :
177+ temp = int (fp .read ())
178+ return temp / 1000
179+
180+ def read_gpu_power (self ):
181+ """Read the current and voltage to calculate and return the power int watt"""
182+
183+ result_cur = subprocess .run (["sudo" , "cat" , f"{ self .gpu_power_path } curr{ self .gpu_channel } _input" ], capture_output = True , text = True )
184+ current = int (result_cur .stdout .strip ()) / 1000
185+ result_vol = subprocess .run (["sudo" , "cat" , f"{ self .gpu_power_path } in{ self .gpu_channel } _input" ], capture_output = True , text = True )
186+ voltage = int (result_vol .stdout .strip ()) / 1000
187+
188+ return current * voltage
189+
119190class TegraObserver (BenchmarkObserver ):
120191 """Observer that uses /sys/ to monitor and control graphics clock frequencies on a Tegra device.
121192
@@ -131,46 +202,71 @@ class TegraObserver(BenchmarkObserver):
131202 def __init__ (
132203 self ,
133204 observables ,
134- save_all = False
205+ save_all = False ,
206+ powerPath = "" ,
207+ tempPath = ""
135208 ):
136209 """Create a TegraObserver"""
137- self .tegra = tegra ()
210+ self .tegra = tegra (powerPath = powerPath , tempPath = tempPath )
138211 self .save_all = save_all
139-
140- supported = ["core_freq" ]
212+ self ._set_units = False
213+
214+ supported = ["core_freq" , "gpu_temp" , "gpu_power" , "gpu_energy" ]
141215 for obs in observables :
142216 if obs not in supported :
143217 raise ValueError (f"Observable { obs } not in supported: { supported } " )
144218 self .observables = observables
145-
219+
220+ # Observe power measurements with the continuous observer
221+ self .measure_power = False
222+ self .needs_power = ["gpu_power" , "gpu_energy" ]
223+ if any ([obs in self .needs_power for obs in observables ]):
224+ self .measure_power = True
225+ power_observables = [obs for obs in observables if obs in self .needs_power ]
226+ self .continuous_observer = tegraPowerObserver (
227+ power_observables , self , continous_duration = 3
228+ )
229+ # remove power observables
230+ self .observables = [obs for obs in observables if obs not in self .needs_power ]
231+
146232 self .results = {}
147233 for obs in self .observables :
148234 self .results [obs + "s" ] = []
149235
150236 self .during_obs = [
151237 obs
152238 for obs in observables
153- if obs in ["core_freq" ]
239+ if obs in ["core_freq" , "gpu_temp" ]
154240 ]
155241
156242 self .iteration = {obs : [] for obs in self .during_obs }
243+
157244
158245 def before_start (self ):
159246 # clear results of the observables for next measurement
160247 self .iteration = {obs : [] for obs in self .during_obs }
248+ # Set the power unit to Watts
249+ if self ._set_units == False :
250+ self .dev .units ["power" ] = "W"
251+ self ._set_units = True
161252
162253 def after_start (self ):
254+ self .t0 = time .perf_counter ()
163255 # ensure during is called at least once
164256 self .during ()
165257
166258 def during (self ):
167259 if "core_freq" in self .observables :
168260 self .iteration ["core_freq" ].append (self .tegra .gr_clock )
261+ if "gpu_temp" in self .observables :
262+ self .iteration ["gpu_temp" ].append (self .tegra .read_gpu_temp ())
169263
170264 def after_finish (self ):
171265 if "core_freq" in self .observables :
172266 self .results ["core_freqs" ].append (np .average (self .iteration ["core_freq" ]))
173-
267+ if "gpu_temp" in self .observables :
268+ self .results ["gpu_temps" ].append (np .average (self .iteration ["gpu_temp" ]))
269+
174270 def get_results (self ):
175271 averaged_results = {}
176272
@@ -207,3 +303,70 @@ def get_tegra_gr_clocks(n=None, quiet=False):
207303 if not quiet :
208304 print ("Using gr frequencies:" , tune_params ["tegra_gr_clock" ])
209305 return tune_params
306+
307+
308+ class tegraPowerObserver (ContinuousObserver ):
309+ """Observer that measures power using tegra and continuous benchmarking."""
310+ def __init__ (self , observables , parent , continous_duration = 1 ):
311+ self .parent = parent
312+
313+ supported = ["gpu_power" , "gpu_energy" ]
314+ for obs in observables :
315+ if obs not in supported :
316+ raise ValueError (f"Observable { obs } not in supported: { supported } " )
317+ self .observables = observables
318+
319+ # duration in seconds
320+ self .continuous_duration = continous_duration
321+
322+ self .power = 0
323+ self .energy = 0
324+ self .power_readings = []
325+ self .t0 = 0
326+
327+ # results from the last iteration-based benchmark
328+ self .results = None
329+
330+ def before_start (self ):
331+ self .parent .before_start ()
332+ self .power = 0
333+ self .energy = 0
334+ self .power_readings = []
335+
336+ def after_start (self ):
337+ self .parent .after_start ()
338+ self .t0 = time .perf_counter ()
339+
340+ def during (self ):
341+ self .parent .during ()
342+ power_usage = self .parent .tegra .read_gpu_power ()
343+ timestamp = time .perf_counter () - self .t0
344+ # only store the result if we get a new measurement from tegra
345+ if len (self .power_readings ) == 0 or (
346+ self .power_readings [- 1 ][1 ] != power_usage
347+ or timestamp - self .power_readings [- 1 ][0 ] > 0.01
348+ ):
349+ self .power_readings .append ([timestamp , power_usage ])
350+
351+ def after_finish (self ):
352+ self .parent .after_finish ()
353+ # safeguard in case we have no measurements, perhaps the kernel was too short to measure anything
354+ if not self .power_readings :
355+ return
356+
357+ # convert to seconds from milliseconds
358+ execution_time = self .results ["time" ] / 1e3
359+ self .power = np .median ([d [1 ] for d in self .power_readings ])
360+ self .energy = self .power * execution_time
361+
362+ def get_results (self ):
363+ results = self .parent .get_results ()
364+ keys = list (results .keys ())
365+ for key in keys :
366+ results ["pwr_" + key ] = results .pop (key )
367+ if "gpu_power" in self .observables :
368+ results ["gpu_power" ] = self .power
369+ if "gpu_energy" in self .observables :
370+ results ["gpu_energy" ] = self .energy
371+
372+ return results
0 commit comments