Skip to content

Commit 8f63a70

Browse files
committed
Add TegraObserver which can monitor/control graphics clock on a tegra device
1 parent 4dbcb66 commit 8f63a70

File tree

2 files changed

+212
-3
lines changed

2 files changed

+212
-3
lines changed

kernel_tuner/core.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,8 @@
2020
from kernel_tuner.backends.nvcuda import CudaFunctions
2121
from kernel_tuner.backends.opencl import OpenCLFunctions
2222
from kernel_tuner.backends.compiler import CompilerFunctions
23-
from kernel_tuner.backends.opencl import OpenCLFunctions
24-
from kernel_tuner.backends.hip import HipFunctions
2523
from kernel_tuner.observers.nvml import NVMLObserver
24+
from kernel_tuner.observers.tegra import TegraObserver
2625
from kernel_tuner.observers.observer import ContinuousObserver, OutputObserver
2726

2827
try:
@@ -315,15 +314,19 @@ def __init__(
315314
else:
316315
raise ValueError("Sorry, support for languages other than CUDA, OpenCL, HIP, C, and Fortran is not implemented yet")
317316

318-
# look for NVMLObserver in observers, if present, enable special tunable parameters through nvml
317+
# look for NVMLObserver and TegraObserver in observers, if present, enable special tunable parameters through nvml/tegra
319318
self.use_nvml = False
319+
self.use_tegra = False
320320
self.continuous_observers = []
321321
self.output_observers = []
322322
if observers:
323323
for obs in observers:
324324
if isinstance(obs, NVMLObserver):
325325
self.nvml = obs.nvml
326326
self.use_nvml = True
327+
if isinstance(obs, TegraObserver):
328+
self.tegra = obs.tegra
329+
self.use_tegra = True
327330
if hasattr(obs, "continuous_observer"):
328331
self.continuous_observers.append(obs.continuous_observer)
329332
if isinstance(obs, OutputObserver):
@@ -409,6 +412,10 @@ def benchmark(self, func, gpu_args, instance, verbose, objective):
409412
if "nvml_mem_clock" in instance.params:
410413
self.nvml.mem_clock = instance.params["nvml_mem_clock"]
411414

415+
if self.use_tegra:
416+
if "tegra_gr_clock" in instance.params:
417+
self.tegra.gr_clock = instance.params["tegra_gr_clock"]
418+
412419
# Call the observers to register the configuration to be benchmarked
413420
for obs in self.dev.observers:
414421
obs.register_configuration(instance.params)

kernel_tuner/observers/tegra.py

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
import subprocess
2+
import time
3+
from pathlib import Path
4+
5+
import numpy as np
6+
7+
from kernel_tuner.observers.observer import BenchmarkObserver
8+
9+
10+
class tegra:
11+
"""Class that gathers the Tegra functionality for one device."""
12+
13+
def __init__(self):
14+
"""Create object to control GPU core clock on a Tegra device."""
15+
16+
self.dev_path = self.get_dev_path()
17+
self.default_min_gr_clock = self._read_clock_file("min_freq")
18+
self.default_max_gr_clock = self._read_clock_file("max_freq")
19+
self.supported_gr_clocks = self._read_clock_file("available_frequencies")
20+
21+
self.default_railgate_status = self._read_railgate_file()
22+
23+
@staticmethod
24+
def get_dev_path(device_id):
25+
"""Get the path to device core clock control in /sys"""
26+
root_path = Path("/sys/devices/gpu.0")
27+
gpu_id = root_path.readlink()
28+
return root_path / Path("devfreq") / gpu_id
29+
30+
def _read_railgate_file(self):
31+
"""Read railgate status"""
32+
with open(self.dev_path / Path("device/railgate_enable")) as fp:
33+
data = int(fp.read().strip())
34+
return data
35+
36+
def _write_railgate_file(self, value):
37+
"""Set railgate status"""
38+
if value not in (0, 1):
39+
raise ValueError(f"Illegal governor value {value}, must be 0 or 1")
40+
print(f"Writing {value} to railgate file")
41+
full_path = self.dev_path / Path("device/railgate_enable")
42+
args = [
43+
"sudo",
44+
"sh",
45+
"-c",
46+
f"echo {value} > {str(full_path)}"
47+
]
48+
subprocess.run(args, check=True)
49+
50+
def _read_clock_file(self, fname):
51+
"""Read current or available frequency value(s) from a frequency control file"""
52+
with open(self.dev_path / Path(fname)) as fp:
53+
raw_data = np.array(fp.read().strip().split())
54+
if len(raw_data) > 1:
55+
data = raw_data.astype(int)
56+
else:
57+
data = int(raw_data)
58+
return data
59+
60+
def _write_clock_file(self, fname, value):
61+
"""Write a frequency value to a core clock control file"""
62+
available_files = ("min_freq", "max_freq")
63+
if fname not in available_files:
64+
raise ValueError(f"Illegal filename value: {fname}, must be one of {available_files}")
65+
66+
if value not in self.supported_gr_clocks:
67+
raise ValueError(f"Illegal frequency value {value}, must be one of {self.supported_gr_clocks}")
68+
69+
full_path = self.dev_path / Path(fname)
70+
args = [
71+
"sudo",
72+
"sh",
73+
"-c",
74+
f"echo {value} > {str(full_path)}"
75+
]
76+
subprocess.run(args, check=True)
77+
78+
@property
79+
def gr_clock(self):
80+
"""Control the core clock frequency"""
81+
return self._read_clock_file("cur_freq")
82+
83+
@gr_clock.setter
84+
def gr_clock(self, new_clock):
85+
self._write_railgate_file(0)
86+
cur_clock = self._read_clock_file("cur_freq")
87+
if new_clock > cur_clock:
88+
self._write_clock_file("max_freq", new_clock)
89+
self._write_clock_file("min_freq", new_clock)
90+
elif new_clock < cur_clock:
91+
self._write_clock_file("min_freq", new_clock)
92+
self._write_clock_file("max_freq", new_clock)
93+
# wait for the new clock to be applied
94+
while (self._read_clock_file("cur_freq") != new_clock):
95+
time.sleep(.001)
96+
97+
def reset_clock(self):
98+
"""Reset the core clock frequency to the original values"""
99+
self._write_clock_file("min_freq", self.default_min_gr_clock)
100+
self._write_clock_file("max_freq", self.default_max_gr_clock)
101+
self._write_railgate_file(self.default_railgate_status)
102+
103+
def __del__(self):
104+
# restore original core clocks
105+
self.reset_clock()
106+
107+
108+
class TegraObserver(BenchmarkObserver):
109+
"""Observer that uses /sys/ to monitor and control graphics clock frequencies on a Tegra device.
110+
111+
:param observables: List of quantities should be observed during tuning, supported is: "core_freq"
112+
:type observables: list of strings
113+
114+
:param device: Device ordinal used to identify your device, typically 0
115+
:type device: integer
116+
117+
:param save_all: If set to True, all data collected by the TegraObserver for every iteration during benchmarking will be returned.
118+
If set to False, data will be aggregated over multiple iterations during benchmarking. False by default.
119+
:type save_all: boolean
120+
121+
"""
122+
123+
def __init__(
124+
self,
125+
observables,
126+
device=0,
127+
save_all=False
128+
):
129+
"""Create a TegraObserver"""
130+
self.tegra = tegra(device)
131+
self.save_all = save_all
132+
133+
supported = ["core_freq"]
134+
for obs in observables:
135+
if obs not in supported:
136+
raise ValueError(f"Observable {obs} not in supported: {supported}")
137+
self.observables = observables
138+
139+
self.results = {}
140+
for obs in self.observables:
141+
self.results[obs + "s"] = []
142+
143+
self.during_obs = [
144+
obs
145+
for obs in observables
146+
if obs in ["core_freq"]
147+
]
148+
149+
self.iteration = {obs: [] for obs in self.during_obs}
150+
151+
def before_start(self):
152+
# clear results of the observables for next measurement
153+
self.iteration = {obs: [] for obs in self.during_obs}
154+
155+
def after_start(self):
156+
# ensure during is called at least once
157+
self.during()
158+
159+
def during(self):
160+
if "core_freq" in self.observables:
161+
self.iteration["core_freq"].append(self.tegra.gr_clock)
162+
163+
def after_finish(self):
164+
if "core_freq" in self.observables:
165+
self.results["core_freqs"].append(np.average(self.iteration["core_freq"]))
166+
167+
def get_results(self):
168+
averaged_results = {}
169+
170+
# return averaged results, except when save_all is True
171+
for obs in self.observables:
172+
# save all information, if the user requested
173+
if self.save_all:
174+
averaged_results[obs + "s"] = self.results[obs + "s"]
175+
# save averaged results, default
176+
averaged_results[obs] = np.average(self.results[obs + "s"])
177+
178+
# clear results for next round
179+
for obs in self.observables:
180+
self.results[obs + "s"] = []
181+
182+
return averaged_results
183+
184+
185+
# High-level Helper functions
186+
187+
188+
def get_tegra_gr_clocks(device=0, n=None, quiet=False):
189+
"""Get tunable parameter for Tegra graphics clock, n is desired number of values."""
190+
d = tegra(device)
191+
gr_clocks = d.supported_gr_clocks
192+
193+
if n and (len(gr_clocks) > n):
194+
indices = np.array(np.ceil(np.linspace(0, len(gr_clocks) - 1, n)), dtype=int)
195+
gr_clocks = np.array(gr_clocks)[indices]
196+
197+
tune_params = dict()
198+
tune_params["tegra_gr_clock"] = list(gr_clocks)
199+
200+
if not quiet:
201+
print("Using gr frequencies:", tune_params["tegra_gr_clock"])
202+
return tune_params

0 commit comments

Comments
 (0)