Skip to content

Commit d844c19

Browse files
committed
Change how timings are collected in all runners
1 parent ceaa96c commit d844c19

File tree

8 files changed

+78
-66
lines changed

8 files changed

+78
-66
lines changed

doc/source/parallel.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ Setting up Ray
6969
--------------
7070

7171
Kernel Tuner uses `Ray <https://docs.ray.io/en/latest/>`_ to distribute kernel evaluations across multiple GPUs.
72-
ay is an open-source framework for distributed computing in Python.
72+
Ray is an open-source framework for distributed computing in Python.
7373

7474
To use parallel tuning, you must first install Ray itself:
7575

kernel_tuner/runners/parallel.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,13 @@ def get_environment(self):
6868
def run(self, params):
6969
# TODO: logging.debug("sequential runner started for " + self.kernel_options.kernel_name)
7070
result = None
71-
warmup_time = 0
7271

7372
# attempt to warmup the GPU by running the first config in the parameter space and ignoring the result
7473
if not self.warmed_up:
75-
warmup_time = perf_counter()
7674
self.dev.compile_and_benchmark(
7775
self.kernel_source, self.gpu_args, params, self.kernel_options, self.tuning_options
7876
)
7977
self.warmed_up = True
80-
warmup_time = 1e3 * (perf_counter() - warmup_time)
8178

8279
result = self.dev.compile_and_benchmark(
8380
self.kernel_source, self.gpu_args, params, self.kernel_options, self.tuning_options
@@ -173,6 +170,8 @@ def __init__(
173170
observers,
174171
num_workers=None,
175172
):
173+
super().__init__()
174+
176175
if not ray.is_initialized():
177176
ray.init()
178177

@@ -209,8 +208,6 @@ def __init__(
209208
# TODO: Get units from the device?
210209
self.units = {"time": "ms"}
211210
self.quiet = device_options.quiet
212-
self.start_time = perf_counter()
213-
self.last_strategy_time = 0
214211

215212
# Print some debugging information
216213
if tuning_options.verbose:
@@ -332,7 +329,7 @@ def run(self, parameter_space, tuning_options) -> List[Optional[dict]]:
332329
# Collect total time spent by worker
333330
total_worker_time += (
334331
result["compile_time"] + result["verification_time"] + result["benchmark_time"]
335-
)
332+
) / 1000
336333

337334
# only compute metrics on configs that have not errored
338335
if not isinstance(result.get(objective), ErrorConfig):
@@ -365,19 +362,18 @@ def run(self, parameter_space, tuning_options) -> List[Optional[dict]]:
365362

366363
# If there are valid results, set timings
367364
if num_valid_results > 0:
368-
total_time = 1000 * (perf_counter() - self.start_time)
369-
self.start_time = perf_counter()
365+
total_time = self.timer.get_and_reset()
370366

371-
strategy_time = self.last_strategy_time
372-
self.last_strategy_time = 0
367+
strategy_time = self.accumulated_strategy_time
368+
self.accumulated_strategy_time = 0
373369

374370
runner_time = total_time - strategy_time
375371
framework_time = max(runner_time * len(self.workers) - total_worker_time, 0)
376372

377-
# Post-process all the results
373+
# Amortize the time over all the results
378374
for result in results:
379-
# Amortize the time over all the results
380375
if result:
376+
# Time must be in ms
381377
result["strategy_time"] = strategy_time / num_valid_results
382378
result["framework_time"] = framework_time / num_valid_results
383379

kernel_tuner/runners/runner.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,18 @@
33

44
from abc import ABC, abstractmethod
55

6+
from kernel_tuner.util import Timer
7+
68

79
class Runner(ABC):
810
"""Base class for kernel_tuner runners"""
911

10-
@abstractmethod
11-
def __init__(
12-
self, kernel_source, kernel_options, device_options, iterations, observers
13-
):
14-
pass
12+
def __init__(self):
13+
self.timer = Timer()
14+
self.accumulated_strategy_time = 0
15+
16+
def add_strategy_time(self, seconds):
17+
self.accumulated_strategy_time += seconds
1518

1619
def shutdown(self):
1720
pass

kernel_tuner/runners/sequential.py

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from kernel_tuner.core import DeviceInterface
77
from kernel_tuner.runners.runner import Runner
8-
from kernel_tuner.util import ErrorConfig, print_config_output, process_metrics, store_cache, disable_benchmark_timings
8+
from kernel_tuner.util import ErrorConfig, Timer, print_config_output, process_metrics, store_cache, disable_benchmark_timings
99

1010

1111
class SequentialRunner(Runner):
@@ -27,16 +27,14 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob
2727
:type iterations: int
2828
"""
2929
# detect language and create high-level device interface
30-
self.dev = DeviceInterface(kernel_source, iterations=iterations, observers=observers, **device_options)
30+
super().__init__()
3131

32+
self.dev = DeviceInterface(kernel_source, iterations=iterations, observers=observers, **device_options)
3233
self.units = self.dev.units
3334
self.quiet = device_options.quiet
3435
self.kernel_source = kernel_source
3536
self.warmed_up = False if self.dev.requires_warmup else True
3637
self.simulation_mode = False
37-
self.start_time = perf_counter()
38-
self.last_strategy_start_time = self.start_time
39-
self.last_strategy_time = 0
4038
self.kernel_options = kernel_options
4139

4240
# move data to the GPU
@@ -64,7 +62,7 @@ def run(self, parameter_space, tuning_options):
6462
logging.debug("sequential runner started for " + self.kernel_options.kernel_name)
6563

6664
results = []
67-
total_worker_time = 0
65+
worker_time = 0
6866

6967
# iterate over parameter space
7068
for element in parameter_space:
@@ -88,21 +86,21 @@ def run(self, parameter_space, tuning_options):
8886
else:
8987
# attempt to warmup the GPU by running the first config in the parameter space and ignoring the result
9088
if not self.warmed_up:
91-
warmup_time = perf_counter()
89+
warmup_timer = Timer()
9290
self.dev.compile_and_benchmark(
9391
self.kernel_source, self.gpu_args, params, self.kernel_options, tuning_options
9492
)
9593
self.warmed_up = True
96-
warmup_time = 1e3 * (perf_counter() - warmup_time)
94+
warmup_time = warmup_timer.get()
9795

9896
result = self.dev.compile_and_benchmark(
9997
self.kernel_source, self.gpu_args, params, self.kernel_options, tuning_options
10098
)
10199

102-
# Collect total time spent by worker
100+
# Collect total time spent by worker in seconds
103101
worker_time += (
104102
result["compile_time"] + result["verification_time"] + result["benchmark_time"]
105-
)
103+
) / 1000
106104

107105
params.update(result)
108106

@@ -128,20 +126,17 @@ def run(self, parameter_space, tuning_options):
128126
num_valid_results = sum(bool(r) for r in results) # Count the number of valid results
129127

130128
if num_valid_results > 0:
131-
# get the framework time by estimating based on other times
132-
total_time = 1000 * (perf_counter() - self.start_time)
133-
self.start_time = perf_counter()
134-
135-
strategy_time = self.last_strategy_time
136-
self.last_strategy_time = 0
129+
strategy_time = self.accumulated_strategy_time
130+
self.accumulated_strategy_time = 0
137131

132+
# get the framework time by estimating based on other times
133+
total_time = self.timer.get_and_reset() - warmup_time
138134
framework_time = max(total_time - strategy_time - worker_time, 0)
139135

140-
# Post-process all the results
136+
# Amortize the time over all the results
141137
for result in results:
142-
# Amortize the time over all the results
143138
if result:
144-
result["strategy_time"] = strategy_time / num_valid_results
145-
result["framework_time"] = framework_time / num_valid_results
139+
result["strategy_time"] = 1000 * strategy_time / num_valid_results
140+
result["framework_time"] = 1000 * framework_time / num_valid_results
146141

147142
return results

kernel_tuner/runners/simulation.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,15 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob
4646
:param iterations: The number of iterations used for benchmarking each kernel instance.
4747
:type iterations: int
4848
"""
49+
super().__init__()
4950
self.quiet = device_options.quiet
5051
self.dev = SimulationDevice(1024, dict(device_name="Simulation"), self.quiet)
5152

5253
self.kernel_source = kernel_source
5354
self.simulation_mode = True
5455
self.kernel_options = kernel_options
5556

56-
self.start_time = perf_counter()
5757
self.total_simulated_time = 0
58-
self.last_strategy_start_time = self.start_time
59-
self.last_strategy_time = 0
6058
self.visited_results = set()
6159
self.units = {}
6260

@@ -85,9 +83,6 @@ def run(self, parameter_space, tuning_options):
8583

8684
results = []
8785

88-
# self.last_strategy_time is set by cost_func
89-
strategy_time_per_config = self.last_strategy_time / len(parameter_space) if len(parameter_space) > 0 else 0
90-
9186
# iterate over parameter space
9287
for element in parameter_space:
9388

@@ -120,9 +115,6 @@ def run(self, parameter_space, tuning_options):
120115
util.print_config_output(tuning_options.tune_params, result, self.quiet, tuning_options.metrics, self.units)
121116
self.visited_results.add(key)
122117

123-
# Everything but the strategy time and framework time are simulated,
124-
result["strategy_time"] = strategy_time_per_config
125-
126118
# Simulate the evaluation of this configuration
127119
tuning_options.budget.add_evaluations(1)
128120
tuning_options.budget.add_time(milliseconds=result["compile_time"])
@@ -136,10 +128,6 @@ def run(self, parameter_space, tuning_options):
136128
"Cannot use simulation mode with a time limit on a cache file that does not have full compile, verification, and benchmark timings on all configurations"
137129
)
138130

139-
total_time = 1000 * (perf_counter() - self.start_time)
140-
self.start_time = perf_counter()
141-
result["framework_time"] = total_time
142-
143131
results.append(result)
144132
continue
145133

@@ -148,12 +136,6 @@ def run(self, parameter_space, tuning_options):
148136
check = util.check_restrictions(tuning_options.restrictions, params_dict, True)
149137
if not check:
150138
result = util.disable_benchmark_timings(params_dict) # Set timings to zero
151-
result['strategy_time'] = strategy_time_per_config
152-
153-
total_time = 1000 * (perf_counter() - self.start_time)
154-
self.start_time = perf_counter()
155-
result['framework_time'] = total_time
156-
157139
result[tuning_options.objective] = util.InvalidConfig()
158140
results.append(result)
159141
warn(f"Configuration {element} not in cache, does not pass restrictions. Will be treated as an InvalidConfig, but make sure you are evaluating the correct cache file.")
@@ -164,4 +146,21 @@ def run(self, parameter_space, tuning_options):
164146
logging.debug(err_string)
165147
raise ValueError(f"{err_string} - in simulation mode, all configurations must be present in the cache")
166148

149+
num_valid_results = sum(bool(r) for r in results)
150+
if num_valid_results:
151+
total_time = self.timer.get_and_reset()
152+
153+
strategy_time = self.accumulated_strategy_time
154+
self.accumulated_strategy_time = 0
155+
156+
framework_time = max(total_time - strategy_time, 0)
157+
158+
# Amortize the time over all the results
159+
for result in results:
160+
if result:
161+
# Time must be in ms
162+
result["strategy_time"] = strategy_time / num_valid_results
163+
result["framework_time"] = framework_time / num_valid_results
164+
165+
167166
return results

kernel_tuner/strategies/common.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def __init__(
9898
self.results = []
9999
self.budget_spent_fraction = 0.0
100100
self.invalid_return_value = invalid_value
101+
self.strategy_timer = util.Timer()
101102

102103
def _normalize_and_validate_config(self, x, check_restrictions=True):
103104
# snap values in x to nearest actual value for each parameter, unscale x if needed
@@ -129,8 +130,8 @@ def _normalize_and_validate_config(self, x, check_restrictions=True):
129130

130131
def _run_configs(self, xs, check_restrictions=True):
131132
""" Takes a list of Euclidian coordinates and evaluates the configurations at those points. """
132-
self.runner.last_strategy_time += 1000 * (perf_counter() - self.runner.last_strategy_start_time)
133-
self.runner.start_time = perf_counter() # start framework time
133+
strategy_time = self.strategy_timer.get()
134+
self.runner.add_strategy_time(strategy_time)
134135

135136
# error value to return for numeric optimizers that need a numerical value
136137
logging.debug("_cost_func called")
@@ -176,9 +177,6 @@ def _run_configs(self, xs, check_restrictions=True):
176177
self.unique_results.setdefault(key, result)
177178
self.results.append(result)
178179

179-
# upon returning from this function control will be given back to the strategy, so reset the start time
180-
self.runner.last_strategy_start_time = perf_counter()
181-
182180
# this check is necessary because some strategies cannot handle partially completed requests
183181
# for example when only half of the configs in a population have been evaluated
184182
self.tuning_options.budget.raise_exception_if_done()
@@ -189,6 +187,9 @@ def _run_configs(self, xs, check_restrictions=True):
189187
if not all(final_results):
190188
raise util.StopCriterionReached("runner did not evaluate all given configurations")
191189

190+
# upon returning from this function control will be given back to the strategy, so reset the start time
191+
self.strategy_timer.reset()
192+
192193
return final_results
193194

194195
def eval_all(self, xs, check_restrictions=True):

kernel_tuner/util.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,26 @@ def check_argument_list(kernel_name, kernel_string, args):
188188
warnings.warn(errors[0], UserWarning)
189189

190190

191+
class Timer:
192+
def __init__(self):
193+
self._start_ns = time.perf_counter_ns()
194+
195+
def get(self) -> float:
196+
"""Elapsed time in seconds."""
197+
now = time.perf_counter_ns()
198+
return (now - self._start_ns) * 1e-9
199+
200+
def get_and_reset(self) -> float:
201+
"""Elapsed time in seconds, then reset."""
202+
now = time.perf_counter_ns()
203+
elapsed_ns = now - self._start_ns
204+
self._start_ns = now
205+
return elapsed_ns * 1e-9
206+
207+
def reset(self) -> None:
208+
self.get_and_reset()
209+
210+
191211
class TuningBudget:
192212
def __init__(self, time_limit=None, max_fevals=None):
193213
if time_limit is not None and not isinstance(time_limit, timedelta):
@@ -199,7 +219,7 @@ def __init__(self, time_limit=None, max_fevals=None):
199219
if time_limit is not None and time_limit <= timedelta(seconds=0):
200220
raise ValueError("time_limit must be greater than zero")
201221

202-
self.start_time_seconds = time.perf_counter()
222+
self.start_timer = Timer()
203223
self.time_spent_extra = timedelta()
204224
self.time_limit = time_limit
205225
self.num_fevals = 0
@@ -212,7 +232,7 @@ def add_time(self, seconds=0, milliseconds=0):
212232
self.time_spent_extra += timedelta(seconds=seconds, milliseconds=milliseconds)
213233

214234
def get_time_spent(self) -> timedelta:
215-
seconds_passed = time.perf_counter() - self.start_time_seconds
235+
seconds_passed = self.start_timer.get()
216236
return timedelta(seconds=seconds_passed) + self.time_spent_extra
217237

218238
def get_time_remaining(self) -> timedelta:
@@ -259,7 +279,6 @@ def get_fraction_consumed(self) -> float:
259279

260280

261281

262-
263282
def check_tune_params_list(tune_params, observers, simulation_mode=False):
264283
"""Raise an exception if a tune parameter has a forbidden name."""
265284
forbidden_names = ("grid_size_x", "grid_size_y", "grid_size_z", "time")

test/strategies/test_common.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ def fake_runner():
2020
'time': 5
2121
}
2222
runner = Mock()
23-
runner.last_strategy_start_time = perf_counter()
2423
runner.run.return_value = [fake_result]
2524
return runner
2625

0 commit comments

Comments
 (0)