Skip to content

Commit b8acffa

Browse files
authored
Merge pull request #241 from MiloLurati/parallelTuning
Parallel Tuning
2 parents ec02f89 + 65c6a23 commit b8acffa

21 files changed

+863
-40
lines changed

doc/source/optimization.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ the ``strategy=`` optional argument of ``tune_kernel()``. Kernel Tuner currently
2525
* "pso" particle swarm optimization
2626
* "random_sample" takes a random sample of the search space
2727
* "simulated_annealing" simulated annealing strategy
28+
* "ensemble" ensemble strategy
2829

2930
Most strategies have some mechanism built in to detect when to stop tuning, which may be controlled through specific
3031
parameters that can be passed to the strategies using the ``strategy_options=`` optional argument of ``tune_kernel()``. You

kernel_tuner/interface.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from kernel_tuner.integration import get_objective_defaults
3535
from kernel_tuner.runners.sequential import SequentialRunner
3636
from kernel_tuner.runners.simulation import SimulationRunner
37+
from kernel_tuner.runners.parallel import ParallelRunner
3738
from kernel_tuner.searchspace import Searchspace
3839

3940
try:
@@ -57,6 +58,7 @@
5758
pso,
5859
random_sample,
5960
simulated_annealing,
61+
ensemble
6062
)
6163

6264
strategy_map = {
@@ -75,6 +77,7 @@
7577
"simulated_annealing": simulated_annealing,
7678
"firefly_algorithm": firefly_algorithm,
7779
"bayes_opt": bayes_opt,
80+
"ensemble": ensemble,
7881
}
7982

8083

@@ -384,6 +387,7 @@ def __deepcopy__(self, _):
384387
* "pso" particle swarm optimization
385388
* "random_sample" takes a random sample of the search space
386389
* "simulated_annealing" simulated annealing strategy
390+
* "ensemble" Ensemble Strategy
387391
388392
Strategy-specific parameters and options are explained under strategy_options.
389393
@@ -463,6 +467,7 @@ def __deepcopy__(self, _):
463467
),
464468
("metrics", ("specifies user-defined metrics, please see :ref:`metrics`.", "dict")),
465469
("simulation_mode", ("Simulate an auto-tuning search from an existing cachefile", "bool")),
470+
("parallel_mode", ("Run the auto-tuning on multiple devices (brute-force execution)", "bool")),
466471
("observers", ("""A list of Observers to use during tuning, please see :ref:`observers`.""", "list")),
467472
]
468473
)
@@ -574,6 +579,7 @@ def tune_kernel(
574579
cache=None,
575580
metrics=None,
576581
simulation_mode=False,
582+
parallel_mode=False,
577583
observers=None,
578584
objective=None,
579585
objective_higher_is_better=None,
@@ -611,6 +617,8 @@ def tune_kernel(
611617
tuning_options["max_fevals"] = strategy_options["max_fevals"]
612618
if strategy_options and "time_limit" in strategy_options:
613619
tuning_options["time_limit"] = strategy_options["time_limit"]
620+
if strategy_options and "num_gpus" in strategy_options:
621+
tuning_options["num_gpus"] = strategy_options["num_gpus"]
614622

615623
logging.debug("tune_kernel called")
616624
logging.debug("kernel_options: %s", util.get_config_string(kernel_options))
@@ -650,9 +658,13 @@ def tune_kernel(
650658
strategy = brute_force
651659

652660
# select the runner for this job based on input
653-
selected_runner = SimulationRunner if simulation_mode else SequentialRunner
661+
selected_runner = SimulationRunner if simulation_mode else (ParallelRunner if parallel_mode else SequentialRunner)
654662
tuning_options.simulated_time = 0
655-
runner = selected_runner(kernelsource, kernel_options, device_options, iterations, observers)
663+
if parallel_mode:
664+
num_gpus = tuning_options['num_gpus'] if 'num_gpus' in tuning_options else None
665+
runner = selected_runner(kernelsource, kernel_options, device_options, iterations, observers, num_gpus=num_gpus)
666+
else:
667+
runner = selected_runner(kernelsource, kernel_options, device_options, iterations, observers)
656668

657669
# the user-specified function may or may not have an optional atol argument;
658670
# we normalize it so that it always accepts atol.

kernel_tuner/observers/nvml.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,15 @@ def __init__(
326326
continuous_duration=1,
327327
):
328328
"""Create an NVMLObserver."""
329+
# needed for re-initializing observer on ray actor
330+
self.init_arguments = {
331+
"observables": observables,
332+
"device": device,
333+
"save_all": save_all,
334+
"nvidia_smi_fallback": nvidia_smi_fallback,
335+
"use_locked_clocks": use_locked_clocks,
336+
"continous_duration": continous_duration
337+
}
329338
if nvidia_smi_fallback:
330339
self.nvml = nvml(
331340
device,

kernel_tuner/observers/pmt.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ class PMTObserver(BenchmarkObserver):
4949
def __init__(self, observable=None, use_continuous_observer=False, continuous_duration=1):
5050
if not pmt:
5151
raise ImportError("could not import pmt")
52+
53+
# needed for re-initializing observer on ray actor
54+
self.init_arguments = {
55+
"observable": observable
56+
}
5257

5358
# User specifices a dictonary of platforms and corresponding device
5459
if type(observable) is dict:

kernel_tuner/observers/powersensor.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ class PowerSensorObserver(BenchmarkObserver):
2727
def __init__(self, observables=None, device=None):
2828
if not powersensor:
2929
raise ImportError("could not import powersensor")
30+
31+
# needed for re-initializing observer on ray actor
32+
self.init_arguments = {
33+
"observables": observables,
34+
"device": device
35+
}
3036

3137
supported = ["ps_energy", "ps_power"]
3238
for obs in observables:

0 commit comments

Comments
 (0)