|
34 | 34 | from kernel_tuner.integration import get_objective_defaults |
35 | 35 | from kernel_tuner.runners.sequential import SequentialRunner |
36 | 36 | from kernel_tuner.runners.simulation import SimulationRunner |
| 37 | +from kernel_tuner.runners.parallel import ParallelRunner |
37 | 38 | from kernel_tuner.searchspace import Searchspace |
38 | 39 |
|
39 | 40 | try: |
|
57 | 58 | pso, |
58 | 59 | random_sample, |
59 | 60 | simulated_annealing, |
| 61 | + ensemble |
60 | 62 | ) |
61 | 63 |
|
62 | 64 | strategy_map = { |
|
75 | 77 | "simulated_annealing": simulated_annealing, |
76 | 78 | "firefly_algorithm": firefly_algorithm, |
77 | 79 | "bayes_opt": bayes_opt, |
| 80 | + "ensemble": ensemble, |
78 | 81 | } |
79 | 82 |
|
80 | 83 |
|
@@ -384,6 +387,7 @@ def __deepcopy__(self, _): |
384 | 387 | * "pso" particle swarm optimization |
385 | 388 | * "random_sample" takes a random sample of the search space |
386 | 389 | * "simulated_annealing" simulated annealing strategy |
| 390 | + * "ensemble" Ensemble Strategy |
387 | 391 |
|
388 | 392 | Strategy-specific parameters and options are explained under strategy_options. |
389 | 393 |
|
@@ -463,6 +467,7 @@ def __deepcopy__(self, _): |
463 | 467 | ), |
464 | 468 | ("metrics", ("specifies user-defined metrics, please see :ref:`metrics`.", "dict")), |
465 | 469 | ("simulation_mode", ("Simulate an auto-tuning search from an existing cachefile", "bool")), |
| 470 | + ("parallel_mode", ("Run the auto-tuning on multiple devices (brute-force execution)", "bool")), |
466 | 471 | ("observers", ("""A list of Observers to use during tuning, please see :ref:`observers`.""", "list")), |
467 | 472 | ] |
468 | 473 | ) |
@@ -574,6 +579,7 @@ def tune_kernel( |
574 | 579 | cache=None, |
575 | 580 | metrics=None, |
576 | 581 | simulation_mode=False, |
| 582 | + parallel_mode=False, |
577 | 583 | observers=None, |
578 | 584 | objective=None, |
579 | 585 | objective_higher_is_better=None, |
@@ -611,6 +617,8 @@ def tune_kernel( |
611 | 617 | tuning_options["max_fevals"] = strategy_options["max_fevals"] |
612 | 618 | if strategy_options and "time_limit" in strategy_options: |
613 | 619 | tuning_options["time_limit"] = strategy_options["time_limit"] |
| 620 | + if strategy_options and "num_gpus" in strategy_options: |
| 621 | + tuning_options["num_gpus"] = strategy_options["num_gpus"] |
614 | 622 |
|
615 | 623 | logging.debug("tune_kernel called") |
616 | 624 | logging.debug("kernel_options: %s", util.get_config_string(kernel_options)) |
@@ -650,9 +658,13 @@ def tune_kernel( |
650 | 658 | strategy = brute_force |
651 | 659 |
|
652 | 660 | # select the runner for this job based on input |
653 | | - selected_runner = SimulationRunner if simulation_mode else SequentialRunner |
| 661 | + selected_runner = SimulationRunner if simulation_mode else (ParallelRunner if parallel_mode else SequentialRunner) |
654 | 662 | tuning_options.simulated_time = 0 |
655 | | - runner = selected_runner(kernelsource, kernel_options, device_options, iterations, observers) |
| 663 | + if parallel_mode: |
| 664 | + num_gpus = tuning_options['num_gpus'] if 'num_gpus' in tuning_options else None |
| 665 | + runner = selected_runner(kernelsource, kernel_options, device_options, iterations, observers, num_gpus=num_gpus) |
| 666 | + else: |
| 667 | + runner = selected_runner(kernelsource, kernel_options, device_options, iterations, observers) |
656 | 668 |
|
657 | 669 | # the user-specified function may or may not have an optional atol argument; |
658 | 670 | # we normalize it so that it always accepts atol. |
|
0 commit comments