Skip to content

Commit 4cc4a13

Browse files
committed
Time spent setting NVML parameters (clock & memory frequency, power) goes to framework time instead of benchmark time
1 parent df54145 commit 4cc4a13

File tree

2 files changed

+17
-8
lines changed

2 files changed

+17
-8
lines changed

kernel_tuner/core.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -391,12 +391,8 @@ def benchmark_continuous(self, func, gpu_args, threads, grid, result, duration):
391391
for obs in self.continuous_observers:
392392
result.update(obs.get_results())
393393

394-
def benchmark(self, func, gpu_args, instance, verbose, objective):
395-
"""benchmark the kernel instance"""
396-
logging.debug("benchmark " + instance.name)
397-
logging.debug("thread block dimensions x,y,z=%d,%d,%d", *instance.threads)
398-
logging.debug("grid dimensions x,y,z=%d,%d,%d", *instance.grid)
399-
394+
def set_nvml_parameters(self, instance):
395+
"""Set the NVML parameters. Avoids setting time leaking into benchmark time."""
400396
if self.use_nvml:
401397
if "nvml_pwr_limit" in instance.params:
402398
new_limit = int(
@@ -409,6 +405,15 @@ def benchmark(self, func, gpu_args, instance, verbose, objective):
409405
if "nvml_mem_clock" in instance.params:
410406
self.nvml.mem_clock = instance.params["nvml_mem_clock"]
411407

408+
def benchmark(self, func, gpu_args, instance, verbose, objective, skip_nvml_setting=False):
409+
"""Benchmark the kernel instance."""
410+
logging.debug("benchmark " + instance.name)
411+
logging.debug("thread block dimensions x,y,z=%d,%d,%d", *instance.threads)
412+
logging.debug("grid dimensions x,y,z=%d,%d,%d", *instance.grid)
413+
414+
if self.use_nvml and not skip_nvml_setting:
415+
self.set_nvml_parameters(instance)
416+
412417
# Call the observers to register the configuration to be benchmarked
413418
for obs in self.dev.observers:
414419
obs.register_configuration(instance.params)
@@ -577,11 +582,15 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options,
577582

578583
# benchmark
579584
if func:
585+
# setting the NVML parameters here avoids this time from leaking into the benchmark time, ends up in framework time instead
586+
if self.use_nvml:
587+
self.set_nvml_parameters(instance)
580588
start_benchmark = time.perf_counter()
581589
result.update(
582-
self.benchmark(func, gpu_args, instance, verbose, to.objective)
590+
self.benchmark(func, gpu_args, instance, verbose, to.objective, skip_nvml_setting=False)
583591
)
584592
last_benchmark_time = 1000 * (time.perf_counter() - start_benchmark)
593+
print(f"Benchmark time: {last_benchmark_time}")
585594

586595
except Exception as e:
587596
# dump kernel sources to temp file

kernel_tuner/runners/sequential.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def run(self, parameter_space, tuning_options):
100100
params = process_metrics(params, tuning_options.metrics)
101101

102102
# get the framework time by estimating based on other times
103-
total_time = 1000 * (perf_counter() - self.start_time) - warmup_time
103+
total_time = 1000 * ((perf_counter() - self.start_time) - warmup_time) # TODO is it valid that we deduct the warmup time here?
104104
params['strategy_time'] = self.last_strategy_time
105105
params['framework_time'] = max(total_time - (params['compile_time'] + params['verification_time'] + params['benchmark_time'] + params['strategy_time']), 0)
106106
params['timestamp'] = str(datetime.now(timezone.utc))

0 commit comments

Comments
 (0)