55
66from kernel_tuner .core import DeviceInterface
77from kernel_tuner .runners .runner import Runner
8- from kernel_tuner .util import ErrorConfig , print_config_output , process_metrics , store_cache , disable_benchmark_timings
8+ from kernel_tuner .util import ErrorConfig , Timer , print_config_output , process_metrics , store_cache , disable_benchmark_timings
99
1010
1111class SequentialRunner (Runner ):
@@ -27,16 +27,14 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob
2727 :type iterations: int
2828 """
2929 # detect language and create high-level device interface
30- self . dev = DeviceInterface ( kernel_source , iterations = iterations , observers = observers , ** device_options )
30+ super (). __init__ ( )
3131
32+ self .dev = DeviceInterface (kernel_source , iterations = iterations , observers = observers , ** device_options )
3233 self .units = self .dev .units
3334 self .quiet = device_options .quiet
3435 self .kernel_source = kernel_source
3536 self .warmed_up = False if self .dev .requires_warmup else True
3637 self .simulation_mode = False
37- self .start_time = perf_counter ()
38- self .last_strategy_start_time = self .start_time
39- self .last_strategy_time = 0
4038 self .kernel_options = kernel_options
4139
4240 # move data to the GPU
@@ -64,7 +62,7 @@ def run(self, parameter_space, tuning_options):
6462 logging .debug ("sequential runner started for " + self .kernel_options .kernel_name )
6563
6664 results = []
67- total_worker_time = 0
65+ worker_time = 0
6866
6967 # iterate over parameter space
7068 for element in parameter_space :
@@ -88,21 +86,21 @@ def run(self, parameter_space, tuning_options):
8886 else :
8987 # attempt to warmup the GPU by running the first config in the parameter space and ignoring the result
9088 if not self .warmed_up :
91- warmup_time = perf_counter ()
89+ warmup_timer = Timer ()
9290 self .dev .compile_and_benchmark (
9391 self .kernel_source , self .gpu_args , params , self .kernel_options , tuning_options
9492 )
9593 self .warmed_up = True
96- warmup_time = 1e3 * ( perf_counter () - warmup_time )
94+ warmup_time = warmup_timer . get ( )
9795
9896 result = self .dev .compile_and_benchmark (
9997 self .kernel_source , self .gpu_args , params , self .kernel_options , tuning_options
10098 )
10199
102- # Collect total time spent by worker
100+ # Collect total time spent by worker in seconds
103101 worker_time += (
104102 result ["compile_time" ] + result ["verification_time" ] + result ["benchmark_time" ]
105- )
103+ ) / 1000
106104
107105 params .update (result )
108106
@@ -128,20 +126,17 @@ def run(self, parameter_space, tuning_options):
128126 num_valid_results = sum (bool (r ) for r in results ) # Count the number of valid results
129127
130128 if num_valid_results > 0 :
131- # get the framework time by estimating based on other times
132- total_time = 1000 * (perf_counter () - self .start_time )
133- self .start_time = perf_counter ()
134-
135- strategy_time = self .last_strategy_time
136- self .last_strategy_time = 0
129+ strategy_time = self .accumulated_strategy_time
130+ self .accumulated_strategy_time = 0
137131
132+ # get the framework time by estimating based on other times
133+ total_time = self .timer .get_and_reset () - warmup_time
138134 framework_time = max (total_time - strategy_time - worker_time , 0 )
139135
140- # Post-process all the results
136+ # Amortize the time over all the results
141137 for result in results :
142- # Amortize the time over all the results
143138 if result :
144- result ["strategy_time" ] = strategy_time / num_valid_results
145- result ["framework_time" ] = framework_time / num_valid_results
139+ result ["strategy_time" ] = 1000 * strategy_time / num_valid_results
140+ result ["framework_time" ] = 1000 * framework_time / num_valid_results
146141
147142 return results
0 commit comments