55import numpy as np
66import pytest
77
8- from kernel_tuner import util , tune_kernel
8+ from kernel_tuner import util , tune_kernel , core
9+ from kernel_tuner .interface import Options , _kernel_options , _device_options , _tuning_options
10+ from kernel_tuner .runners .sequential import SequentialRunner
911
1012from .context import skip_if_no_pycuda
1113
12- cache_filename = os .path .dirname (os .path .realpath (__file__ )) + "/test_cache_file.json"
14+ cache_filename = os .path .dirname (
15+ os .path .realpath (__file__ )) + "/test_cache_file.json"
1316
1417
1518@pytest .fixture
@@ -61,37 +64,55 @@ def test_sequential_runner_alt_block_size_names(env):
6164
6265 block_size_names = ["block_dim_x" ]
6366
64- result , _ = tune_kernel (* env , grid_div_x = ["block_dim_x" ], answer = answer , block_size_names = block_size_names )
67+ result , _ = tune_kernel (* env ,
68+ grid_div_x = ["block_dim_x" ],
69+ answer = answer ,
70+ block_size_names = block_size_names )
6571
6672 assert len (result ) == len (tune_params ["block_dim_x" ])
6773
6874
6975@skip_if_no_pycuda
7076def test_smem_args (env ):
71- result , _ = tune_kernel (* env , smem_args = dict (size = "block_size_x*4" ), verbose = True )
77+ result , _ = tune_kernel (* env ,
78+ smem_args = dict (size = "block_size_x*4" ),
79+ verbose = True )
7280 tune_params = env [- 1 ]
7381 assert len (result ) == len (tune_params ["block_size_x" ])
74- result , _ = tune_kernel (* env , smem_args = dict (size = lambda p : p ['block_size_x' ] * 4 ), verbose = True )
82+ result , _ = tune_kernel (
83+ * env ,
84+ smem_args = dict (size = lambda p : p ['block_size_x' ] * 4 ),
85+ verbose = True )
7586 tune_params = env [- 1 ]
7687 assert len (result ) == len (tune_params ["block_size_x" ])
7788
7889
7990@skip_if_no_pycuda
8091def test_build_cache (env ):
8192 if not os .path .isfile (cache_filename ):
82- result , _ = tune_kernel (* env , cache = cache_filename , verbose = False , quiet = True )
93+ result , _ = tune_kernel (* env ,
94+ cache = cache_filename ,
95+ verbose = False ,
96+ quiet = True )
8397 tune_params = env [- 1 ]
8498 assert len (result ) == len (tune_params ["block_size_x" ])
8599
86100
87101def test_simulation_runner (env ):
88102 kernel_name , kernel_string , size , args , tune_params = env
89103 start = time .perf_counter ()
90- result , res_env = tune_kernel (* env , cache = cache_filename , strategy = "random_sample" , simulation_mode = True , strategy_options = dict (fraction = 1 ))
91- actual_time = (time .perf_counter () - start ) * 1e3 # ms
104+ result , res_env = tune_kernel (* env ,
105+ cache = cache_filename ,
106+ strategy = "random_sample" ,
107+ simulation_mode = True ,
108+ strategy_options = dict (fraction = 1 ))
109+ actual_time = (time .perf_counter () - start ) * 1e3 # ms
92110 assert len (result ) == len (tune_params ["block_size_x" ])
93111
94- timings = ['total_framework_time' , 'total_strategy_time' , 'total_compile_time' , 'total_benchmark_time' , 'overhead_time' ]
112+ timings = [
113+ 'total_framework_time' , 'total_strategy_time' , 'total_compile_time' ,
114+ 'total_benchmark_time' , 'overhead_time'
115+ ]
95116
96117 # ensure all keys are there and non zero
97118 assert all (key in res_env for key in timings )
@@ -111,7 +132,12 @@ def test_simulation_runner(env):
111132
112133
113134def test_diff_evo (env ):
114- result , _ = tune_kernel (* env , strategy = "diff_evo" , strategy_options = dict (popsize = 5 ), verbose = True , cache = cache_filename , simulation_mode = True )
135+ result , _ = tune_kernel (* env ,
136+ strategy = "diff_evo" ,
137+ strategy_options = dict (popsize = 5 ),
138+ verbose = True ,
139+ cache = cache_filename ,
140+ simulation_mode = True )
115141 assert len (result ) > 0
116142
117143
@@ -120,14 +146,25 @@ def test_time_keeping(env):
120146 kernel_name , kernel_string , size , args , tune_params = env
121147 answer = [args [1 ] + args [2 ], None , None , None ]
122148
123- options = dict (method = "uniform" , popsize = 10 , maxiter = 1 , mutation_chance = 1 , max_fevals = 10 )
149+ options = dict (method = "uniform" ,
150+ popsize = 10 ,
151+ maxiter = 1 ,
152+ mutation_chance = 1 ,
153+ max_fevals = 10 )
124154 start = time .perf_counter ()
125- result , env = tune_kernel (* env , strategy = "genetic_algorithm" , strategy_options = options , verbose = True , answer = answer )
126- max_time = (time .perf_counter () - start ) * 1e3 # ms
155+ result , env = tune_kernel (* env ,
156+ strategy = "genetic_algorithm" ,
157+ strategy_options = options ,
158+ verbose = True ,
159+ answer = answer )
160+ max_time = (time .perf_counter () - start ) * 1e3 # ms
127161
128162 assert len (result ) >= 10
129163
130- timings = ['total_framework_time' , 'total_strategy_time' , 'total_compile_time' , 'total_verification_time' , 'total_benchmark_time' , 'overhead_time' ]
164+ timings = [
165+ 'total_framework_time' , 'total_strategy_time' , 'total_compile_time' ,
166+ 'total_verification_time' , 'total_benchmark_time' , 'overhead_time'
167+ ]
131168
132169 # ensure all keys are there and non zero
133170 assert all (key in env for key in timings )
@@ -142,15 +179,27 @@ def test_time_keeping(env):
142179
143180
144181def test_bayesian_optimization (env ):
145- for method in ["poi" , "ei" , "lcb" , "lcb-srinivas" , "multi" , "multi-advanced" , "multi-fast" ]:
182+ for method in [
183+ "poi" , "ei" , "lcb" , "lcb-srinivas" , "multi" , "multi-advanced" ,
184+ "multi-fast"
185+ ]:
146186 print (method , flush = True )
147187 options = dict (popsize = 5 , max_fevals = 10 , method = method )
148- result , _ = tune_kernel (* env , strategy = "bayes_opt" , strategy_options = options , verbose = True , cache = cache_filename , simulation_mode = True )
188+ result , _ = tune_kernel (* env ,
189+ strategy = "bayes_opt" ,
190+ strategy_options = options ,
191+ verbose = True ,
192+ cache = cache_filename ,
193+ simulation_mode = True )
149194 assert len (result ) > 0
150195
151196
152197def test_random_sample (env ):
153- result , _ = tune_kernel (* env , strategy = "random_sample" , strategy_options = { "fraction" : 0.1 }, cache = cache_filename , simulation_mode = True )
198+ result , _ = tune_kernel (* env ,
199+ strategy = "random_sample" ,
200+ strategy_options = {"fraction" : 0.1 },
201+ cache = cache_filename ,
202+ simulation_mode = True )
154203 # check that number of benchmarked kernels is 10% (rounded up)
155204 assert len (result ) == 2
156205 # check all returned results make sense
@@ -182,7 +231,66 @@ def test_interface_handles_compile_failures(env):
182231 }
183232 """
184233
185- results , env = tune_kernel (kernel_name , kernel_string , size , args , tune_params , verbose = True )
234+ results , env = tune_kernel (kernel_name ,
235+ kernel_string ,
236+ size ,
237+ args ,
238+ tune_params ,
239+ verbose = True )
186240
187- failed_config = [record for record in results if record ["block_size_x" ] == 256 ][0 ]
241+ failed_config = [
242+ record for record in results if record ["block_size_x" ] == 256
243+ ][0 ]
188244 assert isinstance (failed_config ["time" ], util .CompilationFailedConfig )
245+
246+
247+ @skip_if_no_pycuda
248+ def test_runner (env ):
249+
250+ kernel_name , kernel_source , problem_size , arguments , tune_params = env
251+
252+ # create KernelSource
253+ kernelsource = core .KernelSource (kernel_name ,
254+ kernel_source ,
255+ lang = None ,
256+ defines = None )
257+
258+ # create option bags
259+ device = 0
260+ atol = 1e-6
261+ platform = 0
262+ iterations = 7
263+ verbose = False
264+ objective = "time"
265+ opts = locals ()
266+ kernel_options = Options ([(k , opts .get (k , None ))
267+ for k in _kernel_options .keys ()])
268+ tuning_options = Options ([(k , opts .get (k , None ))
269+ for k in _tuning_options .keys ()])
270+ device_options = Options ([(k , opts .get (k , None ))
271+ for k in _device_options .keys ()])
272+ tuning_options .cachefile = None
273+
274+ # create runner
275+ runner = SequentialRunner (kernelsource ,
276+ kernel_options ,
277+ device_options ,
278+ iterations ,
279+ observers = None )
280+ runner .warmed_up = True # disable warm up for this test
281+
282+ # select a config to run
283+ searchspace = []
284+
285+ # insert configurations to run with this runner in this list
286+ # each configuration is described as a list of values, one for each tunable parameter
287+ # the order should correspond to the order of parameters specified in tune_params
288+ searchspace .append (
289+ [32 ]) # vector_add only has one tunable parameter (block_size_x)
290+
291+ # call the runner
292+ results , _ = runner .run (searchspace , kernel_options , tuning_options )
293+
294+ assert len (results ) == 1
295+ assert results [0 ]['block_size_x' ] == 32
296+ assert len (results [0 ]['times' ]) == iterations
0 commit comments