Skip to content

Commit 192f935

Browse files
Merge branch 'output_file_writer' of github.com:KernelTuner/kernel_tuner into output_file_writer
2 parents 8a7c265 + 20dcff0 commit 192f935

File tree

3 files changed

+167
-45
lines changed

3 files changed

+167
-45
lines changed

kernel_tuner/file_utils.py

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,25 @@ def output_file_schema(target):
2424
2525
"""
2626
current_version = "1.0.0"
27-
file = schema_dir + f"/T4/{current_version}/{target}-schema.json"
28-
with open(file, 'r') as fh:
27+
output_file = schema_dir + f"/T4/{current_version}/{target}-schema.json"
28+
with open(output_file, 'r') as fh:
2929
json_string = json.load(fh)
3030
return current_version, json_string
3131

3232

33+
def get_configuration_validity(objective) -> str:
34+
""" Convert internal Kernel Tuner error to string """
35+
if not isinstance(objective, util.ErrorConfig):
36+
return "correct"
37+
else:
38+
if isinstance(objective, util.CompilationFailedConfig):
39+
return "compile"
40+
elif isinstance(objective, util.RuntimeFailedConfig):
41+
return "runtime"
42+
else:
43+
return "constraints"
44+
45+
3346
def store_output_file(output_filename, results, tune_params, objective="time"):
3447
""" Store the obtained auto-tuning results in a JSON output file
3548
@@ -51,8 +64,12 @@ def store_output_file(output_filename, results, tune_params, objective="time"):
5164
if output_filename[-5:] != ".json":
5265
output_filename += ".json"
5366

54-
timing_keys = ["compile_time", "benchmark_time", "framework_time", "strategy_time", "verification_time"]
55-
not_measurement_keys = list(tune_params.keys()) + timing_keys + ["timestamp"] + ["times"]
67+
timing_keys = [
68+
"compile_time", "benchmark_time", "framework_time", "strategy_time",
69+
"verification_time"
70+
]
71+
not_measurement_keys = list(
72+
tune_params.keys()) + timing_keys + ["timestamp"] + ["times"]
5673

5774
output_data = []
5875

@@ -61,8 +78,10 @@ def store_output_file(output_filename, results, tune_params, objective="time"):
6178
out = {}
6279

6380
out["timestamp"] = result["timestamp"]
64-
out["configuration"] = { k: v
65-
for k, v in result.items() if k in tune_params }
81+
out["configuration"] = {
82+
k: v
83+
for k, v in result.items() if k in tune_params
84+
}
6685

6786
# collect configuration specific timings
6887
timings = dict()
@@ -75,15 +94,7 @@ def store_output_file(output_filename, results, tune_params, objective="time"):
7594
out["times"] = timings
7695

7796
# encode the validity of the configuration
78-
if not isinstance(result[objective], util.ErrorConfig):
79-
out["invalidity"] = "correct"
80-
else:
81-
if isinstance(result[objective], util.CompilationFailedConfig):
82-
out["invalidity"] = "compile"
83-
elif isinstance(result[objective], util.RuntimeFailedConfig):
84-
out["invalidity"] = "runtime"
85-
else:
86-
out["invalidity"] = "constraints"
97+
out["invalidity"] = get_configuration_validity(result[objective])
8798

8899
# Kernel Tuner does not support producing results of configs that fail the correctness check
89100
# therefore correctness is always 1
@@ -92,11 +103,11 @@ def store_output_file(output_filename, results, tune_params, objective="time"):
92103
# measurements gathers everything that was measured
93104
measurements = []
94105
for key, value in result.items():
95-
if not key in not_measurement_keys:
96-
if key.startswith("time"):
97-
measurements.append(dict(name=key, value=value, unit="ms"))
98-
else:
99-
measurements.append(dict(name=key, value=value, unit=""))
106+
if key not in not_measurement_keys:
107+
measurements.append(
108+
dict(name=key,
109+
value=value,
110+
unit="ms" if key.startswith("time") else ""))
100111
out["measurements"] = measurements
101112

102113
# objectives
@@ -133,12 +144,14 @@ def get_dependencies(package='kernel_tuner'):
133144
def get_device_query(target):
134145
""" Get the information about GPUs in the current system, target is any of ['nvidia', 'amd'] """
135146
if target == "nvidia":
136-
nvidia_smi_out = subprocess.run(["nvidia-smi", "--query", "-x"], capture_output=True)
147+
nvidia_smi_out = subprocess.run(["nvidia-smi", "--query", "-x"],
148+
capture_output=True)
137149
nvidia_smi = xmltodict.parse(nvidia_smi_out.stdout)
138150
del nvidia_smi["nvidia_smi_log"]["gpu"]["processes"]
139151
return nvidia_smi
140152
elif target == "amd":
141-
rocm_smi_out = subprocess.run(["rocm-smi", "--showallinfo", "--json"], capture_output=True)
153+
rocm_smi_out = subprocess.run(["rocm-smi", "--showallinfo", "--json"],
154+
capture_output=True)
142155
return json.loads(rocm_smi_out.stdout)
143156
else:
144157
raise ValueError("get_device_query target not supported")
@@ -167,7 +180,8 @@ def store_metadata_file(metadata_filename, target="nvidia"):
167180
# only works if nvidia-smi (for NVIDIA) or rocm-smi (for AMD) is present, raises FileNotFoundError when not present
168181
device_query = get_device_query(target)
169182

170-
metadata["environment"] = dict(device_query=device_query, requirements=get_dependencies())
183+
metadata["environment"] = dict(device_query=device_query,
184+
requirements=get_dependencies())
171185

172186
# write metadata to JSON file
173187
version, _ = output_file_schema("metadata")

test/test_file_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from kernel_tuner.file_utils import store_output_file, store_metadata_file, output_file_schema, validate
2+
from kernel_tuner.util import delete_temp_file
23
from .test_integration import fake_results
34
from .test_runners import env, cache_filename, tune_kernel
45
import pytest
56
import json
6-
import os
77

88

99
def test_store_output_file(env):
@@ -24,7 +24,7 @@ def test_store_output_file(env):
2424
validate(output_json, schema=schema)
2525

2626
# clean up
27-
os.remove(filename)
27+
delete_temp_file(filename)
2828

2929

3030
def test_store_metadata_file():
@@ -46,4 +46,4 @@ def test_store_metadata_file():
4646
validate(metadata_json, schema=schema)
4747

4848
# clean up
49-
os.remove(filename)
49+
delete_temp_file(filename)

test/test_runners.py

Lines changed: 127 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@
55
import numpy as np
66
import pytest
77

8-
from kernel_tuner import util, tune_kernel
8+
from kernel_tuner import util, tune_kernel, core
9+
from kernel_tuner.interface import Options, _kernel_options, _device_options, _tuning_options
10+
from kernel_tuner.runners.sequential import SequentialRunner
911

1012
from .context import skip_if_no_pycuda
1113

12-
cache_filename = os.path.dirname(os.path.realpath(__file__)) + "/test_cache_file.json"
14+
cache_filename = os.path.dirname(
15+
os.path.realpath(__file__)) + "/test_cache_file.json"
1316

1417

1518
@pytest.fixture
@@ -61,37 +64,55 @@ def test_sequential_runner_alt_block_size_names(env):
6164

6265
block_size_names = ["block_dim_x"]
6366

64-
result, _ = tune_kernel(*env, grid_div_x=["block_dim_x"], answer=answer, block_size_names=block_size_names)
67+
result, _ = tune_kernel(*env,
68+
grid_div_x=["block_dim_x"],
69+
answer=answer,
70+
block_size_names=block_size_names)
6571

6672
assert len(result) == len(tune_params["block_dim_x"])
6773

6874

6975
@skip_if_no_pycuda
7076
def test_smem_args(env):
71-
result, _ = tune_kernel(*env, smem_args=dict(size="block_size_x*4"), verbose=True)
77+
result, _ = tune_kernel(*env,
78+
smem_args=dict(size="block_size_x*4"),
79+
verbose=True)
7280
tune_params = env[-1]
7381
assert len(result) == len(tune_params["block_size_x"])
74-
result, _ = tune_kernel(*env, smem_args=dict(size=lambda p: p['block_size_x'] * 4), verbose=True)
82+
result, _ = tune_kernel(
83+
*env,
84+
smem_args=dict(size=lambda p: p['block_size_x'] * 4),
85+
verbose=True)
7586
tune_params = env[-1]
7687
assert len(result) == len(tune_params["block_size_x"])
7788

7889

7990
@skip_if_no_pycuda
8091
def test_build_cache(env):
8192
if not os.path.isfile(cache_filename):
82-
result, _ = tune_kernel(*env, cache=cache_filename, verbose=False, quiet=True)
93+
result, _ = tune_kernel(*env,
94+
cache=cache_filename,
95+
verbose=False,
96+
quiet=True)
8397
tune_params = env[-1]
8498
assert len(result) == len(tune_params["block_size_x"])
8599

86100

87101
def test_simulation_runner(env):
88102
kernel_name, kernel_string, size, args, tune_params = env
89103
start = time.perf_counter()
90-
result, res_env = tune_kernel(*env, cache=cache_filename, strategy="random_sample", simulation_mode=True, strategy_options=dict(fraction=1))
91-
actual_time = (time.perf_counter() - start) * 1e3 # ms
104+
result, res_env = tune_kernel(*env,
105+
cache=cache_filename,
106+
strategy="random_sample",
107+
simulation_mode=True,
108+
strategy_options=dict(fraction=1))
109+
actual_time = (time.perf_counter() - start) * 1e3 # ms
92110
assert len(result) == len(tune_params["block_size_x"])
93111

94-
timings = ['total_framework_time', 'total_strategy_time', 'total_compile_time', 'total_benchmark_time', 'overhead_time']
112+
timings = [
113+
'total_framework_time', 'total_strategy_time', 'total_compile_time',
114+
'total_benchmark_time', 'overhead_time'
115+
]
95116

96117
# ensure all keys are there and non zero
97118
assert all(key in res_env for key in timings)
@@ -111,7 +132,12 @@ def test_simulation_runner(env):
111132

112133

113134
def test_diff_evo(env):
114-
result, _ = tune_kernel(*env, strategy="diff_evo", strategy_options=dict(popsize=5), verbose=True, cache=cache_filename, simulation_mode=True)
135+
result, _ = tune_kernel(*env,
136+
strategy="diff_evo",
137+
strategy_options=dict(popsize=5),
138+
verbose=True,
139+
cache=cache_filename,
140+
simulation_mode=True)
115141
assert len(result) > 0
116142

117143

@@ -120,14 +146,25 @@ def test_time_keeping(env):
120146
kernel_name, kernel_string, size, args, tune_params = env
121147
answer = [args[1] + args[2], None, None, None]
122148

123-
options = dict(method="uniform", popsize=10, maxiter=1, mutation_chance=1, max_fevals=10)
149+
options = dict(method="uniform",
150+
popsize=10,
151+
maxiter=1,
152+
mutation_chance=1,
153+
max_fevals=10)
124154
start = time.perf_counter()
125-
result, env = tune_kernel(*env, strategy="genetic_algorithm", strategy_options=options, verbose=True, answer=answer)
126-
max_time = (time.perf_counter() - start) * 1e3 # ms
155+
result, env = tune_kernel(*env,
156+
strategy="genetic_algorithm",
157+
strategy_options=options,
158+
verbose=True,
159+
answer=answer)
160+
max_time = (time.perf_counter() - start) * 1e3 # ms
127161

128162
assert len(result) >= 10
129163

130-
timings = ['total_framework_time', 'total_strategy_time', 'total_compile_time', 'total_verification_time', 'total_benchmark_time', 'overhead_time']
164+
timings = [
165+
'total_framework_time', 'total_strategy_time', 'total_compile_time',
166+
'total_verification_time', 'total_benchmark_time', 'overhead_time'
167+
]
131168

132169
# ensure all keys are there and non zero
133170
assert all(key in env for key in timings)
@@ -142,15 +179,27 @@ def test_time_keeping(env):
142179

143180

144181
def test_bayesian_optimization(env):
145-
for method in ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]:
182+
for method in [
183+
"poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced",
184+
"multi-fast"
185+
]:
146186
print(method, flush=True)
147187
options = dict(popsize=5, max_fevals=10, method=method)
148-
result, _ = tune_kernel(*env, strategy="bayes_opt", strategy_options=options, verbose=True, cache=cache_filename, simulation_mode=True)
188+
result, _ = tune_kernel(*env,
189+
strategy="bayes_opt",
190+
strategy_options=options,
191+
verbose=True,
192+
cache=cache_filename,
193+
simulation_mode=True)
149194
assert len(result) > 0
150195

151196

152197
def test_random_sample(env):
153-
result, _ = tune_kernel(*env, strategy="random_sample", strategy_options={ "fraction": 0.1 }, cache=cache_filename, simulation_mode=True)
198+
result, _ = tune_kernel(*env,
199+
strategy="random_sample",
200+
strategy_options={"fraction": 0.1},
201+
cache=cache_filename,
202+
simulation_mode=True)
154203
# check that number of benchmarked kernels is 10% (rounded up)
155204
assert len(result) == 2
156205
# check all returned results make sense
@@ -182,7 +231,66 @@ def test_interface_handles_compile_failures(env):
182231
}
183232
"""
184233

185-
results, env = tune_kernel(kernel_name, kernel_string, size, args, tune_params, verbose=True)
234+
results, env = tune_kernel(kernel_name,
235+
kernel_string,
236+
size,
237+
args,
238+
tune_params,
239+
verbose=True)
186240

187-
failed_config = [record for record in results if record["block_size_x"] == 256][0]
241+
failed_config = [
242+
record for record in results if record["block_size_x"] == 256
243+
][0]
188244
assert isinstance(failed_config["time"], util.CompilationFailedConfig)
245+
246+
247+
@skip_if_no_pycuda
248+
def test_runner(env):
249+
250+
kernel_name, kernel_source, problem_size, arguments, tune_params = env
251+
252+
# create KernelSource
253+
kernelsource = core.KernelSource(kernel_name,
254+
kernel_source,
255+
lang=None,
256+
defines=None)
257+
258+
# create option bags
259+
device = 0
260+
atol = 1e-6
261+
platform = 0
262+
iterations = 7
263+
verbose = False
264+
objective = "time"
265+
opts = locals()
266+
kernel_options = Options([(k, opts.get(k, None))
267+
for k in _kernel_options.keys()])
268+
tuning_options = Options([(k, opts.get(k, None))
269+
for k in _tuning_options.keys()])
270+
device_options = Options([(k, opts.get(k, None))
271+
for k in _device_options.keys()])
272+
tuning_options.cachefile = None
273+
274+
# create runner
275+
runner = SequentialRunner(kernelsource,
276+
kernel_options,
277+
device_options,
278+
iterations,
279+
observers=None)
280+
runner.warmed_up = True # disable warm up for this test
281+
282+
# select a config to run
283+
searchspace = []
284+
285+
# insert configurations to run with this runner in this list
286+
# each configuration is described as a list of values, one for each tunable parameter
287+
# the order should correspond to the order of parameters specified in tune_params
288+
searchspace.append(
289+
[32]) # vector_add only has one tunable parameter (block_size_x)
290+
291+
# call the runner
292+
results, _ = runner.run(searchspace, kernel_options, tuning_options)
293+
294+
assert len(results) == 1
295+
assert results[0]['block_size_x'] == 32
296+
assert len(results[0]['times']) == iterations

0 commit comments

Comments
 (0)