Skip to content

Commit d845b33

Browse files
committed
Merge branch 'master' into refactor_interface
2 parents 84a3563 + 653f617 commit d845b33

File tree

15 files changed

+872
-112
lines changed

15 files changed

+872
-112
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
88
- Support for using time_limit in simulation mode
99
- Helper functions for energy tuning
1010
- Example to show ridge frequency and power-frequency model
11+
- Functions to store tuning output and metadata
1112

1213
### Changed
1314
- Changed what timings are stored in cache files

examples/cuda/vector_add.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#!/usr/bin/env python
22
"""This is the minimal example from the README"""
33

4-
import json
54
import numpy
65
from kernel_tuner import tune_kernel
6+
from kernel_tuner.file_utils import store_output_file, store_metadata_file
77

88
def tune():
99

@@ -28,12 +28,15 @@ def tune():
2828
tune_params = dict()
2929
tune_params["block_size_x"] = [128+64*i for i in range(15)]
3030

31-
result = tune_kernel("vector_add", kernel_string, size, args, tune_params)
31+
results, env = tune_kernel("vector_add", kernel_string, size, args, tune_params)
3232

33-
with open("vector_add.json", 'w') as fp:
34-
json.dump(result, fp)
33+
# Store the tuning results in an output file
34+
store_output_file("vector_add.json", results, tune_params)
3535

36-
return result
36+
# Store the metadata of this run
37+
store_metadata_file("vector_add-metadata.json")
38+
39+
return results
3740

3841

3942
if __name__ == "__main__":

examples/opencl/vector_add.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
#!/usr/bin/env python
2+
"""This is the minimal example from the README"""
3+
24
import numpy
35
from kernel_tuner import tune_kernel
6+
from kernel_tuner.file_utils import store_output_file, store_metadata_file
7+
8+
def tune():
49

5-
kernel_string = """
10+
kernel_string = """
611
__kernel void vector_add(__global float *c, __global const float *a, __global const float *b, int n) {
712
int i = get_global_id(0);
813
if (i<n) {
@@ -11,17 +16,28 @@
1116
}
1217
"""
1318

14-
size = 10000000
19+
size = 10000000
20+
21+
a = numpy.random.rand(size).astype(numpy.float32)
22+
b = numpy.random.rand(size).astype(numpy.float32)
23+
c = numpy.zeros_like(a)
24+
n = numpy.int32(size)
25+
26+
args = [c, a, b, n]
27+
28+
tune_params = dict()
29+
tune_params["block_size_x"] = [128+64*i for i in range(15)]
30+
31+
results, env = tune_kernel("vector_add", kernel_string, size, args, tune_params)
1532

16-
a = numpy.random.rand(size).astype(numpy.float32)
17-
b = numpy.random.rand(size).astype(numpy.float32)
18-
c = numpy.zeros_like(a)
19-
n = numpy.int32(size)
33+
# Store the tuning results in an output file
34+
store_output_file("vector_add.json", results, tune_params)
2035

21-
args = [c, a, b, n]
36+
# Store the metadata of this run
37+
store_metadata_file("vector_add-metadata.json")
2238

23-
tune_params = dict()
24-
tune_params["block_size_x"] = [128+64*i for i in range(15)]
39+
return results
2540

26-
tune_kernel("vector_add", kernel_string, size, args, tune_params)
2741

42+
if __name__ == "__main__":
43+
tune()

kernel_tuner/energy/energy.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
}
4242
"""
4343

44-
def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None):
44+
def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None, simulation_mode=None):
4545
""" Use NVML and PyCUDA with a synthetic kernel to obtain samples of frequency-power pairs """
4646

4747
# get some numbers about the device
@@ -85,7 +85,7 @@ def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback=
8585

8686
results, _ = tune_kernel("fp32_kernel", fp32_kernel_string, problem_size=(multiprocessor_count, 64),
8787
arguments=arguments, tune_params=tune_params, observers=[nvmlobserver],
88-
verbose=False, quiet=True, metrics=metrics, iterations=10,
88+
verbose=False, quiet=True, metrics=metrics, iterations=10, simulation_mode=simulation_mode,
8989
grid_div_x=[], grid_div_y=[], cache=cache or f"synthetic_fp32_cache_{device_name}.json")
9090

9191
freqs = np.array([res["core_freq"] for res in results])
@@ -147,7 +147,7 @@ def fit_power_frequency_model(freqs, nvml_power):
147147
return clock_threshold + clock_min, fit_parameters, scale_parameters
148148

149149

150-
def create_power_frequency_model(device=0, n_samples=10, verbose=False, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None):
150+
def create_power_frequency_model(device=0, n_samples=10, verbose=False, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None, simulation_mode=None):
151151
""" Calculate the most energy-efficient clock frequency of device
152152
153153
This function uses a performance model to fit the power-frequency curve
@@ -181,7 +181,7 @@ def create_power_frequency_model(device=0, n_samples=10, verbose=False, nvidia_s
181181
:rtype: float
182182
183183
"""
184-
freqs, nvml_power = get_frequency_power_relation_fp32(device, n_samples, nvidia_smi_fallback, use_locked_clocks, cache=cache)
184+
freqs, nvml_power = get_frequency_power_relation_fp32(device, n_samples, nvidia_smi_fallback, use_locked_clocks, cache=cache, simulation_mode=simulation_mode)
185185

186186
if verbose:
187187
print("Clock frequencies:", freqs.tolist())

kernel_tuner/file_utils.py

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
""" This module contains utility functions for operations on files, mostly JSON cache files """
2+
3+
import os
4+
import json
5+
import subprocess
6+
import xmltodict
7+
8+
from importlib.metadata import requires, version, PackageNotFoundError
9+
from packaging.requirements import Requirement
10+
11+
from kernel_tuner import util
12+
13+
schema_dir = os.path.dirname(os.path.realpath(__file__)) + "/schema"
14+
15+
16+
def output_file_schema(target):
17+
""" Get the requested JSON schema and the version number
18+
19+
:param target: Name of the T4 schema to return, should be any of ['output', 'metadata']
20+
:type target: string
21+
22+
:returns: the current version of the T4 schemas and the JSON string of the target schema
23+
:rtype: string, string
24+
25+
"""
26+
current_version = "1.0.0"
27+
output_file = schema_dir + f"/T4/{current_version}/{target}-schema.json"
28+
with open(output_file, 'r') as fh:
29+
json_string = json.load(fh)
30+
return current_version, json_string
31+
32+
33+
def get_configuration_validity(objective) -> str:
34+
""" Convert internal Kernel Tuner error to string """
35+
errorstring: str
36+
if not isinstance(objective, util.ErrorConfig):
37+
errorstring = "correct"
38+
else:
39+
if isinstance(objective, util.CompilationFailedConfig):
40+
errorstring = "compile"
41+
elif isinstance(objective, util.RuntimeFailedConfig):
42+
errorstring = "runtime"
43+
else:
44+
errorstring = "constraints"
45+
return errorstring
46+
47+
48+
def filename_ensure_json_extension(filename: str) -> str:
49+
""" Check if the filename has a .json extension, if not, add it """
50+
if filename[-5:] != ".json":
51+
filename += ".json"
52+
return filename
53+
54+
55+
def store_output_file(output_filename, results, tune_params, objective="time"):
56+
""" Store the obtained auto-tuning results in a JSON output file
57+
58+
This function produces a JSON file that adheres to the T4 auto-tuning output JSON schema.
59+
60+
:param output_filename: Name of the to be created output file
61+
:type output_filename: string
62+
63+
:param results: Results list as return by tune_kernel
64+
:type results: list of dicts
65+
66+
:param tune_params: Tunable parameters as passed to tune_kernel
67+
:type tune_params: OrderedDict
68+
69+
:param objective: The objective used during auto-tuning, default is 'time'.
70+
:type objective: string
71+
72+
"""
73+
output_filename = filename_ensure_json_extension(output_filename)
74+
75+
timing_keys = [
76+
"compile_time", "benchmark_time", "framework_time", "strategy_time",
77+
"verification_time"
78+
]
79+
not_measurement_keys = list(
80+
tune_params.keys()) + timing_keys + ["timestamp"] + ["times"]
81+
82+
output_data = []
83+
84+
for result in results:
85+
86+
out = {}
87+
88+
out["timestamp"] = result["timestamp"]
89+
out["configuration"] = {
90+
k: v
91+
for k, v in result.items() if k in tune_params
92+
}
93+
94+
# collect configuration specific timings
95+
timings = dict()
96+
timings["compilation"] = result["compile_time"]
97+
timings["benchmark"] = result["benchmark_time"]
98+
timings["framework"] = result["framework_time"]
99+
timings["search_algorithm"] = result["strategy_time"]
100+
timings["validation"] = result["verification_time"]
101+
timings["runtimes"] = result["times"]
102+
out["times"] = timings
103+
104+
# encode the validity of the configuration
105+
out["invalidity"] = get_configuration_validity(result[objective])
106+
107+
# Kernel Tuner does not support producing results of configs that fail the correctness check
108+
# therefore correctness is always 1
109+
out["correctness"] = 1
110+
111+
# measurements gathers everything that was measured
112+
measurements = []
113+
for key, value in result.items():
114+
if key not in not_measurement_keys:
115+
measurements.append(
116+
dict(name=key,
117+
value=value,
118+
unit="ms" if key.startswith("time") else ""))
119+
out["measurements"] = measurements
120+
121+
# objectives
122+
# In Kernel Tuner we currently support only one objective at a time, this can be a user-defined
123+
# metric that combines scores from multiple different quantities into a single value to support
124+
# multi-objective tuning however.
125+
out["objectives"] = [objective]
126+
127+
# append to output
128+
output_data.append(out)
129+
130+
# write output_data to a JSON file
131+
version, _ = output_file_schema("results")
132+
output_json = dict(results=output_data, schema_version=version)
133+
with open(output_filename, 'w+') as fh:
134+
json.dump(output_json, fh)
135+
136+
137+
def get_dependencies(package='kernel_tuner'):
138+
""" Get the Python dependencies of Kernel Tuner currently installed and their version numbers """
139+
requirements = requires(package)
140+
deps = [Requirement(req).name for req in requirements]
141+
depends = []
142+
for dep in deps:
143+
try:
144+
depends.append(f"{dep}=={version(dep)}")
145+
except PackageNotFoundError:
146+
# uninstalled packages can not have been used to produce these results
147+
# so it is safe to ignore
148+
pass
149+
return depends
150+
151+
152+
def get_device_query(target):
153+
""" Get the information about GPUs in the current system, target is any of ['nvidia', 'amd'] """
154+
if target == "nvidia":
155+
nvidia_smi_out = subprocess.run(["nvidia-smi", "--query", "-x"],
156+
capture_output=True)
157+
nvidia_smi = xmltodict.parse(nvidia_smi_out.stdout)
158+
del nvidia_smi["nvidia_smi_log"]["gpu"]["processes"]
159+
return nvidia_smi
160+
elif target == "amd":
161+
rocm_smi_out = subprocess.run(["rocm-smi", "--showallinfo", "--json"],
162+
capture_output=True)
163+
return json.loads(rocm_smi_out.stdout)
164+
else:
165+
raise ValueError("get_device_query target not supported")
166+
167+
168+
def store_metadata_file(metadata_filename):
169+
""" Store the metadata about the current hardware and software environment in a JSON output file
170+
171+
This function produces a JSON file that adheres to the T4 auto-tuning metadata JSON schema.
172+
173+
:param metadata_filename: Name of the to be created metadata file
174+
:type metadata_filename: string
175+
176+
"""
177+
metadata_filename = filename_ensure_json_extension(metadata_filename)
178+
metadata = {}
179+
180+
# lshw only works on Linux, this intentionally raises a FileNotFoundError when ran on systems that do not have it
181+
lshw_out = subprocess.run(["lshw", "-json"], capture_output=True)
182+
183+
# sometimes lshw outputs a list of length 1, sometimes just as a dict, schema wants a list
184+
lshw_string = lshw_out.stdout.decode('utf-8').strip()
185+
if lshw_string[0] == '{' and lshw_string[-1] == '}':
186+
lshw_string = '[' + lshw_string + ']'
187+
188+
metadata["hardware"] = dict(lshw=json.loads(lshw_string))
189+
190+
# attempts to use nvidia-smi or rocm-smi if present
191+
device_query = {}
192+
try:
193+
device_query['nvidia-smi'] = get_device_query("nvidia")
194+
except FileNotFoundError:
195+
# ignore if nvidia-smi is not found
196+
pass
197+
198+
try:
199+
device_query['rocm-smi'] = get_device_query("amd")
200+
except FileNotFoundError:
201+
# ignore if rocm-smi is not found
202+
pass
203+
204+
metadata["environment"] = dict(device_query=device_query,
205+
requirements=get_dependencies())
206+
207+
# write metadata to JSON file
208+
version, _ = output_file_schema("metadata")
209+
metadata_json = dict(metadata=metadata, schema_version=version)
210+
with open(metadata_filename, 'w+') as fh:
211+
json.dump(metadata_json, fh, indent=" ")

kernel_tuner/runners/sequential.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
""" The default runner for sequentially tuning the parameter space """
22
import logging
33
from collections import OrderedDict
4+
from datetime import datetime, timezone
45
from time import perf_counter
56

67
from kernel_tuner.core import DeviceInterface
@@ -106,6 +107,7 @@ def run(self, parameter_space, tuning_options):
106107
total_time = 1000 * (perf_counter() - self.start_time) - warmup_time
107108
params['strategy_time'] = self.last_strategy_time
108109
params['framework_time'] = max(total_time - (params['compile_time'] + params['verification_time'] + params['benchmark_time'] + params['strategy_time']), 0)
110+
params['timestamp'] = str(datetime.now(timezone.utc))
109111
self.start_time = perf_counter()
110112

111113
if result:

0 commit comments

Comments
 (0)