KernelTuner
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/cuda/vector_add.py‎
Lines changed: 8 additions & 5 deletions b/‎examples/cuda/vector_add.py‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎examples/opencl/vector_add.py‎
Lines changed: 26 additions & 10 deletions b/‎examples/opencl/vector_add.py‎
Lines changed: 26 additions & 10 deletions
diff --git a/‎kernel_tuner/energy/energy.py‎
Lines changed: 4 additions & 4 deletions b/‎kernel_tuner/energy/energy.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎kernel_tuner/file_utils.py‎
Lines changed: 211 additions & 0 deletions b/‎kernel_tuner/file_utils.py‎
Lines changed: 211 additions & 0 deletions
diff --git a/‎kernel_tuner/runners/sequential.py‎
Lines changed: 2 additions & 0 deletions b/‎kernel_tuner/runners/sequential.py‎
Lines changed: 2 additions & 0 deletions
@@ -8,6 +8,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 - Support for using time_limit in simulation mode
 - Helper functions for energy tuning
 - Example to show ridge frequency and power-frequency model
+- Functions to store tuning output and metadata
 
 ### Changed
 - Changed what timings are stored in cache files
 
@@ -1,9 +1,9 @@
 #!/usr/bin/env python
 """This is the minimal example from the README"""
 
-import json
 import numpy
 from kernel_tuner import tune_kernel
+from kernel_tuner.file_utils import store_output_file, store_metadata_file
 
 def tune():
 
@@ -28,12 +28,15 @@ def tune():
     tune_params = dict()
     tune_params["block_size_x"] = [128+64*i for i in range(15)]
 
-    result = tune_kernel("vector_add", kernel_string, size, args, tune_params)
+    results, env = tune_kernel("vector_add", kernel_string, size, args, tune_params)
 
-    with open("vector_add.json", 'w') as fp:
-        json.dump(result, fp)
+    # Store the tuning results in an output file
+    store_output_file("vector_add.json", results, tune_params)
 
-    return result
+    # Store the metadata of this run
+    store_metadata_file("vector_add-metadata.json")
+
+    return results
 
 
 if __name__ == "__main__":
 
@@ -1,8 +1,13 @@
 #!/usr/bin/env python
+"""This is the minimal example from the README"""
+
 import numpy
 from kernel_tuner import tune_kernel
+from kernel_tuner.file_utils import store_output_file, store_metadata_file
+
+def tune():
 
-kernel_string = """
+    kernel_string = """
 __kernel void vector_add(__global float *c, __global const float *a, __global const float *b, int n) {
     int i = get_global_id(0);
     if (i<n) {
@@ -11,17 +16,28 @@
 }
 """
 
-size = 10000000
+    size = 10000000
+
+    a = numpy.random.rand(size).astype(numpy.float32)
+    b = numpy.random.rand(size).astype(numpy.float32)
+    c = numpy.zeros_like(a)
+    n = numpy.int32(size)
+
+    args = [c, a, b, n]
+
+    tune_params = dict()
+    tune_params["block_size_x"] = [128+64*i for i in range(15)]
+
+    results, env = tune_kernel("vector_add", kernel_string, size, args, tune_params)
 
-a = numpy.random.rand(size).astype(numpy.float32)
-b = numpy.random.rand(size).astype(numpy.float32)
-c = numpy.zeros_like(a)
-n = numpy.int32(size)
+    # Store the tuning results in an output file
+    store_output_file("vector_add.json", results, tune_params)
 
-args = [c, a, b, n]
+    # Store the metadata of this run
+    store_metadata_file("vector_add-metadata.json")
 
-tune_params = dict()
-tune_params["block_size_x"] = [128+64*i for i in range(15)]
+    return results
 
-tune_kernel("vector_add", kernel_string, size, args, tune_params)
 
+if __name__ == "__main__":
+    tune()
@@ -41,7 +41,7 @@
 }
 """
 
-def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None):
+def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None, simulation_mode=None):
     """ Use NVML and PyCUDA with a synthetic kernel to obtain samples of frequency-power pairs """
 
     # get some numbers about the device
@@ -85,7 +85,7 @@ def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback=
 
     results, _ = tune_kernel("fp32_kernel", fp32_kernel_string, problem_size=(multiprocessor_count, 64),
                              arguments=arguments, tune_params=tune_params, observers=[nvmlobserver],
-                             verbose=False, quiet=True, metrics=metrics, iterations=10,
+                             verbose=False, quiet=True, metrics=metrics, iterations=10, simulation_mode=simulation_mode,
                              grid_div_x=[], grid_div_y=[], cache=cache or f"synthetic_fp32_cache_{device_name}.json")
 
     freqs = np.array([res["core_freq"] for res in results])
@@ -147,7 +147,7 @@ def fit_power_frequency_model(freqs, nvml_power):
     return clock_threshold + clock_min, fit_parameters, scale_parameters
 
 
-def create_power_frequency_model(device=0, n_samples=10, verbose=False, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None):
+def create_power_frequency_model(device=0, n_samples=10, verbose=False, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None, simulation_mode=None):
     """ Calculate the most energy-efficient clock frequency of device
 
     This function uses a performance model to fit the power-frequency curve
@@ -181,7 +181,7 @@ def create_power_frequency_model(device=0, n_samples=10, verbose=False, nvidia_s
     :rtype: float
 
     """
-    freqs, nvml_power = get_frequency_power_relation_fp32(device, n_samples, nvidia_smi_fallback, use_locked_clocks, cache=cache)
+    freqs, nvml_power = get_frequency_power_relation_fp32(device, n_samples, nvidia_smi_fallback, use_locked_clocks, cache=cache, simulation_mode=simulation_mode)
 
     if verbose:
         print("Clock frequencies:", freqs.tolist())
 
@@ -0,0 +1,211 @@
+""" This module contains utility functions for operations on files, mostly JSON cache files """
+
+import os
+import json
+import subprocess
+import xmltodict
+
+from importlib.metadata import requires, version, PackageNotFoundError
+from packaging.requirements import Requirement
+
+from kernel_tuner import util
+
+schema_dir = os.path.dirname(os.path.realpath(__file__)) + "/schema"
+
+
+def output_file_schema(target):
+    """ Get the requested JSON schema and the version number
+
+    :param target: Name of the T4 schema to return, should be any of ['output', 'metadata']
+    :type target: string
+
+    :returns: the current version of the T4 schemas and the JSON string of the target schema
+    :rtype: string, string
+
+    """
+    current_version = "1.0.0"
+    output_file = schema_dir + f"/T4/{current_version}/{target}-schema.json"
+    with open(output_file, 'r') as fh:
+        json_string = json.load(fh)
+    return current_version, json_string
+
+
+def get_configuration_validity(objective) -> str:
+    """ Convert internal Kernel Tuner error to string """
+    errorstring: str
+    if not isinstance(objective, util.ErrorConfig):
+        errorstring = "correct"
+    else:
+        if isinstance(objective, util.CompilationFailedConfig):
+            errorstring = "compile"
+        elif isinstance(objective, util.RuntimeFailedConfig):
+            errorstring = "runtime"
+        else:
+            errorstring = "constraints"
+    return errorstring
+
+
+def filename_ensure_json_extension(filename: str) -> str:
+    """ Check if the filename has a .json extension, if not, add it """
+    if filename[-5:] != ".json":
+        filename += ".json"
+    return filename
+
+
+def store_output_file(output_filename, results, tune_params, objective="time"):
+    """ Store the obtained auto-tuning results in a JSON output file
+
+    This function produces a JSON file that adheres to the T4 auto-tuning output JSON schema.
+
+    :param output_filename: Name of the to be created output file
+    :type output_filename: string
+
+    :param results: Results list as return by tune_kernel
+    :type results: list of dicts
+
+    :param tune_params: Tunable parameters as passed to tune_kernel
+    :type tune_params: OrderedDict
+
+    :param objective: The objective used during auto-tuning, default is 'time'.
+    :type objective: string
+
+    """
+    output_filename = filename_ensure_json_extension(output_filename)
+
+    timing_keys = [
+        "compile_time", "benchmark_time", "framework_time", "strategy_time",
+        "verification_time"
+    ]
+    not_measurement_keys = list(
+        tune_params.keys()) + timing_keys + ["timestamp"] + ["times"]
+
+    output_data = []
+
+    for result in results:
+
+        out = {}
+
+        out["timestamp"] = result["timestamp"]
+        out["configuration"] = {
+            k: v
+            for k, v in result.items() if k in tune_params
+        }
+
+        # collect configuration specific timings
+        timings = dict()
+        timings["compilation"] = result["compile_time"]
+        timings["benchmark"] = result["benchmark_time"]
+        timings["framework"] = result["framework_time"]
+        timings["search_algorithm"] = result["strategy_time"]
+        timings["validation"] = result["verification_time"]
+        timings["runtimes"] = result["times"]
+        out["times"] = timings
+
+        # encode the validity of the configuration
+        out["invalidity"] = get_configuration_validity(result[objective])
+
+        # Kernel Tuner does not support producing results of configs that fail the correctness check
+        # therefore correctness is always 1
+        out["correctness"] = 1
+
+        # measurements gathers everything that was measured
+        measurements = []
+        for key, value in result.items():
+            if key not in not_measurement_keys:
+                measurements.append(
+                    dict(name=key,
+                         value=value,
+                         unit="ms" if key.startswith("time") else ""))
+        out["measurements"] = measurements
+
+        # objectives
+        # In Kernel Tuner we currently support only one objective at a time, this can be a user-defined
+        # metric that combines scores from multiple different quantities into a single value to support
+        # multi-objective tuning however.
+        out["objectives"] = [objective]
+
+        # append to output
+        output_data.append(out)
+
+    # write output_data to a JSON file
+    version, _ = output_file_schema("results")
+    output_json = dict(results=output_data, schema_version=version)
+    with open(output_filename, 'w+') as fh:
+        json.dump(output_json, fh)
+
+
+def get_dependencies(package='kernel_tuner'):
+    """ Get the Python dependencies of Kernel Tuner currently installed and their version numbers """
+    requirements = requires(package)
+    deps = [Requirement(req).name for req in requirements]
+    depends = []
+    for dep in deps:
+        try:
+            depends.append(f"{dep}=={version(dep)}")
+        except PackageNotFoundError:
+            # uninstalled packages can not have been used to produce these results
+            # so it is safe to ignore
+            pass
+    return depends
+
+
+def get_device_query(target):
+    """ Get the information about GPUs in the current system, target is any of ['nvidia', 'amd'] """
+    if target == "nvidia":
+        nvidia_smi_out = subprocess.run(["nvidia-smi", "--query", "-x"],
+                                        capture_output=True)
+        nvidia_smi = xmltodict.parse(nvidia_smi_out.stdout)
+        del nvidia_smi["nvidia_smi_log"]["gpu"]["processes"]
+        return nvidia_smi
+    elif target == "amd":
+        rocm_smi_out = subprocess.run(["rocm-smi", "--showallinfo", "--json"],
+                                      capture_output=True)
+        return json.loads(rocm_smi_out.stdout)
+    else:
+        raise ValueError("get_device_query target not supported")
+
+
+def store_metadata_file(metadata_filename):
+    """ Store the metadata about the current hardware and software environment in a JSON output file
+
+    This function produces a JSON file that adheres to the T4 auto-tuning metadata JSON schema.
+
+    :param metadata_filename: Name of the to be created metadata file
+    :type metadata_filename: string
+
+    """
+    metadata_filename = filename_ensure_json_extension(metadata_filename)
+    metadata = {}
+
+    # lshw only works on Linux, this intentionally raises a FileNotFoundError when ran on systems that do not have it
+    lshw_out = subprocess.run(["lshw", "-json"], capture_output=True)
+
+    # sometimes lshw outputs a list of length 1, sometimes just as a dict, schema wants a list
+    lshw_string = lshw_out.stdout.decode('utf-8').strip()
+    if lshw_string[0] == '{' and lshw_string[-1] == '}':
+        lshw_string = '[' + lshw_string + ']'
+
+    metadata["hardware"] = dict(lshw=json.loads(lshw_string))
+
+    # attempts to use nvidia-smi or rocm-smi if present
+    device_query = {}
+    try:
+        device_query['nvidia-smi'] = get_device_query("nvidia")
+    except FileNotFoundError:
+        # ignore if nvidia-smi is not found
+        pass
+
+    try:
+        device_query['rocm-smi'] = get_device_query("amd")
+    except FileNotFoundError:
+        # ignore if rocm-smi is not found
+        pass
+
+    metadata["environment"] = dict(device_query=device_query,
+                                   requirements=get_dependencies())
+
+    # write metadata to JSON file
+    version, _ = output_file_schema("metadata")
+    metadata_json = dict(metadata=metadata, schema_version=version)
+    with open(metadata_filename, 'w+') as fh:
+        json.dump(metadata_json, fh, indent="  ")
@@ -1,6 +1,7 @@
 """ The default runner for sequentially tuning the parameter space """
 import logging
 from collections import OrderedDict
+from datetime import datetime, timezone
 from time import perf_counter
 
 from kernel_tuner.core import DeviceInterface
@@ -106,6 +107,7 @@ def run(self, parameter_space, tuning_options):
             total_time = 1000 * (perf_counter() - self.start_time) - warmup_time
             params['strategy_time'] = self.last_strategy_time
             params['framework_time'] = max(total_time - (params['compile_time'] + params['verification_time'] + params['benchmark_time'] + params['strategy_time']), 0)
+            params['timestamp'] = str(datetime.now(timezone.utc))
             self.start_time = perf_counter()
 
             if result: