Merge branch 'output_file_writer' of github.com:KernelTuner/kernel_tuner into output_file_writer

benvanwerkhoven · benvanwerkhoven · commit 192f9352a05d · 2023-01-12T16:11:42.000+01:00
diff --git a/kernel_tuner/file_utils.py b/kernel_tuner/file_utils.py
@@ -24,12 +24,25 @@ def output_file_schema(target):
 
     """
     current_version = "1.0.0"
-    file = schema_dir + f"/T4/{current_version}/{target}-schema.json"
-    with open(file, 'r') as fh:
+    output_file = schema_dir + f"/T4/{current_version}/{target}-schema.json"
+    with open(output_file, 'r') as fh:
         json_string = json.load(fh)
     return current_version, json_string
 
 
+def get_configuration_validity(objective) -> str:
+    """ Convert internal Kernel Tuner error to string """
+    if not isinstance(objective, util.ErrorConfig):
+        return "correct"
+    else:
+        if isinstance(objective, util.CompilationFailedConfig):
+            return "compile"
+        elif isinstance(objective, util.RuntimeFailedConfig):
+            return "runtime"
+        else:
+            return "constraints"
+
+
 def store_output_file(output_filename, results, tune_params, objective="time"):
     """ Store the obtained auto-tuning results in a JSON output file
 
@@ -51,8 +64,12 @@ def store_output_file(output_filename, results, tune_params, objective="time"):
     if output_filename[-5:] != ".json":
         output_filename += ".json"
 
-    timing_keys = ["compile_time", "benchmark_time", "framework_time", "strategy_time", "verification_time"]
-    not_measurement_keys = list(tune_params.keys()) + timing_keys + ["timestamp"] + ["times"]
+    timing_keys = [
+        "compile_time", "benchmark_time", "framework_time", "strategy_time",
+        "verification_time"
+    ]
+    not_measurement_keys = list(
+        tune_params.keys()) + timing_keys + ["timestamp"] + ["times"]
 
     output_data = []
 
@@ -61,8 +78,10 @@ def store_output_file(output_filename, results, tune_params, objective="time"):
         out = {}
 
         out["timestamp"] = result["timestamp"]
-        out["configuration"] = { k: v
-                                 for k, v in result.items() if k in tune_params }
+        out["configuration"] = {
+            k: v
+            for k, v in result.items() if k in tune_params
+        }
 
         # collect configuration specific timings
         timings = dict()
@@ -75,15 +94,7 @@ def store_output_file(output_filename, results, tune_params, objective="time"):
         out["times"] = timings
 
         # encode the validity of the configuration
-        if not isinstance(result[objective], util.ErrorConfig):
-            out["invalidity"] = "correct"
-        else:
-            if isinstance(result[objective], util.CompilationFailedConfig):
-                out["invalidity"] = "compile"
-            elif isinstance(result[objective], util.RuntimeFailedConfig):
-                out["invalidity"] = "runtime"
-            else:
-                out["invalidity"] = "constraints"
+        out["invalidity"] = get_configuration_validity(result[objective])
 
         # Kernel Tuner does not support producing results of configs that fail the correctness check
         # therefore correctness is always 1
@@ -92,11 +103,11 @@ def store_output_file(output_filename, results, tune_params, objective="time"):
         # measurements gathers everything that was measured
         measurements = []
         for key, value in result.items():
-            if not key in not_measurement_keys:
-                if key.startswith("time"):
-                    measurements.append(dict(name=key, value=value, unit="ms"))
-                else:
-                    measurements.append(dict(name=key, value=value, unit=""))
+            if key not in not_measurement_keys:
+                measurements.append(
+                    dict(name=key,
+                         value=value,
+                         unit="ms" if key.startswith("time") else ""))
         out["measurements"] = measurements
 
         # objectives
@@ -133,12 +144,14 @@ def get_dependencies(package='kernel_tuner'):
 def get_device_query(target):
     """ Get the information about GPUs in the current system, target is any of ['nvidia', 'amd'] """
     if target == "nvidia":
-        nvidia_smi_out = subprocess.run(["nvidia-smi", "--query", "-x"], capture_output=True)
+        nvidia_smi_out = subprocess.run(["nvidia-smi", "--query", "-x"],
+                                        capture_output=True)
         nvidia_smi = xmltodict.parse(nvidia_smi_out.stdout)
         del nvidia_smi["nvidia_smi_log"]["gpu"]["processes"]
         return nvidia_smi
     elif target == "amd":
-        rocm_smi_out = subprocess.run(["rocm-smi", "--showallinfo", "--json"], capture_output=True)
+        rocm_smi_out = subprocess.run(["rocm-smi", "--showallinfo", "--json"],
+                                      capture_output=True)
         return json.loads(rocm_smi_out.stdout)
     else:
         raise ValueError("get_device_query target not supported")
@@ -167,7 +180,8 @@ def store_metadata_file(metadata_filename, target="nvidia"):
     # only works if nvidia-smi (for NVIDIA) or rocm-smi (for AMD) is present, raises FileNotFoundError when not present
     device_query = get_device_query(target)
 
-    metadata["environment"] = dict(device_query=device_query, requirements=get_dependencies())
+    metadata["environment"] = dict(device_query=device_query,
+                                   requirements=get_dependencies())
 
     # write metadata to JSON file
     version, _ = output_file_schema("metadata")
diff --git a/test/test_file_utils.py b/test/test_file_utils.py
@@ -1,9 +1,9 @@
 from kernel_tuner.file_utils import store_output_file, store_metadata_file, output_file_schema, validate
+from kernel_tuner.util import delete_temp_file
 from .test_integration import fake_results
 from .test_runners import env, cache_filename, tune_kernel
 import pytest
 import json
-import os
 
 
 def test_store_output_file(env):
@@ -24,7 +24,7 @@ def test_store_output_file(env):
     validate(output_json, schema=schema)
 
     # clean up
-    os.remove(filename)
+    delete_temp_file(filename)
 
 
 def test_store_metadata_file():
@@ -46,4 +46,4 @@ def test_store_metadata_file():
     validate(metadata_json, schema=schema)
 
     # clean up
-    os.remove(filename)
+    delete_temp_file(filename)
diff --git a/test/test_runners.py b/test/test_runners.py
@@ -5,11 +5,14 @@
 import numpy as np
 import pytest
 
-from kernel_tuner import util, tune_kernel
+from kernel_tuner import util, tune_kernel, core
+from kernel_tuner.interface import Options, _kernel_options, _device_options, _tuning_options
+from kernel_tuner.runners.sequential import SequentialRunner
 
 from .context import skip_if_no_pycuda
 
-cache_filename = os.path.dirname(os.path.realpath(__file__)) + "/test_cache_file.json"
+cache_filename = os.path.dirname(
+    os.path.realpath(__file__)) + "/test_cache_file.json"
 
 
 @pytest.fixture
@@ -61,37 +64,55 @@ def test_sequential_runner_alt_block_size_names(env):
 
     block_size_names = ["block_dim_x"]
 
-    result, _ = tune_kernel(*env, grid_div_x=["block_dim_x"], answer=answer, block_size_names=block_size_names)
+    result, _ = tune_kernel(*env,
+                            grid_div_x=["block_dim_x"],
+                            answer=answer,
+                            block_size_names=block_size_names)
 
     assert len(result) == len(tune_params["block_dim_x"])
 
 
 @skip_if_no_pycuda
 def test_smem_args(env):
-    result, _ = tune_kernel(*env, smem_args=dict(size="block_size_x*4"), verbose=True)
+    result, _ = tune_kernel(*env,
+                            smem_args=dict(size="block_size_x*4"),
+                            verbose=True)
     tune_params = env[-1]
     assert len(result) == len(tune_params["block_size_x"])
-    result, _ = tune_kernel(*env, smem_args=dict(size=lambda p: p['block_size_x'] * 4), verbose=True)
+    result, _ = tune_kernel(
+        *env,
+        smem_args=dict(size=lambda p: p['block_size_x'] * 4),
+        verbose=True)
     tune_params = env[-1]
     assert len(result) == len(tune_params["block_size_x"])
 
 
 @skip_if_no_pycuda
 def test_build_cache(env):
     if not os.path.isfile(cache_filename):
-        result, _ = tune_kernel(*env, cache=cache_filename, verbose=False, quiet=True)
+        result, _ = tune_kernel(*env,
+                                cache=cache_filename,
+                                verbose=False,
+                                quiet=True)
         tune_params = env[-1]
         assert len(result) == len(tune_params["block_size_x"])
 
 
 def test_simulation_runner(env):
     kernel_name, kernel_string, size, args, tune_params = env
     start = time.perf_counter()
-    result, res_env = tune_kernel(*env, cache=cache_filename, strategy="random_sample", simulation_mode=True, strategy_options=dict(fraction=1))
-    actual_time = (time.perf_counter() - start) * 1e3    # ms
+    result, res_env = tune_kernel(*env,
+                                  cache=cache_filename,
+                                  strategy="random_sample",
+                                  simulation_mode=True,
+                                  strategy_options=dict(fraction=1))
+    actual_time = (time.perf_counter() - start) * 1e3  # ms
     assert len(result) == len(tune_params["block_size_x"])
 
-    timings = ['total_framework_time', 'total_strategy_time', 'total_compile_time', 'total_benchmark_time', 'overhead_time']
+    timings = [
+        'total_framework_time', 'total_strategy_time', 'total_compile_time',
+        'total_benchmark_time', 'overhead_time'
+    ]
 
     # ensure all keys are there and non zero
     assert all(key in res_env for key in timings)
@@ -111,7 +132,12 @@ def test_simulation_runner(env):
 
 
 def test_diff_evo(env):
-    result, _ = tune_kernel(*env, strategy="diff_evo", strategy_options=dict(popsize=5), verbose=True, cache=cache_filename, simulation_mode=True)
+    result, _ = tune_kernel(*env,
+                            strategy="diff_evo",
+                            strategy_options=dict(popsize=5),
+                            verbose=True,
+                            cache=cache_filename,
+                            simulation_mode=True)
     assert len(result) > 0
 
 
@@ -120,14 +146,25 @@ def test_time_keeping(env):
     kernel_name, kernel_string, size, args, tune_params = env
     answer = [args[1] + args[2], None, None, None]
 
-    options = dict(method="uniform", popsize=10, maxiter=1, mutation_chance=1, max_fevals=10)
+    options = dict(method="uniform",
+                   popsize=10,
+                   maxiter=1,
+                   mutation_chance=1,
+                   max_fevals=10)
     start = time.perf_counter()
-    result, env = tune_kernel(*env, strategy="genetic_algorithm", strategy_options=options, verbose=True, answer=answer)
-    max_time = (time.perf_counter() - start) * 1e3    # ms
+    result, env = tune_kernel(*env,
+                              strategy="genetic_algorithm",
+                              strategy_options=options,
+                              verbose=True,
+                              answer=answer)
+    max_time = (time.perf_counter() - start) * 1e3  # ms
 
     assert len(result) >= 10
 
-    timings = ['total_framework_time', 'total_strategy_time', 'total_compile_time', 'total_verification_time', 'total_benchmark_time', 'overhead_time']
+    timings = [
+        'total_framework_time', 'total_strategy_time', 'total_compile_time',
+        'total_verification_time', 'total_benchmark_time', 'overhead_time'
+    ]
 
     # ensure all keys are there and non zero
     assert all(key in env for key in timings)
@@ -142,15 +179,27 @@ def test_time_keeping(env):
 
 
 def test_bayesian_optimization(env):
-    for method in ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"]:
+    for method in [
+            "poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced",
+            "multi-fast"
+    ]:
         print(method, flush=True)
         options = dict(popsize=5, max_fevals=10, method=method)
-        result, _ = tune_kernel(*env, strategy="bayes_opt", strategy_options=options, verbose=True, cache=cache_filename, simulation_mode=True)
+        result, _ = tune_kernel(*env,
+                                strategy="bayes_opt",
+                                strategy_options=options,
+                                verbose=True,
+                                cache=cache_filename,
+                                simulation_mode=True)
         assert len(result) > 0
 
 
 def test_random_sample(env):
-    result, _ = tune_kernel(*env, strategy="random_sample", strategy_options={ "fraction": 0.1 }, cache=cache_filename, simulation_mode=True)
+    result, _ = tune_kernel(*env,
+                            strategy="random_sample",
+                            strategy_options={"fraction": 0.1},
+                            cache=cache_filename,
+                            simulation_mode=True)
     # check that number of benchmarked kernels is 10% (rounded up)
     assert len(result) == 2
     # check all returned results make sense
@@ -182,7 +231,66 @@ def test_interface_handles_compile_failures(env):
     }
     """
 
-    results, env = tune_kernel(kernel_name, kernel_string, size, args, tune_params, verbose=True)
+    results, env = tune_kernel(kernel_name,
+                               kernel_string,
+                               size,
+                               args,
+                               tune_params,
+                               verbose=True)
 
-    failed_config = [record for record in results if record["block_size_x"] == 256][0]
+    failed_config = [
+        record for record in results if record["block_size_x"] == 256
+    ][0]
     assert isinstance(failed_config["time"], util.CompilationFailedConfig)
+
+
+@skip_if_no_pycuda
+def test_runner(env):
+
+    kernel_name, kernel_source, problem_size, arguments, tune_params = env
+
+    # create KernelSource
+    kernelsource = core.KernelSource(kernel_name,
+                                     kernel_source,
+                                     lang=None,
+                                     defines=None)
+
+    # create option bags
+    device = 0
+    atol = 1e-6
+    platform = 0
+    iterations = 7
+    verbose = False
+    objective = "time"
+    opts = locals()
+    kernel_options = Options([(k, opts.get(k, None))
+                              for k in _kernel_options.keys()])
+    tuning_options = Options([(k, opts.get(k, None))
+                              for k in _tuning_options.keys()])
+    device_options = Options([(k, opts.get(k, None))
+                              for k in _device_options.keys()])
+    tuning_options.cachefile = None
+
+    # create runner
+    runner = SequentialRunner(kernelsource,
+                              kernel_options,
+                              device_options,
+                              iterations,
+                              observers=None)
+    runner.warmed_up = True  # disable warm up for this test
+
+    # select a config to run
+    searchspace = []
+
+    # insert configurations to run with this runner in this list
+    # each configuration is described as a list of values, one for each tunable parameter
+    # the order should correspond to the order of parameters specified in tune_params
+    searchspace.append(
+        [32])  # vector_add only has one tunable parameter (block_size_x)
+
+    # call the runner
+    results, _ = runner.run(searchspace, kernel_options, tuning_options)
+
+    assert len(results) == 1
+    assert results[0]['block_size_x'] == 32
+    assert len(results[0]['times']) == iterations