Merge pull request #186 from KernelTuner/additional_tests

benvanwerkhoven · web-flow · commit 653f617bc292 · 2023-01-13T14:43:49.000+01:00
Additional tests
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 - Support for using time_limit in simulation mode
 - Helper functions for energy tuning
 - Example to show ridge frequency and power-frequency model
+- Functions to store tuning output and metadata
 
 ### Changed
 - Changed what timings are stored in cache files
diff --git a/examples/cuda/vector_add.py b/examples/cuda/vector_add.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python
 """This is the minimal example from the README"""
 
-import json
 import numpy
 from kernel_tuner import tune_kernel
+from kernel_tuner.file_utils import store_output_file, store_metadata_file
 
 def tune():
 
@@ -28,12 +28,15 @@ def tune():
     tune_params = dict()
     tune_params["block_size_x"] = [128+64*i for i in range(15)]
 
-    result = tune_kernel("vector_add", kernel_string, size, args, tune_params)
+    results, env = tune_kernel("vector_add", kernel_string, size, args, tune_params)
 
-    with open("vector_add.json", 'w') as fp:
-        json.dump(result, fp)
+    # Store the tuning results in an output file
+    store_output_file("vector_add.json", results, tune_params)
 
-    return result
+    # Store the metadata of this run
+    store_metadata_file("vector_add-metadata.json")
+
+    return results
 
 
 if __name__ == "__main__":
diff --git a/examples/opencl/vector_add.py b/examples/opencl/vector_add.py
@@ -1,8 +1,13 @@
 #!/usr/bin/env python
+"""This is the minimal example from the README"""
+
 import numpy
 from kernel_tuner import tune_kernel
+from kernel_tuner.file_utils import store_output_file, store_metadata_file
+
+def tune():
 
-kernel_string = """
+    kernel_string = """
 __kernel void vector_add(__global float *c, __global const float *a, __global const float *b, int n) {
     int i = get_global_id(0);
     if (i<n) {
@@ -11,17 +16,28 @@
 }
 """
 
-size = 10000000
+    size = 10000000
+
+    a = numpy.random.rand(size).astype(numpy.float32)
+    b = numpy.random.rand(size).astype(numpy.float32)
+    c = numpy.zeros_like(a)
+    n = numpy.int32(size)
+
+    args = [c, a, b, n]
+
+    tune_params = dict()
+    tune_params["block_size_x"] = [128+64*i for i in range(15)]
+
+    results, env = tune_kernel("vector_add", kernel_string, size, args, tune_params)
 
-a = numpy.random.rand(size).astype(numpy.float32)
-b = numpy.random.rand(size).astype(numpy.float32)
-c = numpy.zeros_like(a)
-n = numpy.int32(size)
+    # Store the tuning results in an output file
+    store_output_file("vector_add.json", results, tune_params)
 
-args = [c, a, b, n]
+    # Store the metadata of this run
+    store_metadata_file("vector_add-metadata.json")
 
-tune_params = dict()
-tune_params["block_size_x"] = [128+64*i for i in range(15)]
+    return results
 
-tune_kernel("vector_add", kernel_string, size, args, tune_params)
 
+if __name__ == "__main__":
+    tune()
diff --git a/kernel_tuner/energy/energy.py b/kernel_tuner/energy/energy.py
@@ -41,7 +41,7 @@
 }
 """
 
-def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None):
+def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None, simulation_mode=None):
     """ Use NVML and PyCUDA with a synthetic kernel to obtain samples of frequency-power pairs """
 
     # get some numbers about the device
@@ -85,7 +85,7 @@ def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback=
 
     results, _ = tune_kernel("fp32_kernel", fp32_kernel_string, problem_size=(multiprocessor_count, 64),
                              arguments=arguments, tune_params=tune_params, observers=[nvmlobserver],
-                             verbose=False, quiet=True, metrics=metrics, iterations=10,
+                             verbose=False, quiet=True, metrics=metrics, iterations=10, simulation_mode=simulation_mode,
                              grid_div_x=[], grid_div_y=[], cache=cache or f"synthetic_fp32_cache_{device_name}.json")
 
     freqs = np.array([res["core_freq"] for res in results])
@@ -147,7 +147,7 @@ def fit_power_frequency_model(freqs, nvml_power):
     return clock_threshold + clock_min, fit_parameters, scale_parameters
 
 
-def create_power_frequency_model(device=0, n_samples=10, verbose=False, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None):
+def create_power_frequency_model(device=0, n_samples=10, verbose=False, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None, simulation_mode=None):
     """ Calculate the most energy-efficient clock frequency of device
 
     This function uses a performance model to fit the power-frequency curve
@@ -181,7 +181,7 @@ def create_power_frequency_model(device=0, n_samples=10, verbose=False, nvidia_s
     :rtype: float
 
     """
-    freqs, nvml_power = get_frequency_power_relation_fp32(device, n_samples, nvidia_smi_fallback, use_locked_clocks, cache=cache)
+    freqs, nvml_power = get_frequency_power_relation_fp32(device, n_samples, nvidia_smi_fallback, use_locked_clocks, cache=cache, simulation_mode=simulation_mode)
 
     if verbose:
         print("Clock frequencies:", freqs.tolist())
diff --git a/kernel_tuner/file_utils.py b/kernel_tuner/file_utils.py
@@ -165,27 +165,41 @@ def get_device_query(target):
         raise ValueError("get_device_query target not supported")
 
 
-def store_metadata_file(metadata_filename, target="nvidia"):
+def store_metadata_file(metadata_filename):
     """ Store the metadata about the current hardware and software environment in a JSON output file
 
     This function produces a JSON file that adheres to the T4 auto-tuning metadata JSON schema.
 
     :param metadata_filename: Name of the to be created metadata file
     :type metadata_filename: string
 
-    :param target: Target specifies whether to include the metadata of the 'nvidia' or 'amd' GPUs in the system
-    :type target: string
-
     """
     metadata_filename = filename_ensure_json_extension(metadata_filename)
     metadata = {}
 
     # lshw only works on Linux, this intentionally raises a FileNotFoundError when ran on systems that do not have it
     lshw_out = subprocess.run(["lshw", "-json"], capture_output=True)
-    metadata["hardware"] = dict(lshw=json.loads(lshw_out.stdout))
 
-    # only works if nvidia-smi (for NVIDIA) or rocm-smi (for AMD) is present, raises FileNotFoundError when not present
-    device_query = get_device_query(target)
+    # sometimes lshw outputs a list of length 1, sometimes just as a dict, schema wants a list
+    lshw_string = lshw_out.stdout.decode('utf-8').strip()
+    if lshw_string[0] == '{' and lshw_string[-1] == '}':
+        lshw_string = '[' + lshw_string + ']'
+
+    metadata["hardware"] = dict(lshw=json.loads(lshw_string))
+
+    # attempts to use nvidia-smi or rocm-smi if present
+    device_query = {}
+    try:
+        device_query['nvidia-smi'] = get_device_query("nvidia")
+    except FileNotFoundError:
+        # ignore if nvidia-smi is not found
+        pass
+
+    try:
+        device_query['rocm-smi'] = get_device_query("amd")
+    except FileNotFoundError:
+        # ignore if rocm-smi is not found
+        pass
 
     metadata["environment"] = dict(device_query=device_query,
                                    requirements=get_dependencies())
diff --git a/test/test_energy.py b/test/test_energy.py
@@ -8,6 +8,6 @@
 @skip_if_no_pycuda
 def test_create_power_frequency_model():
 
-    ridge_frequency, freqs, nvml_power, fitted_params, scaling = energy.create_power_frequency_model(cache=cache_filename)
+    ridge_frequency, freqs, nvml_power, fitted_params, scaling = energy.create_power_frequency_model(cache=cache_filename, simulation_mode=True)
     assert ridge_frequency == 1350
 
diff --git a/test/test_file_utils.py b/test/test_file_utils.py
@@ -11,41 +11,46 @@
 def test_store_output_file(env):
     # setup variables
     filename = "test_output_file.json"
-    results, _ = tune_kernel(*env, cache=cache_filename, simulation_mode=True)
-    tune_params = env[-1]
 
-    # run store_output_file
-    store_output_file(filename, results, tune_params)
+    try:
+        results, _ = tune_kernel(*env, cache=cache_filename, simulation_mode=True)
+        tune_params = env[-1]
+
+        # run store_output_file
+        store_output_file(filename, results, tune_params)
 
-    # retrieve output file
-    _, schema = output_file_schema("results")
-    with open(filename) as json_file:
-        output_json = json.load(json_file)
+        # retrieve output file
+        _, schema = output_file_schema("results")
+        with open(filename) as json_file:
+            output_json = json.load(json_file)
 
-    # validate
-    validate(output_json, schema=schema)
+        # validate
+        validate(output_json, schema=schema)
 
-    # clean up
-    delete_temp_file(filename)
+    finally:
+        # clean up
+        delete_temp_file(filename)
 
 
 def test_store_metadata_file():
     # setup variables
     filename = "test_metadata_file.json"
 
-    # run store_metadata_file
     try:
-        store_metadata_file(filename, target="nvidia")
-    except FileNotFoundError:
-        pytest.skip("'lshw' or 'nvidia-smi' not present on this system")
-
-    # retrieve metadata file
-    _, schema = output_file_schema("metadata")
-    with open(filename) as json_file:
-        metadata_json = json.load(json_file)
-
-    # validate
-    validate(metadata_json, schema=schema)
-
-    # clean up
-    delete_temp_file(filename)
+        # run store_metadata_file
+        try:
+            store_metadata_file(filename)
+        except FileNotFoundError:
+            pytest.skip("'lshw' not present on this system")
+
+        # retrieve metadata file
+        _, schema = output_file_schema("metadata")
+        with open(filename) as json_file:
+            metadata_json = json.load(json_file)
+
+        # validate
+        validate(metadata_json, schema=schema)
+
+    finally:
+        # clean up
+        delete_temp_file(filename)