Add possibility to add custom validation function for workload

AlexanderKalistratov · ZzEeKkAa · commit 768d9bc3bbd4 · 2023-12-26T16:20:01.000-05:00
diff --git a/dpbench/benchmarks/pca/pca_validate.py b/dpbench/benchmarks/pca/pca_validate.py
@@ -0,0 +1,11 @@
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+from dpbench.infrastructure.benchmark_validation import (
+    validate as default_validate,
+)
+
+
+def validate(expected: dict[str, any], actual: dict[str, any], rel_error=1e-05):
+    # TODO implement actual validation suitable for pca workload
+    return default_validate(expected, actual, rel_error)
diff --git a/dpbench/config/benchmark.py b/dpbench/config/benchmark.py
@@ -80,6 +80,8 @@ class Benchmark:
     module_name: str = ""
     package_path: str = ""
     func_name: str = ""
+    validate_package_path: str = ""
+    validate_func_name: str = ""
     kind: str = ""
     domain: str = ""
     parameters: Presets = field(default_factory=Presets)
@@ -100,6 +102,8 @@ def from_dict(obj: Any) -> "Benchmark":
         _module_name = str(obj.get("module_name") or "")
         _package_path = str(obj.get("package_path") or "")
         _func_name = str(obj.get("func_name") or "")
+        _validate_package_path = str(obj.get("validate_package_path") or "")
+        _validate_func_name = str(obj.get("validate_func_name") or "validate")
         _kind = str(obj.get("kind") or "")
         _domain = str(obj.get("domain") or "")
         _parameters = Presets(obj.get("parameters"))
@@ -122,6 +126,8 @@ def from_dict(obj: Any) -> "Benchmark":
             _module_name,
             _package_path,
             _func_name,
+            _validate_package_path,
+            _validate_func_name,
             _kind,
             _domain,
             _parameters,
diff --git a/dpbench/config/reader.py b/dpbench/config/reader.py
@@ -340,6 +340,7 @@ def read_benchmark_implementations(
 
     setup_init(config, modules)
     set_default_reference_implementation_postfix(config, modules)
+    set_validate_func(config, modules)
 
     for module in modules:
         module_name, postfix = discover_module_name_and_postfix(module, config)
@@ -386,6 +387,49 @@ def read_benchmark_implementations(
         )
 
 
+def set_validate_func(
+    config: Benchmark,
+    modules: set[str] = None,
+):
+    """Read, discover and populate config with validation module and function.
+
+    Validation package priority, if found/set:
+    1. package specified in config
+    2. validation package at <benchmark>/<benchmark>_validate.py
+    3. default validation package
+
+    Args:
+        config: Benchmark configuration object where settings should be
+            populated.
+        modules: List of available modules for the benchmark to find init.
+    """
+    if config.validate_package_path != "":
+        if importlib.util.find_spec(config.validate_package_path) is None:
+            logging.fatal(
+                f"validation package path is specified but not found for {config.module_name}"
+            )
+    else:
+        validate_package_path = "dpbench.infrastructure.benchmark_validation"
+
+        for module_name in [
+            config.short_name + "_validate",
+            config.module_name + "_validate",
+        ]:
+            if module_name in modules:
+                validate_package_path = config.package_path + "." + module_name
+                break
+
+        config.validate_package_path = validate_package_path
+
+    val_mod = importlib.import_module(config.validate_package_path)
+
+    if not hasattr(val_mod, config.validate_func_name):
+        logging.fatal(
+            f"validation function '{config.validate_func_name}' not found for "
+            + f"{config.module_name} at '{validate_package_path}'"
+        )
+
+
 def set_default_reference_implementation_postfix(
     config: Benchmark,
     modules: set[str] = None,
diff --git a/dpbench/configs/framework_info/dpcpp.toml b/dpbench/configs/framework_info/dpcpp.toml
@@ -10,7 +10,7 @@ postfix = "dpcpp"
 class = "DpcppFramework"
 arch = "cpu"
 sycl_device = "cpu"
-dpcpp_version = "IntelLLVM 2023.2.0"
+dpcpp_version = "IntelLLVM 2024.0.0"
 
 [[framework.postfixes]]
 impl_postfix = "sycl"
diff --git a/dpbench/infrastructure/benchmark.py b/dpbench/infrastructure/benchmark.py
@@ -52,6 +52,12 @@ def init_mod_path(self):
     def init_fn_name(self):
         return self.info.init.func_name if self.info.init else None
 
+    def get_validation_func(self):
+        mod = importlib.import_module(self.info.validate_package_path)
+        validate_function = getattr(mod, self.info.validate_func_name)
+
+        return validate_function
+
     def get_implementation(self, implementation_postfix: str):
         implementation = None
 
diff --git a/dpbench/infrastructure/benchmark_runner.py b/dpbench/infrastructure/benchmark_runner.py
@@ -15,7 +15,6 @@
 import dpbench.config as cfg
 from dpbench.infrastructure.benchmark import Benchmark
 from dpbench.infrastructure.benchmark_results import BenchmarkResults
-from dpbench.infrastructure.benchmark_validation import validate_results
 from dpbench.infrastructure.datamodel import store_results
 from dpbench.infrastructure.enums import ErrorCodes, ValidationStatusCodes
 from dpbench.infrastructure.frameworks import Framework
@@ -263,15 +262,24 @@ def run_benchmark(
 
         if rc.validate and results.error_state == ErrorCodes.SUCCESS:
             ref_framework = build_framework(rc.ref_framework)
+            # TODO: don't run it for every framework, but run it only once.
             ref_output = _exec_simple(
                 bench,
                 ref_framework,
                 rc.benchmark.reference_implementation_postfix,
                 rc.preset,
             )
-            if validate_results(ref_output, output):
-                results.validation_state = ValidationStatusCodes.SUCCESS
-            else:
+
+            if ref_output:
+                try:
+                    results.validation_state = ValidationStatusCodes.SUCCESS
+                    validate = bench.get_validation_func()
+                    validated = validate(ref_output, output)
+                except Exception as e:
+                    logging.error(f"Exception during validation {e.args}")
+                    validated = False
+
+            if not ref_output or not validated:
                 results.validation_state = ValidationStatusCodes.FAILURE
                 results.error_state = ErrorCodes.FAILED_VALIDATION
                 results.error_msg = "Validation failed"
diff --git a/dpbench/infrastructure/benchmark_validation.py b/dpbench/infrastructure/benchmark_validation.py
@@ -11,10 +11,12 @@
 import numpy as np
 
 
-def validate_results(
-    expected: dict[str, any], actual: dict[str, any], rel_error=1e-05
-) -> bool:
-    """Checks if expected equals actual with certain precision.
+def validate(
+    expected: dict[str, any],
+    actual: dict[str, any],
+    rel_error=1e-05,
+):
+    """Default validation function.
 
     Args:
         expected: expected values.
@@ -23,25 +25,20 @@ def validate_results(
 
     Returns: true, if provided data is equal.
     """
-    if not expected:
-        return False
-
-    try:
-        for key in expected.keys():
-            valid = validate_two_lists_of_array(
-                expected[key], actual[key], rel_error=rel_error
+    valid = True
+    for key in expected.keys():
+        valid = valid and validate_two_lists_of_array(
+            expected[key], actual[key], rel_error=rel_error
+        )
+        if not valid:
+            logging.error(
+                (
+                    "Output did not match for {0}. "
+                    + "Expected: {1} Actual: {2}"
+                ).format(key, expected[key], actual[key])
             )
-            if not valid:
-                logging.error(
-                    (
-                        "Output did not match for {0}. "
-                        + "Expected: {1} Actual: {2}"
-                    ).format(key, expected[key], actual[key])
-                )
-        return valid
-    except Exception as e:
-        logging.error(f"Exception during validation {e.args}")
-        return False
+
+    return valid
 
 
 def validate_two_lists_of_array(
@@ -93,6 +90,11 @@ def relative_error(
 
     Returns: relative error.
     """
-    if np.linalg.norm(ref) == 0.0:
-        return 0.0
-    return np.linalg.norm(ref - val) / np.linalg.norm(ref)
+    ref_norm = np.linalg.norm(ref)
+    if ref_norm:
+        val_norm = np.linalg.norm(val)
+        if val_norm == 0:
+            return 0.0
+        ref_norm = val_norm
+
+    return np.linalg.norm(ref - val) / ref_norm