Add detailed performance reporting. (#1090)

mshahneo · web-flow · commit 6bad6786922f · 2025-06-27T09:57:09.000-05:00
Add min, max, avg, median, std_deviation, variance, and middle_third average.

This is to provide a more detailed performance info.
diff --git a/include/imex/ExecutionEngine/ExecutionEngineUtils.h b/include/imex/ExecutionEngine/ExecutionEngineUtils.h
@@ -0,0 +1,36 @@
+//===- ExecutionEngineUtils.h -  Utilities -----------------------===//
+//
+// Copyright 2022 Intel Corporation
+// Part of the IMEX Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file includes utility fiunctions used by the runtime wrappers.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef IMEX_EXECUTIONENGINE_UTILS_H
+#define IMEX_EXECUTIONENGINE_UTILS_H
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <vector>
+
+// Utilities for calculating statistics on a vector of floats
+float calculateMin(const std::vector<float> &values);
+float calculateMax(const std::vector<float> &values);
+float calculateAverage(const std::vector<float> &values);
+float calculateMedian(std::vector<float> &values);
+float calculateStdDev(const std::vector<float> &values,
+                      float mean = -std::numeric_limits<float>::max());
+float calculateVariance(const std::vector<float> &values,
+                        float mean = -std::numeric_limits<float>::max());
+float calculateP95(std::vector<float> &values);
+float calculateP5(std::vector<float> &values);
+float calculateMiddleThirdAverage(std::vector<float> &values);
+
+#endif // IMEX_EXECUTIONENGINE_UTILS_H
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
@@ -1,9 +1,11 @@
+add_subdirectory(ExecutionEngineUtils)
+
 if(IMEX_ENABLE_L0_RUNTIME)
-    add_subdirectory(LEVELZERORUNTIME)
+  add_subdirectory(LEVELZERORUNTIME)
 endif()
 
 if(IMEX_ENABLE_SYCL_RUNTIME)
-    add_subdirectory(SYCLRUNTIME)
+  add_subdirectory(SYCLRUNTIME)
 endif()
 
 add_mlir_library(imex_runner_utils
diff --git a/lib/ExecutionEngine/ExecutionEngineUtils/CMakeLists.txt b/lib/ExecutionEngine/ExecutionEngineUtils/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_mlir_library(imex_execution_engine_utils
+  ExecutionEngineUtils.cpp
+  EXCLUDE_FROM_LIBMLIR
+
+  ADDITIONAL_HEADER_DIRS
+  ${PROJECT_SOURCE_DIR}/imex/ExecutionEngine/
+)
diff --git a/lib/ExecutionEngine/ExecutionEngineUtils/ExecutionEngineUtils.cpp b/lib/ExecutionEngine/ExecutionEngineUtils/ExecutionEngineUtils.cpp
@@ -0,0 +1,150 @@
+//===- ExecutionEngineUtils.cpp -  Utilities -----------------------===//
+//
+// Copyright 2022 Intel Corporation
+// Part of the IMEX Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file includes utility fiunctions used by the runtime wrappers.
+///
+//===----------------------------------------------------------------------===//
+
+#include "imex/ExecutionEngine/ExecutionEngineUtils.h"
+
+// Calculate the minimum of a vector of floats
+float calculateMin(const std::vector<float> &values) {
+  if (values.empty()) {
+    return std::numeric_limits<float>::max(); // Return maximum float value if
+                                              // the vector is empty
+  }
+
+  return *std::min_element(values.begin(), values.end());
+}
+
+// Calculate the maximum of a vector of floats
+float calculateMax(const std::vector<float> &values) {
+  if (values.empty()) {
+    return std::numeric_limits<float>::min(); // Return minimum float value if
+                                              // the vector is empty
+  }
+
+  return *std::max_element(values.begin(), values.end());
+}
+
+// Calculate the average of a vector of floats
+float calculateAverage(const std::vector<float> &values) {
+  if (values.empty()) {
+    return 0.0f;
+  }
+  float sum = 0.0f;
+  for (const auto &value : values) {
+    sum += value;
+  }
+  return sum / values.size();
+}
+
+// Calculate the median of a vector of floats
+float calculateMedian(std::vector<float> &values) {
+  if (values.empty()) {
+    return 0.0f;
+  }
+  // std::sort(values.begin(), values.end());
+  float median = 0.0f;
+  size_t n = values.size();
+  size_t medianIndex = n / 2 + 1;
+  std::nth_element(values.begin(), values.begin() + medianIndex, values.end());
+  // If n is even, return the average of the two middle elements
+  // If n is odd, return the middle element
+  // Note: This is a more efficient way to calculate median without sorting
+  if (n % 2 == 1) {
+    median = values[medianIndex];
+  } else {
+    auto n_2_value = values[medianIndex];
+    std::nth_element(values.begin(), values.begin() + medianIndex - 1,
+                     values.end());
+    auto n_2_minus_1_value = values[medianIndex - 1];
+    median = (n_2_value + n_2_minus_1_value) / 2;
+  }
+  return median;
+}
+
+// Calculate standard deviation of a vector of floats
+float calculateStdDev(const std::vector<float> &values, float mean) {
+  if (values.empty()) {
+    return 0.0f;
+  }
+  // If mean is -std::numeric_limits<float>::max(), calculate it from the values
+  // This is useful for cases where the mean is not precomputed
+  mean = (mean == -std::numeric_limits<float>::max()) ? calculateAverage(values)
+                                                      : mean;
+  float sum = 0.0f;
+  for (const auto &value : values) {
+    sum += (value - mean) * (value - mean);
+  }
+  return sqrt(sum / values.size());
+}
+
+// Calculate variance of a vector of floats
+float calculateVariance(const std::vector<float> &values, float mean) {
+  if (values.empty()) {
+    return 0.0f;
+  }
+  // If mean is -std::numeric_limits<float>::max(), calculate it from the values
+  // This is useful for cases where the mean is not precomputed
+  mean = (mean == -std::numeric_limits<float>::max()) ? calculateAverage(values)
+                                                      : mean;
+  float sum = 0.0f;
+  for (const auto &value : values) {
+    sum += (value - mean) * (value - mean);
+  }
+  return sum / values.size();
+}
+
+// Calculate P95 of a vector of floats
+float calculateP95(std::vector<float> &values) {
+  if (values.empty()) {
+    return 0.0f;
+  }
+  size_t p95Index = static_cast<size_t>(0.95 * values.size());
+  if (p95Index >= values.size()) {
+    p95Index = values.size() - 1;
+  }
+  std::nth_element(values.begin(), values.begin() + p95Index, values.end());
+  return values[p95Index];
+}
+
+// Calculate P5 of a vector of floats
+float calculateP5(std::vector<float> &values) {
+  if (values.empty()) {
+    return 0.0f;
+  }
+  size_t p5Index = static_cast<size_t>(0.05 * values.size());
+  if (p5Index >= values.size()) {
+    p5Index = values.size() - 1;
+  }
+  std::nth_element(values.begin(), values.begin() + p5Index, values.end());
+  return values[p5Index];
+}
+
+// Calculate average of middle 1/3 of a vector of floats, it ignores the first
+// and last third of the sorted values
+float calculateMiddleThirdAverage(std::vector<float> &values) {
+  if (values.empty()) {
+    return 0.0f;
+  }
+  size_t n = values.size();
+  if (n < 3) {
+    return calculateMedian(values);
+  }
+  std::sort(values.begin(), values.end());
+  size_t start = n / 3;
+  size_t end = n - start;
+  float sum = 0.0f;
+  for (size_t i = start; i < end; ++i) {
+    sum += values[i];
+  }
+  return sum / (end - start);
+}
diff --git a/lib/ExecutionEngine/LEVELZERORUNTIME/CMakeLists.txt b/lib/ExecutionEngine/LEVELZERORUNTIME/CMakeLists.txt
@@ -4,7 +4,7 @@
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -15,17 +15,20 @@
 find_package(LevelZero)
 
 if(NOT LevelZero_FOUND)
-    message(FATAL_ERROR "LevelZero not found. Please set LEVEL_ZERO_DIR.")
+  message(FATAL_ERROR "LevelZero not found. Please set LEVEL_ZERO_DIR.")
 endif()
 
 add_mlir_library(level-zero-runtime
-    SHARED
-    LevelZeroRuntimeWrappers.cpp
+  SHARED
+  LevelZeroRuntimeWrappers.cpp
 
-    EXCLUDE_FROM_LIBMLIR
-  )
+  EXCLUDE_FROM_LIBMLIR
 
-target_compile_options (level-zero-runtime PUBLIC -fexceptions)
+  LINK_LIBS
+  imex_execution_engine_utils
+)
+
+target_compile_options(level-zero-runtime PUBLIC -fexceptions)
 
 target_link_libraries(level-zero-runtime PRIVATE LevelZero::LevelZero)
 
diff --git a/lib/ExecutionEngine/LEVELZERORUNTIME/LevelZeroRuntimeWrappers.cpp b/lib/ExecutionEngine/LEVELZERORUNTIME/LevelZeroRuntimeWrappers.cpp
@@ -12,9 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "imex/ExecutionEngine/ExecutionEngineUtils.h"
+
+#include <algorithm>
 #include <atomic>
 #include <cassert>
 #include <cfloat>
+#include <cmath>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
@@ -513,9 +517,6 @@ static ze_event_handle_t launchKernel(GPUL0QUEUE *queue,
   ze_group_count_t launchArgs = {castSz(gridX), castSz(gridY), castSz(gridZ)};
 
   if (getenv("IMEX_ENABLE_PROFILING")) {
-    auto executionTime = 0.0f;
-    auto maxTime = 0.0f;
-    auto minTime = FLT_MAX;
     auto rounds = 1000;
     auto warmups = 3;
 
@@ -547,6 +548,8 @@ static ze_event_handle_t launchKernel(GPUL0QUEUE *queue,
         warmups = runs;
     }
 
+    std::vector<float> executionTime(rounds, 0.0);
+
     // warmup
     for (int r = 0; r < warmups; r++) {
       enqueueKernel(queue->zeCommandList_, kernel, &launchArgs, params,
@@ -574,17 +577,21 @@ static ze_event_handle_t launchKernel(GPUL0QUEUE *queue,
       auto endTime =
           tstampEvent.get_profiling_info<imex::profiling::command_end>();
       auto duration = float(endTime - startTime) / 1000000.0f;
-      executionTime += duration;
-      if (duration > maxTime)
-        maxTime = duration;
-      if (duration < minTime)
-        minTime = duration;
+      executionTime[r] = duration;
     }
     deallocDeviceMemory(queue, cache);
+
+    // Print profiling results
     fprintf(stdout,
             "the kernel execution time is (ms, on L0 runtime):"
-            "avg: %.4f, min: %.4f, max: %.4f (over %d runs)\n",
-            executionTime / rounds, minTime, maxTime, rounds);
+            "avg: %.4f, min: %.4f, max: %.4f, median: %4f, std_deviation: %4f, "
+            "variance: %4f, P95: %4f, P5: %4f, median_one_third_avg: %4f (over "
+            "%d runs)\n",
+            calculateAverage(executionTime), calculateMin(executionTime),
+            calculateMax(executionTime), calculateMedian(executionTime),
+            calculateStdDev(executionTime), calculateVariance(executionTime),
+            calculateP95(executionTime), calculateP5(executionTime),
+            calculateMiddleThirdAverage(executionTime), rounds);
   }
 
   Event *event = new Event(queue->zeContext_, queue->zeDevice_);
diff --git a/lib/ExecutionEngine/SYCLRUNTIME/CMakeLists.txt b/lib/ExecutionEngine/SYCLRUNTIME/CMakeLists.txt
@@ -4,7 +4,7 @@
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -31,17 +31,22 @@ add_mlir_library(sycl-runtime
     SyclRuntimeWrappers.cpp
 
     EXCLUDE_FROM_LIBMLIR
-  )
+
+    LINK_LIBS
+    imex_execution_engine_utils
+)
 
 check_cxx_compiler_flag("-frtti" CXX_HAS_FRTTI_FLAG)
+
 if(NOT CXX_HAS_FRTTI_FLAG)
     message(FATAL_ERROR "CXX compiler does not accept flag -frtti")
 endif()
-target_compile_options (sycl-runtime PUBLIC -fexceptions -frtti)
+
+target_compile_options(sycl-runtime PUBLIC -fexceptions -frtti)
 
 target_include_directories(sycl-runtime PRIVATE
     ${MLIR_INCLUDE_DIRS}
-    )
+)
 
 target_link_libraries(sycl-runtime PRIVATE LevelZero::LevelZero SyclRuntime::SyclRuntime)
 
diff --git a/lib/ExecutionEngine/SYCLRUNTIME/SyclRuntimeWrappers.cpp b/lib/ExecutionEngine/SYCLRUNTIME/SyclRuntimeWrappers.cpp
diff --git a/lib/Utils/CMakeLists.txt b/lib/Utils/CMakeLists.txt