Skip to content

Commit 6bad678

Browse files
authored
Add detailed performance reporting. (#1090)
Add min, max, avg, median, std_deviation, variance, and middle_third average. This is to provide a more detailed performance info.
1 parent b83c345 commit 6bad678

File tree

9 files changed

+261
-44
lines changed

9 files changed

+261
-44
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//===- ExecutionEngineUtils.h - Utilities -----------------------===//
2+
//
3+
// Copyright 2022 Intel Corporation
4+
// Part of the IMEX Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
///
10+
/// \file
11+
/// This file includes utility fiunctions used by the runtime wrappers.
12+
///
13+
//===----------------------------------------------------------------------===//
14+
15+
#ifndef IMEX_EXECUTIONENGINE_UTILS_H
16+
#define IMEX_EXECUTIONENGINE_UTILS_H
17+
18+
#include <algorithm>
19+
#include <cmath>
20+
#include <limits>
21+
#include <vector>
22+
23+
// Utilities for calculating statistics on a vector of floats
24+
float calculateMin(const std::vector<float> &values);
25+
float calculateMax(const std::vector<float> &values);
26+
float calculateAverage(const std::vector<float> &values);
27+
float calculateMedian(std::vector<float> &values);
28+
float calculateStdDev(const std::vector<float> &values,
29+
float mean = -std::numeric_limits<float>::max());
30+
float calculateVariance(const std::vector<float> &values,
31+
float mean = -std::numeric_limits<float>::max());
32+
float calculateP95(std::vector<float> &values);
33+
float calculateP5(std::vector<float> &values);
34+
float calculateMiddleThirdAverage(std::vector<float> &values);
35+
36+
#endif // IMEX_EXECUTIONENGINE_UTILS_H

lib/ExecutionEngine/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
add_subdirectory(ExecutionEngineUtils)
2+
13
if(IMEX_ENABLE_L0_RUNTIME)
2-
add_subdirectory(LEVELZERORUNTIME)
4+
add_subdirectory(LEVELZERORUNTIME)
35
endif()
46

57
if(IMEX_ENABLE_SYCL_RUNTIME)
6-
add_subdirectory(SYCLRUNTIME)
8+
add_subdirectory(SYCLRUNTIME)
79
endif()
810

911
add_mlir_library(imex_runner_utils
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
add_mlir_library(imex_execution_engine_utils
2+
ExecutionEngineUtils.cpp
3+
EXCLUDE_FROM_LIBMLIR
4+
5+
ADDITIONAL_HEADER_DIRS
6+
${PROJECT_SOURCE_DIR}/imex/ExecutionEngine/
7+
)
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
//===- ExecutionEngineUtils.cpp - Utilities -----------------------===//
2+
//
3+
// Copyright 2022 Intel Corporation
4+
// Part of the IMEX Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
///
10+
/// \file
11+
/// This file includes utility fiunctions used by the runtime wrappers.
12+
///
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "imex/ExecutionEngine/ExecutionEngineUtils.h"
16+
17+
// Calculate the minimum of a vector of floats
18+
float calculateMin(const std::vector<float> &values) {
19+
if (values.empty()) {
20+
return std::numeric_limits<float>::max(); // Return maximum float value if
21+
// the vector is empty
22+
}
23+
24+
return *std::min_element(values.begin(), values.end());
25+
}
26+
27+
// Calculate the maximum of a vector of floats
28+
float calculateMax(const std::vector<float> &values) {
29+
if (values.empty()) {
30+
return std::numeric_limits<float>::min(); // Return minimum float value if
31+
// the vector is empty
32+
}
33+
34+
return *std::max_element(values.begin(), values.end());
35+
}
36+
37+
// Calculate the average of a vector of floats
38+
float calculateAverage(const std::vector<float> &values) {
39+
if (values.empty()) {
40+
return 0.0f;
41+
}
42+
float sum = 0.0f;
43+
for (const auto &value : values) {
44+
sum += value;
45+
}
46+
return sum / values.size();
47+
}
48+
49+
// Calculate the median of a vector of floats
50+
float calculateMedian(std::vector<float> &values) {
51+
if (values.empty()) {
52+
return 0.0f;
53+
}
54+
// std::sort(values.begin(), values.end());
55+
float median = 0.0f;
56+
size_t n = values.size();
57+
size_t medianIndex = n / 2 + 1;
58+
std::nth_element(values.begin(), values.begin() + medianIndex, values.end());
59+
// If n is even, return the average of the two middle elements
60+
// If n is odd, return the middle element
61+
// Note: This is a more efficient way to calculate median without sorting
62+
if (n % 2 == 1) {
63+
median = values[medianIndex];
64+
} else {
65+
auto n_2_value = values[medianIndex];
66+
std::nth_element(values.begin(), values.begin() + medianIndex - 1,
67+
values.end());
68+
auto n_2_minus_1_value = values[medianIndex - 1];
69+
median = (n_2_value + n_2_minus_1_value) / 2;
70+
}
71+
return median;
72+
}
73+
74+
// Calculate standard deviation of a vector of floats
75+
float calculateStdDev(const std::vector<float> &values, float mean) {
76+
if (values.empty()) {
77+
return 0.0f;
78+
}
79+
// If mean is -std::numeric_limits<float>::max(), calculate it from the values
80+
// This is useful for cases where the mean is not precomputed
81+
mean = (mean == -std::numeric_limits<float>::max()) ? calculateAverage(values)
82+
: mean;
83+
float sum = 0.0f;
84+
for (const auto &value : values) {
85+
sum += (value - mean) * (value - mean);
86+
}
87+
return sqrt(sum / values.size());
88+
}
89+
90+
// Calculate variance of a vector of floats
91+
float calculateVariance(const std::vector<float> &values, float mean) {
92+
if (values.empty()) {
93+
return 0.0f;
94+
}
95+
// If mean is -std::numeric_limits<float>::max(), calculate it from the values
96+
// This is useful for cases where the mean is not precomputed
97+
mean = (mean == -std::numeric_limits<float>::max()) ? calculateAverage(values)
98+
: mean;
99+
float sum = 0.0f;
100+
for (const auto &value : values) {
101+
sum += (value - mean) * (value - mean);
102+
}
103+
return sum / values.size();
104+
}
105+
106+
// Calculate P95 of a vector of floats
107+
float calculateP95(std::vector<float> &values) {
108+
if (values.empty()) {
109+
return 0.0f;
110+
}
111+
size_t p95Index = static_cast<size_t>(0.95 * values.size());
112+
if (p95Index >= values.size()) {
113+
p95Index = values.size() - 1;
114+
}
115+
std::nth_element(values.begin(), values.begin() + p95Index, values.end());
116+
return values[p95Index];
117+
}
118+
119+
// Calculate P5 of a vector of floats
120+
float calculateP5(std::vector<float> &values) {
121+
if (values.empty()) {
122+
return 0.0f;
123+
}
124+
size_t p5Index = static_cast<size_t>(0.05 * values.size());
125+
if (p5Index >= values.size()) {
126+
p5Index = values.size() - 1;
127+
}
128+
std::nth_element(values.begin(), values.begin() + p5Index, values.end());
129+
return values[p5Index];
130+
}
131+
132+
// Calculate average of middle 1/3 of a vector of floats, it ignores the first
133+
// and last third of the sorted values
134+
float calculateMiddleThirdAverage(std::vector<float> &values) {
135+
if (values.empty()) {
136+
return 0.0f;
137+
}
138+
size_t n = values.size();
139+
if (n < 3) {
140+
return calculateMedian(values);
141+
}
142+
std::sort(values.begin(), values.end());
143+
size_t start = n / 3;
144+
size_t end = n - start;
145+
float sum = 0.0f;
146+
for (size_t i = start; i < end; ++i) {
147+
sum += values[i];
148+
}
149+
return sum / (end - start);
150+
}

lib/ExecutionEngine/LEVELZERORUNTIME/CMakeLists.txt

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# you may not use this file except in compliance with the License.
55
# You may obtain a copy of the License at
66
#
7-
# http://www.apache.org/licenses/LICENSE-2.0
7+
# http://www.apache.org/licenses/LICENSE-2.0
88
#
99
# Unless required by applicable law or agreed to in writing, software
1010
# distributed under the License is distributed on an "AS IS" BASIS,
@@ -15,17 +15,20 @@
1515
find_package(LevelZero)
1616

1717
if(NOT LevelZero_FOUND)
18-
message(FATAL_ERROR "LevelZero not found. Please set LEVEL_ZERO_DIR.")
18+
message(FATAL_ERROR "LevelZero not found. Please set LEVEL_ZERO_DIR.")
1919
endif()
2020

2121
add_mlir_library(level-zero-runtime
22-
SHARED
23-
LevelZeroRuntimeWrappers.cpp
22+
SHARED
23+
LevelZeroRuntimeWrappers.cpp
2424

25-
EXCLUDE_FROM_LIBMLIR
26-
)
25+
EXCLUDE_FROM_LIBMLIR
2726

28-
target_compile_options (level-zero-runtime PUBLIC -fexceptions)
27+
LINK_LIBS
28+
imex_execution_engine_utils
29+
)
30+
31+
target_compile_options(level-zero-runtime PUBLIC -fexceptions)
2932

3033
target_link_libraries(level-zero-runtime PRIVATE LevelZero::LevelZero)
3134

lib/ExecutionEngine/LEVELZERORUNTIME/LevelZeroRuntimeWrappers.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,13 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
#include "imex/ExecutionEngine/ExecutionEngineUtils.h"
16+
17+
#include <algorithm>
1518
#include <atomic>
1619
#include <cassert>
1720
#include <cfloat>
21+
#include <cmath>
1822
#include <cstdint>
1923
#include <cstdio>
2024
#include <cstdlib>
@@ -513,9 +517,6 @@ static ze_event_handle_t launchKernel(GPUL0QUEUE *queue,
513517
ze_group_count_t launchArgs = {castSz(gridX), castSz(gridY), castSz(gridZ)};
514518

515519
if (getenv("IMEX_ENABLE_PROFILING")) {
516-
auto executionTime = 0.0f;
517-
auto maxTime = 0.0f;
518-
auto minTime = FLT_MAX;
519520
auto rounds = 1000;
520521
auto warmups = 3;
521522

@@ -547,6 +548,8 @@ static ze_event_handle_t launchKernel(GPUL0QUEUE *queue,
547548
warmups = runs;
548549
}
549550

551+
std::vector<float> executionTime(rounds, 0.0);
552+
550553
// warmup
551554
for (int r = 0; r < warmups; r++) {
552555
enqueueKernel(queue->zeCommandList_, kernel, &launchArgs, params,
@@ -574,17 +577,21 @@ static ze_event_handle_t launchKernel(GPUL0QUEUE *queue,
574577
auto endTime =
575578
tstampEvent.get_profiling_info<imex::profiling::command_end>();
576579
auto duration = float(endTime - startTime) / 1000000.0f;
577-
executionTime += duration;
578-
if (duration > maxTime)
579-
maxTime = duration;
580-
if (duration < minTime)
581-
minTime = duration;
580+
executionTime[r] = duration;
582581
}
583582
deallocDeviceMemory(queue, cache);
583+
584+
// Print profiling results
584585
fprintf(stdout,
585586
"the kernel execution time is (ms, on L0 runtime):"
586-
"avg: %.4f, min: %.4f, max: %.4f (over %d runs)\n",
587-
executionTime / rounds, minTime, maxTime, rounds);
587+
"avg: %.4f, min: %.4f, max: %.4f, median: %4f, std_deviation: %4f, "
588+
"variance: %4f, P95: %4f, P5: %4f, median_one_third_avg: %4f (over "
589+
"%d runs)\n",
590+
calculateAverage(executionTime), calculateMin(executionTime),
591+
calculateMax(executionTime), calculateMedian(executionTime),
592+
calculateStdDev(executionTime), calculateVariance(executionTime),
593+
calculateP95(executionTime), calculateP5(executionTime),
594+
calculateMiddleThirdAverage(executionTime), rounds);
588595
}
589596

590597
Event *event = new Event(queue->zeContext_, queue->zeDevice_);

lib/ExecutionEngine/SYCLRUNTIME/CMakeLists.txt

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# you may not use this file except in compliance with the License.
55
# You may obtain a copy of the License at
66
#
7-
# http://www.apache.org/licenses/LICENSE-2.0
7+
# http://www.apache.org/licenses/LICENSE-2.0
88
#
99
# Unless required by applicable law or agreed to in writing, software
1010
# distributed under the License is distributed on an "AS IS" BASIS,
@@ -31,17 +31,22 @@ add_mlir_library(sycl-runtime
3131
SyclRuntimeWrappers.cpp
3232

3333
EXCLUDE_FROM_LIBMLIR
34-
)
34+
35+
LINK_LIBS
36+
imex_execution_engine_utils
37+
)
3538

3639
check_cxx_compiler_flag("-frtti" CXX_HAS_FRTTI_FLAG)
40+
3741
if(NOT CXX_HAS_FRTTI_FLAG)
3842
message(FATAL_ERROR "CXX compiler does not accept flag -frtti")
3943
endif()
40-
target_compile_options (sycl-runtime PUBLIC -fexceptions -frtti)
44+
45+
target_compile_options(sycl-runtime PUBLIC -fexceptions -frtti)
4146

4247
target_include_directories(sycl-runtime PRIVATE
4348
${MLIR_INCLUDE_DIRS}
44-
)
49+
)
4550

4651
target_link_libraries(sycl-runtime PRIVATE LevelZero::LevelZero SyclRuntime::SyclRuntime)
4752

0 commit comments

Comments
 (0)