Skip to content

Commit ee91fd9

Browse files
committed
harness: basic warmup detection using CUSUM change point detection and thresholds set on avg relative duration stability (1) and slope grade between change points
1 parent 7062b23 commit ee91fd9

File tree

1 file changed

+97
-3
lines changed

1 file changed

+97
-3
lines changed

graalpython/benchmarks/src/harness.py

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,93 @@
5555
ATTR_TEARDOWN = '__teardown__'
5656

5757

58+
# ----------------------------------------------------------------------------------------------------------------------
59+
#
60+
# the CUSUM method adapted for warmup detection within a given threshold (initial iterations)
61+
#
62+
# ----------------------------------------------------------------------------------------------------------------------
63+
def zeros(n):
64+
return [0 for _ in range(n)]
65+
66+
67+
def append(arr, val):
68+
if isinstance(arr, list):
69+
return arr + [val]
70+
else:
71+
return [val] + arr
72+
73+
74+
def cusum(values, threshold=1.0, drift=0.0):
75+
csum_pos, csum_neg = zeros(len(values)), zeros(len(values))
76+
change_points = []
77+
for i in range(1, len(values)):
78+
diff = values[i] - values[i - 1]
79+
csum_pos[i] = csum_pos[i-1] + diff - drift
80+
csum_neg[i] = csum_neg[i-1] - diff - drift
81+
82+
if csum_pos[i] < 0:
83+
csum_pos[i] = 0
84+
if csum_neg[i] < 0:
85+
csum_neg[i] = 0
86+
87+
if csum_pos[i] > threshold or csum_neg[i] > threshold:
88+
change_points = append(change_points, i)
89+
csum_pos[i], csum_neg[i] = 0, 0
90+
91+
return change_points
92+
93+
94+
def avg(values):
95+
return float(sum(values)) / len(values)
96+
97+
98+
def norm(values):
99+
_max, _min = max(values), min(values)
100+
return [float(v - _min) / (_max - _min) * 100.0 for v in values]
101+
102+
103+
def pairwise_slopes(values, cp):
104+
return [abs(float(values[i+1] - values[i]) / float(cp[i+1] - cp[i])) for i in range(len(values)-1)]
105+
106+
107+
def detect_warmup(values, cp_threshold=0.03, stability_slope_grade=0.01):
108+
"""
109+
detect the point of warmup point (iteration / run)
110+
111+
:param values: the durations for each run
112+
:param cp_threshold: the percent in value difference for a point to be considered a change point (percentage)
113+
:param stability_slope_grade: the slope grade (percentage). A grade of 1% corresponds to a slope of 0.5 degrees
114+
:return: the change point or -1 if not detected
115+
"""
116+
# normalize all
117+
stability_slope_grade *= 100.0
118+
cp_threshold *= 100
119+
values = norm(values)
120+
121+
try:
122+
cp = cusum(values, threshold=cp_threshold)
123+
rolling_avg = [avg(values[i:]) for i in cp]
124+
125+
# find the point where the duration avg is below the cp threshold
126+
for i, d in enumerate(rolling_avg):
127+
if d <= cp_threshold:
128+
return cp[i] + 1
129+
130+
# could not find something below the CP threshold (noise in the data), use the stabilisation of slopes
131+
end_runs_idx = len(values) - int(len(values) * 0.1)
132+
end_runs_idx = len(values) - 1 if end_runs_idx >= len(values) else end_runs_idx
133+
slopes = pairwise_slopes(rolling_avg + values[end_runs_idx:], cp + list(range(end_runs_idx, len(values))))
134+
135+
for i, d in enumerate(slopes):
136+
if d <= stability_slope_grade:
137+
return cp[i] + 1
138+
139+
return -1
140+
except Exception as e:
141+
print("exception occurred while detecting warmup: %s" % e)
142+
return -1
143+
144+
58145
def ccompile(name, code):
59146
from importlib import invalidate_caches
60147
from distutils.core import setup, Extension
@@ -178,11 +265,18 @@ def run(self):
178265
print(_HRULE)
179266
print("### teardown ... ")
180267
self._call_attr(ATTR_TEARDOWN)
268+
warmup_iter = detect_warmup(durations)
181269
print("### benchmark complete")
182270
print(_HRULE)
183-
print("### BEST duration: %.3f s" % min(durations))
184-
print("### WORST duration: %.3f s" % max(durations))
185-
print("### AVG duration: %.3f" % (sum(durations) / len(durations)))
271+
print("### BEST duration: %.3f s" % min(durations))
272+
print("### WORST duration: %.3f s" % max(durations))
273+
print("### AVG (with warmup) duration: %.3f s" % (sum(durations) / len(durations)))
274+
if warmup_iter > 0:
275+
print("### WARMUP detected at iteration: %d" % warmup_iter)
276+
no_warmup_durations = durations[warmup_iter:]
277+
print("### AVG (no warmup) duration: %.3f s" % (sum(no_warmup_durations) / len(no_warmup_durations)))
278+
else:
279+
print("### WARMUP could not be detected")
186280
print(_HRULE)
187281

188282

0 commit comments

Comments
 (0)