Skip to content

Commit da63f2b

Browse files
committed
[GR-26124] Add command for benchmark bisection
PullRequest: graalpython/1288
2 parents 859e7de + 4c49a53 commit da63f2b

File tree

4 files changed

+327
-1
lines changed

4 files changed

+327
-1
lines changed

bisect-benchmark.ini

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
[bisect-benchmark]
2+
# This is the configuration file for bisecting benchmark jobs in the CI.
3+
# Usage:
4+
# - Create a temporary branch based on master (or the bad commit)
5+
# - Fill in this configuration file, commit the changes and push it
6+
# - Execute the bisect-benchmark job for the commit. When you click the "create
7+
# PR" link, you can run jobs in commits UI (Actions). You need to wait a bit
8+
# for the job enumerator to populate the job list. You don't need to actually
9+
# create any PR.
10+
# - Wait for the jobs to finish. You should get an email when it's done
11+
12+
# The command to build particular configuration. You can copy paste this from
13+
# the benchmark job log. Don't worry about sforceimports, the script takes care
14+
# of that
15+
build_command = mx --dy /compiler build
16+
17+
# The commmand to run the particular benchmark on the configuration. You can
18+
# copy paste this from the benchmark job log. Make sure you replace the '*'
19+
# wildcard with a single benchmark, the script only supports bisecting a single
20+
# benchmark at a time
21+
benchmark_command = mx --dy /compiler benchmark micro:try-except-simple
22+
23+
# The first known "bad" merge commit for bisection. Try to use long commit
24+
# SHAs, the CI cache has higher probability of short SHAs being ambiguous
25+
bad = 1234deadbeef
26+
27+
# The last known "good" merge commit for bisection
28+
good = 1234cafebabe
29+
30+
# Whether to checkout graal-enterprise
31+
enterprise = true
32+
33+
# Which result parameter should be used for comparisons, one of BEST, WORST,
34+
# AVG (all runs), AVG (no warmup)
35+
benchmark_criterion = BEST

ci.jsonnet

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{ "overlay": "883cec58bc69d36f96f09c393804b65df020d67a" }
1+
{ "overlay": "5e162687796a6e3c6274904b8d7fdec55594760a" }

mx.graalpython/mx_graalpython.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
import mx_sdk
5454
import mx_subst
5555
import mx_urlrewrites
56+
import mx_graalpython_bisect
5657
from mx_gate import Task
5758
from mx_graalpython_bench_param import PATH_MESO, BENCHMARKS, JBENCHMARKS
5859
from mx_graalpython_benchmark import PythonBenchmarkSuite, python_vm_registry, CPythonVm, PyPyVm, JythonVm, GraalPythonVm, \
@@ -1985,4 +1986,5 @@ def import_files(from_dir, to_dir):
19851986
'graalpytest': [graalpytest, '[-h] [-v] [--python PYTHON] [-k TEST_PATTERN] [TESTS]'],
19861987
'clean': [python_clean, ''],
19871988
'python-update-hpy-import': [update_hpy_import_cmd, '[--no-pull] PATH_TO_HPY'],
1989+
'bisect-benchmark': [mx_graalpython_bisect.bisect_benchmark, ''],
19881990
})
Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
# Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
2+
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
3+
#
4+
# The Universal Permissive License (UPL), Version 1.0
5+
#
6+
# Subject to the condition set forth below, permission is hereby granted to any
7+
# person obtaining a copy of this software, associated documentation and/or
8+
# data (collectively the "Software"), free of charge and under any and all
9+
# copyright rights in the Software, and any and all patent rights owned or
10+
# freely licensable by each licensor hereunder covering either (i) the
11+
# unmodified Software as contributed to or provided by such licensor, or (ii)
12+
# the Larger Works (as defined below), to deal in both
13+
#
14+
# (a) the Software, and
15+
#
16+
# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
17+
# one is included with the Software each a "Larger Work" to which the Software
18+
# is contributed by such licensors),
19+
#
20+
# without restriction, including without limitation the rights to copy, create
21+
# derivative works of, display, perform, and distribute the Software and make,
22+
# use, sell, offer for sale, import, export, have made, and have sold the
23+
# Software and the Larger Work(s), and to sublicense the foregoing rights on
24+
# either these or other terms.
25+
#
26+
# This license is subject to the following condition:
27+
#
28+
# The above copyright notice and either this complete permission notice or at a
29+
# minimum a reference to the UPL must be included in all copies or substantial
30+
# portions of the Software.
31+
#
32+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38+
# SOFTWARE.
39+
40+
import argparse
41+
import configparser
42+
import os
43+
import re
44+
import shlex
45+
import sys
46+
import types
47+
48+
import mx
49+
50+
51+
def get_suite(name):
52+
suite_name = name.lstrip('/')
53+
suite = mx.suite(suite_name, fatalIfMissing=False)
54+
if not suite:
55+
suite = mx.primary_suite().import_suite(suite_name, version=None, urlinfos=None, in_subdir=name.startswith('/'))
56+
assert suite
57+
return suite
58+
59+
60+
def get_downstream_suite(suite):
61+
downstreams = {
62+
'graalpython-apptests': 'graalpython',
63+
'graalpython-extensions': 'graalpython',
64+
'graalpython': '/vm',
65+
'vm': '/vm-enterprise',
66+
}
67+
downstream = downstreams.get(suite.name)
68+
if downstream:
69+
return get_suite(downstream)
70+
71+
72+
def get_commit(suite, ref='HEAD'):
73+
if not suite:
74+
return None
75+
return suite.vc.git_command(suite.vc_dir, ['rev-parse', ref], abortOnError=True).strip()
76+
77+
78+
def get_message(suite, commit):
79+
return suite.vc.git_command(suite.vc_dir, ['log', '--format=%s', '-n', '1', commit]).strip()
80+
81+
82+
def run_bisect_benchmark(suite, bad, good, callback, threshold=None):
83+
git_dir = suite.vc_dir
84+
commits = suite.vc.git_command(
85+
git_dir,
86+
['log', '--first-parent', '--format=format:%H', '{}^..{}'.format(good, bad)],
87+
abortOnError=True,
88+
).splitlines()
89+
if not commits:
90+
raise RuntimeError("No merge commits found in the range. Did you swap good and bad?")
91+
downstream_suite = get_downstream_suite(suite)
92+
values = [None] * len(commits)
93+
if threshold is None:
94+
bad_index = 0
95+
good_index = len(commits) - 1
96+
values[bad_index] = callback(suite, bad)
97+
downstream_bad = get_commit(downstream_suite)
98+
values[good_index] = callback(suite, good)
99+
downstream_good = get_commit(downstream_suite)
100+
threshold = (values[bad_index] + values[good_index]) / 2
101+
if values[good_index] * 1.03 > values[bad_index]:
102+
raise RuntimeError(
103+
"Didn't detect a regression - less that 3% difference between good value "
104+
"{} and bad value {}".format(values[good_index], values[bad_index])
105+
)
106+
else:
107+
bad_index = -1
108+
good_index = len(commits)
109+
downstream_bad = None
110+
downstream_good = None
111+
while True:
112+
index = bad_index + ((good_index - bad_index) // 2)
113+
if index == bad_index or index == good_index:
114+
assert good_index - bad_index == 1
115+
break
116+
commit = commits[index]
117+
values[index] = callback(suite, commit)
118+
if values[index] < threshold:
119+
good_index = index
120+
downstream_good = get_commit(downstream_suite)
121+
else:
122+
bad_index = index
123+
downstream_bad = get_commit(downstream_suite)
124+
subresults = {}
125+
if downstream_bad and downstream_good and downstream_bad != downstream_good:
126+
subresult = run_bisect_benchmark(downstream_suite, downstream_bad, downstream_good, callback, threshold)
127+
subresults[bad_index] = subresult
128+
return BisectResult(suite, commits, values, good_index, bad_index, subresults)
129+
130+
131+
class BisectResult:
132+
def __init__(self, suite, commits, values, good_index, bad_index, subresults):
133+
self.suite = suite
134+
self.commits = commits
135+
self.values = values
136+
self.good_index = good_index
137+
self.bad_index = bad_index
138+
self.subresults = subresults
139+
140+
@property
141+
def repo_name(self):
142+
return os.path.basename(self.suite.vc_dir)
143+
144+
@property
145+
def good_commit(self):
146+
try:
147+
return self.commits[self.good_index]
148+
except IndexError:
149+
return None
150+
151+
@property
152+
def bad_commit(self):
153+
try:
154+
return self.commits[self.bad_index]
155+
except IndexError:
156+
return None
157+
158+
def visualize(self, level=1):
159+
level_marker = '=' * level
160+
out = ["{} {}".format(level_marker, self.repo_name)]
161+
for index, (commit, value) in enumerate(zip(self.commits, self.values)):
162+
if value is not None:
163+
out.append("{} {} {:6.6} s {}".format(level_marker, commit, value, get_message(self.suite, commit)))
164+
if self.subresults and index in self.subresults:
165+
out.append(self.subresults[index].visualize(level + 1))
166+
return '\n'.join(out)
167+
168+
def summarize(self):
169+
if self.bad_commit and self.good_commit:
170+
for subresult in self.subresults.values():
171+
sub = subresult.summarize()
172+
if sub:
173+
return sub
174+
return ("Detected bad commit in {} repository:\n{} {}"
175+
.format(self.repo_name, self.bad_commit, get_message(self.suite, self.bad_commit)))
176+
return ''
177+
178+
179+
def _bisect_benchmark(argv, initial_branch, email_to):
180+
if 'BISECT_BENCHMARK_CONFIG' in os.environ:
181+
cp = configparser.ConfigParser()
182+
cp.read(os.environ['BISECT_BENCHMARK_CONFIG'])
183+
sec = cp['bisect-benchmark']
184+
args = types.SimpleNamespace()
185+
args.bad = sec['bad']
186+
args.good = sec['good']
187+
args.build_command = sec['build_command']
188+
args.benchmark_command = sec['benchmark_command']
189+
args.benchmark_criterion = sec.get('benchmark_criterion', 'BEST')
190+
args.enterprise = sec.getboolean('enterprise', False)
191+
else:
192+
parser = argparse.ArgumentParser()
193+
parser.add_argument('bad', help="Bad commit for bisection")
194+
parser.add_argument('good', help="Good commit for bisection")
195+
parser.add_argument('build_command', help="Command to run in order to build the configuration")
196+
parser.add_argument('benchmark_command',
197+
help="Command to run in order to run the benchmark. Output needs to be in mx's format")
198+
parser.add_argument('--benchmark-criterion', default='BEST',
199+
help="Which result parameter should be used for comparisons")
200+
parser.add_argument('--enterprise', action='store_true', help="Whether to checkout graal-enterprise")
201+
args = parser.parse_args(argv)
202+
203+
primary_suite = mx.primary_suite()
204+
205+
fetched_enterprise = [False]
206+
207+
def benchmark_callback(suite, commit):
208+
suite.vc.update_to_branch(suite.vc_dir, commit)
209+
mx.run_mx(['sforceimports'], suite=suite)
210+
if args.enterprise and suite.name != 'vm-enterprise':
211+
checkout_args = ['--dynamicimports', '/vm-enterprise', 'checkout-downstream', 'vm', 'vm-enterprise']
212+
if fetched_enterprise[0]:
213+
checkout_args.append('--no-fetch')
214+
mx.run_mx(checkout_args, out=mx.OutputCapture())
215+
mx.run_mx(['--env', 'ee', 'sforceimports'], suite=get_suite('/vm-enterprise'))
216+
fetched_enterprise[0] = True
217+
elif suite.name != 'vm':
218+
mx.run_mx(['--env', 'ce', 'sforceimports'], suite=get_suite('/vm'))
219+
suite.vc.update_to_branch(suite.vc_dir, commit)
220+
mx.run_mx(['sforceimports'], suite=suite)
221+
env = os.environ.copy()
222+
if 'CI' not in os.environ:
223+
env['MX_ALT_OUTPUT_ROOT'] = 'mxbuild-{}'.format(commit)
224+
retcode = mx.run(shlex.split(args.build_command), env=env, nonZeroIsFatal=False)
225+
if retcode:
226+
raise RuntimeError("Failed to execute the build command for {}".format(commit))
227+
output = mx.OutputCapture()
228+
retcode = mx.run(shlex.split(args.benchmark_command), env=env, out=mx.TeeOutputCapture(output),
229+
nonZeroIsFatal=False)
230+
if retcode:
231+
raise RuntimeError("Failed to execute benchmark for {}".format(commit))
232+
match = re.search(r'{}.*duration: ([\d.]+)'.format(re.escape(args.benchmark_criterion)), output.data)
233+
if not match:
234+
raise RuntimeError("Failed to get result from the benchmark")
235+
return float(match.group(1))
236+
237+
bad = get_commit(primary_suite, args.bad)
238+
good = get_commit(primary_suite, args.good)
239+
result = run_bisect_benchmark(primary_suite, bad, good, benchmark_callback)
240+
visualization = result.visualize()
241+
summary = result.summarize()
242+
243+
print()
244+
print(visualization)
245+
print()
246+
print(summary)
247+
248+
if 'CI' not in os.environ:
249+
print("You can rerun a benchmark for a particular commit using:\nMX_ALT_OUTPUT_ROOT=mxbuild-$commit {}".format(
250+
args.benchmark_command))
251+
252+
send_email(
253+
initial_branch,
254+
email_to,
255+
"Bisection job has finished successfully.\n{}\n".format(summary)
256+
+ "Note I'm just a script and I don't validate statistical significance of the above result.\n"
257+
+ "Please take a moment to also inspect the detailed results below.\n\n{}\n\n".format(visualization)
258+
+ os.environ.get('BUILD_URL', 'Unknown URL')
259+
)
260+
261+
262+
def bisect_benchmark(argv):
263+
suite = mx.primary_suite()
264+
initial_branch = suite.vc.git_command(suite.vc_dir, ['rev-parse', '--abbrev-ref', 'HEAD']).strip()
265+
email_to = suite.vc.git_command(suite.vc_dir, ['log', '--format=%cE', '-n', '1']).strip()
266+
try:
267+
_bisect_benchmark(argv, initial_branch, email_to)
268+
except Exception:
269+
send_email(initial_branch, email_to, "Job failed.\n {}".format(os.environ.get('BUILD_URL', 'Unknown URL')))
270+
raise
271+
272+
273+
def send_email(initial_branch, email_to, content):
274+
if 'BISECT_EMAIL_SMTP_SERVER' in os.environ:
275+
import smtplib
276+
from email.message import EmailMessage
277+
278+
msg = EmailMessage()
279+
msg['Subject'] = "Bisection result for {}".format(initial_branch)
280+
msg['From'] = os.environ['BISECT_EMAIL_FROM']
281+
validate_to = os.environ['BISECT_EMAIL_TO_PATTERN']
282+
if not re.match(validate_to, email_to):
283+
sys.exit("Email {} not allowed, aborting sending".format(email_to))
284+
msg['To'] = email_to
285+
msg.set_content(content)
286+
print(msg)
287+
smtp = smtplib.SMTP(os.environ['BISECT_EMAIL_SMTP_SERVER'])
288+
smtp.send_message(msg)
289+
smtp.quit()

0 commit comments

Comments
 (0)