Skip to content

Commit ca3306a

Browse files
committed
Add bisect-benchmark command
1 parent 3286cb6 commit ca3306a

File tree

3 files changed

+237
-0
lines changed

3 files changed

+237
-0
lines changed

bisect-benchmark.ini

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
[bisect-benchmark]
2+
# This is the configuration file for bisecting benchmark jobs in the CI.
3+
# Usage:
4+
# - Create a temporary branch based on master (or the bad commit)
5+
# - Fill in this configuration file, commit the changes and push it
6+
# TODO job name
7+
# - Execute the $name job for the commit. When you click the "create PR" link,
8+
# you can run jobs in commits UI. You need to wait a bit for the job
9+
# enumerator to populate the job list. You don't need to actually create the
10+
# PR.
11+
# - Wat for the jobs to finish. TODO
12+
13+
# The command to build particular configuration. You can copy paste this from
14+
# the benchmark job log. Don't worry about sforceimports, the script takes care
15+
# of that
16+
build_command = mx --dy /compiler build
17+
18+
# The commmand to run the particular benchmark on the configuration. You can
19+
# copy paste this from the benchmark job log. Make sure you replace the '*'
20+
# wildcard with a single benchmark, the script only supports bisecting a single
21+
# benchmark at a time
22+
benchmark_command = mx --dy /compiler benchmark micro:try-except-simple
23+
24+
# The first known "bad" merge commit for bisection
25+
bad = 1234deadbeef
26+
27+
# The last known "good" merge commit for bisection
28+
good = 1234cafebabe
29+
30+
# Whether to checkout graal-enterprise
31+
enterprise = true

mx.graalpython/mx_graalpython.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
import mx_sdk
5454
import mx_subst
5555
import mx_urlrewrites
56+
import mx_graalpython_bisect
5657
from mx_gate import Task
5758
from mx_graalpython_bench_param import PATH_MESO, BENCHMARKS, JBENCHMARKS
5859
from mx_graalpython_benchmark import PythonBenchmarkSuite, python_vm_registry, CPythonVm, PyPyVm, JythonVm, GraalPythonVm, \
@@ -1985,4 +1986,5 @@ def import_files(from_dir, to_dir):
19851986
'graalpytest': [graalpytest, '[-h] [-v] [--python PYTHON] [-k TEST_PATTERN] [TESTS]'],
19861987
'clean': [python_clean, ''],
19871988
'python-update-hpy-import': [update_hpy_import_cmd, '[--no-pull] PATH_TO_HPY'],
1989+
'bisect-benchmark': [mx_graalpython_bisect.bisect_benchmark, ''],
19881990
})
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
import re
2+
import os
3+
import sys
4+
import argparse
5+
import shlex
6+
import types
7+
import configparser
8+
9+
import mx
10+
11+
12+
SUITE = mx.suite('graalpython')
13+
14+
15+
def get_commit(suite, ref='HEAD'):
16+
if not suite:
17+
return None
18+
return suite.vc.git_command(suite.vc_dir, ['rev-parse', ref], abortOnError=True).strip()
19+
20+
21+
def get_message(suite, commit):
22+
return suite.vc.git_command(suite.vc_dir, ['log', '--format=%s', '-n', '1', commit]).strip()
23+
24+
25+
def run_bisect_benchmark(suite, bad, good, callback, downstreams, threshold=None):
26+
git_dir = suite.vc_dir
27+
commits = SUITE.vc.git_command(
28+
git_dir,
29+
['log', '--merges', '--format=format:%H', f'{good}^..{bad}'],
30+
abortOnError=True,
31+
).splitlines()
32+
if not commits:
33+
sys.exit("No merge commits found in the range. Did you swap good and bad?")
34+
downstream_suite = downstreams.get(suite)
35+
values = [None] * len(commits)
36+
if threshold is None:
37+
bad_index = 0
38+
good_index = len(commits) - 1
39+
values[bad_index] = callback(suite, bad)
40+
downstream_bad = get_commit(downstream_suite)
41+
values[good_index] = callback(suite, good)
42+
downstream_good = get_commit(downstream_suite)
43+
threshold = (values[bad_index] + values[good_index]) / 2
44+
if values[good_index] * 1.03 > values[bad_index]:
45+
sys.exit(
46+
"Didn't detect a regression - less that 3% difference between good value "
47+
f"{values[good_index]} and bad value {values[bad_index]}"
48+
)
49+
else:
50+
bad_index = -1
51+
good_index = len(commits)
52+
downstream_bad = None
53+
downstream_good = None
54+
while True:
55+
index = bad_index + ((good_index - bad_index) // 2)
56+
if index == bad_index or index == good_index:
57+
assert good_index - bad_index == 1
58+
break
59+
commit = commits[index]
60+
values[index] = callback(suite, commit)
61+
if values[index] < threshold:
62+
good_index = index
63+
downstream_good = get_commit(downstream_suite)
64+
else:
65+
bad_index = index
66+
downstream_bad = get_commit(downstream_suite)
67+
subresults = {}
68+
if downstream_bad and downstream_good and downstream_bad != downstream_good:
69+
subresult = run_bisect_benchmark(downstream_suite, downstream_bad, downstream_good, callback, downstreams, threshold)
70+
subresults[bad_index] = subresult
71+
return BisectResult(suite, commits, values, good_index, bad_index, subresults)
72+
73+
74+
class BisectResult:
75+
def __init__(self, suite, commits, values, good_index, bad_index, subresults):
76+
self.suite = suite
77+
self.commits = commits
78+
self.values = values
79+
self.good_index = good_index
80+
self.bad_index = bad_index
81+
self.subresults = subresults
82+
83+
@property
84+
def repo_name(self):
85+
return os.path.basename(self.suite.vc_dir)
86+
87+
@property
88+
def good_commit(self):
89+
try:
90+
return self.commits[self.good_index]
91+
except IndexError:
92+
return None
93+
94+
@property
95+
def bad_commit(self):
96+
try:
97+
return self.commits[self.bad_index]
98+
except IndexError:
99+
return None
100+
101+
def visualize(self, level=1):
102+
level_marker = '=' * level
103+
print(f"{level_marker} {self.repo_name}")
104+
for index, (commit, value) in enumerate(zip(self.commits, self.values)):
105+
if value is not None:
106+
print(f"{level_marker} {commit} {value:6.6} {get_message(self.suite, commit)}")
107+
if self.subresults and index in self.subresults:
108+
self.subresults[index].visualize(level + 1)
109+
110+
def summarize(self):
111+
if self.bad_commit and self.good_commit:
112+
for subresult in self.subresults.values():
113+
if subresult.summarize():
114+
return True
115+
print(f"Detected bad commit in {self.repo_name} repository:\n{self.bad_commit} {get_message(self.suite, self.bad_commit)}")
116+
return True
117+
return False
118+
119+
120+
def get_suite_py(commit):
121+
suite_py = SUITE.vc.git_command(['show', f'{commit}:mx.graalpython/suite.py'], abortOnError=True)
122+
namespace = {}
123+
exec(suite_py, namespace, namespace)
124+
return namespace['suite']
125+
126+
127+
def get_graal_commit(commit):
128+
suite_py = get_suite_py(commit)
129+
return [imp for imp in suite_py['imports'] if imp['name'] == 'sulong'][0]['version']
130+
131+
132+
def bisect_benchmark(argv):
133+
if 'BISECT_BENCHMARK_CONFIG' in os.environ:
134+
cp = configparser.ConfigParser()
135+
cp.read(os.environ['BISECT_BENCHMARK_CONFIG'])
136+
sec = cp['bisect-benchmark']
137+
args = types.SimpleNamespace()
138+
args.bad = sec['bad']
139+
args.good = sec['good']
140+
args.build_command = sec['build_command']
141+
args.benchmark_command = sec['benchmark_command']
142+
args.benchmark_criterion = sec.get('benchmark_criterion', 'BEST')
143+
args.enterprise = sec.getboolean('enterprise', False)
144+
else:
145+
parser = argparse.ArgumentParser()
146+
parser.add_mutually_exclusive_group()
147+
parser.add_argument('bad')
148+
parser.add_argument('good')
149+
parser.add_argument('build_command')
150+
parser.add_argument('benchmark_command')
151+
parser.add_argument('--benchmark-criterion', default='BEST')
152+
parser.add_argument('--enterprise', action='store_true')
153+
args = parser.parse_args(argv)
154+
155+
vm_suite = mx.suite('vm')
156+
downstreams = {
157+
SUITE: vm_suite,
158+
}
159+
if args.enterprise:
160+
downstreams[vm_suite] = mx.suite('vm-enterprise')
161+
162+
fetched_enterprise = False
163+
164+
def benchmark_callback(suite, commit):
165+
nonlocal fetched_enterprise
166+
suite.vc.update_to_branch(suite.vc_dir, commit)
167+
mx.run_mx(['sforceimports'], suite=suite)
168+
if args.enterprise and suite.name != 'vm-enterprise':
169+
checkout_args = ['--dynamicimports', '/vm-enterprise', 'checkout-downstream', 'vm', 'vm-enterprise']
170+
if fetched_enterprise:
171+
checkout_args.append('--no-fetch')
172+
mx.run_mx(checkout_args, out=mx.OutputCapture())
173+
mx.run_mx(['--env', 'ee', 'sforceimports'], suite=mx.suite('vm-enterprise'))
174+
fetched_enterprise = True
175+
elif suite.name != 'vm':
176+
mx.run_mx(['--env', 'ce', 'sforceimports'], suite=vm_suite)
177+
suite.vc.update_to_branch(suite.vc_dir, commit)
178+
mx.run_mx(['sforceimports'], suite=suite)
179+
env = os.environ.copy()
180+
if 'CI' not in os.environ:
181+
env['MX_ALT_OUTPUT_ROOT'] = f'mxbuild-{commit}'
182+
retcode = mx.run(shlex.split(args.build_command), env=env, nonZeroIsFatal=False)
183+
if retcode:
184+
sys.exit(f"Failed to execute the build command for {commit}")
185+
output = mx.OutputCapture()
186+
retcode = mx.run(shlex.split(args.benchmark_command), env=env, out=mx.TeeOutputCapture(output), nonZeroIsFatal=False)
187+
if retcode:
188+
sys.exit(f"Failed to execute benchmark for {commit}")
189+
match = re.search(rf'{re.escape(args.benchmark_criterion)}.*duration: ([\d.]+)', output.data)
190+
if not match:
191+
sys.exit(f"Failed to get result from the benchmark")
192+
return float(match.group(1))
193+
194+
bad = get_commit(SUITE, args.bad)
195+
good = get_commit(SUITE, args.good)
196+
result = run_bisect_benchmark(SUITE, bad, good, benchmark_callback, downstreams)
197+
print()
198+
result.visualize()
199+
print()
200+
result.summarize()
201+
print()
202+
203+
if 'CI' not in os.environ:
204+
print(f"You can rerun a benchmark for a particular commit using:\nMX_ALT_OUTPUT_ROOT=mxbuild-$commit {args.benchmark_command}")

0 commit comments

Comments
 (0)