Skip to content

Commit a9b8dfe

Browse files
committed
[libc++] Add a script to find outliers and re-run candidates in LNT results
This allows selectively re-running benchmarks that are suspected to contain a lot of noise.
1 parent 91c35d6 commit a9b8dfe

File tree

2 files changed

+242
-17
lines changed

2 files changed

+242
-17
lines changed

libcxx/utils/find-rerun-candidates

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import datetime
5+
import functools
6+
import os
7+
import pathlib
8+
import re
9+
import statistics
10+
import subprocess
11+
import sys
12+
13+
import git
14+
import pandas
15+
import tqdm
16+
17+
@functools.total_ordering
18+
class Commit:
19+
"""
20+
This class represents a commit inside a given Git repository.
21+
"""
22+
23+
def __init__(self, git_repo, sha):
24+
self._git_repo = git_repo
25+
self._sha = sha
26+
27+
def __eq__(self, other):
28+
"""
29+
Return whether two commits refer to the same commit.
30+
31+
This doesn't take into account the content of the Git tree at those commits, only the
32+
'identity' of the commits themselves.
33+
"""
34+
return self.fullrev == other.fullrev
35+
36+
def __lt__(self, other):
37+
"""
38+
Return whether a commit is an ancestor of another commit in the Git repository.
39+
"""
40+
# Is self._sha an ancestor of other._sha?
41+
res = subprocess.run(['git', '-C', self._git_repo, 'merge-base', '--is-ancestor', self._sha, other._sha])
42+
if res.returncode not in (0, 1):
43+
raise RuntimeError(f'Error when trying to obtain the commit order for {self._sha} and {other._sha}')
44+
return res.returncode == 0
45+
46+
def __hash__(self):
47+
"""
48+
Return the full revision for this commit.
49+
"""
50+
return hash(self.fullrev)
51+
52+
@functools.cache
53+
def show(self, include_diff=False):
54+
"""
55+
Return the commit information equivalent to `git show` associated to this commit.
56+
"""
57+
cmd = ['git', '-C', self._git_repo, 'show', self._sha]
58+
if not include_diff:
59+
cmd.append('--no-patch')
60+
return subprocess.check_output(cmd, text=True)
61+
62+
@functools.cached_property
63+
def shortrev(self):
64+
"""
65+
Return the shortened version of the given SHA.
66+
"""
67+
return subprocess.check_output(['git', '-C', self._git_repo, 'rev-parse', '--short', self._sha], text=True).strip()
68+
69+
@functools.cached_property
70+
def fullrev(self):
71+
"""
72+
Return the full SHA associated to this commit.
73+
"""
74+
return subprocess.check_output(['git', '-C', self._git_repo, 'rev-parse', self._sha], text=True).strip()
75+
76+
@functools.cached_property
77+
def commit_date(self):
78+
"""
79+
Return the date of the commit as a `datetime.datetime` object.
80+
"""
81+
repo = git.Repo(self._git_repo)
82+
return datetime.datetime.fromtimestamp(repo.commit(self._sha).committed_date)
83+
84+
def prefetch(self):
85+
"""
86+
Prefetch cached properties associated to this commit object.
87+
88+
This makes it possible to control when time is spent recovering that information from Git for
89+
e.g. better reporting to the user.
90+
"""
91+
self.commit_date
92+
self.fullrev
93+
self.shortrev
94+
self.show()
95+
96+
def __str__(self):
97+
return self._sha
98+
99+
def directory_path(string):
100+
if os.path.isdir(string):
101+
return pathlib.Path(string)
102+
else:
103+
raise NotADirectoryError(string)
104+
105+
def parse_lnt(lines, aggregate=statistics.median):
106+
"""
107+
Parse lines in LNT format and return a list of dictionnaries of the form:
108+
109+
[
110+
{
111+
'benchmark': <benchmark1>,
112+
<metric1>: [float],
113+
<metric2>: [float],
114+
'data_points': int,
115+
...
116+
},
117+
{
118+
'benchmark': <benchmark2>,
119+
<metric1>: [float],
120+
<metric2>: [float],
121+
'data_points': int,
122+
...
123+
},
124+
...
125+
]
126+
127+
If a metric has multiple values associated to it, they are aggregated into a single
128+
value using the provided aggregation function.
129+
"""
130+
results = {}
131+
for line in lines:
132+
line = line.strip()
133+
if not line:
134+
continue
135+
136+
(identifier, value) = line.split(' ')
137+
(benchmark, metric) = identifier.split('.')
138+
if benchmark not in results:
139+
results[benchmark] = {'benchmark': benchmark}
140+
141+
entry = results[benchmark]
142+
if metric not in entry:
143+
entry[metric] = []
144+
entry[metric].append(float(value))
145+
146+
for (bm, entry) in results.items():
147+
metrics = [key for key in entry if isinstance(entry[key], list)]
148+
min_data_points = min(len(entry[metric]) for metric in metrics)
149+
for metric in metrics:
150+
entry[metric] = aggregate(entry[metric])
151+
entry['data_points'] = min_data_points
152+
153+
return list(results.values())
154+
155+
def sorted_revlist(git_repo, commits):
156+
"""
157+
Return the list of commits sorted by their chronological order (from oldest to newest) in the
158+
provided Git repository. Items earlier in the list are older than items later in the list.
159+
"""
160+
revlist_cmd = ['git', '-C', git_repo, 'rev-list', '--no-walk'] + list(commits)
161+
revlist = subprocess.check_output(revlist_cmd, text=True).strip().splitlines()
162+
return list(reversed(revlist))
163+
164+
def main(argv):
165+
parser = argparse.ArgumentParser(
166+
prog='find-rerun-candidates',
167+
description='Find benchmarking data points that are good candidates for additional runs, to reduce noise.')
168+
parser.add_argument('directory', type=directory_path,
169+
help='Path to a valid directory containing benchmark data in LNT format, each file being named <commit>.lnt. '
170+
'This is also the format generated by the `benchmark-historical` utility.')
171+
parser.add_argument('--metric', type=str, default='execution_time',
172+
help='The metric to analyze. LNT data may contain multiple metrics (e.g. code size, execution time, etc) -- '
173+
'this option allows selecting which metric is analyzed for rerun candidates. The default is "execution_time".')
174+
parser.add_argument('--filter', type=str, required=False,
175+
help='An optional regular expression used to filter the benchmarks included in the analysis. '
176+
'Only benchmarks whose names match the regular expression will be analyzed.')
177+
parser.add_argument('--outlier-threshold', metavar='FLOAT', type=float, default=0.1,
178+
help='Relative difference from the previous points for considering a data point as an outlier. This threshold is '
179+
'expressed as a floating point number, e.g. 0.25 will detect points that differ by more than 25%% from their '
180+
'previous result.')
181+
parser.add_argument('--data-points-threshold', type=int, required=False,
182+
help='Number of data points above which an outlier is not considered an outlier. If an outlier has more than '
183+
'that number of data points yet its relative difference is above the threshold, it is not considered an '
184+
'outlier. This can be used to re-run noisy data points until we have at least N samples, at which point '
185+
'we consider the data to be accurate, even if the result is beyond the threshold. By default, there is '
186+
'no limit on the number of data points.')
187+
parser.add_argument('--git-repo', type=directory_path, default=pathlib.Path(os.getcwd()),
188+
help='Path to the git repository to use for ordering commits in time. '
189+
'By default, the current working directory is used.')
190+
args = parser.parse_args(argv)
191+
192+
# Extract benchmark data from the directory.
193+
data = {}
194+
files = [f for f in args.directory.glob('*.lnt')]
195+
for file in tqdm.tqdm(files, desc='Parsing LNT files'):
196+
rows = parse_lnt(file.read_text().splitlines())
197+
(commit, _) = os.path.splitext(os.path.basename(file))
198+
commit = Commit(args.git_repo, commit)
199+
data[commit] = rows
200+
201+
# Obtain commit information which is then cached throughout the program. Do this
202+
# eagerly so we can provide a progress bar.
203+
for commit in tqdm.tqdm(data.keys(), desc='Prefetching Git information'):
204+
commit.prefetch()
205+
206+
# Create a dataframe from the raw data and add some columns to it:
207+
# - 'commit' represents the Commit object associated to the results in that row
208+
# - `revlist_order` represents the order of the commit within the Git repository.
209+
revlist = sorted_revlist(args.git_repo, [c.fullrev for c in data.keys()])
210+
data = pandas.DataFrame([row | {'commit': c} for (c, rows) in data.items() for row in rows])
211+
data = data.join(pandas.DataFrame([{'revlist_order': revlist.index(c.fullrev)} for c in data['commit']]))
212+
213+
# Filter the benchmarks if needed.
214+
if args.filter is not None:
215+
keeplist = [b for b in data['benchmark'] if re.search(args.filter, b) is not None]
216+
data = data[data['benchmark'].isin(keeplist)]
217+
218+
# Detect outliers by selecting all benchmarks whose change percentage is beyond the threshold.
219+
# If we have a max number of points, also take that into account.
220+
if args.data_points_threshold is not None:
221+
print(f'Generating outliers with more than {args.outlier_threshold * 100}% relative difference and less than {args.data_points_threshold} data points')
222+
else:
223+
print(f'Generating outliers with more than {args.outlier_threshold * 100}% relative difference')
224+
225+
overall = set()
226+
for (benchmark, series) in data.sort_values(by='revlist_order').groupby('benchmark'):
227+
pct_change = series[args.metric].pct_change()
228+
outliers = series[pct_change.abs() > args.outlier_threshold]
229+
if args.data_points_threshold is not None:
230+
outliers = outliers[outliers['data_points'] < args.data_points_threshold]
231+
outliers = set(outliers['commit'])
232+
overall |= outliers
233+
if len(outliers) > 0:
234+
print(f'{benchmark}: {" ".join(c.shortrev for c in outliers)}')
235+
236+
if len(overall) > 0:
237+
print(f'Summary: {" ".join(c.shortrev for c in overall)}')
238+
else:
239+
print(f'No outliers')
240+
241+
if __name__ == '__main__':
242+
main(sys.argv[1:])

libcxx/utils/visualize-historical

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -213,13 +213,6 @@ def main(argv):
213213
'Since the chart is interactive, it generally makes most sense to include all the benchmarks '
214214
'and to then filter them in the browser, but in some cases producing a chart with a reduced '
215215
'number of data series is useful.')
216-
parser.add_argument('--find-outliers', metavar='FLOAT', type=float, required=False,
217-
help='Instead of building a chart, detect commits that show a large spike (more than the given relative threshold) '
218-
'with the previous result and print those to standard output. This can be used to generate a list of '
219-
'potential outliers that we might want to re-generate the data for. The threshold is expressed as a '
220-
'floating point number, e.g. 0.25 will detect points that differ by more than 25%% from their previous '
221-
'result. This option respects --filter, i.e. only benchmarks that match the filter will be analyzed for '
222-
'outliers.')
223216
parser.add_argument('--subtitle', type=str, required=False,
224217
help='Optional subtitle for the chart. This can be used to help identify the contents of the chart.')
225218
parser.add_argument('--git-repo', type=directory_path, default=pathlib.Path(os.getcwd()),
@@ -258,16 +251,6 @@ def main(argv):
258251
keeplist = [b for b in data['benchmark'] if re.search(args.filter, b) is not None]
259252
data = data[data['benchmark'].isin(keeplist)]
260253

261-
# If requested, perform a basic pass to detect outliers.
262-
# Note that we consider a commit to be an outlier if any of the benchmarks for that commit is an outlier.
263-
if args.find_outliers is not None:
264-
threshold = args.find_outliers
265-
outliers = set()
266-
for (benchmark, series) in data.sort_values(by='revlist_order').groupby('benchmark'):
267-
outliers |= set(series[series[args.metric].pct_change() > threshold]['commit'])
268-
print(f'Outliers (more than {threshold * 100}%): {" ".join(c.shortrev for c in outliers)}')
269-
return
270-
271254
# Plot the data for all the required benchmarks.
272255
figure = create_plot(data, args.metric, subtitle=args.subtitle)
273256
do_open = args.output is None or args.open

0 commit comments

Comments
 (0)