@@ -42,6 +42,12 @@ class Commit:
4242 raise RuntimeError (f'Error when trying to obtain the commit order for { self ._sha } and { other ._sha } ' )
4343 return res .returncode == 0
4444
45+ def __hash__ (self ):
46+ """
47+ Return the full revision for this commit.
48+ """
49+ return hash (self .fullrev )
50+
4551 def show (self , include_diff = False ):
4652 """
4753 Return the commit information equivalent to `git show` associated to this commit.
@@ -153,6 +159,29 @@ def parse_lnt(lines):
153159 results [name ][metric ].append (float (value ))
154160 return results
155161
162+ def find_outliers (xs , ys , threshold ):
163+ """
164+ Given a list of x coordinates and a list of y coordinates, find (x, y) pairs where the y
165+ value differs from the previous y value by more than the given relative difference.
166+
167+ The threshold is given as a floating point representing a percentage, e.g. 0.25 will result in
168+ detecting points that differ from their previous value by more than 25%. The difference is in
169+ absolute value, i.e. both positive and negative spikes are detected.
170+ """
171+ outliers = []
172+ previous = None
173+ for (x , y ) in zip (xs , ys ):
174+ if y is None : # skip data points that don't contain values
175+ continue
176+
177+ if previous is not None :
178+ diff = y - previous
179+ if (diff / previous ) > threshold :
180+ outliers .append ((x , y ))
181+ previous = y
182+ return outliers
183+
184+
156185def main (argv ):
157186 parser = argparse .ArgumentParser (
158187 prog = 'visualize-historical' ,
@@ -176,6 +205,13 @@ def main(argv):
176205 'Since the chart is interactive, it generally makes most sense to include all the benchmarks '
177206 'and to then filter them in the browser, but in some cases producing a chart with a reduced '
178207 'number of data series is useful.' )
208+ parser .add_argument ('--find-outliers' , metavar = 'FLOAT' , type = float , required = False ,
209+ help = 'When building the chart, detect commits that show a large spike (more than the given relative threshold) '
210+ 'with the previous result and print those to standard output. This can be used to generate a list of '
211+ 'potential outliers that we might want to re-generate the data for. The threshold is expressed as a '
212+ 'floating point number, e.g. 0.25 will detect points that differ by more than 25%% from their previous '
213+ 'result. This option respects --filter, i.e. only benchmarks that match the filter will be analyzed for '
214+ 'outliers.' )
179215 parser .add_argument ('--git-repo' , type = directory_path , default = pathlib .Path (os .getcwd ()),
180216 help = 'Path to the git repository to use for ordering commits in time. '
181217 'By default, the current working directory is used.' )
@@ -214,10 +250,20 @@ def main(argv):
214250 regex = re .compile (args .filter )
215251 benchmarks = {b for b in benchmarks if regex .search (b )}
216252
253+ # If requested, perform a basic pass to detect outliers
254+ if args .find_outliers is not None :
255+ threshold = args .find_outliers
256+ outliers = set ()
257+ for benchmark in benchmarks :
258+ commits = [commit for (commit , _ ) in historical_data ]
259+ series = [commit_data .get (benchmark , None ) for (_ , commit_data ) in historical_data ]
260+ outliers |= set (commit for (commit , _ ) in find_outliers (commits , series , threshold = threshold ))
261+ print (f'Outliers (more than { threshold * 100 } %): { " " .join (str (x ) for x in outliers )} ' )
262+
217263 # Plot the data for all the required benchmarks
218264 figure = create_plot ([commit for (commit , _ ) in historical_data ],
219265 sorted (list (benchmarks )),
220- [data for (_ , data ) in historical_data ])
266+ [commit_data for (_ , commit_data ) in historical_data ])
221267 do_open = args .output is None or args .open
222268 output = args .output if args .output is not None else tempfile .NamedTemporaryFile (suffix = '.html' ).name
223269 plotly .io .write_html (figure , file = output , auto_open = do_open )
0 commit comments