11#!/usr/bin/env python3
22
33import argparse
4+ import datetime
45import functools
56import os
67import pathlib
@@ -10,6 +11,7 @@ import subprocess
1011import sys
1112import tempfile
1213
14+ import git
1315import pandas
1416import plotly
1517import plotly .express
@@ -74,13 +76,22 @@ class Commit:
7476 """
7577 return subprocess .check_output (['git' , '-C' , self ._git_repo , 'rev-parse' , self ._sha ], text = True ).strip ()
7678
79+ @functools .cached_property
80+ def commit_date (self ):
81+ """
82+ Return the date of the commit as a `datetime.datetime` object.
83+ """
84+ repo = git .Repo (self ._git_repo )
85+ return datetime .datetime .fromtimestamp (repo .commit (self ._sha ).committed_date )
86+
7787 def prefetch (self ):
7888 """
7989 Prefetch cached properties associated to this commit object.
8090
8191 This makes it possible to control when time is spent recovering that information from Git for
8292 e.g. better reporting to the user.
8393 """
94+ self .commit_date
8495 self .fullrev
8596 self .shortrev
8697 self .show ()
@@ -101,20 +112,21 @@ def truncate_lines(string, n, marker=None):
101112 assert len (truncated ) <= n , "broken post-condition"
102113 return '\n ' .join (truncated )
103114
104- def create_plot (data , metric ):
115+ def create_plot (data , metric , subtitle = None ):
105116 """
106117 Create a plot object showing the evolution of each benchmark throughout the given commits for
107118 the given metric.
108119 """
109- data = data .sort_values (by = 'revlist_order' )
120+ data = data .sort_values (by = [ 'date' , 'benchmark' ] )
110121 revlist = pandas .unique (data ['commit' ]) # list of all commits in chronological order
111122 hover_info = {c : truncate_lines (c .show (), 30 , marker = '...' ).replace ('\n ' , '<br>' ) for c in revlist }
112123 figure = plotly .express .scatter (data , title = f"{ revlist [0 ].shortrev } to { revlist [- 1 ].shortrev } " ,
113- x = 'revlist_order' , y = metric ,
124+ subtitle = subtitle ,
125+ x = 'date' , y = metric ,
114126 symbol = 'benchmark' ,
115127 color = 'benchmark' ,
116128 hover_name = [hover_info [c ] for c in data ['commit' ]],
117- trendline = "ols " )
129+ trendline = "lowess " )
118130 return figure
119131
120132def directory_path (string ):
@@ -184,7 +196,7 @@ def main(argv):
184196 description = 'Visualize historical data in LNT format. This program generates a HTML file that embeds an '
185197 'interactive plot with the provided data. The HTML file can then be opened in a browser to '
186198 'visualize the data as a chart.' ,
187- epilog = 'This script depends on the `plotly` and the `tqdm` Python modules .' )
199+ epilog = 'This script depends on the modules listed in `libcxx/utils/requirements.txt` .' )
188200 parser .add_argument ('directory' , type = directory_path ,
189201 help = 'Path to a valid directory containing benchmark data in LNT format, each file being named <commit>.lnt. '
190202 'This is also the format generated by the `benchmark-historical` utility.' )
@@ -208,6 +220,8 @@ def main(argv):
208220 'floating point number, e.g. 0.25 will detect points that differ by more than 25%% from their previous '
209221 'result. This option respects --filter, i.e. only benchmarks that match the filter will be analyzed for '
210222 'outliers.' )
223+ parser .add_argument ('--subtitle' , type = str , required = False ,
224+ help = 'Optional subtitle for the chart. This can be used to help identify the contents of the chart.' )
211225 parser .add_argument ('--git-repo' , type = directory_path , default = pathlib .Path (os .getcwd ()),
212226 help = 'Path to the git repository to use for ordering commits in time. '
213227 'By default, the current working directory is used.' )
@@ -217,26 +231,27 @@ def main(argv):
217231 args = parser .parse_args (argv )
218232
219233 # Extract benchmark data from the directory.
220- data = []
234+ data = {}
221235 files = [f for f in args .directory .glob ('*.lnt' )]
222236 for file in tqdm .tqdm (files , desc = 'Parsing LNT files' ):
237+ rows = parse_lnt (file .read_text ().splitlines ())
223238 (commit , _ ) = os .path .splitext (os .path .basename (file ))
224239 commit = Commit (args .git_repo , commit )
225- with open (file , 'r' ) as f :
226- rows = parse_lnt (f .readlines ())
227- data .extend ((commit , row ) for row in rows )
240+ data [commit ] = rows
228241
229242 # Obtain commit information which is then cached throughout the program. Do this
230243 # eagerly so we can provide a progress bar.
231- for ( commit , _ ) in tqdm .tqdm (data , desc = 'Prefetching Git information' ):
244+ for commit in tqdm .tqdm (data . keys () , desc = 'Prefetching Git information' ):
232245 commit .prefetch ()
233246
234247 # Create a dataframe from the raw data and add some columns to it:
235248 # - 'commit' represents the Commit object associated to the results in that row
236249 # - `revlist_order` represents the order of the commit within the Git repository.
237- data = pandas .DataFrame ([row | {'commit' : commit } for (commit , row ) in data ])
238- revlist = sorted_revlist (args .git_repo , [c .fullrev for c in set (data ['commit' ])])
250+ # - `date` represents the commit date
251+ revlist = sorted_revlist (args .git_repo , [c .fullrev for c in data .keys ()])
252+ data = pandas .DataFrame ([row | {'commit' : c } for (c , rows ) in data .items () for row in rows ])
239253 data = data .join (pandas .DataFrame ([{'revlist_order' : revlist .index (c .fullrev )} for c in data ['commit' ]]))
254+ data = data .join (pandas .DataFrame ([{'date' : c .commit_date } for c in data ['commit' ]]))
240255
241256 # Filter the benchmarks if needed.
242257 if args .filter is not None :
@@ -254,7 +269,7 @@ def main(argv):
254269 return
255270
256271 # Plot the data for all the required benchmarks.
257- figure = create_plot (data , args .metric )
272+ figure = create_plot (data , args .metric , subtitle = args . subtitle )
258273 do_open = args .output is None or args .open
259274 output = args .output if args .output is not None else tempfile .NamedTemporaryFile (suffix = '.html' ).name
260275 plotly .io .write_html (figure , file = output , auto_open = do_open )
0 commit comments