1
1
#!/usr/bin/env python3
2
2
3
3
import argparse
4
+ import datetime
4
5
import functools
5
6
import os
6
7
import pathlib
@@ -10,6 +11,7 @@ import subprocess
10
11
import sys
11
12
import tempfile
12
13
14
+ import git
13
15
import pandas
14
16
import plotly
15
17
import plotly .express
@@ -74,13 +76,22 @@ class Commit:
74
76
"""
75
77
return subprocess .check_output (['git' , '-C' , self ._git_repo , 'rev-parse' , self ._sha ], text = True ).strip ()
76
78
79
+ @functools .cached_property
80
+ def commit_date (self ):
81
+ """
82
+ Return the date of the commit as a `datetime.datetime` object.
83
+ """
84
+ repo = git .Repo (self ._git_repo )
85
+ return datetime .datetime .fromtimestamp (repo .commit (self ._sha ).committed_date )
86
+
77
87
def prefetch (self ):
78
88
"""
79
89
Prefetch cached properties associated to this commit object.
80
90
81
91
This makes it possible to control when time is spent recovering that information from Git for
82
92
e.g. better reporting to the user.
83
93
"""
94
+ self .commit_date
84
95
self .fullrev
85
96
self .shortrev
86
97
self .show ()
@@ -101,20 +112,21 @@ def truncate_lines(string, n, marker=None):
101
112
assert len (truncated ) <= n , "broken post-condition"
102
113
return '\n ' .join (truncated )
103
114
104
- def create_plot (data , metric ):
115
+ def create_plot (data , metric , subtitle = None ):
105
116
"""
106
117
Create a plot object showing the evolution of each benchmark throughout the given commits for
107
118
the given metric.
108
119
"""
109
- data = data .sort_values (by = 'revlist_order' )
120
+ data = data .sort_values (by = [ 'date' , 'benchmark' ] )
110
121
revlist = pandas .unique (data ['commit' ]) # list of all commits in chronological order
111
122
hover_info = {c : truncate_lines (c .show (), 30 , marker = '...' ).replace ('\n ' , '<br>' ) for c in revlist }
112
123
figure = plotly .express .scatter (data , title = f"{ revlist [0 ].shortrev } to { revlist [- 1 ].shortrev } " ,
113
- x = 'revlist_order' , y = metric ,
124
+ subtitle = subtitle ,
125
+ x = 'date' , y = metric ,
114
126
symbol = 'benchmark' ,
115
127
color = 'benchmark' ,
116
128
hover_name = [hover_info [c ] for c in data ['commit' ]],
117
- trendline = "ols " )
129
+ trendline = "lowess " )
118
130
return figure
119
131
120
132
def directory_path (string ):
@@ -184,7 +196,7 @@ def main(argv):
184
196
description = 'Visualize historical data in LNT format. This program generates a HTML file that embeds an '
185
197
'interactive plot with the provided data. The HTML file can then be opened in a browser to '
186
198
'visualize the data as a chart.' ,
187
- epilog = 'This script depends on the `plotly` and the `tqdm` Python modules .' )
199
+ epilog = 'This script depends on the modules listed in `libcxx/utils/requirements.txt` .' )
188
200
parser .add_argument ('directory' , type = directory_path ,
189
201
help = 'Path to a valid directory containing benchmark data in LNT format, each file being named <commit>.lnt. '
190
202
'This is also the format generated by the `benchmark-historical` utility.' )
@@ -208,6 +220,8 @@ def main(argv):
208
220
'floating point number, e.g. 0.25 will detect points that differ by more than 25%% from their previous '
209
221
'result. This option respects --filter, i.e. only benchmarks that match the filter will be analyzed for '
210
222
'outliers.' )
223
+ parser .add_argument ('--subtitle' , type = str , required = False ,
224
+ help = 'Optional subtitle for the chart. This can be used to help identify the contents of the chart.' )
211
225
parser .add_argument ('--git-repo' , type = directory_path , default = pathlib .Path (os .getcwd ()),
212
226
help = 'Path to the git repository to use for ordering commits in time. '
213
227
'By default, the current working directory is used.' )
@@ -217,26 +231,27 @@ def main(argv):
217
231
args = parser .parse_args (argv )
218
232
219
233
# Extract benchmark data from the directory.
220
- data = []
234
+ data = {}
221
235
files = [f for f in args .directory .glob ('*.lnt' )]
222
236
for file in tqdm .tqdm (files , desc = 'Parsing LNT files' ):
237
+ rows = parse_lnt (file .read_text ().splitlines ())
223
238
(commit , _ ) = os .path .splitext (os .path .basename (file ))
224
239
commit = Commit (args .git_repo , commit )
225
- with open (file , 'r' ) as f :
226
- rows = parse_lnt (f .readlines ())
227
- data .extend ((commit , row ) for row in rows )
240
+ data [commit ] = rows
228
241
229
242
# Obtain commit information which is then cached throughout the program. Do this
230
243
# eagerly so we can provide a progress bar.
231
- for ( commit , _ ) in tqdm .tqdm (data , desc = 'Prefetching Git information' ):
244
+ for commit in tqdm .tqdm (data . keys () , desc = 'Prefetching Git information' ):
232
245
commit .prefetch ()
233
246
234
247
# Create a dataframe from the raw data and add some columns to it:
235
248
# - 'commit' represents the Commit object associated to the results in that row
236
249
# - `revlist_order` represents the order of the commit within the Git repository.
237
- data = pandas .DataFrame ([row | {'commit' : commit } for (commit , row ) in data ])
238
- revlist = sorted_revlist (args .git_repo , [c .fullrev for c in set (data ['commit' ])])
250
+ # - `date` represents the commit date
251
+ revlist = sorted_revlist (args .git_repo , [c .fullrev for c in data .keys ()])
252
+ data = pandas .DataFrame ([row | {'commit' : c } for (c , rows ) in data .items () for row in rows ])
239
253
data = data .join (pandas .DataFrame ([{'revlist_order' : revlist .index (c .fullrev )} for c in data ['commit' ]]))
254
+ data = data .join (pandas .DataFrame ([{'date' : c .commit_date } for c in data ['commit' ]]))
240
255
241
256
# Filter the benchmarks if needed.
242
257
if args .filter is not None :
@@ -254,7 +269,7 @@ def main(argv):
254
269
return
255
270
256
271
# Plot the data for all the required benchmarks.
257
- figure = create_plot (data , args .metric )
272
+ figure = create_plot (data , args .metric , subtitle = args . subtitle )
258
273
do_open = args .output is None or args .open
259
274
output = args .output if args .output is not None else tempfile .NamedTemporaryFile (suffix = '.html' ).name
260
275
plotly .io .write_html (figure , file = output , auto_open = do_open )
0 commit comments