Skip to content

Commit 0ee69b8

Browse files
committed
Add geomean summary row to output from utils/compare.py.
This change adds a summary row with the geometric mean of the selected metric to the output of utils/compare.py. The 'rhs' values are normalized to the 'lhs' values and the geometric mean of the results is computed. This should be similar to the geomean row in the LNT HTML UI and allow to compare 2 sets of runs. Reviewers: anemet, MatzeB, cmatthews, serge-sans-paille Reviewed By: anemet Differential Revision: https://reviews.llvm.org/D57828 llvm-svn: 356545
1 parent 1b240a7 commit 0ee69b8

File tree

1 file changed

+38
-12
lines changed

1 file changed

+38
-12
lines changed

utils/compare.py

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from __future__ import print_function
77

88
import pandas as pd
9+
from scipy import stats
910
import sys
1011
import os.path
1112
import re
@@ -108,28 +109,40 @@ def readmulti(filenames):
108109
d = pd.concat(datasets, axis=0, names=['run'], keys=datasetnames)
109110
return d
110111

111-
def add_diff_column(d, absolute_diff=False):
112-
values = d.unstack(level=0)
113-
114-
has_two_runs = d.index.get_level_values(0).nunique() == 2
112+
def get_values(values):
113+
# Create data view without diff column.
114+
if 'diff' in values.columns:
115+
values = values[[c for c in values.columns if c != 'diff']]
116+
has_two_runs = len(values.columns) == 2
115117
if has_two_runs:
116-
values0 = values.iloc[:,0]
117-
values1 = values.iloc[:,1]
118+
return (values.iloc[:,0], values.iloc[:,1])
118119
else:
119-
values0 = values.min(axis=1)
120-
values1 = values.max(axis=1)
120+
return (values.min(axis=1), values.max(axis=1))
121121

122+
def add_diff_column(values, absolute_diff=False):
123+
values0, values1 = get_values(values)
122124
# Quotient or absolute difference?
123125
if absolute_diff:
124126
values['diff'] = values1 - values0
125127
else:
126128
values['diff'] = values1 / values0
127129
values['diff'] -= 1.0
128-
# unstack() gave us a complicated multiindex for the columns, simplify
129-
# things by renaming to a simple index.
130-
values.columns = [(c[1] if c[1] else c[0]) for c in values.columns.values]
131130
return values
132131

132+
def add_geomean_row(data, dataout):
133+
"""
134+
Normalize values1 over values0, compute geomean difference and add a
135+
summary row to dataout.
136+
"""
137+
values0, values1 = get_values(data)
138+
relative = values1 / values0
139+
gm_diff = stats.gmean(relative) - 1.0
140+
141+
gm_row = {c: '' for c in dataout.columns}
142+
gm_row['diff'] = gm_diff
143+
gm_row['Program'] = 'Geomean difference'
144+
return dataout.append(gm_row, ignore_index=True)
145+
133146
def filter_failed(data, key='Exec'):
134147
return data.loc[data[key] == "pass"]
135148

@@ -209,6 +222,9 @@ def print_result(d, limit_output=True, shorten_names=True,
209222
# Turn index into a column so we can format it...
210223
dataout.insert(0, 'Program', dataout.index)
211224

225+
if show_diff_column:
226+
dataout = add_geomean_row(d, dataout)
227+
212228
formatters = dict()
213229
formatters['diff'] = format_diff
214230
if shorten_names:
@@ -220,7 +236,11 @@ def format_name(name, common_prefix, common_suffix):
220236
return "%-45s" % truncate(name, 10, 30)
221237

222238
formatters['Program'] = lambda name: format_name(name, drop_prefix, drop_suffix)
223-
float_format = lambda x: "%6.2f" % (x,)
239+
def float_format(x):
240+
if x == '':
241+
return ''
242+
return "%6.2f" % (x,)
243+
224244
pd.set_option("display.max_colwidth", 0)
225245
out = dataout.to_string(index=False, justify='left',
226246
float_format=float_format, formatters=formatters)
@@ -334,6 +354,12 @@ def format_name(name, common_prefix, common_suffix):
334354
print("Metric: %s" % (",".join(metrics),))
335355
if len(metrics) > 0:
336356
data = data[metrics]
357+
358+
data = data.unstack(level=0)
359+
# unstack() gave us a complicated multiindex for the columns, simplify
360+
# things by renaming to a simple index.
361+
data.columns = [(c[1] if c[1] else c[0]) for c in data.columns.values]
362+
337363
data = add_diff_column(data)
338364

339365
sortkey = 'diff'

0 commit comments

Comments
 (0)