6
6
from __future__ import print_function
7
7
8
8
import pandas as pd
9
+ from scipy import stats
9
10
import sys
10
11
import os .path
11
12
import re
@@ -108,28 +109,40 @@ def readmulti(filenames):
108
109
d = pd .concat (datasets , axis = 0 , names = ['run' ], keys = datasetnames )
109
110
return d
110
111
111
- def add_diff_column (d , absolute_diff = False ):
112
- values = d .unstack (level = 0 )
113
-
114
- has_two_runs = d .index .get_level_values (0 ).nunique () == 2
112
+ def get_values (values ):
113
+ # Create data view without diff column.
114
+ if 'diff' in values .columns :
115
+ values = values [[c for c in values .columns if c != 'diff' ]]
116
+ has_two_runs = len (values .columns ) == 2
115
117
if has_two_runs :
116
- values0 = values .iloc [:,0 ]
117
- values1 = values .iloc [:,1 ]
118
+ return (values .iloc [:,0 ], values .iloc [:,1 ])
118
119
else :
119
- values0 = values .min (axis = 1 )
120
- values1 = values .max (axis = 1 )
120
+ return (values .min (axis = 1 ), values .max (axis = 1 ))
121
121
122
+ def add_diff_column (values , absolute_diff = False ):
123
+ values0 , values1 = get_values (values )
122
124
# Quotient or absolute difference?
123
125
if absolute_diff :
124
126
values ['diff' ] = values1 - values0
125
127
else :
126
128
values ['diff' ] = values1 / values0
127
129
values ['diff' ] -= 1.0
128
- # unstack() gave us a complicated multiindex for the columns, simplify
129
- # things by renaming to a simple index.
130
- values .columns = [(c [1 ] if c [1 ] else c [0 ]) for c in values .columns .values ]
131
130
return values
132
131
132
+ def add_geomean_row (data , dataout ):
133
+ """
134
+ Normalize values1 over values0, compute geomean difference and add a
135
+ summary row to dataout.
136
+ """
137
+ values0 , values1 = get_values (data )
138
+ relative = values1 / values0
139
+ gm_diff = stats .gmean (relative ) - 1.0
140
+
141
+ gm_row = {c : '' for c in dataout .columns }
142
+ gm_row ['diff' ] = gm_diff
143
+ gm_row ['Program' ] = 'Geomean difference'
144
+ return dataout .append (gm_row , ignore_index = True )
145
+
133
146
def filter_failed (data , key = 'Exec' ):
134
147
return data .loc [data [key ] == "pass" ]
135
148
@@ -209,6 +222,9 @@ def print_result(d, limit_output=True, shorten_names=True,
209
222
# Turn index into a column so we can format it...
210
223
dataout .insert (0 , 'Program' , dataout .index )
211
224
225
+ if show_diff_column :
226
+ dataout = add_geomean_row (d , dataout )
227
+
212
228
formatters = dict ()
213
229
formatters ['diff' ] = format_diff
214
230
if shorten_names :
@@ -220,7 +236,11 @@ def format_name(name, common_prefix, common_suffix):
220
236
return "%-45s" % truncate (name , 10 , 30 )
221
237
222
238
formatters ['Program' ] = lambda name : format_name (name , drop_prefix , drop_suffix )
223
- float_format = lambda x : "%6.2f" % (x ,)
239
+ def float_format (x ):
240
+ if x == '' :
241
+ return ''
242
+ return "%6.2f" % (x ,)
243
+
224
244
pd .set_option ("display.max_colwidth" , 0 )
225
245
out = dataout .to_string (index = False , justify = 'left' ,
226
246
float_format = float_format , formatters = formatters )
@@ -334,6 +354,12 @@ def format_name(name, common_prefix, common_suffix):
334
354
print ("Metric: %s" % ("," .join (metrics ),))
335
355
if len (metrics ) > 0 :
336
356
data = data [metrics ]
357
+
358
+ data = data .unstack (level = 0 )
359
+ # unstack() gave us a complicated multiindex for the columns, simplify
360
+ # things by renaming to a simple index.
361
+ data .columns = [(c [1 ] if c [1 ] else c [0 ]) for c in data .columns .values ]
362
+
337
363
data = add_diff_column (data )
338
364
339
365
sortkey = 'diff'
0 commit comments