@@ -87,17 +87,25 @@ def get_accuracies(confusions):
8787 assert len (accs ) == 10 , len (accs )
8888 return pandas .Series (accs )
8989
90- def plot_accuracy_comparison (experiments , ylim = (0.60 , 0.80 ), figsize = (12 , 4 )):
90+ def plot_accuracy_comparison (experiments , ylim = (0.0 , 1.0 ), figsize = (12 , 4 )):
9191
9292 df = experiments .copy ()
9393 df .index = experiments .nickname
9494 acc = df .confusions_test .apply (get_accuracies ).T
95- fig , ax = plt .subplots (1 , figsize = figsize )
95+ fig , ax = plt .subplots (1 , figsize = figsize , dpi = 300 )
9696
9797 acc .boxplot (ax = ax )
9898
99+ # Mark SOTA models
100+ ax .axhline (0.79 , linestyle = 'dotted' , color = 'green' )
101+ ax .axhline (0.83 , linestyle = 'dotted' , color = 'green' )
102+
103+ # FIXME: better no-information rate
104+ ax .axhline (0.10 , linestyle = 'dotted' , color = 'black' )
105+
99106 ax .set_ylabel ('Accuracy' )
100107 ax .set_ylim (ylim )
108+ ax .set_yticks (numpy .arange (ylim [0 ], ylim [1 ], 0.1 ))
101109
102110 #ax.set_xticks(experiments.nickname)
103111 #ax.set_xlabel('Model')
@@ -106,7 +114,6 @@ def plot_accuracy_comparison(experiments, ylim=(0.60, 0.80), figsize=(12, 4)):
106114
107115def plot_accuracy_vs_compute (experiments , ylim = (0.60 , 0.80 ),
108116 perf_metric = 'utilization' , figsize = (12 ,8 )):
109- # TODO: color experiment groups
110117 # TODO: add error bars?
111118
112119 acc = experiments .confusions_test .apply (get_accuracies ).T
@@ -115,12 +122,23 @@ def plot_accuracy_vs_compute(experiments, ylim=(0.60, 0.80),
115122 numpy .testing .assert_allclose (df .test_acc_mean , df .accuracy )
116123 df ['experiment' ] = df .index
117124
118- fig , ax = plt .subplots (1 , figsize = figsize )
119- df .plot .scatter (ax = ax , x = perf_metric , y = 'accuracy' , logx = True )
125+ fig , ax = plt .subplots (1 , figsize = figsize , dpi = 300 )
126+ def get_color (idx , nick ):
127+ if nick .startswith ('Stride-DS-' ) and not nick .endswith ('3x3' ):
128+ return 'C0'
129+ return 'C{}' .format (1 + idx )
130+
131+ colors = [ get_color (i , n ) for i , n in enumerate (df .nickname ) ]
132+ df .plot .scatter (ax = ax , x = perf_metric , y = 'accuracy' , c = colors , logx = True )
120133
121134 # Y axis
122135 ax .set_ylim (ylim )
123136 ax .set_ylabel ('Accuracy' )
137+ ax .grid (True )
138+ ax .tick_params (axis = 'y' , grid_alpha = 0.2 , grid_color = 'black' )
139+
140+ # X axis
141+ ax .tick_params (axis = 'x' , grid_alpha = 0.0 )
124142
125143 if perf_metric == 'utilization' :
126144 # mark feasible regions
@@ -135,20 +153,26 @@ def format_utilization(tick_val, tick_pos):
135153 return '{:d}%' .format (int (tick_val * 100 ))
136154
137155 ax .xaxis .set_major_formatter (matplotlib .ticker .FuncFormatter (format_utilization ))
138- ax .set_xlabel ('CPU utilization ' )
156+ ax .set_xlabel ('CPU usage ' )
139157
140158 # Add markers
141159 def add_labels (row ):
142160 xy = row [perf_metric ], row .accuracy
143161 label = "{}" .format (row .nickname )
162+ label = label .replace ('Stride-DS-' , 'S-DS-' )
163+ label = label .replace ('Stride-' , 'S-' )
164+
144165 ax .annotate (label , xy ,
145- xytext = (5 , 20 ),
166+ xytext = (2 , 5 ),
146167 textcoords = 'offset points' ,
147- size = 10 ,
148- rotation = 25 ,
168+ rotation_mode = 'anchor' ,
169+ size = 7 ,
170+ rotation = 80 ,
149171 color = 'darkslategrey' )
150172 df .apply (add_labels , axis = 1 )
151173
174+ fig .tight_layout ()
175+
152176 return fig
153177
154178
@@ -231,12 +255,16 @@ def main():
231255
232256 df ['val_acc_mean' ] = df .confusions_val .apply (get_accuracies ).mean (axis = 1 )
233257 df ['test_acc_mean' ] = df .confusions_test .apply (get_accuracies ).mean (axis = 1 )
258+ df ['test_acc_std' ] = df .confusions_test .apply (get_accuracies ).std (axis = 1 )
234259 df = df .sort_index ()
235260
236261 # TODO: add std-dev
237262 df ['foreground_val_acc_mean' ] = df .confusions_val_foreground .apply (get_accuracies ).mean (axis = 1 )
238263 df ['foreground_test_acc_mean' ] = df .confusions_test_foreground .apply (get_accuracies ).mean (axis = 1 )
239264 df ['background_test_acc_mean' ] = df .confusions_test_background .apply (get_accuracies ).mean (axis = 1 )
265+ df ['foreground_val_acc_std' ] = df .confusions_val_foreground .apply (get_accuracies ).std (axis = 1 )
266+ df ['foreground_test_acc_std' ] = df .confusions_test_foreground .apply (get_accuracies ).std (axis = 1 )
267+ df ['background_test_acc_std' ] = df .confusions_test_background .apply (get_accuracies ).std (axis = 1 )
240268
241269
242270 #df['grouped_test_acc_mean'] = grouped_confusion(df.confusions_test, groups).apply(get_accuracies).mean(axis=1)
@@ -271,12 +299,12 @@ def save(fig, name):
271299
272300
273301 # Split the variations from all models
274- width_variations = df .nickname .str .startswith ('Stride-DS-5x5- ' )
275- fig = plot_accuracy_comparison (df [width_variations != True ])
302+ width_variations = df .nickname .str .startswith ('Stride-DS-' )
303+ fig = plot_accuracy_comparison (df [width_variations != True ], ylim = ( 0.0 , 1.0 ), figsize = ( 7 , 3 ) )
276304 save (fig , 'models_accuracy.png' )
277305
278306 perf_metric = 'maccs_frame' if args .skip_device else 'utilization'
279- fig = plot_accuracy_vs_compute (df , perf_metric = perf_metric )
307+ fig = plot_accuracy_vs_compute (df , perf_metric = perf_metric , figsize = ( 7 , 4 ), ylim = ( 0.5 , 0.8 ) )
280308 save (fig , 'models_efficiency.png' )
281309
282310
0 commit comments