@@ -10,6 +10,21 @@ def parse_args():
1010 return parser .parse_args ()
1111
1212
13+ def clean_up_duplicates (combined_df ):
14+
15+ def print_duplicates (df ):
16+ if len (df ) != 1 :
17+ print ('Group with duplicates:' )
18+ print (df .T )
19+ print ('Only the first result will be picked!' )
20+
21+ combined_df ['is_timeout' ] = combined_df ['accuracy' ].eq ('timeout' )
22+ # Prioritize non-timeout results
23+ combined_df .sort_values (['suite' , 'mode' , 'dtype' , 'name' , 'is_timeout' ], inplace = True )
24+ combined_df .groupby (['suite' , 'mode' , 'dtype' , 'name' ]).filter (print_duplicates )
25+ return combined_df .groupby (['suite' , 'mode' , 'dtype' , 'name' ], as_index = False ).first ()
26+
27+
1328def build_suite_report (combined_df , output_path ):
1429 print ('=======================================' )
1530 print ('= SUMMARY REPORT =' )
@@ -72,8 +87,6 @@ def build_pytorch_report(combined_df, output_path):
7287 for suite , mode in combined_df [['suite' , 'mode' ]].drop_duplicates ().values :
7388 df_subset = combined_df [combined_df ['suite' ].eq (suite )
7489 & combined_df ['mode' ].eq (mode )][['dtype' , 'name' , 'accuracy' ]]
75-
76- df_subset = drop_duplicates (df_subset , suite , mode )
7790 pivoted_df = df_subset .pivot (index = 'name' , columns = 'dtype' , values = 'accuracy' )
7891
7992 # Reset index to make 'name' a regular column
@@ -157,6 +170,10 @@ def main(input_dir, output_dir):
157170 # Artifacts
158171 # 1. Simple concat of all with added suite, mode, dtype
159172 combined_df .to_csv (output_path / 'combined_results.csv' , index = False )
173+ # Clean up duplicates, due to possible race conditions between subprocess and main process we could have duplicates
174+ # when main process writes timeout result and subprocess writes it's own result later.
175+ # https://github.com/pytorch/pytorch/blob/06d86e58d0309aa2c217256f88d1990a22ec6e4f/benchmarks/dynamo/common.py#L4298
176+ combined_df = clean_up_duplicates (combined_df )
160177 # 2. torch format report, 9 items (suite, mode), dtype stored as column
161178 build_pytorch_report (combined_df , output_path = output_path )
162179 # 3. Agg report with 45 rows (suite, mode, dtype, passed, failed_REASON, failed_REASON model list)
0 commit comments