Skip to content

Commit 47f16b6

Browse files
authored
[E2E accuracy] Added workaround for duplicate cases (#5279)
The duplication can appear because benchmarking script runs benchmark in a subprocess. If subprocess is too slow, main script will write timeout record. At the same time child process can still write a row with result. https://github.com/pytorch/pytorch/blob/06d86e58d0309aa2c217256f88d1990a22ec6e4f/benchmarks/dynamo/common.py#L4298 I added choice of one result in case of duplication and warning.
1 parent 979146e commit 47f16b6

File tree

1 file changed

+19
-2
lines changed

1 file changed

+19
-2
lines changed

scripts/e2e_checks/aggregate_e2e_results.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,21 @@ def parse_args():
1010
return parser.parse_args()
1111

1212

13+
def clean_up_duplicates(combined_df):
14+
15+
def print_duplicates(df):
16+
if len(df) != 1:
17+
print('Group with duplicates:')
18+
print(df.T)
19+
print('Only the first result will be picked!')
20+
21+
combined_df['is_timeout'] = combined_df['accuracy'].eq('timeout')
22+
# Prioritize non-timeout results
23+
combined_df.sort_values(['suite', 'mode', 'dtype', 'name', 'is_timeout'], inplace=True)
24+
combined_df.groupby(['suite', 'mode', 'dtype', 'name']).filter(print_duplicates)
25+
return combined_df.groupby(['suite', 'mode', 'dtype', 'name'], as_index=False).first()
26+
27+
1328
def build_suite_report(combined_df, output_path):
1429
print('=======================================')
1530
print('= SUMMARY REPORT =')
@@ -72,8 +87,6 @@ def build_pytorch_report(combined_df, output_path):
7287
for suite, mode in combined_df[['suite', 'mode']].drop_duplicates().values:
7388
df_subset = combined_df[combined_df['suite'].eq(suite)
7489
& combined_df['mode'].eq(mode)][['dtype', 'name', 'accuracy']]
75-
76-
df_subset = drop_duplicates(df_subset, suite, mode)
7790
pivoted_df = df_subset.pivot(index='name', columns='dtype', values='accuracy')
7891

7992
# Reset index to make 'name' a regular column
@@ -157,6 +170,10 @@ def main(input_dir, output_dir):
157170
# Artifacts
158171
# 1. Simple concat of all with added suite, mode, dtype
159172
combined_df.to_csv(output_path / 'combined_results.csv', index=False)
173+
# Clean up duplicates, due to possible race conditions between subprocess and main process we could have duplicates
174+
# when main process writes timeout result and subprocess writes it's own result later.
175+
# https://github.com/pytorch/pytorch/blob/06d86e58d0309aa2c217256f88d1990a22ec6e4f/benchmarks/dynamo/common.py#L4298
176+
combined_df = clean_up_duplicates(combined_df)
160177
# 2. torch format report, 9 items (suite, mode), dtype stored as column
161178
build_pytorch_report(combined_df, output_path=output_path)
162179
# 3. Agg report with 45 rows (suite, mode, dtype, passed, failed_REASON, failed_REASON model list)

0 commit comments

Comments
 (0)