Skip to content

Commit f2115fc

Browse files
enhance the perf comparison
1 parent 490abb6 commit f2115fc

File tree

1 file changed

+176
-104
lines changed

1 file changed

+176
-104
lines changed

.github/scripts/op_perf_comparison.py

Lines changed: 176 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
To compare the op perf diff
33
# usage
44
python op_perf_comparison.py --xpu_file /path/to/xpu/performance/result/dir/forward.csv --baseline_file /path/to/baselineence/dir/baseline.csv
5-
5+
--profile-only: Only compare record['time(us)']
6+
--e2e-only: Only compare record['E2E total time(us)']
7+
Default: Compare both record['time(us)'] and record['E2E total time(us)'] in same table
68
"""
79

810
import pandas as pd
@@ -48,7 +50,7 @@ def format_parameters(record):
4850
params.append(f"{key}: {value}")
4951
return "<br>".join(params)
5052

51-
def display_comparison(results, threshold, xpu_file):
53+
def display_comparison(results, threshold, xpu_file, compare_both):
5254
if 'forward' in xpu_file.lower():
5355
direction = "Forward"
5456
elif 'backward' in xpu_file.lower():
@@ -61,97 +63,96 @@ def display_comparison(results, threshold, xpu_file):
6163
write_to_github_summary(f"## {direction} No outlier exceeding ({threshold:.0%})")
6264
return
6365

64-
results['diff_float'] = results['difference'].str.rstrip('%').astype(float)
65-
regression = results[results['change'] == '↓'].sort_values('diff_float', ascending=False)
66-
improvement = results[results['change'] == '↑'].sort_values('diff_float')
66+
# Prepare display records - always include both metrics when available
67+
display_records = []
68+
for _, row in results.iterrows():
69+
record = display_row(row)
70+
display_record = {
71+
'Case Name': record['case_name'],
72+
'Op Name': record['op_name'],
73+
'Datatype': record['datatype'],
74+
'Parameters': format_parameters(record)
75+
}
6776

68-
if not regression.empty:
69-
print("\n🔴 Regression:")
70-
display_records = []
71-
for _, row in regression.iterrows():
72-
record = display_row(row)
73-
display_records.append({
74-
'Case Name': record['case_name'],
75-
'Op Name': record['op_name'],
76-
'Datatype': record['datatype'],
77-
'Parameters': format_parameters(record),
78-
'Current Time(us)': record['time_xpu_file'],
79-
'Baseline Time(us)': record['time_baseline_file'],
80-
'Difference': record['difference']
77+
# Always try to include profile time if it exists in the data
78+
if 'profile_time_xpu' in record or 'profile_time_base' in record:
79+
display_record.update({
80+
'Profile Current(us)': record.get('profile_time_xpu', 'N/A'),
81+
'Profile Baseline(us)': record.get('profile_time_base', 'N/A'),
82+
'Profile Diff': record.get('profile_diff', 'N/A'),
83+
'Profile Change': record.get('profile_change', '')
8184
})
8285

86+
# Always try to include E2E time if it exists in the data
87+
if 'e2e_time_xpu' in record or 'e2e_time_base' in record:
88+
display_record.update({
89+
'E2E Current(us)': record.get('e2e_time_xpu', 'N/A'),
90+
'E2E Baseline(us)': record.get('e2e_time_base', 'N/A'),
91+
'E2E Diff': record.get('e2e_diff', 'N/A'),
92+
'E2E Change': record.get('e2e_change', '')
93+
})
94+
95+
display_records.append(display_record)
96+
97+
# Classify records based on changes
98+
regression_records = []
99+
improvement_records = []
100+
101+
for record in results.to_dict('records'):
102+
has_profile_change = 'profile_change' in record and record['profile_change'] in ('↑', '↓')
103+
has_e2e_change = 'e2e_change' in record and record['e2e_change'] in ('↑', '↓')
104+
105+
# If either metric shows regression, count as regression
106+
if (has_profile_change and record['profile_change'] == '↓') or \
107+
(has_e2e_change and record['e2e_change'] == '↓'):
108+
regression_records.append(record)
109+
# If either metric shows improvement, count as improvement
110+
elif (has_profile_change and record['profile_change'] == '↑') or \
111+
(has_e2e_change and record['e2e_change'] == '↑'):
112+
improvement_records.append(record)
113+
114+
# Print results
115+
if regression_records:
116+
print("\n🔴 Regression:")
117+
regression_display = [r for r in display_records
118+
if r['Case Name'] in [x['case_name'] for x in regression_records]]
83119
print(tabulate(
84-
display_records,
120+
regression_display,
85121
headers="keys",
86122
tablefmt='grid',
87123
showindex=False,
88124
floatfmt=".2f"
89125
))
90126

91-
if not improvement.empty:
127+
if improvement_records:
92128
print("\n🟢 Improvement:")
93-
display_records = []
94-
for _, row in improvement.iterrows():
95-
record = display_row(row)
96-
display_records.append({
97-
'Case Name': record['case_name'],
98-
'Op Name': record['op_name'],
99-
'Datatype': record['datatype'],
100-
'Parameters': format_parameters(record),
101-
'Current Time(us)': record['time_xpu_file'],
102-
'Baseline Time(us)': record['time_baseline_file'],
103-
'Difference': record['difference']
104-
})
105-
129+
improvement_display = [r for r in display_records
130+
if r['Case Name'] in [x['case_name'] for x in improvement_records]]
106131
print(tabulate(
107-
display_records,
132+
improvement_display,
108133
headers="keys",
109134
tablefmt='grid',
110135
showindex=False,
111136
floatfmt=".2f"
112137
))
113-
# Print Summary on Github Action Summary
138+
139+
# Generate GitHub summary
114140
summary_output = f"## {direction} Performance Comparison Results\n"
115-
if not regression.empty:
116-
summary_output += f"\n### 🔴 {direction} Regression\n"
117-
display_records = []
118-
for _, row in regression.iterrows():
119-
record = display_row(row)
120-
display_records.append({
121-
'Case Name': record['case_name'],
122-
'Op Name': record['op_name'],
123-
'Datatype': record['datatype'],
124-
'Parameters': format_parameters(record),
125-
'Current Time(us)': record['time_xpu_file'],
126-
'Baseline Time(us)': record['time_baseline_file'],
127-
'Difference': record['difference']
128-
})
129141

142+
if regression_records:
143+
summary_output += "\n### 🔴 Regression\n"
130144
summary_output += tabulate(
131-
display_records,
145+
[r for r in display_records if r['Case Name'] in [x['case_name'] for x in regression_records]],
132146
headers="keys",
133147
tablefmt='github',
134148
showindex=False,
135149
floatfmt=".2f"
136150
) + "\n"
137151

138-
if not improvement.empty:
139-
summary_output += f"\n### 🟢 {direction} Improvement\n"
140-
display_records = []
141-
for _, row in improvement.iterrows():
142-
record = display_row(row)
143-
display_records.append({
144-
'Case Name': record['case_name'],
145-
'Op Name': record['op_name'],
146-
'Datatype': record['datatype'],
147-
'Parameters': format_parameters(record),
148-
'Current Time(us)': record['time_xpu_file'],
149-
'Baseline Time(us)': record['time_baseline_file'],
150-
'Difference': record['difference']
151-
})
152-
152+
if improvement_records:
153+
summary_output += "\n### 🟢 Improvement\n"
153154
summary_output += tabulate(
154-
display_records,
155+
[r for r in display_records if r['Case Name'] in [x['case_name'] for x in improvement_records]],
155156
headers="keys",
156157
tablefmt='github',
157158
showindex=False,
@@ -160,70 +161,141 @@ def display_comparison(results, threshold, xpu_file):
160161

161162
write_to_github_summary(summary_output)
162163

163-
def compare_op_time_values(xpu_file, baseline_file, threshold=0.05, output_file=None):
164-
df_xpu = pd.read_csv(xpu_file, sep=';')
165-
df_baseline = pd.read_csv(baseline_file, sep=';')
164+
def compare_time_values(xpu_file, baseline_file, threshold=0.05, profile_only=False, e2e_only=False):
165+
def prepare_df(df):
166+
df.columns = df.columns.str.strip()
167+
if 'time(us)' not in df.columns:
168+
df['time(us)'] = float('nan')
169+
if 'E2E total time(us)' not in df.columns:
170+
df['E2E total time(us)'] = float('nan')
171+
return df
172+
173+
df_xpu = prepare_df(pd.read_csv(xpu_file, sep=';'))
174+
df_baseline = prepare_df(pd.read_csv(baseline_file, sep=';'))
175+
176+
for col in ['time(us)', 'E2E total time(us)']:
177+
df_xpu[col] = pd.to_numeric(df_xpu[col], errors='coerce')
178+
df_baseline[col] = pd.to_numeric(df_baseline[col], errors='coerce')
166179

167180
records_xpu = [preprocess_row(row) for _, row in df_xpu.iterrows()]
168181
records_baseline = [preprocess_row(row) for _, row in df_baseline.iterrows()]
169182

170-
dict_xpu = {
171-
tuple((k, str(v)) for k, v in record.items() if k not in ['time(us)', 'E2E total time(us)', 'E2E forward time(us)']):
172-
record['time(us)']
173-
for record in records_xpu
183+
data_dict = {
184+
'xpu': {'profile': {}, 'e2e': {}},
185+
'baseline': {'profile': {}, 'e2e': {}}
174186
}
175-
dict_baseline = {
176-
tuple((k, str(v)) for k, v in record.items() if k not in ['time(us)', 'E2E total time(us)', 'E2E forward time(us)']):
177-
record['time(us)']
178-
for record in records_baseline
179-
}
180-
common_keys = set(dict_xpu.keys()) & set(dict_baseline.keys())
187+
188+
for record, source in [(records_xpu, 'xpu'), (records_baseline, 'baseline')]:
189+
for r in record:
190+
key = tuple((k, str(v)) for k, v in r.items()
191+
if k not in ['time(us)', 'E2E total time(us)', 'E2E forward time(us)'])
192+
193+
for time_type in ['profile', 'e2e']:
194+
col = 'time(us)' if time_type == 'profile' else 'E2E total time(us)'
195+
if col in r:
196+
try:
197+
time_val = float(r[col])
198+
if not pd.isna(time_val):
199+
data_dict[source][time_type][key] = time_val
200+
except (ValueError, TypeError):
201+
continue
202+
181203
results = []
204+
compare_both = not profile_only and not e2e_only
205+
all_keys = set().union(*[set(data_dict[s][t].keys())
206+
for s in data_dict for t in data_dict[s]])
182207

183-
for key in common_keys:
184-
time_xpu = dict_xpu[key]
185-
time_baseline = dict_baseline[key]
186-
187-
# Skip comparison if time_xpu or time_baseline is 0
188-
if time_xpu == 0 or time_baseline == 0:
189-
continue
190-
191-
diff = (time_baseline - time_xpu) / time_xpu
192-
# Compare Time, Lower is better
193-
if abs(diff) > threshold:
194-
record = dict(key)
195-
print(record)
196-
record.update({
197-
'time_xpu_file': time_xpu,
198-
'time_baseline_file': time_baseline,
199-
'difference': f"{diff:.2%}",
200-
'change': "↑" if diff > 0 else "↓"
201-
})
202-
results.append(record)
208+
for key in all_keys:
209+
record = dict(key)
210+
should_include = False
203211

204-
result_df = pd.DataFrame(results) if results else pd.DataFrame()
205-
display_comparison(result_df, threshold, xpu_file)
212+
if not e2e_only and key in data_dict['xpu']['profile'] and key in data_dict['baseline']['profile']:
213+
xpu_time = data_dict['xpu']['profile'][key]
214+
base_time = data_dict['baseline']['profile'][key]
215+
216+
if xpu_time != 0 and base_time != 0:
217+
try:
218+
diff = (base_time - xpu_time) / xpu_time
219+
record.update({
220+
'profile_time_xpu': xpu_time,
221+
'profile_time_base': base_time,
222+
'profile_diff': f"{diff:.2%}",
223+
'profile_change': "↑" if diff > threshold else "↓" if diff < -threshold else ""
224+
})
225+
if abs(diff) > threshold:
226+
should_include = True
227+
except (TypeError, ValueError):
228+
pass
206229

230+
if not profile_only and key in data_dict['xpu']['e2e'] and key in data_dict['baseline']['e2e']:
231+
xpu_time = data_dict['xpu']['e2e'][key]
232+
base_time = data_dict['baseline']['e2e'][key]
233+
234+
if xpu_time != 0 and base_time != 0:
235+
try:
236+
diff = (base_time - xpu_time) / xpu_time
237+
record.update({
238+
'e2e_time_xpu': xpu_time,
239+
'e2e_time_base': base_time,
240+
'e2e_diff': f"{diff:.2%}",
241+
'e2e_change': "↑" if diff > threshold else "↓" if diff < -threshold else ""
242+
})
243+
if abs(diff) > threshold:
244+
should_include = True
245+
except (TypeError, ValueError):
246+
pass
247+
248+
if compare_both:
249+
if should_include:
250+
results.append(record)
251+
else:
252+
if ((profile_only and 'profile_change' in record and record['profile_change']) or
253+
(e2e_only and 'e2e_change' in record and record['e2e_change'])):
254+
results.append(record)
255+
256+
result_df = pd.DataFrame(results) if results else pd.DataFrame()
257+
display_comparison(result_df, threshold, xpu_file, compare_both)
207258

208259
def main():
209260
parser = argparse.ArgumentParser(description='Compare time values between two CSV files')
210261
parser.add_argument('-x', '--xpu_file', required=True, help='XPU OP performance result csv files dir')
211262
parser.add_argument('-b', '--baseline_file', required=True, help="XPU OP baseline result csv files dir")
212263
parser.add_argument('-t', '--threshold', type=float, default=0.10,
213264
help='Threshold for time difference (default: 0.10 for 10%)')
265+
parser.add_argument('--profile-only', action='store_true',
266+
help='Only compare profile time (time(us))')
267+
parser.add_argument('--e2e-only', action='store_true',
268+
help='Only compare E2E time (E2E total time(us))')
214269
args = parser.parse_args()
215270

216-
print(f" Compared file: {args.xpu_file}{args.baseline_file}")
271+
if args.profile_only and args.e2e_only:
272+
raise ValueError("Cannot specify both --profile-only and --e2e-only")
273+
274+
print(f" Compared file: {args.xpu_file} and {args.baseline_file}")
217275
print(f" Threshold: {args.threshold:.0%}")
276+
if args.profile_only:
277+
print(" Comparing only profile time (time(us))")
278+
elif args.e2e_only:
279+
print(" Comparing only E2E time (E2E total time(us))")
280+
else:
281+
print(" Comparing both profile time and E2E time in same table")
282+
218283
write_to_github_summary("## Performance Comparison Set")
219284
write_to_github_summary(f"- Threshold: {args.threshold:.0%}")
285+
if args.profile_only:
286+
write_to_github_summary("- Comparing only profile time (time(us))")
287+
elif args.e2e_only:
288+
write_to_github_summary("- Comparing only E2E time (E2E total time(us))")
289+
else:
290+
write_to_github_summary("- Comparing both profile time and E2E time in same table")
220291

221-
compare_op_time_values(
292+
compare_time_values(
222293
xpu_file=args.xpu_file,
223294
baseline_file=args.baseline_file,
224295
threshold=args.threshold,
296+
profile_only=args.profile_only,
297+
e2e_only=args.e2e_only
225298
)
226299

227-
228300
if __name__ == "__main__":
229301
main()

0 commit comments

Comments
 (0)