Skip to content

Commit ae3022f

Browse files
[Nightly] Add op performance regression check (#1622)
- For op performance track, add the regression check funtion into the OP nightly performance test. - Baseline artifacts are saved into a github action issue (#1689), it will change automatically when new baseline updated.
1 parent 7b4ff01 commit ae3022f

File tree

4 files changed

+403
-1
lines changed

4 files changed

+403
-1
lines changed
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
"""
2+
To update the op perf baseline, use the better performance value
3+
# usage
4+
python op_calculate_best_perf.py --xpu /path/to/xpu/performance/result/dir/forward.csv --baseline /path/to/baseline/dir/new_baseline.csv -r
5+
6+
"""
7+
8+
import csv
9+
import argparse
10+
from pathlib import Path
11+
12+
updated_rows = []
13+
added_cases = []
14+
updated_cases = []
15+
removed_cases = []
16+
17+
def update_baseline(xpu_file, baseline_file, remove_missing=False):
18+
with open(xpu_file) as f:
19+
xpu_reader = csv.DictReader(f, delimiter=';')
20+
xpu_rows = list(xpu_reader)
21+
xpu_fieldnames = xpu_reader.fieldnames # Keep original field order
22+
fieldnames = [f for f in xpu_fieldnames if f != 'time(us)']
23+
xpu_data = {make_key(row, fieldnames): (float(row['time(us)']), row) for row in xpu_rows}
24+
25+
with open(baseline_file) as f:
26+
baseline_reader = csv.DictReader(f, delimiter=';')
27+
baseline_rows = list(baseline_reader)
28+
baseline_fieldnames = baseline_reader.fieldnames
29+
30+
# To add new parameter of new ops into baseline file
31+
all_fieldnames = xpu_fieldnames + [f for f in baseline_fieldnames if f not in xpu_fieldnames]
32+
fieldnames = [f for f in all_fieldnames if f != 'time(us)']
33+
34+
baseline_keys = {make_key(row, fieldnames) for row in baseline_rows}
35+
xpu_keys = set(xpu_data.keys())
36+
37+
# Resolve existing cases
38+
for row in baseline_rows:
39+
key = make_key(row, fieldnames)
40+
if key in xpu_data:
41+
xpu_time, xpu_row = xpu_data[key]
42+
baseline_time = float(row['time(us)'])
43+
44+
if xpu_time < baseline_time:
45+
updated_row = {}
46+
for field in all_fieldnames:
47+
updated_row[field] = xpu_row.get(field, row.get(field, ''))
48+
updated_row['time(us)'] = str(xpu_time)
49+
updated_cases.append((key, baseline_time, xpu_time, updated_row))
50+
updated_rows.append(updated_row)
51+
else:
52+
ordered_row = {}
53+
for field in all_fieldnames:
54+
ordered_row[field] = row.get(field, '')
55+
updated_rows.append(ordered_row)
56+
elif not remove_missing:
57+
ordered_row = {}
58+
for field in all_fieldnames:
59+
ordered_row[field] = row.get(field, '')
60+
updated_rows.append(ordered_row)
61+
62+
# Add new cases
63+
for key in xpu_keys - baseline_keys:
64+
xpu_time, xpu_row = xpu_data[key]
65+
new_row = {}
66+
for field in all_fieldnames:
67+
new_row[field] = xpu_row.get(field, '')
68+
new_row['time(us)'] = str(xpu_time)
69+
updated_rows.append(new_row)
70+
added_cases.append((key, xpu_time, new_row))
71+
72+
# Resolve removed cases
73+
if remove_missing:
74+
for key in baseline_keys - xpu_keys:
75+
removed_case = next(row for row in baseline_rows if make_key(row, fieldnames) == key)
76+
removed_cases.append((key, float(removed_case['time(us)']), removed_case))
77+
78+
if added_cases:
79+
print(f"\nAdded {len(added_cases)} new case(s):")
80+
for key, time, row in added_cases:
81+
print(f"\n[New Case] {format_case(key)}")
82+
print(f"Time: {time} us")
83+
print("Parameters:")
84+
for k, v in row.items():
85+
if k != 'time(us)':
86+
print(f" {k}: {v}")
87+
print("-" * 60)
88+
89+
if updated_cases:
90+
print(f"\nUpdated {len(updated_cases)} case(s):")
91+
for key, old_time, new_time, row in updated_cases:
92+
print(f"\n[Updated] {format_case(key)}")
93+
print(f"Time: {old_time} us → {new_time} us")
94+
print("Parameters:")
95+
for k, v in row.items():
96+
if k != 'time(us)':
97+
print(f" {k}: {v}")
98+
print("-" * 60)
99+
100+
if remove_missing and removed_cases:
101+
print(f"\nRemoved {len(removed_cases)} case(s):")
102+
for key, time, row in removed_cases:
103+
print(f"\n[Removed] {format_case(key)}")
104+
print(f"Time: {time} us")
105+
print("Parameters:")
106+
for k, v in row.items():
107+
if k != 'time(us)':
108+
print(f" {k}: {v}")
109+
print("-" * 60)
110+
111+
if not (added_cases or updated_cases or (remove_missing and removed_cases)):
112+
print("\nNo changes detected between files.")
113+
114+
backup_file = baseline_file.replace('.csv', '_backup.csv')
115+
Path(baseline_file).rename(backup_file)
116+
117+
with open(baseline_file, 'w', newline='') as f:
118+
writer = csv.DictWriter(f, fieldnames=all_fieldnames, delimiter=';')
119+
writer.writeheader()
120+
writer.writerows(updated_rows)
121+
122+
print("\n" + "-" * 80)
123+
print(f"Update complete! Total cases in new baseline: {len(updated_rows)}")
124+
print(f"Updated baseline saved to {baseline_file}")
125+
print(f"Original backup created at {backup_file}")
126+
127+
def make_key(row, fieldnames):
128+
return tuple(str(row.get(field, '')) for field in fieldnames)
129+
130+
def format_case(key):
131+
return f"{key[0]} | {key[1]} | {key[2]} (shape: {key[3]})"
132+
133+
def main():
134+
parser = argparse.ArgumentParser(description='Compare and synchronize operation performance data')
135+
parser.add_argument('-x', '--xpu', required=True, help='Path to xpu_op_summary.csv')
136+
parser.add_argument('-b', '--baseline', required=True, help='Path to baseline_op_summary.csv')
137+
parser.add_argument('-r', '--remove-missing', action='store_true',
138+
help='Remove cases not present in xpu file')
139+
140+
args = parser.parse_args()
141+
142+
if not Path(args.xpu).exists():
143+
print(f"Error: XPU file not found at {args.xpu}")
144+
return
145+
if not Path(args.baseline).exists():
146+
print(f"Error: Baseline file not found at {args.baseline}")
147+
return
148+
149+
update_baseline(args.xpu, args.baseline, args.remove_missing)
150+
151+
152+
if __name__ == "__main__":
153+
main()

.github/scripts/op_perf_comparison.py

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
"""
2+
To compare the op perf diff
3+
# usage
4+
python op_perf_comparison.py --xpu_file /path/to/xpu/performance/result/dir/forward.csv --baseline_file /path/to/baselineence/dir/baseline.csv
5+
6+
"""
7+
8+
import pandas as pd
9+
import argparse
10+
import os
11+
from ast import literal_eval
12+
from tabulate import tabulate
13+
14+
def preprocess_row(row):
15+
processed = {}
16+
for col, val in row.items():
17+
if pd.isna(val):
18+
processed[col] = "NULL"
19+
else:
20+
try:
21+
processed[col] = literal_eval(str(val))
22+
except (ValueError, SyntaxError):
23+
processed[col] = val
24+
return processed
25+
26+
def display_row(record):
27+
formatted = {}
28+
for key, value in record.items():
29+
if isinstance(value, (list, tuple, dict)):
30+
formatted[key] = str(value)
31+
elif value == "NULL":
32+
formatted[key] = "NULL"
33+
else:
34+
formatted[key] = value
35+
return formatted
36+
37+
def write_to_github_summary(content):
38+
github_step_summary = os.getenv('GITHUB_STEP_SUMMARY')
39+
if github_step_summary:
40+
with open(github_step_summary, 'a') as f:
41+
f.write(content + "\n")
42+
43+
def display_comparison(results, threshold):
44+
if results.empty:
45+
print(f"\n No outlier exceeding ({threshold:.0%})")
46+
write_to_github_summary(f"## No outlier exceeding ({threshold:.0%})")
47+
return
48+
49+
regression = results[results['change'] == '↓']
50+
improvement = results[results['change'] == '↑']
51+
52+
if not regression.empty:
53+
print("\n🔴 Regression:")
54+
display_records = []
55+
for _, row in regression.iterrows():
56+
record = display_row(row)
57+
display_records.append({
58+
**{k: v for k, v in record.items() if k not in ['time_xpu_file', 'time_baseline_file', 'difference', 'change']},
59+
'Current Time(us)': record['time_xpu_file'],
60+
'Baseline Time(us)': record['time_baseline_file'],
61+
'Difference': record['difference']
62+
})
63+
64+
print(tabulate(
65+
display_records,
66+
headers="keys",
67+
tablefmt='grid',
68+
showindex=False,
69+
floatfmt=".2f"
70+
))
71+
72+
if not improvement.empty:
73+
print("\n🟢 Improvement:")
74+
display_records = []
75+
for _, row in improvement.iterrows():
76+
record = display_row(row)
77+
display_records.append({
78+
**{k: v for k, v in record.items() if k not in ['time_xpu_file', 'time_baseline_file', 'difference', 'change']},
79+
'Current Time(us)': record['time_xpu_file'],
80+
'Baseline Time(us)': record['time_baseline_file'],
81+
'Difference': record['difference']
82+
})
83+
84+
print(tabulate(
85+
display_records,
86+
headers="keys",
87+
tablefmt='grid',
88+
showindex=False,
89+
floatfmt=".2f"
90+
))
91+
# Print Summary on Github Action Summary
92+
summary_output = "## Performance Comparison Results\n"
93+
if not regression.empty:
94+
summary_output += "\n### 🔴 Regression\n"
95+
display_records = []
96+
for _, row in regression.iterrows():
97+
record = display_row(row)
98+
display_records.append({
99+
**{k: v for k, v in record.items() if k not in ['time_xpu_file', 'time_baseline_file', 'difference', 'change']},
100+
'Current Time(us)': record['time_xpu_file'],
101+
'Baseline Time(us)': record['time_baseline_file'],
102+
'Difference': record['difference']
103+
})
104+
105+
summary_output += tabulate(
106+
display_records,
107+
headers="keys",
108+
tablefmt='github',
109+
showindex=False,
110+
floatfmt=".2f"
111+
) + "\n"
112+
113+
if not improvement.empty:
114+
summary_output += "\n### 🟢 Improvement\n"
115+
display_records = []
116+
for _, row in improvement.iterrows():
117+
record = display_row(row)
118+
display_records.append({
119+
**{k: v for k, v in record.items() if k not in ['time_xpu_file', 'time_baseline_file', 'difference', 'change']},
120+
'Current Time(us)': record['time_xpu_file'],
121+
'Baseline Time(us)': record['time_baseline_file'],
122+
'Difference': record['difference']
123+
})
124+
125+
summary_output += tabulate(
126+
display_records,
127+
headers="keys",
128+
tablefmt='github',
129+
showindex=False,
130+
floatfmt=".2f"
131+
) + "\n"
132+
133+
write_to_github_summary(summary_output)
134+
135+
def compare_op_time_values(xpu_file, baseline_file, threshold=0.05, output_file=None):
136+
df_xpu = pd.read_csv(xpu_file, sep=';')
137+
df_baseline = pd.read_csv(baseline_file, sep=';')
138+
139+
records_xpu = [preprocess_row(row) for _, row in df_xpu.iterrows()]
140+
records_baseline = [preprocess_row(row) for _, row in df_baseline.iterrows()]
141+
142+
dict_xpu = {
143+
tuple((k, str(v)) for k, v in record.items() if k != 'time(us)'):
144+
record['time(us)']
145+
for record in records_xpu
146+
}
147+
dict_baseline = {
148+
tuple((k, str(v)) for k, v in record.items() if k != 'time(us)'):
149+
record['time(us)']
150+
for record in records_baseline
151+
}
152+
common_keys = set(dict_xpu.keys()) & set(dict_baseline.keys())
153+
results = []
154+
155+
for key in common_keys:
156+
time_xpu = dict_xpu[key]
157+
time_baseline = dict_baseline[key]
158+
159+
# Skip comparison if time_xpu is 0
160+
if time_xpu == 0:
161+
continue
162+
163+
diff = (time_baseline - time_xpu) / time_xpu
164+
# Compare Time, Lower is better
165+
if abs(diff) > threshold:
166+
record = dict(key)
167+
print(record)
168+
record.update({
169+
'time_xpu_file': time_xpu,
170+
'time_baseline_file': time_baseline,
171+
'difference': f"{diff:.2%}",
172+
'change': "↑" if diff > 0 else "↓"
173+
})
174+
results.append(record)
175+
176+
result_df = pd.DataFrame(results) if results else pd.DataFrame()
177+
display_comparison(result_df, threshold)
178+
179+
180+
def main():
181+
parser = argparse.ArgumentParser(description='Compare time values between two CSV files')
182+
parser.add_argument('-x', '--xpu_file', required=True, help='XPU OP performance result csv files dir')
183+
parser.add_argument('-b', '--baseline_file', required=True, help="XPU OP baseline result csv files dir")
184+
parser.add_argument('-t', '--threshold', type=float, default=0.10,
185+
help='Threshold for time difference (default: 0.10 for 10%)')
186+
args = parser.parse_args()
187+
188+
print(f" Compared file: {args.xpu_file}{args.baseline_file}")
189+
print(f" Threshold: {args.threshold:.0%}")
190+
write_to_github_summary("## Performance Comparison Set")
191+
write_to_github_summary(f"- Threshold: {args.threshold:.0%}")
192+
193+
compare_op_time_values(
194+
xpu_file=args.xpu_file,
195+
baseline_file=args.baseline_file,
196+
threshold=args.threshold,
197+
)
198+
199+
200+
if __name__ == "__main__":
201+
main()

0 commit comments

Comments
 (0)