2
2
To compare the op perf diff
3
3
# usage
4
4
python op_perf_comparison.py --xpu_file /path/to/xpu/performance/result/dir/forward.csv --baseline_file /path/to/baselineence/dir/baseline.csv
5
-
5
+ --profile-only: Only compare record['time(us)']
6
+ --e2e-only: Only compare record['E2E total time(us)']
7
+ Default: Compare both record['time(us)'] and record['E2E total time(us)'] in same table
6
8
"""
7
9
8
10
import pandas as pd
@@ -48,7 +50,7 @@ def format_parameters(record):
48
50
params .append (f"{ key } : { value } " )
49
51
return "<br>" .join (params )
50
52
51
- def display_comparison (results , threshold , xpu_file ):
53
+ def display_comparison (results , threshold , xpu_file , compare_both ):
52
54
if 'forward' in xpu_file .lower ():
53
55
direction = "Forward"
54
56
elif 'backward' in xpu_file .lower ():
@@ -61,97 +63,96 @@ def display_comparison(results, threshold, xpu_file):
61
63
write_to_github_summary (f"## { direction } No outlier exceeding ({ threshold :.0%} )" )
62
64
return
63
65
64
- results ['diff_float' ] = results ['difference' ].str .rstrip ('%' ).astype (float )
65
- regression = results [results ['change' ] == '↓' ].sort_values ('diff_float' , ascending = False )
66
- improvement = results [results ['change' ] == '↑' ].sort_values ('diff_float' )
66
+ # Prepare display records - always include both metrics when available
67
+ display_records = []
68
+ for _ , row in results .iterrows ():
69
+ record = display_row (row )
70
+ display_record = {
71
+ 'Case Name' : record ['case_name' ],
72
+ 'Op Name' : record ['op_name' ],
73
+ 'Datatype' : record ['datatype' ],
74
+ 'Parameters' : format_parameters (record )
75
+ }
67
76
68
- if not regression .empty :
69
- print ("\n 🔴 Regression:" )
70
- display_records = []
71
- for _ , row in regression .iterrows ():
72
- record = display_row (row )
73
- display_records .append ({
74
- 'Case Name' : record ['case_name' ],
75
- 'Op Name' : record ['op_name' ],
76
- 'Datatype' : record ['datatype' ],
77
- 'Parameters' : format_parameters (record ),
78
- 'Current Time(us)' : record ['time_xpu_file' ],
79
- 'Baseline Time(us)' : record ['time_baseline_file' ],
80
- 'Difference' : record ['difference' ]
77
+ # Always try to include profile time if it exists in the data
78
+ if 'profile_time_xpu' in record or 'profile_time_base' in record :
79
+ display_record .update ({
80
+ 'Profile Current(us)' : record .get ('profile_time_xpu' , 'N/A' ),
81
+ 'Profile Baseline(us)' : record .get ('profile_time_base' , 'N/A' ),
82
+ 'Profile Diff' : record .get ('profile_diff' , 'N/A' ),
83
+ 'Profile Change' : record .get ('profile_change' , '' )
81
84
})
82
85
86
+ # Always try to include E2E time if it exists in the data
87
+ if 'e2e_time_xpu' in record or 'e2e_time_base' in record :
88
+ display_record .update ({
89
+ 'E2E Current(us)' : record .get ('e2e_time_xpu' , 'N/A' ),
90
+ 'E2E Baseline(us)' : record .get ('e2e_time_base' , 'N/A' ),
91
+ 'E2E Diff' : record .get ('e2e_diff' , 'N/A' ),
92
+ 'E2E Change' : record .get ('e2e_change' , '' )
93
+ })
94
+
95
+ display_records .append (display_record )
96
+
97
+ # Classify records based on changes
98
+ regression_records = []
99
+ improvement_records = []
100
+
101
+ for record in results .to_dict ('records' ):
102
+ has_profile_change = 'profile_change' in record and record ['profile_change' ] in ('↑' , '↓' )
103
+ has_e2e_change = 'e2e_change' in record and record ['e2e_change' ] in ('↑' , '↓' )
104
+
105
+ # If either metric shows regression, count as regression
106
+ if (has_profile_change and record ['profile_change' ] == '↓' ) or \
107
+ (has_e2e_change and record ['e2e_change' ] == '↓' ):
108
+ regression_records .append (record )
109
+ # If either metric shows improvement, count as improvement
110
+ elif (has_profile_change and record ['profile_change' ] == '↑' ) or \
111
+ (has_e2e_change and record ['e2e_change' ] == '↑' ):
112
+ improvement_records .append (record )
113
+
114
+ # Print results
115
+ if regression_records :
116
+ print ("\n 🔴 Regression:" )
117
+ regression_display = [r for r in display_records
118
+ if r ['Case Name' ] in [x ['case_name' ] for x in regression_records ]]
83
119
print (tabulate (
84
- display_records ,
120
+ regression_display ,
85
121
headers = "keys" ,
86
122
tablefmt = 'grid' ,
87
123
showindex = False ,
88
124
floatfmt = ".2f"
89
125
))
90
126
91
- if not improvement . empty :
127
+ if improvement_records :
92
128
print ("\n 🟢 Improvement:" )
93
- display_records = []
94
- for _ , row in improvement .iterrows ():
95
- record = display_row (row )
96
- display_records .append ({
97
- 'Case Name' : record ['case_name' ],
98
- 'Op Name' : record ['op_name' ],
99
- 'Datatype' : record ['datatype' ],
100
- 'Parameters' : format_parameters (record ),
101
- 'Current Time(us)' : record ['time_xpu_file' ],
102
- 'Baseline Time(us)' : record ['time_baseline_file' ],
103
- 'Difference' : record ['difference' ]
104
- })
105
-
129
+ improvement_display = [r for r in display_records
130
+ if r ['Case Name' ] in [x ['case_name' ] for x in improvement_records ]]
106
131
print (tabulate (
107
- display_records ,
132
+ improvement_display ,
108
133
headers = "keys" ,
109
134
tablefmt = 'grid' ,
110
135
showindex = False ,
111
136
floatfmt = ".2f"
112
137
))
113
- # Print Summary on Github Action Summary
138
+
139
+ # Generate GitHub summary
114
140
summary_output = f"## { direction } Performance Comparison Results\n "
115
- if not regression .empty :
116
- summary_output += f"\n ### 🔴 { direction } Regression\n "
117
- display_records = []
118
- for _ , row in regression .iterrows ():
119
- record = display_row (row )
120
- display_records .append ({
121
- 'Case Name' : record ['case_name' ],
122
- 'Op Name' : record ['op_name' ],
123
- 'Datatype' : record ['datatype' ],
124
- 'Parameters' : format_parameters (record ),
125
- 'Current Time(us)' : record ['time_xpu_file' ],
126
- 'Baseline Time(us)' : record ['time_baseline_file' ],
127
- 'Difference' : record ['difference' ]
128
- })
129
141
142
+ if regression_records :
143
+ summary_output += "\n ### 🔴 Regression\n "
130
144
summary_output += tabulate (
131
- display_records ,
145
+ [ r for r in display_records if r [ 'Case Name' ] in [ x [ 'case_name' ] for x in regression_records ]] ,
132
146
headers = "keys" ,
133
147
tablefmt = 'github' ,
134
148
showindex = False ,
135
149
floatfmt = ".2f"
136
150
) + "\n "
137
151
138
- if not improvement .empty :
139
- summary_output += f"\n ### 🟢 { direction } Improvement\n "
140
- display_records = []
141
- for _ , row in improvement .iterrows ():
142
- record = display_row (row )
143
- display_records .append ({
144
- 'Case Name' : record ['case_name' ],
145
- 'Op Name' : record ['op_name' ],
146
- 'Datatype' : record ['datatype' ],
147
- 'Parameters' : format_parameters (record ),
148
- 'Current Time(us)' : record ['time_xpu_file' ],
149
- 'Baseline Time(us)' : record ['time_baseline_file' ],
150
- 'Difference' : record ['difference' ]
151
- })
152
-
152
+ if improvement_records :
153
+ summary_output += "\n ### 🟢 Improvement\n "
153
154
summary_output += tabulate (
154
- display_records ,
155
+ [ r for r in display_records if r [ 'Case Name' ] in [ x [ 'case_name' ] for x in improvement_records ]] ,
155
156
headers = "keys" ,
156
157
tablefmt = 'github' ,
157
158
showindex = False ,
@@ -160,70 +161,141 @@ def display_comparison(results, threshold, xpu_file):
160
161
161
162
write_to_github_summary (summary_output )
162
163
163
- def compare_op_time_values (xpu_file , baseline_file , threshold = 0.05 , output_file = None ):
164
- df_xpu = pd .read_csv (xpu_file , sep = ';' )
165
- df_baseline = pd .read_csv (baseline_file , sep = ';' )
164
+ def compare_time_values (xpu_file , baseline_file , threshold = 0.05 , profile_only = False , e2e_only = False ):
165
+ def prepare_df (df ):
166
+ df .columns = df .columns .str .strip ()
167
+ if 'time(us)' not in df .columns :
168
+ df ['time(us)' ] = float ('nan' )
169
+ if 'E2E total time(us)' not in df .columns :
170
+ df ['E2E total time(us)' ] = float ('nan' )
171
+ return df
172
+
173
+ df_xpu = prepare_df (pd .read_csv (xpu_file , sep = ';' ))
174
+ df_baseline = prepare_df (pd .read_csv (baseline_file , sep = ';' ))
175
+
176
+ for col in ['time(us)' , 'E2E total time(us)' ]:
177
+ df_xpu [col ] = pd .to_numeric (df_xpu [col ], errors = 'coerce' )
178
+ df_baseline [col ] = pd .to_numeric (df_baseline [col ], errors = 'coerce' )
166
179
167
180
records_xpu = [preprocess_row (row ) for _ , row in df_xpu .iterrows ()]
168
181
records_baseline = [preprocess_row (row ) for _ , row in df_baseline .iterrows ()]
169
182
170
- dict_xpu = {
171
- tuple ((k , str (v )) for k , v in record .items () if k not in ['time(us)' , 'E2E total time(us)' , 'E2E forward time(us)' ]):
172
- record ['time(us)' ]
173
- for record in records_xpu
183
+ data_dict = {
184
+ 'xpu' : {'profile' : {}, 'e2e' : {}},
185
+ 'baseline' : {'profile' : {}, 'e2e' : {}}
174
186
}
175
- dict_baseline = {
176
- tuple ((k , str (v )) for k , v in record .items () if k not in ['time(us)' , 'E2E total time(us)' , 'E2E forward time(us)' ]):
177
- record ['time(us)' ]
178
- for record in records_baseline
179
- }
180
- common_keys = set (dict_xpu .keys ()) & set (dict_baseline .keys ())
187
+
188
+ for record , source in [(records_xpu , 'xpu' ), (records_baseline , 'baseline' )]:
189
+ for r in record :
190
+ key = tuple ((k , str (v )) for k , v in r .items ()
191
+ if k not in ['time(us)' , 'E2E total time(us)' , 'E2E forward time(us)' ])
192
+
193
+ for time_type in ['profile' , 'e2e' ]:
194
+ col = 'time(us)' if time_type == 'profile' else 'E2E total time(us)'
195
+ if col in r :
196
+ try :
197
+ time_val = float (r [col ])
198
+ if not pd .isna (time_val ):
199
+ data_dict [source ][time_type ][key ] = time_val
200
+ except (ValueError , TypeError ):
201
+ continue
202
+
181
203
results = []
204
+ compare_both = not profile_only and not e2e_only
205
+ all_keys = set ().union (* [set (data_dict [s ][t ].keys ())
206
+ for s in data_dict for t in data_dict [s ]])
182
207
183
- for key in common_keys :
184
- time_xpu = dict_xpu [key ]
185
- time_baseline = dict_baseline [key ]
186
-
187
- # Skip comparison if time_xpu or time_baseline is 0
188
- if time_xpu == 0 or time_baseline == 0 :
189
- continue
190
-
191
- diff = (time_baseline - time_xpu ) / time_xpu
192
- # Compare Time, Lower is better
193
- if abs (diff ) > threshold :
194
- record = dict (key )
195
- print (record )
196
- record .update ({
197
- 'time_xpu_file' : time_xpu ,
198
- 'time_baseline_file' : time_baseline ,
199
- 'difference' : f"{ diff :.2%} " ,
200
- 'change' : "↑" if diff > 0 else "↓"
201
- })
202
- results .append (record )
208
+ for key in all_keys :
209
+ record = dict (key )
210
+ should_include = False
203
211
204
- result_df = pd .DataFrame (results ) if results else pd .DataFrame ()
205
- display_comparison (result_df , threshold , xpu_file )
212
+ if not e2e_only and key in data_dict ['xpu' ]['profile' ] and key in data_dict ['baseline' ]['profile' ]:
213
+ xpu_time = data_dict ['xpu' ]['profile' ][key ]
214
+ base_time = data_dict ['baseline' ]['profile' ][key ]
215
+
216
+ if xpu_time != 0 and base_time != 0 :
217
+ try :
218
+ diff = (base_time - xpu_time ) / xpu_time
219
+ record .update ({
220
+ 'profile_time_xpu' : xpu_time ,
221
+ 'profile_time_base' : base_time ,
222
+ 'profile_diff' : f"{ diff :.2%} " ,
223
+ 'profile_change' : "↑" if diff > threshold else "↓" if diff < - threshold else ""
224
+ })
225
+ if abs (diff ) > threshold :
226
+ should_include = True
227
+ except (TypeError , ValueError ):
228
+ pass
206
229
230
+ if not profile_only and key in data_dict ['xpu' ]['e2e' ] and key in data_dict ['baseline' ]['e2e' ]:
231
+ xpu_time = data_dict ['xpu' ]['e2e' ][key ]
232
+ base_time = data_dict ['baseline' ]['e2e' ][key ]
233
+
234
+ if xpu_time != 0 and base_time != 0 :
235
+ try :
236
+ diff = (base_time - xpu_time ) / xpu_time
237
+ record .update ({
238
+ 'e2e_time_xpu' : xpu_time ,
239
+ 'e2e_time_base' : base_time ,
240
+ 'e2e_diff' : f"{ diff :.2%} " ,
241
+ 'e2e_change' : "↑" if diff > threshold else "↓" if diff < - threshold else ""
242
+ })
243
+ if abs (diff ) > threshold :
244
+ should_include = True
245
+ except (TypeError , ValueError ):
246
+ pass
247
+
248
+ if compare_both :
249
+ if should_include :
250
+ results .append (record )
251
+ else :
252
+ if ((profile_only and 'profile_change' in record and record ['profile_change' ]) or
253
+ (e2e_only and 'e2e_change' in record and record ['e2e_change' ])):
254
+ results .append (record )
255
+
256
+ result_df = pd .DataFrame (results ) if results else pd .DataFrame ()
257
+ display_comparison (result_df , threshold , xpu_file , compare_both )
207
258
208
259
def main ():
209
260
parser = argparse .ArgumentParser (description = 'Compare time values between two CSV files' )
210
261
parser .add_argument ('-x' , '--xpu_file' , required = True , help = 'XPU OP performance result csv files dir' )
211
262
parser .add_argument ('-b' , '--baseline_file' , required = True , help = "XPU OP baseline result csv files dir" )
212
263
parser .add_argument ('-t' , '--threshold' , type = float , default = 0.10 ,
213
264
help = 'Threshold for time difference (default: 0.10 for 10%)' )
265
+ parser .add_argument ('--profile-only' , action = 'store_true' ,
266
+ help = 'Only compare profile time (time(us))' )
267
+ parser .add_argument ('--e2e-only' , action = 'store_true' ,
268
+ help = 'Only compare E2E time (E2E total time(us))' )
214
269
args = parser .parse_args ()
215
270
216
- print (f" Compared file: { args .xpu_file } 和 { args .baseline_file } " )
271
+ if args .profile_only and args .e2e_only :
272
+ raise ValueError ("Cannot specify both --profile-only and --e2e-only" )
273
+
274
+ print (f" Compared file: { args .xpu_file } and { args .baseline_file } " )
217
275
print (f" Threshold: { args .threshold :.0%} " )
276
+ if args .profile_only :
277
+ print (" Comparing only profile time (time(us))" )
278
+ elif args .e2e_only :
279
+ print (" Comparing only E2E time (E2E total time(us))" )
280
+ else :
281
+ print (" Comparing both profile time and E2E time in same table" )
282
+
218
283
write_to_github_summary ("## Performance Comparison Set" )
219
284
write_to_github_summary (f"- Threshold: { args .threshold :.0%} " )
285
+ if args .profile_only :
286
+ write_to_github_summary ("- Comparing only profile time (time(us))" )
287
+ elif args .e2e_only :
288
+ write_to_github_summary ("- Comparing only E2E time (E2E total time(us))" )
289
+ else :
290
+ write_to_github_summary ("- Comparing both profile time and E2E time in same table" )
220
291
221
- compare_op_time_values (
292
+ compare_time_values (
222
293
xpu_file = args .xpu_file ,
223
294
baseline_file = args .baseline_file ,
224
295
threshold = args .threshold ,
296
+ profile_only = args .profile_only ,
297
+ e2e_only = args .e2e_only
225
298
)
226
299
227
-
228
300
if __name__ == "__main__" :
229
301
main ()
0 commit comments