@@ -74,10 +74,9 @@ def get_raw_data(args: argparse.Namespace) -> tuple[Optional[Path], Optional[Pat
7474 return num_dir , denom_dir
7575
7676
77- def parse_data (config : str , df : pd .DataFrame , file : Path ) -> pd .DataFrame :
78- """Parse data from a single CSV file into the dataframe."""
77+ def parse_pytorch_benchmark_data (config : str , df : pd .DataFrame , file : Path ) -> pd .DataFrame :
78+ """Parse pytorch benchmark data from a single CSV file into the dataframe."""
7979 path = Path (file ).absolute ()
80-
8180 datatype = path .parts [- 2 ]
8281 suite = path .parts [- 3 ]
8382
@@ -97,25 +96,59 @@ def parse_data(config: str, df: pd.DataFrame, file: Path) -> pd.DataFrame:
9796 return pd .concat ([df , raw_data ], ignore_index = True )
9897
9998
100- def parse_directory (config : str , previous : pd .DataFrame , directory : Path ) -> pd .DataFrame :
101- """Parse all CSV files for a configuration in a directory, merging with
102- the previous dataframe if present."""
103- df = pd .DataFrame (columns = ["dev" , "name" , "batch_size" , f"speedup { config } " , "suite" , "datatype" , "mode" ])
104- for file in Path (directory ).rglob ("*performance.csv" ):
105- df = parse_data (config , df , file )
99+ def merge_triton_xetla_reports_data (config : str , triton_file : Path , xetla_file : Path ) -> pd .DataFrame :
100+ """Merge triton and xetla raw data."""
101+ try :
102+ triton_raw_data , xetla_raw_data = [
103+ pd .read_csv (file , header = 0 , usecols = ["params" , "tflops" , "benchmark" ])
104+ for file in [triton_file , xetla_file ]
105+ ]
106+ triton_raw_data .rename (columns = {"tflops" : f"Triton-TFlops-{ config } " }, inplace = True )
107+ xetla_raw_data .rename (columns = {"tflops" : f"XeTLA-TFlops-{ config } " }, inplace = True )
108+ return triton_raw_data .merge (xetla_raw_data , how = "outer" , on = ["params" , "benchmark" ])
109+ except FileNotFoundError :
110+ print (f"Warning: One or both files not found: { triton_file } or { xetla_file } " )
111+ return pd .DataFrame ()
106112
107- if previous is not None :
108- df = df .merge (previous , how = "outer" , on = ["suite" , "datatype" , "mode" , "name" , "dev" ])
109- return df
110113
114+ def build_triton_benchmark_reports_path (directory : Path , report_name : str ) -> str :
115+ """Construct the full file path for a given report name."""
116+ return os .path .join (directory , "benchmark-reports" , f"{ report_name } -report.csv" )
111117
112- def eval_data (df : pd .DataFrame , numerator : str , denominator : str , plot : bool ):
113- """Evaluate the data, print a summary and plot if enabled."""
114- num_col = f"speedup { numerator } "
115- denom_col = f"speedup { denominator } "
116118
117- df .drop (columns = ["batch_size_x" , "batch_size_y" ], inplace = True )
119+ def parse_triton_benchmark_data (config : str , directory : Path ) -> pd .DataFrame :
120+ """Parse triton benchmark data from a merged dataframe into the dataframe.
121+ Now focus on dft path for softmax, gemm and attention
122+ which include both xetla and triton data with regular name."""
123+
124+ reports = ["softmax" , "gemm" , "attn" ]
125+
126+ reports_list = []
127+ for report in reports :
128+ triton_file = f"{ report } -triton"
129+ xetla_file = f"{ report } -xetla"
130+ triton_path = build_triton_benchmark_reports_path (directory , triton_file )
131+ xetla_path = build_triton_benchmark_reports_path (directory , xetla_file )
132+ reports_list .append (merge_triton_xetla_reports_data (config , triton_path , xetla_path ))
118133
134+ return pd .concat (reports_list , ignore_index = True )
135+
136+
137+ def parse_directory (triton_benchmark : bool , config : str , directory : Path ) -> pd .DataFrame :
138+ """Parse all CSV files for a configuration in a directory."""
139+ if triton_benchmark :
140+ df = parse_triton_benchmark_data (config , directory )
141+ else :
142+ df = pd .DataFrame (columns = ["dev" , "name" , "batch_size" , f"speedup { config } " , "suite" , "datatype" , "mode" ])
143+ for file in Path (directory ).rglob ("*performance.csv" ):
144+ df = parse_pytorch_benchmark_data (config , df , file )
145+
146+ return df
147+
148+
149+ def summarize_diff (triton_benchmark : bool , perf_index : str , plot : bool , df : pd .DataFrame , num_col : str , denom_col : str ,
150+ numerator : str , denominator : str ):
151+ """Summarize data difference of numerator and denominator."""
119152 both_failed = df .loc [(df [num_col ] == 0.0 ) & (df [denom_col ] == 0.0 )]
120153 print (f"Both failed ({ both_failed .shape [0 ]} configurations):" )
121154 print (both_failed .to_string ())
@@ -142,24 +175,24 @@ def eval_data(df: pd.DataFrame, numerator: str, denominator: str, plot: bool):
142175
143176 df ["relative difference" ] = (df [num_col ] - df [denom_col ]) / df [denom_col ]
144177
145- print ("Overview of relative difference in speedup .\n "
178+ print (f "Overview of relative difference in { perf_index } .\n "
146179 "Relative difference 0.0 means both perform identically,"
147180 f" relative difference > 0.0 means { numerator } performs better,"
148181 f" relative difference < 0.0 means { denominator } performs better" )
149182
150183 print (df ["relative difference" ].describe ())
151- print (f"Mean speedup for denominator: { df [denom_col ].mean ()} " )
184+ print (f"Mean { perf_index } for denominator: { df [denom_col ].mean ()} " )
152185 print ("\n " * 2 )
153186
154187 df .sort_values (by = ["relative difference" ], inplace = True , ignore_index = True , ascending = True )
155188 print_cfgs = 10
156- print (f"{ print_cfgs } fastest configurations ({ denominator } faster than "
157- f"{ numerator } , showing relative difference in speedup )" )
189+ print (f"{ print_cfgs } best configurations ({ denominator } better than "
190+ f"{ numerator } , showing relative difference in { perf_index } )" )
158191 print (df .head (print_cfgs ))
159192 print ("\n " * 2 )
160193 df .sort_values (by = ["relative difference" ], inplace = True , ignore_index = True , ascending = False )
161- print (f"{ print_cfgs } slowest configurations ({ denominator } slower than "
162- f"{ numerator } , showing relative difference in speedup )" )
194+ print (f"{ print_cfgs } worst configurations ({ denominator } worse than "
195+ f"{ numerator } , showing relative difference in { perf_index } )" )
163196 print (df .head (print_cfgs ))
164197 print ("\n " * 2 )
165198
@@ -169,12 +202,13 @@ def eval_data(df: pd.DataFrame, numerator: str, denominator: str, plot: bool):
169202 import matplotlib .pyplot as plt
170203 from matplotlib .backends .backend_pdf import PdfPages
171204
172- df ["xlabel" ] = df [["suite" , "mode" , "datatype" ]].agg (", " .join , axis = 1 )
205+ keys = ["params" , "benchmark" ] if triton_benchmark else ["suite" , "mode" , "datatype" ]
206+ df ["xlabel" ] = df [keys ].agg (", " .join , axis = 1 )
173207
174208 # Sort by configuration
175209 order = list (df ["xlabel" ].unique ())
176210 order .sort ()
177- filename = f"performance-plot-{ numerator } -{ denominator } .pdf"
211+ filename = f"performance-plot-{ num_col } -{ denom_col } .pdf" . lower ()
178212 with PdfPages (filename ) as pdf :
179213 fig = plt .figure ()
180214 plt .xticks (rotation = 85 )
@@ -188,12 +222,29 @@ def eval_data(df: pd.DataFrame, numerator: str, denominator: str, plot: bool):
188222
189223 ax = sns .boxplot (df , x = "xlabel" , y = "relative difference" , order = order )
190224
191- ax .set (xlabel = None , ylabel = "Relative difference in speedup " )
225+ ax .set (xlabel = None , ylabel = f "Relative difference in { perf_index } " )
192226
193227 pdf .savefig (fig , bbox_inches = "tight" )
194228 print (f"Saved performance plot to { filename } " )
195229
196230
231+ def eval_data (triton_benchmark : bool , plot : bool , df : pd .DataFrame , numerator : str , denominator : str ):
232+ """Evaluate the data, print a summary and plot if enabled."""
233+ if triton_benchmark :
234+ num_tri2xe_col = f"Tri2Xe-{ numerator } "
235+ dem_tri2xe_col = f"Tri2Xe-{ denominator } "
236+
237+ df_ratio = df [["params" , "benchmark" , num_tri2xe_col , dem_tri2xe_col ]]
238+ summarize_diff (triton_benchmark , "tri2xe" , plot , df_ratio , num_tri2xe_col , dem_tri2xe_col , numerator ,
239+ denominator )
240+ else :
241+ num_col = f"speedup { numerator } "
242+ denom_col = f"speedup { denominator } "
243+
244+ df .drop (columns = ["batch_size_x" , "batch_size_y" ], inplace = True )
245+ summarize_diff (triton_benchmark , "speedup" , plot , df , num_col , denom_col , numerator , denominator )
246+
247+
197248def main ():
198249 """Main entry point."""
199250 parser = argparse .ArgumentParser (prog = "compare-runs" , description = "Compare performance of two CI runs" )
@@ -206,6 +257,8 @@ def main():
206257 action = "store_true" )
207258 parser .add_argument ("-e" , "--eval-only" , help = "Use existing preprocessed data" , action = "store_true" )
208259 parser .add_argument ("--no-plot" , help = "Do not plot, no requirement on seaborn and matplotlib" , action = "store_true" )
260+ parser .add_argument ("--triton-benchmark" , help = "Compare triton benchmark performance of two CI runs" ,
261+ action = "store_true" )
209262
210263 args = parser .parse_args ()
211264
@@ -230,19 +283,31 @@ def main():
230283 print ("Failed to obtain raw data" )
231284 sys .exit (1 )
232285
233- df = parse_directory (num_cfg , None , num_dir )
234- df = parse_directory (denom_cfg , df , denom_dir )
286+ num_df = parse_directory (args .triton_benchmark , num_cfg , num_dir )
287+ denom_df = parse_directory (args .triton_benchmark , denom_cfg , denom_dir )
288+ on = ["params" , "benchmark" ] if args .triton_benchmark else ["suite" , "datatype" , "mode" , "name" , "dev" ]
289+ df = denom_df .merge (num_df , how = "outer" , on = on )
290+
291+ if args .triton_benchmark :
292+ cols = [
293+ "params" , "benchmark" , f"Triton-TFlops-{ num_cfg } " , f"XeTLA-TFlops-{ num_cfg } " ,
294+ f"Triton-TFlops-{ denom_cfg } " , f"XeTLA-TFlops-{ denom_cfg } "
295+ ]
296+ else :
297+ cols = [
298+ "dev" , "suite" , "name" , "mode" , "datatype" , "batch_size_x" , "batch_size_y" , f"speedup { num_cfg } " ,
299+ f"speedup { denom_cfg } "
300+ ]
235301
236- cols = [
237- "dev" , "suite" , "name" , "mode" , "datatype" , "batch_size_x" , "batch_size_y" , f"speedup { num_cfg } " ,
238- f"speedup { denom_cfg } "
239- ]
240302 df = df [cols ]
303+ if args .triton_benchmark :
304+ df [f"Tri2Xe-{ num_cfg } " ] = df [f"Triton-TFlops-{ num_cfg } " ] / df [f"XeTLA-TFlops-{ num_cfg } " ]
305+ df [f"Tri2Xe-{ denom_cfg } " ] = df [f"Triton-TFlops-{ denom_cfg } " ] / df [f"XeTLA-TFlops-{ denom_cfg } " ]
241306
242307 print (f"Storing preprocessed data to { csv_file } " )
243308 df .to_csv (csv_file , index = False )
244309
245- eval_data (df , num_cfg , denom_cfg , (not args .no_plot ))
310+ eval_data (args . triton_benchmark , (not args .no_plot ), df , num_cfg , denom_cfg )
246311
247312
248313if __name__ == "__main__" :
0 commit comments