Skip to content

Commit 3f81c35

Browse files
yudongsivictor-eds
andauthored
Add Triton benchmark support in compare script (#2060)
Co-authored-by: Victor Perez <[email protected]>
1 parent 320e2c5 commit 3f81c35

File tree

2 files changed

+99
-34
lines changed

2 files changed

+99
-34
lines changed

scripts/compare-ci-runs/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Comparing CI Runs
22

33
This script can be used to compare the results of two runs of the
4-
"Performance E2E" CI workflow.
4+
"Performance E2E" or "Triton benchmarks" CI workflow.
55

66
## Prerequisites
77

scripts/compare-ci-runs/compare_runs.py

Lines changed: 98 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,9 @@ def get_raw_data(args: argparse.Namespace) -> tuple[Optional[Path], Optional[Pat
7474
return num_dir, denom_dir
7575

7676

77-
def parse_data(config: str, df: pd.DataFrame, file: Path) -> pd.DataFrame:
78-
"""Parse data from a single CSV file into the dataframe."""
77+
def parse_pytorch_benchmark_data(config: str, df: pd.DataFrame, file: Path) -> pd.DataFrame:
78+
"""Parse pytorch benchmark data from a single CSV file into the dataframe."""
7979
path = Path(file).absolute()
80-
8180
datatype = path.parts[-2]
8281
suite = path.parts[-3]
8382

@@ -97,25 +96,59 @@ def parse_data(config: str, df: pd.DataFrame, file: Path) -> pd.DataFrame:
9796
return pd.concat([df, raw_data], ignore_index=True)
9897

9998

100-
def parse_directory(config: str, previous: pd.DataFrame, directory: Path) -> pd.DataFrame:
101-
"""Parse all CSV files for a configuration in a directory, merging with
102-
the previous dataframe if present."""
103-
df = pd.DataFrame(columns=["dev", "name", "batch_size", f"speedup {config}", "suite", "datatype", "mode"])
104-
for file in Path(directory).rglob("*performance.csv"):
105-
df = parse_data(config, df, file)
99+
def merge_triton_xetla_reports_data(config: str, triton_file: Path, xetla_file: Path) -> pd.DataFrame:
100+
"""Merge triton and xetla raw data."""
101+
try:
102+
triton_raw_data, xetla_raw_data = [
103+
pd.read_csv(file, header=0, usecols=["params", "tflops", "benchmark"])
104+
for file in [triton_file, xetla_file]
105+
]
106+
triton_raw_data.rename(columns={"tflops": f"Triton-TFlops-{config}"}, inplace=True)
107+
xetla_raw_data.rename(columns={"tflops": f"XeTLA-TFlops-{config}"}, inplace=True)
108+
return triton_raw_data.merge(xetla_raw_data, how="outer", on=["params", "benchmark"])
109+
except FileNotFoundError:
110+
print(f"Warning: One or both files not found: {triton_file} or {xetla_file}")
111+
return pd.DataFrame()
106112

107-
if previous is not None:
108-
df = df.merge(previous, how="outer", on=["suite", "datatype", "mode", "name", "dev"])
109-
return df
110113

114+
def build_triton_benchmark_reports_path(directory: Path, report_name: str) -> str:
115+
"""Construct the full file path for a given report name."""
116+
return os.path.join(directory, "benchmark-reports", f"{report_name}-report.csv")
111117

112-
def eval_data(df: pd.DataFrame, numerator: str, denominator: str, plot: bool):
113-
"""Evaluate the data, print a summary and plot if enabled."""
114-
num_col = f"speedup {numerator}"
115-
denom_col = f"speedup {denominator}"
116118

117-
df.drop(columns=["batch_size_x", "batch_size_y"], inplace=True)
119+
def parse_triton_benchmark_data(config: str, directory: Path) -> pd.DataFrame:
120+
"""Parse triton benchmark data from a merged dataframe into the dataframe.
121+
Now focus on dft path for softmax, gemm and attention
122+
which include both xetla and triton data with regular name."""
123+
124+
reports = ["softmax", "gemm", "attn"]
125+
126+
reports_list = []
127+
for report in reports:
128+
triton_file = f"{report}-triton"
129+
xetla_file = f"{report}-xetla"
130+
triton_path = build_triton_benchmark_reports_path(directory, triton_file)
131+
xetla_path = build_triton_benchmark_reports_path(directory, xetla_file)
132+
reports_list.append(merge_triton_xetla_reports_data(config, triton_path, xetla_path))
118133

134+
return pd.concat(reports_list, ignore_index=True)
135+
136+
137+
def parse_directory(triton_benchmark: bool, config: str, directory: Path) -> pd.DataFrame:
138+
"""Parse all CSV files for a configuration in a directory."""
139+
if triton_benchmark:
140+
df = parse_triton_benchmark_data(config, directory)
141+
else:
142+
df = pd.DataFrame(columns=["dev", "name", "batch_size", f"speedup {config}", "suite", "datatype", "mode"])
143+
for file in Path(directory).rglob("*performance.csv"):
144+
df = parse_pytorch_benchmark_data(config, df, file)
145+
146+
return df
147+
148+
149+
def summarize_diff(triton_benchmark: bool, perf_index: str, plot: bool, df: pd.DataFrame, num_col: str, denom_col: str,
150+
numerator: str, denominator: str):
151+
"""Summarize data difference of numerator and denominator."""
119152
both_failed = df.loc[(df[num_col] == 0.0) & (df[denom_col] == 0.0)]
120153
print(f"Both failed ({both_failed.shape[0]} configurations):")
121154
print(both_failed.to_string())
@@ -142,24 +175,24 @@ def eval_data(df: pd.DataFrame, numerator: str, denominator: str, plot: bool):
142175

143176
df["relative difference"] = (df[num_col] - df[denom_col]) / df[denom_col]
144177

145-
print("Overview of relative difference in speedup.\n"
178+
print(f"Overview of relative difference in {perf_index}.\n"
146179
"Relative difference 0.0 means both perform identically,"
147180
f" relative difference > 0.0 means {numerator} performs better,"
148181
f" relative difference < 0.0 means {denominator} performs better")
149182

150183
print(df["relative difference"].describe())
151-
print(f"Mean speedup for denominator: {df[denom_col].mean()}")
184+
print(f"Mean {perf_index} for denominator: {df[denom_col].mean()}")
152185
print("\n" * 2)
153186

154187
df.sort_values(by=["relative difference"], inplace=True, ignore_index=True, ascending=True)
155188
print_cfgs = 10
156-
print(f"{print_cfgs} fastest configurations ({denominator} faster than "
157-
f"{numerator}, showing relative difference in speedup)")
189+
print(f"{print_cfgs} best configurations ({denominator} better than "
190+
f"{numerator}, showing relative difference in {perf_index})")
158191
print(df.head(print_cfgs))
159192
print("\n" * 2)
160193
df.sort_values(by=["relative difference"], inplace=True, ignore_index=True, ascending=False)
161-
print(f"{print_cfgs} slowest configurations ({denominator} slower than "
162-
f"{numerator}, showing relative difference in speedup)")
194+
print(f"{print_cfgs} worst configurations ({denominator} worse than "
195+
f"{numerator}, showing relative difference in {perf_index})")
163196
print(df.head(print_cfgs))
164197
print("\n" * 2)
165198

@@ -169,12 +202,13 @@ def eval_data(df: pd.DataFrame, numerator: str, denominator: str, plot: bool):
169202
import matplotlib.pyplot as plt
170203
from matplotlib.backends.backend_pdf import PdfPages
171204

172-
df["xlabel"] = df[["suite", "mode", "datatype"]].agg(", ".join, axis=1)
205+
keys = ["params", "benchmark"] if triton_benchmark else ["suite", "mode", "datatype"]
206+
df["xlabel"] = df[keys].agg(", ".join, axis=1)
173207

174208
# Sort by configuration
175209
order = list(df["xlabel"].unique())
176210
order.sort()
177-
filename = f"performance-plot-{numerator}-{denominator}.pdf"
211+
filename = f"performance-plot-{num_col}-{denom_col}.pdf".lower()
178212
with PdfPages(filename) as pdf:
179213
fig = plt.figure()
180214
plt.xticks(rotation=85)
@@ -188,12 +222,29 @@ def eval_data(df: pd.DataFrame, numerator: str, denominator: str, plot: bool):
188222

189223
ax = sns.boxplot(df, x="xlabel", y="relative difference", order=order)
190224

191-
ax.set(xlabel=None, ylabel="Relative difference in speedup")
225+
ax.set(xlabel=None, ylabel=f"Relative difference in {perf_index}")
192226

193227
pdf.savefig(fig, bbox_inches="tight")
194228
print(f"Saved performance plot to {filename}")
195229

196230

231+
def eval_data(triton_benchmark: bool, plot: bool, df: pd.DataFrame, numerator: str, denominator: str):
232+
"""Evaluate the data, print a summary and plot if enabled."""
233+
if triton_benchmark:
234+
num_tri2xe_col = f"Tri2Xe-{numerator}"
235+
dem_tri2xe_col = f"Tri2Xe-{denominator}"
236+
237+
df_ratio = df[["params", "benchmark", num_tri2xe_col, dem_tri2xe_col]]
238+
summarize_diff(triton_benchmark, "tri2xe", plot, df_ratio, num_tri2xe_col, dem_tri2xe_col, numerator,
239+
denominator)
240+
else:
241+
num_col = f"speedup {numerator}"
242+
denom_col = f"speedup {denominator}"
243+
244+
df.drop(columns=["batch_size_x", "batch_size_y"], inplace=True)
245+
summarize_diff(triton_benchmark, "speedup", plot, df, num_col, denom_col, numerator, denominator)
246+
247+
197248
def main():
198249
"""Main entry point."""
199250
parser = argparse.ArgumentParser(prog="compare-runs", description="Compare performance of two CI runs")
@@ -206,6 +257,8 @@ def main():
206257
action="store_true")
207258
parser.add_argument("-e", "--eval-only", help="Use existing preprocessed data", action="store_true")
208259
parser.add_argument("--no-plot", help="Do not plot, no requirement on seaborn and matplotlib", action="store_true")
260+
parser.add_argument("--triton-benchmark", help="Compare triton benchmark performance of two CI runs",
261+
action="store_true")
209262

210263
args = parser.parse_args()
211264

@@ -230,19 +283,31 @@ def main():
230283
print("Failed to obtain raw data")
231284
sys.exit(1)
232285

233-
df = parse_directory(num_cfg, None, num_dir)
234-
df = parse_directory(denom_cfg, df, denom_dir)
286+
num_df = parse_directory(args.triton_benchmark, num_cfg, num_dir)
287+
denom_df = parse_directory(args.triton_benchmark, denom_cfg, denom_dir)
288+
on = ["params", "benchmark"] if args.triton_benchmark else ["suite", "datatype", "mode", "name", "dev"]
289+
df = denom_df.merge(num_df, how="outer", on=on)
290+
291+
if args.triton_benchmark:
292+
cols = [
293+
"params", "benchmark", f"Triton-TFlops-{num_cfg}", f"XeTLA-TFlops-{num_cfg}",
294+
f"Triton-TFlops-{denom_cfg}", f"XeTLA-TFlops-{denom_cfg}"
295+
]
296+
else:
297+
cols = [
298+
"dev", "suite", "name", "mode", "datatype", "batch_size_x", "batch_size_y", f"speedup {num_cfg}",
299+
f"speedup {denom_cfg}"
300+
]
235301

236-
cols = [
237-
"dev", "suite", "name", "mode", "datatype", "batch_size_x", "batch_size_y", f"speedup {num_cfg}",
238-
f"speedup {denom_cfg}"
239-
]
240302
df = df[cols]
303+
if args.triton_benchmark:
304+
df[f"Tri2Xe-{num_cfg}"] = df[f"Triton-TFlops-{num_cfg}"] / df[f"XeTLA-TFlops-{num_cfg}"]
305+
df[f"Tri2Xe-{denom_cfg}"] = df[f"Triton-TFlops-{denom_cfg}"] / df[f"XeTLA-TFlops-{denom_cfg}"]
241306

242307
print(f"Storing preprocessed data to {csv_file}")
243308
df.to_csv(csv_file, index=False)
244309

245-
eval_data(df, num_cfg, denom_cfg, (not args.no_plot))
310+
eval_data(args.triton_benchmark, (not args.no_plot), df, num_cfg, denom_cfg)
246311

247312

248313
if __name__ == "__main__":

0 commit comments

Comments
 (0)