Skip to content

Commit 0b7b39d

Browse files
authored
[Refactor] Rewrite scripts using pylint too-many-* suggestions (#3839)
Signed-off-by: Anatoly Myachev <[email protected]>
1 parent 8e81e26 commit 0b7b39d

File tree

5 files changed

+105
-66
lines changed

5 files changed

+105
-66
lines changed

.pre-commit-config.yaml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,6 @@ repos:
7979
- --disable=missing-module-docstring
8080
- --disable=missing-function-docstring
8181
- --disable=missing-class-docstring
82-
- --disable=duplicate-code
83-
- --disable=too-many-locals
84-
- --disable=too-many-statements
85-
- --disable=too-many-arguments
86-
- --disable=too-many-positional-arguments
8782
stages: [pre-commit, pre-push, manual]
8883

8984
- id: pylint
@@ -106,7 +101,6 @@ repos:
106101
- --disable=unnecessary-lambda-assignment
107102
# FIXME: revisit these checks later
108103
- --disable=too-few-public-methods
109-
- --disable=consider-using-generator
110104
- --disable=missing-module-docstring
111105
- --disable=missing-function-docstring
112106
- --disable=missing-class-docstring

benchmarks/triton_kernels_benchmark/benchmark_testing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def extract_kernels(funcs):
159159
f"the profiling number not match; {n_repeat=}, {kernels=}, \n" +
160160
f"top functions by xpu_time:\n {prof.key_averages(group_by_stack_n=5).table(sort_by='xpu_time')}")
161161
# Make the time to the milliseconds.
162-
times = torch.tensor([sum([k.duration for k in ks]) * 1e-3 for ks in kernels], dtype=torch.float)
162+
times = torch.tensor([sum((k.duration for k in ks)) * 1e-3 for ks in kernels], dtype=torch.float)
163163
return _summarize_statistics(times, quantiles, return_mode)
164164

165165

scripts/build_report.py

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,25 @@
44
import uuid
55
import json
66
import datetime
7+
from dataclasses import dataclass
78

89
import pandas as pd
910

1011

11-
def parse_args():
12+
@dataclass
13+
class PassedArgs: # pylint: disable=too-many-instance-attributes
14+
source: str
15+
target: str
16+
param_cols: str
17+
benchmark: str
18+
compiler: str
19+
tflops_col: str
20+
hbm_col: str
21+
tag: str
22+
mask: bool
23+
24+
25+
def parse_args() -> PassedArgs:
1226
parser = argparse.ArgumentParser(description="Build report based on triton-benchmark run")
1327
parser.add_argument("source", help="Path to source csv file with benchmark results")
1428
parser.add_argument(
@@ -26,7 +40,8 @@ def parse_args():
2640
parser.add_argument("--hbm_col", help="Column name with HBM results.", required=False, default=None)
2741
parser.add_argument("--tag", help="How to tag results", required=False, default="")
2842
parser.add_argument("--mask", help="Mask identifiers among the params", required=False, action="store_true")
29-
return parser.parse_args()
43+
parsed_args = parser.parse_args()
44+
return PassedArgs(**vars(parsed_args))
3045

3146

3247
def check_cols(target_cols, all_cols):
@@ -35,24 +50,26 @@ def check_cols(target_cols, all_cols):
3550
raise ValueError(f"Couldn't find required columns: '{diff}' among available '{all_cols}'")
3651

3752

38-
def transform_df(df, param_cols, tflops_col, hbm_col, benchmark, compiler, tag, mask):
53+
def transform_df(df, args: PassedArgs) -> pd.DataFrame:
54+
param_cols = args.param_cols.split(",")
55+
hbm_col = args.hbm_col
3956
check_cols(param_cols, df.columns)
40-
check_cols([tflops_col] + [] if hbm_col is None else [hbm_col], df.columns)
57+
check_cols([args.tflops_col] + [] if hbm_col is None else [hbm_col], df.columns)
4158
# Build json with parameters
4259
df_results = pd.DataFrame()
4360
# Type conversion to int is important here, because dashboards expect
4461
# int values.
4562
# Changing it without changing dashboards and database will
4663
# break comparison of old and new results
47-
if mask:
64+
if args.mask:
4865
df_results["MASK"] = df[param_cols[-1]]
4966
param_cols = param_cols[:-1]
5067
for p in param_cols:
5168
df[p] = df[p].astype(int)
5269
df_results["params"] = [json.dumps(j) for j in df[[*param_cols, "MASK"]].to_dict("records")]
5370
else:
5471
df_results["params"] = [json.dumps(j) for j in df[param_cols].astype(int).to_dict("records")]
55-
df_results["tflops"] = df[tflops_col]
72+
df_results["tflops"] = df[args.tflops_col]
5673
if hbm_col is not None:
5774
df_results["hbm_gbs"] = df[hbm_col]
5875

@@ -70,9 +87,9 @@ def transform_df(df, param_cols, tflops_col, hbm_col, benchmark, compiler, tag,
7087
df_results["datetime"] = datetime.datetime.now()
7188
else:
7289
df_results["datetime"] = df["datetime"]
73-
df_results["benchmark"] = benchmark
74-
df_results["compiler"] = compiler
75-
df_results["tag"] = tag
90+
df_results["benchmark"] = args.benchmark
91+
df_results["compiler"] = args.compiler
92+
df_results["tag"] = args.tag
7693

7794
host_info = {
7895
n: os.getenv(n.upper(), default="")
@@ -96,10 +113,8 @@ def transform_df(df, param_cols, tflops_col, hbm_col, benchmark, compiler, tag,
96113

97114
def main():
98115
args = parse_args()
99-
param_cols = args.param_cols.split(",")
100116
df = pd.read_csv(args.source)
101-
result_df = transform_df(df, param_cols=param_cols, tflops_col=args.tflops_col, hbm_col=args.hbm_col,
102-
benchmark=args.benchmark, compiler=args.compiler, tag=args.tag, mask=args.mask)
117+
result_df = transform_df(df, args)
103118
result_df.to_csv(args.target, index=False)
104119

105120

scripts/check_inductor_report.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,35 @@
33
from pathlib import Path
44
import csv
55
import sys
6+
from dataclasses import dataclass
67

78

8-
def check_report(suite, dtype, mode, test_mode, device, models_file, inductor_log_dir):
9-
inductor_log_dir_leaf = Path(inductor_log_dir) / suite / dtype
10-
inductor_report_filename = f"inductor_{suite}_{dtype}_{mode}_{device}_{test_mode}.csv"
11-
inductor_report_path = Path(inductor_log_dir_leaf / inductor_report_filename)
9+
@dataclass
10+
class PassedArgs:
11+
suite: str
12+
dtype: str
13+
mode: str
14+
test_mode: str
15+
device: str
16+
models_file: str
17+
inductor_log_dir: str
18+
19+
20+
def get_inductor_report_path(args: PassedArgs) -> Path:
21+
inductor_log_dir_leaf = Path(args.inductor_log_dir) / args.suite / args.dtype
22+
inductor_report_filename = f"inductor_{args.suite}_{args.dtype}_{args.mode}_{args.device}_{args.test_mode}.csv"
23+
return Path(inductor_log_dir_leaf / inductor_report_filename)
24+
25+
26+
def check_report(args: PassedArgs) -> int:
27+
test_mode = args.test_mode
28+
inductor_report_path = get_inductor_report_path(args)
1229

1330
subset = []
1431
report = []
1532
exitcode = 0
1633

17-
with open(models_file, encoding="utf-8") as f:
34+
with open(args.models_file, encoding="utf-8") as f:
1835
subset = f.read().splitlines()
1936

2037
with open(inductor_report_path, encoding="utf-8") as f:
@@ -23,7 +40,7 @@ def check_report(suite, dtype, mode, test_mode, device, models_file, inductor_lo
2340
for l in reader:
2441
report_with_header.append(l)
2542
for r in report_with_header[1:]:
26-
if r[0] == device:
43+
if r[0] == args.device:
2744
report.append(r)
2845

2946
test_list = [r[1] for r in report]
@@ -58,9 +75,9 @@ def main():
5875
argparser.add_argument("--device", help="i.e. xpu", required=True)
5976
argparser.add_argument("--models-file", help="Subset of models list", required=True)
6077
argparser.add_argument("--inductor-log-dir", help="Inductor test log directory", default="inductor_log")
61-
args = argparser.parse_args()
62-
exitcode = check_report(args.suite, args.dtype, args.mode, args.test_mode, args.device, args.models_file,
63-
args.inductor_log_dir)
78+
parsed_args = argparser.parse_args()
79+
passed_args = PassedArgs(**vars(parsed_args))
80+
exitcode = check_report(passed_args)
6481
print(f"Report check result: {'SUCCESS' if exitcode == 0 else 'FAIL'}")
6582
sys.exit(exitcode)
6683

scripts/compare-ci-runs/compare_runs.py

Lines changed: 51 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def parse_pytorch_benchmark_data(config: str, df: pd.DataFrame, file: Path) -> p
9191
raw_data["suite"] = suite
9292
raw_data["datatype"] = datatype
9393
raw_data["mode"] = mode
94-
raw_data.rename(columns={"speedup": f"speedup {config}"}, inplace=True)
94+
raw_data.rename(columns={"speedup": f"speedup-{config}"}, inplace=True)
9595

9696
return pd.concat([df, raw_data], ignore_index=True)
9797

@@ -146,9 +146,16 @@ def parse_directory(triton_benchmark: bool, config: str, directory: Path) -> pd.
146146
return df
147147

148148

149-
def summarize_diff(triton_benchmark: bool, perf_index: str, plot: bool, df: pd.DataFrame, num_col: str, denom_col: str,
150-
numerator: str, denominator: str):
149+
def get_column_names(perf_index: str, numerator: str, denominator: str):
150+
num_col = f"{perf_index}-{numerator}"
151+
denom_col = f"{perf_index}-{denominator}"
152+
return num_col, denom_col
153+
154+
155+
def summarize_diff(perf_index: str, df: pd.DataFrame, numerator: str, denominator: str):
151156
"""Summarize data difference of numerator and denominator."""
157+
num_col, denom_col = get_column_names(perf_index, numerator, denominator)
158+
152159
both_failed = df.loc[(df[num_col] == 0.0) & (df[denom_col] == 0.0)]
153160
print(f"Both failed ({both_failed.shape[0]} configurations):")
154161
print(both_failed.to_string())
@@ -195,54 +202,60 @@ def summarize_diff(triton_benchmark: bool, perf_index: str, plot: bool, df: pd.D
195202
f"{numerator}, showing relative difference in {perf_index})")
196203
print(df.head(print_cfgs))
197204
print("\n" * 2)
205+
return df
198206

199-
if plot:
200-
# pylint: disable=import-outside-toplevel
201-
import seaborn as sns
202-
import matplotlib.pyplot as plt
203-
from matplotlib.backends.backend_pdf import PdfPages
204207

205-
keys = ["params", "benchmark"] if triton_benchmark else ["suite", "mode", "datatype"]
206-
df["xlabel"] = df[keys].agg(", ".join, axis=1)
208+
def get_filename(perf_index, numerator, denominator) -> str:
209+
num_col, denom_col = get_column_names(perf_index, numerator, denominator)
210+
return f"performance-plot-{num_col}-{denom_col}.pdf".lower()
211+
207212

208-
# Sort by configuration
209-
order = list(df["xlabel"].unique())
210-
order.sort()
211-
filename = f"performance-plot-{num_col}-{denom_col}.pdf".lower()
212-
with PdfPages(filename) as pdf:
213-
fig = plt.figure()
214-
plt.xticks(rotation=85)
213+
def plot_diff_df(df, triton_benchmark: bool, perf_index: str, numerator: str, denominator: str):
214+
# pylint: disable=import-outside-toplevel
215+
import seaborn as sns
216+
import matplotlib.pyplot as plt
217+
from matplotlib.backends.backend_pdf import PdfPages
215218

216-
title = ("Relative difference 0.0 means both perform identically,\n"
217-
f"relative difference > 0.0 means {numerator} performs better,\n"
218-
f"relative difference < 0.0 means {denominator} performs better")
219-
plt.title(f"Comparison {numerator} vs {denominator}.")
219+
keys = ["params", "benchmark"] if triton_benchmark else ["suite", "mode", "datatype"]
220+
df["xlabel"] = df[keys].agg(", ".join, axis=1)
220221

221-
plt.figtext(1, 0.5, title)
222+
# Sort by configuration
223+
order = list(df["xlabel"].unique())
224+
order.sort()
222225

223-
ax = sns.boxplot(df, x="xlabel", y="relative difference", order=order)
226+
filename = get_filename(perf_index, numerator, denominator)
227+
with PdfPages(filename) as pdf:
228+
fig = plt.figure()
229+
plt.xticks(rotation=85)
224230

225-
ax.set(xlabel=None, ylabel=f"Relative difference in {perf_index}")
231+
title = ("Relative difference 0.0 means both perform identically,\n"
232+
f"relative difference > 0.0 means {numerator} performs better,\n"
233+
f"relative difference < 0.0 means {denominator} performs better")
234+
plt.title(f"Comparison {numerator} vs {denominator}.")
226235

227-
pdf.savefig(fig, bbox_inches="tight")
228-
print(f"Saved performance plot to {filename}")
236+
plt.figtext(1, 0.5, title)
237+
238+
ax = sns.boxplot(df, x="xlabel", y="relative difference", order=order)
239+
240+
ax.set(xlabel=None, ylabel=f"Relative difference in {perf_index}")
241+
242+
pdf.savefig(fig, bbox_inches="tight")
243+
print(f"Saved performance plot to {filename}")
229244

230245

231246
def eval_data(triton_benchmark: bool, plot: bool, df: pd.DataFrame, numerator: str, denominator: str):
232247
"""Evaluate the data, print a summary and plot if enabled."""
233248
if triton_benchmark:
234-
num_tri2xe_col = f"Tri2Xe-{numerator}"
235-
dem_tri2xe_col = f"Tri2Xe-{denominator}"
236-
237-
df_ratio = df[["params", "benchmark", num_tri2xe_col, dem_tri2xe_col]]
238-
summarize_diff(triton_benchmark, "tri2xe", plot, df_ratio, num_tri2xe_col, dem_tri2xe_col, numerator,
239-
denominator)
249+
perf_index = "Tri2Xe"
250+
num_col, denom_col = get_column_names(perf_index, numerator, denominator)
251+
df_ratio = df[["params", "benchmark", num_col, denom_col]]
252+
diff_df = summarize_diff(perf_index, df_ratio, numerator, denominator)
240253
else:
241-
num_col = f"speedup {numerator}"
242-
denom_col = f"speedup {denominator}"
243-
254+
perf_index = "speedup"
244255
df.drop(columns=["batch_size_x", "batch_size_y"], inplace=True)
245-
summarize_diff(triton_benchmark, "speedup", plot, df, num_col, denom_col, numerator, denominator)
256+
diff_df = summarize_diff(perf_index, df, numerator, denominator)
257+
if plot:
258+
plot_diff_df(diff_df, triton_benchmark, perf_index, numerator, denominator)
246259

247260

248261
def main():
@@ -295,8 +308,8 @@ def main():
295308
]
296309
else:
297310
cols = [
298-
"dev", "suite", "name", "mode", "datatype", "batch_size_x", "batch_size_y", f"speedup {num_cfg}",
299-
f"speedup {denom_cfg}"
311+
"dev", "suite", "name", "mode", "datatype", "batch_size_x", "batch_size_y", f"speedup-{num_cfg}",
312+
f"speedup-{denom_cfg}"
300313
]
301314

302315
df = df[cols]

0 commit comments

Comments
 (0)