Skip to content

Commit 490abb6

Browse files
align the lint check
1 parent 4e7733a commit 490abb6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+2763
-1353
lines changed

test/microbench/avg_pool2d.py

Lines changed: 44 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
import time
21
import argparse
2+
import time
3+
34
import torch
45
from torch.profiler import profile, ProfilerActivity
56

@@ -52,28 +53,33 @@ def AVGPool2d(shape, dtype, channels_last, backward, device):
5253
if backward:
5354
output[0].backward(grad)
5455

56+
5557
def run_profile(shape, dtype, channels_last, backward, device, num_iter):
5658
with profile(
57-
activities=[ProfilerActivity.CPU,
58-
ProfilerActivity.XPU if device == 'xpu' else ProfilerActivity.CUDA],
59+
activities=[
60+
ProfilerActivity.CPU,
61+
ProfilerActivity.XPU if device == "xpu" else ProfilerActivity.CUDA,
62+
],
5963
record_shapes=True,
6064
) as prof:
6165
for i in range(num_iter):
6266
AVGPool2d(shape, dtype, channels_last, backward, device)
63-
print(prof.key_averages().table(sort_by="{}_time_total".format(device)))
67+
print(prof.key_averages().table(sort_by=f"{device}_time_total"))
68+
6469

6570
def run_e2e(shape, dtype, channels_last, backward, device, num_iter):
66-
if device in ['xpu', 'cuda']:
67-
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
71+
if device in ["xpu", "cuda"]:
72+
torch.xpu.synchronize() if device == "xpu" else torch.cuda.synchronize()
6873
t1 = time.time()
6974
for i in range(num_iter):
7075
AVGPool2d(shape, dtype, channels_last, backward, device)
71-
if device in ['xpu', 'cuda']:
72-
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
76+
if device in ["xpu", "cuda"]:
77+
torch.xpu.synchronize() if device == "xpu" else torch.cuda.synchronize()
7378
t2 = time.time()
7479
e2e_time = (t2 - t1) / num_iter
7580
print("E2E total time:", f"{float(e2e_time):.20f}")
7681

82+
7783
def benchmark(args):
7884
for shape in shape_list:
7985
for dtype in [torch.bfloat16, torch.float16, torch.float32]:
@@ -98,24 +104,43 @@ def benchmark(args):
98104
)
99105

100106
if not args.e2e_only:
101-
run_profile(shape, dtype, channels_last, backward, args.device, args.num_iter)
107+
run_profile(
108+
shape,
109+
dtype,
110+
channels_last,
111+
backward,
112+
args.device,
113+
args.num_iter,
114+
)
102115

103116
if not args.profile_only:
104-
run_e2e(shape, dtype, channels_last, backward, args.device, args.num_iter)
117+
run_e2e(
118+
shape,
119+
dtype,
120+
channels_last,
121+
backward,
122+
args.device,
123+
args.num_iter,
124+
)
125+
105126

106127
def parse_args():
107-
parser = argparse.ArgumentParser(description='OP Benchmark')
108-
parser.add_argument('--device', type=str, default='xpu',
109-
help='Device to run on (e.g., "cpu", "cuda", "xpu")')
128+
parser = argparse.ArgumentParser(description="OP Benchmark")
129+
parser.add_argument(
130+
"--device",
131+
type=str,
132+
default="xpu",
133+
help='Device to run on (e.g., "cpu", "cuda", "xpu")',
134+
)
110135
group = parser.add_mutually_exclusive_group()
111-
group.add_argument('--profile-only', action='store_true',
112-
help='Only Run profile timing')
113-
group.add_argument('--e2e-only', action='store_true',
114-
help='Only Run E2E timing')
115-
parser.add_argument('--num-iter', type=int, default=20,
116-
help='Number of iterations')
136+
group.add_argument(
137+
"--profile-only", action="store_true", help="Only Run profile timing"
138+
)
139+
group.add_argument("--e2e-only", action="store_true", help="Only Run E2E timing")
140+
parser.add_argument("--num-iter", type=int, default=20, help="Number of iterations")
117141
return parser.parse_args()
118142

143+
119144
if __name__ == "__main__":
120145
args = parse_args()
121146
benchmark(args)

test/microbench/avg_pool3d.py

Lines changed: 44 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
import time
21
import argparse
2+
import time
3+
34
import torch
45
from torch.profiler import profile, ProfilerActivity
56

@@ -52,28 +53,33 @@ def AVGPool3d(shape, dtype, channels_last, backward, device):
5253
if backward:
5354
output[0].backward(grad)
5455

56+
5557
def run_profile(shape, dtype, channels_last, backward, device, num_iter):
5658
with profile(
57-
activities=[ProfilerActivity.CPU,
58-
ProfilerActivity.XPU if device == 'xpu' else ProfilerActivity.CUDA],
59+
activities=[
60+
ProfilerActivity.CPU,
61+
ProfilerActivity.XPU if device == "xpu" else ProfilerActivity.CUDA,
62+
],
5963
record_shapes=True,
6064
) as prof:
6165
for i in range(num_iter):
6266
AVGPool3d(shape, dtype, channels_last, backward, device)
63-
print(prof.key_averages().table(sort_by="{}_time_total".format(device)))
67+
print(prof.key_averages().table(sort_by=f"{device}_time_total"))
68+
6469

6570
def run_e2e(shape, dtype, channels_last, backward, device, num_iter):
66-
if device in ['xpu', 'cuda']:
67-
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
71+
if device in ["xpu", "cuda"]:
72+
torch.xpu.synchronize() if device == "xpu" else torch.cuda.synchronize()
6873
t1 = time.time()
6974
for i in range(num_iter):
7075
AVGPool3d(shape, dtype, channels_last, backward, device)
71-
if device in ['xpu', 'cuda']:
72-
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
76+
if device in ["xpu", "cuda"]:
77+
torch.xpu.synchronize() if device == "xpu" else torch.cuda.synchronize()
7378
t2 = time.time()
7479
e2e_time = (t2 - t1) / num_iter
7580
print("E2E total time:", f"{float(e2e_time):.20f}")
7681

82+
7783
def benchmark(args):
7884
for shape in shape_list:
7985
for dtype in [torch.bfloat16, torch.float16, torch.float32]:
@@ -98,24 +104,43 @@ def benchmark(args):
98104
)
99105

100106
if not args.e2e_only:
101-
run_profile(shape, dtype, channels_last, backward, args.device, args.num_iter)
107+
run_profile(
108+
shape,
109+
dtype,
110+
channels_last,
111+
backward,
112+
args.device,
113+
args.num_iter,
114+
)
102115

103116
if not args.profile_only:
104-
run_e2e(shape, dtype, channels_last, backward, args.device, args.num_iter)
117+
run_e2e(
118+
shape,
119+
dtype,
120+
channels_last,
121+
backward,
122+
args.device,
123+
args.num_iter,
124+
)
125+
105126

106127
def parse_args():
107-
parser = argparse.ArgumentParser(description='OP Benchmark')
108-
parser.add_argument('--device', type=str, default='xpu',
109-
help='Device to run on (e.g., "cpu", "cuda", "xpu")')
128+
parser = argparse.ArgumentParser(description="OP Benchmark")
129+
parser.add_argument(
130+
"--device",
131+
type=str,
132+
default="xpu",
133+
help='Device to run on (e.g., "cpu", "cuda", "xpu")',
134+
)
110135
group = parser.add_mutually_exclusive_group()
111-
group.add_argument('--profile-only', action='store_true',
112-
help='Only Run profile timing')
113-
group.add_argument('--e2e-only', action='store_true',
114-
help='Only Run E2E timing')
115-
parser.add_argument('--num-iter', type=int, default=20,
116-
help='Number of iterations')
136+
group.add_argument(
137+
"--profile-only", action="store_true", help="Only Run profile timing"
138+
)
139+
group.add_argument("--e2e-only", action="store_true", help="Only Run E2E timing")
140+
parser.add_argument("--num-iter", type=int, default=20, help="Number of iterations")
117141
return parser.parse_args()
118142

143+
119144
if __name__ == "__main__":
120145
args = parse_args()
121146
benchmark(args)

test/microbench/batch_norm_1d.py

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
import time
21
import argparse
2+
import time
3+
34
import torch
45
from torch.profiler import profile, ProfilerActivity
56

@@ -13,28 +14,33 @@ def BTN1d(m, input, backward, device):
1314
gy = torch.empty_like(output)
1415
output.backward(gy)
1516

17+
1618
def run_profile(m, input, backward, device, num_iter):
1719
with profile(
18-
activities=[ProfilerActivity.CPU,
19-
ProfilerActivity.XPU if device == 'xpu' else ProfilerActivity.CUDA],
20+
activities=[
21+
ProfilerActivity.CPU,
22+
ProfilerActivity.XPU if device == "xpu" else ProfilerActivity.CUDA,
23+
],
2024
record_shapes=True,
2125
) as prof:
2226
for i in range(num_iter):
2327
BTN1d(m, input, backward, device)
24-
print(prof.key_averages().table(sort_by="{}_time_total".format(device)))
28+
print(prof.key_averages().table(sort_by=f"{device}_time_total"))
29+
2530

2631
def run_e2e(m, input, backward, device, num_iter):
27-
if device in ['xpu', 'cuda']:
28-
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
32+
if device in ["xpu", "cuda"]:
33+
torch.xpu.synchronize() if device == "xpu" else torch.cuda.synchronize()
2934
t1 = time.time()
3035
for i in range(num_iter):
3136
BTN1d(m, input, backward, device)
32-
if device in ['xpu', 'cuda']:
33-
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
37+
if device in ["xpu", "cuda"]:
38+
torch.xpu.synchronize() if device == "xpu" else torch.cuda.synchronize()
3439
t2 = time.time()
3540
e2e_time = (t2 - t1) / num_iter
3641
print("E2E total time:", f"{float(e2e_time):.20f}")
3742

43+
3844
def benchmark(args):
3945
for shape in shape_list:
4046
for dtype in [torch.bfloat16, torch.float16, torch.float32]:
@@ -63,19 +69,24 @@ def benchmark(args):
6369
if not args.profile_only:
6470
run_e2e(m, input, backward, args.device, args.num_iter)
6571

72+
6673
def parse_args():
67-
parser = argparse.ArgumentParser(description='OP Benchmark')
68-
parser.add_argument('--device', type=str, default='xpu',
69-
help='Device to run on (e.g., "cpu", "cuda", "xpu")')
74+
parser = argparse.ArgumentParser(description="OP Benchmark")
75+
parser.add_argument(
76+
"--device",
77+
type=str,
78+
default="xpu",
79+
help='Device to run on (e.g., "cpu", "cuda", "xpu")',
80+
)
7081
group = parser.add_mutually_exclusive_group()
71-
group.add_argument('--profile-only', action='store_true',
72-
help='Only Run profile timing')
73-
group.add_argument('--e2e-only', action='store_true',
74-
help='Only Run E2E timing')
75-
parser.add_argument('--num-iter', type=int, default=20,
76-
help='Number of iterations')
82+
group.add_argument(
83+
"--profile-only", action="store_true", help="Only Run profile timing"
84+
)
85+
group.add_argument("--e2e-only", action="store_true", help="Only Run E2E timing")
86+
parser.add_argument("--num-iter", type=int, default=20, help="Number of iterations")
7787
return parser.parse_args()
7888

89+
7990
if __name__ == "__main__":
8091
args = parse_args()
8192
benchmark(args)

test/microbench/batch_norm_2d.py

Lines changed: 44 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
import time
21
import argparse
2+
import time
3+
34
import torch
45
from torch.profiler import profile, ProfilerActivity
56

@@ -37,28 +38,33 @@ def BTN2d(shape, dtype, channels_last, backward, device):
3738
if backward:
3839
output[0].backward(grad)
3940

41+
4042
def run_profile(shape, dtype, channels_last, backward, device, num_iter):
4143
with profile(
42-
activities=[ProfilerActivity.CPU,
43-
ProfilerActivity.XPU if device == 'xpu' else ProfilerActivity.CUDA],
44+
activities=[
45+
ProfilerActivity.CPU,
46+
ProfilerActivity.XPU if device == "xpu" else ProfilerActivity.CUDA,
47+
],
4448
record_shapes=True,
4549
) as prof:
4650
for i in range(num_iter):
4751
BTN2d(shape, dtype, channels_last, backward, device)
48-
print(prof.key_averages().table(sort_by="{}_time_total".format(device)))
52+
print(prof.key_averages().table(sort_by=f"{device}_time_total"))
53+
4954

5055
def run_e2e(shape, dtype, channels_last, backward, device, num_iter):
51-
if device in ['xpu', 'cuda']:
52-
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
56+
if device in ["xpu", "cuda"]:
57+
torch.xpu.synchronize() if device == "xpu" else torch.cuda.synchronize()
5358
t1 = time.time()
5459
for i in range(num_iter):
5560
BTN2d(shape, dtype, channels_last, backward, device)
56-
if device in ['xpu', 'cuda']:
57-
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
61+
if device in ["xpu", "cuda"]:
62+
torch.xpu.synchronize() if device == "xpu" else torch.cuda.synchronize()
5863
t2 = time.time()
5964
e2e_time = (t2 - t1) / num_iter
6065
print("E2E total time:", f"{float(e2e_time):.20f}")
6166

67+
6268
def benchmark(args):
6369
for shape in shape_list:
6470
for dtype in [torch.bfloat16, torch.float16, torch.float32]:
@@ -81,24 +87,43 @@ def benchmark(args):
8187
)
8288

8389
if not args.e2e_only:
84-
run_profile(shape, dtype, channels_last, backward, args.device, args.num_iter)
90+
run_profile(
91+
shape,
92+
dtype,
93+
channels_last,
94+
backward,
95+
args.device,
96+
args.num_iter,
97+
)
8598

8699
if not args.profile_only:
87-
run_e2e(shape, dtype, channels_last, backward, args.device, args.num_iter)
100+
run_e2e(
101+
shape,
102+
dtype,
103+
channels_last,
104+
backward,
105+
args.device,
106+
args.num_iter,
107+
)
108+
88109

89110
def parse_args():
90-
parser = argparse.ArgumentParser(description='OP Benchmark')
91-
parser.add_argument('--device', type=str, default='xpu',
92-
help='Device to run on (e.g., "cpu", "cuda", "xpu")')
111+
parser = argparse.ArgumentParser(description="OP Benchmark")
112+
parser.add_argument(
113+
"--device",
114+
type=str,
115+
default="xpu",
116+
help='Device to run on (e.g., "cpu", "cuda", "xpu")',
117+
)
93118
group = parser.add_mutually_exclusive_group()
94-
group.add_argument('--profile-only', action='store_true',
95-
help='Only Run profile timing')
96-
group.add_argument('--e2e-only', action='store_true',
97-
help='Only Run E2E timing')
98-
parser.add_argument('--num-iter', type=int, default=20,
99-
help='Number of iterations')
119+
group.add_argument(
120+
"--profile-only", action="store_true", help="Only Run profile timing"
121+
)
122+
group.add_argument("--e2e-only", action="store_true", help="Only Run E2E timing")
123+
parser.add_argument("--num-iter", type=int, default=20, help="Number of iterations")
100124
return parser.parse_args()
101125

126+
102127
if __name__ == "__main__":
103128
args = parse_args()
104129
benchmark(args)

0 commit comments

Comments
 (0)