Skip to content

Commit 103de7f

Browse files
[Nightly] Set different time measure for microbench
1 parent 30a820f commit 103de7f

38 files changed

+2509
-1576
lines changed

test/microbench/adaptive_avg_pool2d.py

Lines changed: 47 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,16 @@
11
import time
2-
2+
import argparse
33
import torch
44
from torch.profiler import profile, ProfilerActivity
55

6-
device = "xpu"
7-
86
shape_list = [
97
(8, 512, 32, 32, (7, 7)),
108
(8, 256, 56, 56, (14, 14)),
119
]
12-
num_iter = 20
10+
backward = True
1311

1412

15-
def Adaptive_AVGPool2d(shape, dtype, channels_last, backward):
13+
def Adaptive_AVGPool2d(shape, dtype, channels_last, backward, device):
1614
N, C, H, W, output_size = (
1715
shape[0],
1816
shape[1],
@@ -47,14 +45,34 @@ def Adaptive_AVGPool2d(shape, dtype, channels_last, backward):
4745
if backward:
4846
output[0].backward(grad)
4947

48+
def run_profile(shape, dtype, channels_last, backward, device, num_iter):
49+
with profile(
50+
activities=[ProfilerActivity.CPU,
51+
ProfilerActivity.XPU if device == 'xpu' else ProfilerActivity.CUDA],
52+
record_shapes=True,
53+
) as prof:
54+
for _ in range(num_iter):
55+
Adaptive_AVGPool2d(shape, dtype, channels_last, backward, device)
56+
print(prof.key_averages().table(sort_by="{}_time_total".format(device)))
5057

51-
if __name__ == "__main__":
52-
backward = True
58+
def run_e2e(shape, dtype, channels_last, backward, device, num_iter):
59+
if device in ['xpu', 'cuda']:
60+
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
61+
t1 = time.time()
62+
for _ in range(num_iter):
63+
Adaptive_AVGPool2d(shape, dtype, channels_last, backward, device)
64+
if device in ['xpu', 'cuda']:
65+
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
66+
t2 = time.time()
67+
e2e_time = (t2 - t1) / num_iter
68+
print("E2E total time:", f"{float(e2e_time):.20f}")
69+
70+
def benchmark(args):
5371
for shape in shape_list:
5472
for dtype in [torch.bfloat16, torch.float16, torch.float32]:
5573
for channels_last in [False, True]:
5674
# warm up
57-
Adaptive_AVGPool2d(shape, dtype, channels_last, backward)
75+
Adaptive_AVGPool2d(shape, dtype, channels_last, backward, args.device)
5876

5977
# go
6078
print(
@@ -69,20 +87,25 @@ def Adaptive_AVGPool2d(shape, dtype, channels_last, backward):
6987
"; backward:",
7088
backward,
7189
)
72-
with profile(
73-
activities=[ProfilerActivity.CPU, ProfilerActivity.XPU],
74-
record_shapes=True,
75-
) as prof:
76-
for i in range(num_iter):
77-
Adaptive_AVGPool2d(shape, dtype, channels_last, backward)
78-
print(prof.key_averages().table(sort_by="xpu_time_total"))
90+
if not args.e2e_only:
91+
run_profile(shape, dtype, channels_last, backward, args.device, args.num_iter)
7992

80-
# E2E time
81-
torch.xpu.synchronize()
82-
t1 = time.time()
83-
for i in range(num_iter):
84-
Adaptive_AVGPool2d(shape, dtype, channels_last, backward)
85-
torch.xpu.synchronize()
86-
t2 = time.time()
87-
e2e_time = (t2 - t1) / num_iter
88-
print("E2E total time:", f"{float(e2e_time):.20f}")
93+
if not args.profile_only:
94+
run_e2e(shape, dtype, channels_last, backward, args.device, args.num_iter)
95+
96+
def parse_args():
97+
parser = argparse.ArgumentParser(description='OP Benchmark')
98+
parser.add_argument('--device', type=str, default='xpu',
99+
help='Device to run on (e.g., "cpu", "cuda", "xpu")')
100+
group = parser.add_mutually_exclusive_group()
101+
group.add_argument('--profile-only', action='store_true',
102+
help='Only Run profile timing')
103+
group.add_argument('--e2e-only', action='store_true',
104+
help='Only Run E2E timing')
105+
parser.add_argument('--num-iter', type=int, default=20,
106+
help='Number of iterations')
107+
return parser.parse_args()
108+
109+
if __name__ == "__main__":
110+
args = parse_args()
111+
benchmark(args)

test/microbench/avg_pool2d.py

Lines changed: 49 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,18 @@
11
import time
2-
2+
import argparse
33
import torch
44
from torch.profiler import profile, ProfilerActivity
55

6-
device = "xpu"
7-
86
shape_list = [
97
(16, 24, 112, 112, (3), (2)),
108
(16, 1984, 7, 7, (3, 2), (2, 1)),
119
(64, 1024, 112, 112, (6), (4)),
1210
(16, 2048, 224, 224, (3), (2)),
1311
]
14-
num_iter = 20
12+
backward = True
1513

1614

17-
def AVGPool2d(shape, dtype, channels_last, backward):
15+
def AVGPool2d(shape, dtype, channels_last, backward, device):
1816
N, C, H, W, kernel_size, stride = (
1917
shape[0],
2018
shape[1],
@@ -54,14 +52,34 @@ def AVGPool2d(shape, dtype, channels_last, backward):
5452
if backward:
5553
output[0].backward(grad)
5654

55+
def run_profile(shape, dtype, channels_last, backward, device, num_iter):
56+
with profile(
57+
activities=[ProfilerActivity.CPU,
58+
ProfilerActivity.XPU if device == 'xpu' else ProfilerActivity.CUDA],
59+
record_shapes=True,
60+
) as prof:
61+
for _ in range(num_iter):
62+
AVGPool2d(shape, dtype, channels_last, backward, device)
63+
print(prof.key_averages().table(sort_by="{}_time_total".format(device)))
5764

58-
if __name__ == "__main__":
59-
backward = True
65+
def run_e2e(shape, dtype, channels_last, backward, device, num_iter):
66+
if device in ['xpu', 'cuda']:
67+
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
68+
t1 = time.time()
69+
for _ in range(num_iter):
70+
AVGPool2d(shape, dtype, channels_last, backward, device)
71+
if device in ['xpu', 'cuda']:
72+
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
73+
t2 = time.time()
74+
e2e_time = (t2 - t1) / num_iter
75+
print("E2E total time:", f"{float(e2e_time):.20f}")
76+
77+
def benchmark(args):
6078
for shape in shape_list:
6179
for dtype in [torch.bfloat16, torch.float16, torch.float32]:
6280
for channels_last in [False, True]:
6381
# warm up
64-
AVGPool2d(shape, dtype, channels_last, backward)
82+
AVGPool2d(shape, dtype, channels_last, backward, args.device)
6583

6684
# go
6785
print(
@@ -78,20 +96,26 @@ def AVGPool2d(shape, dtype, channels_last, backward):
7896
"; backward:",
7997
backward,
8098
)
81-
with profile(
82-
activities=[ProfilerActivity.CPU, ProfilerActivity.XPU],
83-
record_shapes=True,
84-
) as prof:
85-
for i in range(num_iter):
86-
AVGPool2d(shape, dtype, channels_last, backward)
87-
print(prof.key_averages().table(sort_by="xpu_time_total"))
88-
89-
# E2E time
90-
torch.xpu.synchronize()
91-
t1 = time.time()
92-
for i in range(num_iter):
93-
AVGPool2d(shape, dtype, channels_last, backward)
94-
torch.xpu.synchronize()
95-
t2 = time.time()
96-
e2e_time = (t2 - t1) / num_iter
97-
print("E2E total time:", f"{float(e2e_time):.20f}")
99+
100+
if not args.e2e_only:
101+
run_profile(shape, dtype, channels_last, backward, args.device, args.num_iter)
102+
103+
if not args.profile_only:
104+
run_e2e(shape, dtype, channels_last, backward, args.device, args.num_iter)
105+
106+
def parse_args():
107+
parser = argparse.ArgumentParser(description='OP Benchmark')
108+
parser.add_argument('--device', type=str, default='xpu',
109+
help='Device to run on (e.g., "cpu", "cuda", "xpu")')
110+
group = parser.add_mutually_exclusive_group()
111+
group.add_argument('--profile-only', action='store_true',
112+
help='Only Run profile timing')
113+
group.add_argument('--e2e-only', action='store_true',
114+
help='Only Run E2E timing')
115+
parser.add_argument('--num-iter', type=int, default=20,
116+
help='Number of iterations')
117+
return parser.parse_args()
118+
119+
if __name__ == "__main__":
120+
args = parse_args()
121+
benchmark(args)

test/microbench/avg_pool3d.py

Lines changed: 49 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
11
import time
2-
2+
import argparse
33
import torch
44
from torch.profiler import profile, ProfilerActivity
55

6-
device = "xpu"
7-
86
shape_list = [
97
(16, 24, 28, 19, 19, (3), (2)),
108
(16, 1984, 7, 7, 7, (3, 2, 2), (2, 1, 2)),
119
(64, 1024, 14, 14, 14, (6), (4)),
1210
]
13-
num_iter = 20
11+
backward = True
1412

1513

16-
def AVGPool3d(shape, dtype, channels_last, backward):
14+
def AVGPool3d(shape, dtype, channels_last, backward, device):
1715
N, C, D, H, W, kernel_size, stride = (
1816
shape[0],
1917
shape[1],
@@ -54,14 +52,34 @@ def AVGPool3d(shape, dtype, channels_last, backward):
5452
if backward:
5553
output[0].backward(grad)
5654

55+
def run_profile(shape, dtype, channels_last, backward, device, num_iter):
56+
with profile(
57+
activities=[ProfilerActivity.CPU,
58+
ProfilerActivity.XPU if device == 'xpu' else ProfilerActivity.CUDA],
59+
record_shapes=True,
60+
) as prof:
61+
for _ in range(num_iter):
62+
AVGPool3d(shape, dtype, channels_last, backward, device)
63+
print(prof.key_averages().table(sort_by="{}_time_total".format(device)))
5764

58-
if __name__ == "__main__":
59-
backward = True
65+
def run_e2e(shape, dtype, channels_last, backward, device, num_iter):
66+
if device in ['xpu', 'cuda']:
67+
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
68+
t1 = time.time()
69+
for _ in range(num_iter):
70+
AVGPool3d(shape, dtype, channels_last, backward, device)
71+
if device in ['xpu', 'cuda']:
72+
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
73+
t2 = time.time()
74+
e2e_time = (t2 - t1) / num_iter
75+
print("E2E total time:", f"{float(e2e_time):.20f}")
76+
77+
def benchmark(args):
6078
for shape in shape_list:
6179
for dtype in [torch.bfloat16, torch.float16, torch.float32]:
6280
for channels_last in [False, True]:
6381
# warm up
64-
AVGPool3d(shape, dtype, channels_last, backward)
82+
AVGPool3d(shape, dtype, channels_last, backward, args.device)
6583

6684
# go
6785
print(
@@ -78,20 +96,26 @@ def AVGPool3d(shape, dtype, channels_last, backward):
7896
"; backward:",
7997
backward,
8098
)
81-
with profile(
82-
activities=[ProfilerActivity.CPU, ProfilerActivity.XPU],
83-
record_shapes=True,
84-
) as prof:
85-
for i in range(num_iter):
86-
AVGPool3d(shape, dtype, channels_last, backward=True)
87-
print(prof.key_averages().table(sort_by="xpu_time_total"))
88-
89-
# E2E time
90-
torch.xpu.synchronize()
91-
t1 = time.time()
92-
for i in range(num_iter):
93-
AVGPool3d(shape, dtype, channels_last, backward=True)
94-
torch.xpu.synchronize()
95-
t2 = time.time()
96-
e2e_time = (t2 - t1) / num_iter
97-
print("E2E total time:", f"{float(e2e_time):.20f}")
99+
100+
if not args.e2e_only:
101+
run_profile(shape, dtype, channels_last, backward, args.device, args.num_iter)
102+
103+
if not args.profile_only:
104+
run_e2e(shape, dtype, channels_last, backward, args.device, args.num_iter)
105+
106+
def parse_args():
107+
parser = argparse.ArgumentParser(description='OP Benchmark')
108+
parser.add_argument('--device', type=str, default='xpu',
109+
help='Device to run on (e.g., "cpu", "cuda", "xpu")')
110+
group = parser.add_mutually_exclusive_group()
111+
group.add_argument('--profile-only', action='store_true',
112+
help='Only Run profile timing')
113+
group.add_argument('--e2e-only', action='store_true',
114+
help='Only Run E2E timing')
115+
parser.add_argument('--num-iter', type=int, default=20,
116+
help='Number of iterations')
117+
return parser.parse_args()
118+
119+
if __name__ == "__main__":
120+
args = parse_args()
121+
benchmark(args)

0 commit comments

Comments
 (0)