Skip to content

Commit dbf0a9c

Browse files
fix the typo issues
1 parent 9de9f14 commit dbf0a9c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+644
-657
lines changed

test/microbench/distance.pdist.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,32 +7,28 @@
77
backward_shape_list = [(256, 256), (256, 8192), (16, 8192 * 4)]
88

99

10-
def Pdist(shape, dtype, backward, device):
11-
input = torch.rand(shape, device=device, dtype=dtype)
12-
if backward:
13-
input.requires_grad_(True)
14-
10+
def Pdist(input, backward, device):
1511
b = torch.nn.functional.pdist(input, 2)
1612
if backward:
1713
gy = torch.empty_like(b)
1814
b.backward(gy)
1915

20-
def run_profile(shape, dtype, backward, device, num_iter):
16+
def run_profile(input, backward, device, num_iter):
2117
with profile(
22-
activities=[ProfilerActivity.CPU,
18+
activities=[ProfilerActivity.CPU,
2319
ProfilerActivity.XPU if device == 'xpu' else ProfilerActivity.CUDA],
2420
record_shapes=True,
2521
) as prof:
26-
for _ in range(num_iter):
27-
Pdist(shape, dtype, backward, device)
22+
for i in range(num_iter):
23+
Pdist(input, backward, device)
2824
print(prof.key_averages().table(sort_by="{}_time_total".format(device)))
2925

30-
def run_e2e(shape, dtype, backward, device, num_iter):
26+
def run_e2e(input, backward, device, num_iter):
3127
if device in ['xpu', 'cuda']:
3228
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
3329
t1 = time.time()
34-
for _ in range(num_iter):
35-
Pdist(shape, dtype, backward, device)
30+
for i in range(num_iter):
31+
Pdist(input, backward, device)
3632
if device in ['xpu', 'cuda']:
3733
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
3834
t2 = time.time()
@@ -44,27 +40,30 @@ def benchmark(args):
4440
shape_list = backward_shape_list if backward else forward_shape_list
4541
for shape in shape_list:
4642
for dtype in [torch.float32]:
43+
input = torch.rand(shape, device=args.device, dtype=dtype)
44+
if backward:
45+
input.requires_grad_(True)
4746
# warm up
48-
Pdist(shape, dtype, backward, args.device)
47+
Pdist(input, backward, args.device)
4948

5049
# go
5150
print("shape:", shape, "; datatype:", dtype, "; backward:", backward)
5251
if not args.e2e_only:
53-
run_profile(shape, dtype, backward, args.device, args.num_iter)
52+
run_profile(input, backward, args.device, args.num_iter)
5453

5554
if not args.profile_only:
56-
run_e2e(shape, dtype, backward, args.device, args.num_iter)
55+
run_e2e(input, backward, args.device, args.num_iter)
5756

5857
def parse_args():
5958
parser = argparse.ArgumentParser(description='OP Benchmark')
60-
parser.add_argument('--device', type=str, default='xpu',
59+
parser.add_argument('--device', type=str, default='xpu',
6160
help='Device to run on (e.g., "cpu", "cuda", "xpu")')
6261
group = parser.add_mutually_exclusive_group()
63-
group.add_argument('--profile-only', action='store_true',
62+
group.add_argument('--profile-only', action='store_true',
6463
help='Only Run profile timing')
65-
group.add_argument('--e2e-only', action='store_true',
64+
group.add_argument('--e2e-only', action='store_true',
6665
help='Only Run E2E timing')
67-
parser.add_argument('--num-iter', type=int, default=20,
66+
parser.add_argument('--num-iter', type=int, default=20,
6867
help='Number of iterations')
6968
return parser.parse_args()
7069

test/microbench/distribution.bernoulli.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,28 +7,25 @@
77
backward = False
88

99

10-
def Bernoulli(shape, dtype, backward, p, device):
11-
input = torch.zeros(
12-
shape, dtype=torch.bfloat16, device=device
13-
)
10+
def Bernoulli(input, p, device):
1411
input.bernoulli_(p)
1512

16-
def run_profile(shape, dtype, backward, p, device, num_iter):
13+
def run_profile(input, p, device, num_iter):
1714
with profile(
18-
activities=[ProfilerActivity.CPU,
15+
activities=[ProfilerActivity.CPU,
1916
ProfilerActivity.XPU if device == 'xpu' else ProfilerActivity.CUDA],
2017
record_shapes=True,
2118
) as prof:
22-
for _ in range(num_iter):
23-
Bernoulli(shape, dtype, backward, p, device)
19+
for i in range(num_iter):
20+
Bernoulli(input, p, device)
2421
print(prof.key_averages().table(sort_by="{}_time_total".format(device)))
2522

26-
def run_e2e(shape, dtype, backward, p, device, num_iter):
23+
def run_e2e(input, p, device, num_iter):
2724
if device in ['xpu', 'cuda']:
2825
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
2926
t1 = time.time()
30-
for _ in range(num_iter):
31-
Bernoulli(shape, dtype, backward, p, device)
27+
for i in range(num_iter):
28+
Bernoulli(input, p, device)
3229
if device in ['xpu', 'cuda']:
3330
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
3431
t2 = time.time()
@@ -39,8 +36,11 @@ def benchmark(args):
3936
for shape in shape_list:
4037
for dtype in [torch.bfloat16, torch.float16, torch.float32]:
4138
for p in [0.5, torch.tensor(0.5)]:
39+
input = torch.zeros(
40+
shape, dtype=dtype, device=args.device
41+
)
4242
# warm up
43-
Bernoulli(shape, dtype, backward, p, args.device)
43+
Bernoulli(input, p, args.device)
4444

4545
# go
4646
print(
@@ -54,21 +54,21 @@ def benchmark(args):
5454
backward,
5555
)
5656
if not args.e2e_only:
57-
run_profile(shape, dtype, backward, p, args.device, args.num_iter)
57+
run_profile(input, p, args.device, args.num_iter)
5858

5959
if not args.profile_only:
60-
run_e2e(shape, dtype, backward, p, args.device, args.num_iter)
60+
run_e2e(input, p, args.device, args.num_iter)
6161

6262
def parse_args():
6363
parser = argparse.ArgumentParser(description='OP Benchmark')
64-
parser.add_argument('--device', type=str, default='xpu',
64+
parser.add_argument('--device', type=str, default='xpu',
6565
help='Device to run on (e.g., "cpu", "cuda", "xpu")')
6666
group = parser.add_mutually_exclusive_group()
67-
group.add_argument('--profile-only', action='store_true',
67+
group.add_argument('--profile-only', action='store_true',
6868
help='Only Run profile timing')
69-
group.add_argument('--e2e-only', action='store_true',
69+
group.add_argument('--e2e-only', action='store_true',
7070
help='Only Run E2E timing')
71-
parser.add_argument('--num-iter', type=int, default=20,
71+
parser.add_argument('--num-iter', type=int, default=20,
7272
help='Number of iterations')
7373
return parser.parse_args()
7474

test/microbench/distribution.cauchy.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,25 @@
77
backward = False
88

99

10-
def Cauchy(shape, dtype, backward, device):
11-
input = torch.randn(shape, dtype=dtype, device=device)
10+
def Cauchy(input, device):
1211
input.cauchy_()
1312

14-
def run_profile(shape, dtype, backward, device, num_iter):
13+
def run_profile(input, device, num_iter):
1514
with profile(
16-
activities=[ProfilerActivity.CPU,
15+
activities=[ProfilerActivity.CPU,
1716
ProfilerActivity.XPU if device == 'xpu' else ProfilerActivity.CUDA],
1817
record_shapes=True,
1918
) as prof:
20-
for _ in range(num_iter):
21-
Cauchy(shape, dtype, backward, device)
19+
for i in range(num_iter):
20+
Cauchy(input, device)
2221
print(prof.key_averages().table(sort_by="{}_time_total".format(device)))
2322

24-
def run_e2e(shape, dtype, backward, device, num_iter):
23+
def run_e2e(input, device, num_iter):
2524
if device in ['xpu', 'cuda']:
2625
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
2726
t1 = time.time()
28-
for _ in range(num_iter):
29-
Cauchy(shape, dtype, backward, device)
27+
for i in range(num_iter):
28+
Cauchy(input, device)
3029
if device in ['xpu', 'cuda']:
3130
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
3231
t2 = time.time()
@@ -36,27 +35,28 @@ def run_e2e(shape, dtype, backward, device, num_iter):
3635
def benchmark(args):
3736
for shape in shape_list:
3837
for dtype in [torch.bfloat16, torch.float16, torch.float32]:
38+
input = torch.randn(shape, dtype=dtype, device=args.device)
3939
# warm up
40-
Cauchy(shape, dtype, backward, args.device)
40+
Cauchy(input, args.device)
4141

4242
# go
4343
print("shape:", (shape), "; datatype:", dtype, "; backward:", backward)
4444
if not args.e2e_only:
45-
run_profile(shape, dtype, backward, args.device, args.num_iter)
45+
run_profile(input, args.device, args.num_iter)
4646

4747
if not args.profile_only:
48-
run_e2e(shape, dtype, backward, args.device, args.num_iter)
48+
run_e2e(input, args.device, args.num_iter)
4949

5050
def parse_args():
5151
parser = argparse.ArgumentParser(description='OP Benchmark')
52-
parser.add_argument('--device', type=str, default='xpu',
52+
parser.add_argument('--device', type=str, default='xpu',
5353
help='Device to run on (e.g., "cpu", "cuda", "xpu")')
5454
group = parser.add_mutually_exclusive_group()
55-
group.add_argument('--profile-only', action='store_true',
55+
group.add_argument('--profile-only', action='store_true',
5656
help='Only Run profile timing')
57-
group.add_argument('--e2e-only', action='store_true',
57+
group.add_argument('--e2e-only', action='store_true',
5858
help='Only Run E2E timing')
59-
parser.add_argument('--num-iter', type=int, default=20,
59+
parser.add_argument('--num-iter', type=int, default=20,
6060
help='Number of iterations')
6161
return parser.parse_args()
6262

test/microbench/distribution.exponential.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,25 @@
77
backward = False
88

99

10-
def Exponential(shape, dtype, backward, device):
11-
input = torch.randn(shape, dtype=dtype, device=device)
10+
def Exponential(input, device):
1211
input.exponential_(0.5)
1312

14-
def run_profile(shape, dtype, backward, device, num_iter):
13+
def run_profile(input, device, num_iter):
1514
with profile(
16-
activities=[ProfilerActivity.CPU,
15+
activities=[ProfilerActivity.CPU,
1716
ProfilerActivity.XPU if device == 'xpu' else ProfilerActivity.CUDA],
1817
record_shapes=True,
1918
) as prof:
20-
for _ in range(num_iter):
21-
Exponential(shape, dtype, backward, device)
19+
for i in range(num_iter):
20+
Exponential(input, device)
2221
print(prof.key_averages().table(sort_by="{}_time_total".format(device)))
2322

24-
def run_e2e(shape, dtype, backward, device, num_iter):
23+
def run_e2e(input, device, num_iter):
2524
if device in ['xpu', 'cuda']:
2625
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
2726
t1 = time.time()
28-
for _ in range(num_iter):
29-
Exponential(shape, dtype, backward, device)
27+
for i in range(num_iter):
28+
Exponential(input, device)
3029
if device in ['xpu', 'cuda']:
3130
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
3231
t2 = time.time()
@@ -36,27 +35,28 @@ def run_e2e(shape, dtype, backward, device, num_iter):
3635
def benchmark(args):
3736
for shape in shape_list:
3837
for dtype in [torch.bfloat16, torch.float16, torch.float32]:
38+
input = torch.randn(shape, dtype=dtype, device=args.device)
3939
# warm up
40-
Exponential(shape, dtype, backward, args.device)
40+
Exponential(input, args.device)
4141

4242
# go
4343
print("shape:", (shape), "; datatype:", dtype, "; backward:", backward)
4444
if not args.e2e_only:
45-
run_profile(shape, dtype, backward, args.device, args.num_iter)
45+
run_profile(input, args.device, args.num_iter)
4646

4747
if not args.profile_only:
48-
run_e2e(shape, dtype, backward, args.device, args.num_iter)
48+
run_e2e(input, args.device, args.num_iter)
4949

5050
def parse_args():
5151
parser = argparse.ArgumentParser(description='OP Benchmark')
52-
parser.add_argument('--device', type=str, default='xpu',
52+
parser.add_argument('--device', type=str, default='xpu',
5353
help='Device to run on (e.g., "cpu", "cuda", "xpu")')
5454
group = parser.add_mutually_exclusive_group()
55-
group.add_argument('--profile-only', action='store_true',
55+
group.add_argument('--profile-only', action='store_true',
5656
help='Only Run profile timing')
57-
group.add_argument('--e2e-only', action='store_true',
57+
group.add_argument('--e2e-only', action='store_true',
5858
help='Only Run E2E timing')
59-
parser.add_argument('--num-iter', type=int, default=20,
59+
parser.add_argument('--num-iter', type=int, default=20,
6060
help='Number of iterations')
6161
return parser.parse_args()
6262

test/microbench/distribution.geometric.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,25 @@
77
backward = False
88

99

10-
def Geometric(shape, dtype, backward, device):
11-
input = torch.randn(shape, dtype=torch.bfloat16, device=device)
10+
def Geometric(input, device):
1211
input.geometric_(0.5)
1312

14-
def run_profile(shape, dtype, backward, device, num_iter):
13+
def run_profile(input, device, num_iter):
1514
with profile(
16-
activities=[ProfilerActivity.CPU,
15+
activities=[ProfilerActivity.CPU,
1716
ProfilerActivity.XPU if device == 'xpu' else ProfilerActivity.CUDA],
1817
record_shapes=True,
1918
) as prof:
20-
for _ in range(num_iter):
21-
Geometric(shape, dtype, backward, device)
19+
for i in range(num_iter):
20+
Geometric(input, device)
2221
print(prof.key_averages().table(sort_by="{}_time_total".format(device)))
2322

24-
def run_e2e(shape, dtype, backward, device, num_iter):
23+
def run_e2e(input, device, num_iter):
2524
if device in ['xpu', 'cuda']:
2625
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
2726
t1 = time.time()
28-
for _ in range(num_iter):
29-
Geometric(shape, dtype, backward, device)
27+
for i in range(num_iter):
28+
Geometric(input, device)
3029
if device in ['xpu', 'cuda']:
3130
torch.xpu.synchronize() if device == 'xpu' else torch.cuda.synchronize()
3231
t2 = time.time()
@@ -36,8 +35,9 @@ def run_e2e(shape, dtype, backward, device, num_iter):
3635
def benchmark(args):
3736
for shape in shape_list:
3837
for dtype in [torch.bfloat16, torch.float16, torch.float32]:
38+
input = torch.randn(shape, dtype=torch.bfloat16, device=args.device)
3939
# warm up
40-
Geometric(shape, dtype, backward, args.device)
40+
Geometric(input, args.device)
4141

4242
# go
4343
print(
@@ -51,21 +51,21 @@ def benchmark(args):
5151
backward,
5252
)
5353
if not args.e2e_only:
54-
run_profile(shape, dtype, backward, args.device, args.num_iter)
54+
run_profile(input, args.device, args.num_iter)
5555

5656
if not args.profile_only:
57-
run_e2e(shape, dtype, backward, args.device, args.num_iter)
57+
run_e2e(input, args.device, args.num_iter)
5858

5959
def parse_args():
6060
parser = argparse.ArgumentParser(description='OP Benchmark')
61-
parser.add_argument('--device', type=str, default='xpu',
61+
parser.add_argument('--device', type=str, default='xpu',
6262
help='Device to run on (e.g., "cpu", "cuda", "xpu")')
6363
group = parser.add_mutually_exclusive_group()
64-
group.add_argument('--profile-only', action='store_true',
64+
group.add_argument('--profile-only', action='store_true',
6565
help='Only Run profile timing')
66-
group.add_argument('--e2e-only', action='store_true',
66+
group.add_argument('--e2e-only', action='store_true',
6767
help='Only Run E2E timing')
68-
parser.add_argument('--num-iter', type=int, default=20,
68+
parser.add_argument('--num-iter', type=int, default=20,
6969
help='Number of iterations')
7070
return parser.parse_args()
7171

0 commit comments

Comments
 (0)