1
1
import time
2
2
import argparse
3
+
3
4
import torch
4
5
from torch .profiler import profile , ProfilerActivity
5
6
@@ -48,21 +49,22 @@ def Adaptive_AVGPool2d(shape, dtype, channels_last, backward, device):
48
49
def run_profile (shape , dtype , channels_last , backward , device , num_iter ):
49
50
with profile (
50
51
activities = [ProfilerActivity .CPU ,
51
- ProfilerActivity .XPU if device == 'xpu' else ProfilerActivity .CUDA ],
52
+ ProfilerActivity .XPU if device == "xpu" else ProfilerActivity .CUDA ,
53
+ ],
52
54
record_shapes = True ,
53
55
) as prof :
54
56
for i in range (num_iter ):
55
57
Adaptive_AVGPool2d (shape , dtype , channels_last , backward , device )
56
- print (prof .key_averages ().table (sort_by = "{ }_time_total". format ( device ) ))
58
+ print (prof .key_averages ().table (sort_by = f" { device } _time_total" ))
57
59
58
60
def run_e2e (shape , dtype , channels_last , backward , device , num_iter ):
59
- if device in [' xpu' , ' cuda' ]:
60
- torch .xpu .synchronize () if device == ' xpu' else torch .cuda .synchronize ()
61
+ if device in [" xpu" , " cuda" ]:
62
+ torch .xpu .synchronize () if device == " xpu" else torch .cuda .synchronize ()
61
63
t1 = time .time ()
62
64
for i in range (num_iter ):
63
65
Adaptive_AVGPool2d (shape , dtype , channels_last , backward , device )
64
- if device in [' xpu' , ' cuda' ]:
65
- torch .xpu .synchronize () if device == ' xpu' else torch .cuda .synchronize ()
66
+ if device in [" xpu" , " cuda" ]:
67
+ torch .xpu .synchronize () if device == " xpu" else torch .cuda .synchronize ()
66
68
t2 = time .time ()
67
69
e2e_time = (t2 - t1 ) / num_iter
68
70
print ("E2E total time:" , f"{ float (e2e_time ):.20f} " )
@@ -88,22 +90,39 @@ def benchmark(args):
88
90
backward ,
89
91
)
90
92
if not args .e2e_only :
91
- run_profile (shape , dtype , channels_last , backward , args .device , args .num_iter )
93
+ run_profile (
94
+ shape ,
95
+ dtype ,
96
+ channels_last ,
97
+ backward ,
98
+ args .device ,
99
+ args .num_iter ,
100
+ )
92
101
93
102
if not args .profile_only :
94
- run_e2e (shape , dtype , channels_last , backward , args .device , args .num_iter )
103
+ run_e2e (
104
+ shape ,
105
+ dtype ,
106
+ channels_last ,
107
+ backward ,
108
+ args .device ,
109
+ args .num_iter ,
110
+ )
95
111
96
112
def parse_args ():
97
- parser = argparse .ArgumentParser (description = 'OP Benchmark' )
98
- parser .add_argument ('--device' , type = str , default = 'xpu' ,
99
- help = 'Device to run on (e.g., "cpu", "cuda", "xpu")' )
113
+ parser = argparse .ArgumentParser (description = "OP Benchmark" )
114
+ parser .add_argument (
115
+ "--device" ,
116
+ type = str ,
117
+ default = 'xpu' ,
118
+ help = 'Device to run on (e.g., "cpu", "cuda", "xpu")'
119
+ )
100
120
group = parser .add_mutually_exclusive_group ()
101
- group .add_argument ('--profile-only' , action = 'store_true' ,
102
- help = 'Only Run profile timing' )
103
- group .add_argument ('--e2e-only' , action = 'store_true' ,
104
- help = 'Only Run E2E timing' )
105
- parser .add_argument ('--num-iter' , type = int , default = 20 ,
106
- help = 'Number of iterations' )
121
+ group .add_argument (
122
+ "--profile-only" , action = "store_true" , help = "Only Run profile timing"
123
+ )
124
+ group .add_argument ("--e2e-only" , action = "store_true" , help = "Only Run E2E timing" )
125
+ parser .add_argument ("--num-iter" , type = int , default = 20 , help = "Number of iterations" )
107
126
return parser .parse_args ()
108
127
109
128
if __name__ == "__main__" :
0 commit comments