5
5
import torch .utils .benchmark as benchmark
6
6
7
7
import torchcodec
8
- import torchvision .transforms . v2 . functional as F
8
+ from torchvision .transforms import Resize
9
9
10
- RESIZED_WIDTH = 256
11
- RESIZED_HEIGHT = 256
12
10
13
-
14
- def transfer_and_resize_frame (frame , resize_device_string ):
15
- # This should be a no-op if the frame is already on the target device.
16
- frame = frame .to (resize_device_string )
17
- frame = F .resize (frame , (RESIZED_HEIGHT , RESIZED_WIDTH ))
11
+ def transfer_and_resize_frame (frame , device ):
12
+ # This should be a no-op if the frame is already on the device.
13
+ frame = frame .to (device )
14
+ frame = Resize ((256 , 256 ))(frame )
18
15
return frame
19
16
20
17
21
- def decode_full_video (video_path , decode_device_string , resize_device_string ):
22
- # We use the core API instead of SimpleVideoDecoder because the core API
23
- # allows us to natively resize as part of the decode step.
24
- print (f"{ decode_device_string = } { resize_device_string = } " )
18
+ def decode_full_video (video_path , decode_device ):
25
19
decoder = torchcodec .decoders ._core .create_from_file (video_path )
26
20
num_threads = None
27
- if "cuda" in decode_device_string :
21
+ if "cuda" in decode_device :
28
22
num_threads = 1
29
- width = None
30
- height = None
31
- if "native" in resize_device_string :
32
- width = RESIZED_WIDTH
33
- height = RESIZED_HEIGHT
34
23
torchcodec .decoders ._core .add_video_stream (
35
- decoder ,
36
- stream_index = - 1 ,
37
- device_string = decode_device_string ,
38
- num_threads = num_threads ,
39
- width = width ,
40
- height = height ,
24
+ decoder , stream_index = 0 , device_string = decode_device , num_threads = num_threads
41
25
)
42
-
43
26
start_time = time .time ()
44
27
frame_count = 0
45
28
while True :
46
29
try :
47
30
frame , * _ = torchcodec .decoders ._core .get_next_frame (decoder )
48
- if resize_device_string != "none" and "native" not in resize_device_string :
49
- frame = transfer_and_resize_frame (frame , resize_device_string )
31
+ # You can do a resize to simulate extra preproc work that happens
32
+ # on the GPU by uncommenting the following line:
33
+ # frame = transfer_and_resize_frame(frame, decode_device)
50
34
51
35
frame_count += 1
52
36
except Exception as e :
53
37
print ("EXCEPTION" , e )
54
38
break
55
-
39
+ # print(f"current {frame_count=}", flush=True)
56
40
end_time = time .time ()
57
41
elapsed = end_time - start_time
58
42
fps = frame_count / (end_time - start_time )
59
43
print (
60
- f"****** DECODED full video { decode_device_string = } { frame_count = } { elapsed = } { fps = } "
44
+ f"****** DECODED full video { decode_device = } { frame_count = } { elapsed = } { fps = } "
61
45
)
62
46
return frame_count , end_time - start_time
63
47
@@ -70,12 +54,6 @@ def main():
70
54
type = str ,
71
55
help = "Comma-separated devices to test decoding on." ,
72
56
)
73
- parser .add_argument (
74
- "--resize_devices" ,
75
- default = "cuda:0,cpu,native,none" ,
76
- type = str ,
77
- help = "Comma-separated devices to test preroc (resize) on. Use 'none' to specify no resize." ,
78
- )
79
57
parser .add_argument (
80
58
"--video" ,
81
59
type = str ,
@@ -100,44 +78,23 @@ def main():
100
78
decode_full_video (video_path , device )
101
79
return
102
80
103
- resize_devices = args .resize_devices .split ("," )
104
- resize_devices = [d for d in resize_devices if d != "" ]
105
- if len (resize_devices ) == 0 :
106
- resize_devices .append ("none" )
107
-
108
- label = "Decode+Resize Time"
109
-
110
81
results = []
111
- for decode_device_string in args .devices .split ("," ):
112
- for resize_device_string in resize_devices :
113
- decode_label = decode_device_string
114
- if "cuda" in decode_label :
115
- # Shorten "cuda:0" to "cuda"
116
- decode_label = "cuda"
117
- resize_label = resize_device_string
118
- if "cuda" in resize_device_string :
119
- # Shorten "cuda:0" to "cuda"
120
- resize_label = "cuda"
121
- print ("decode_device" , decode_device_string )
122
- print ("resize_device" , resize_device_string )
123
- t = benchmark .Timer (
124
- stmt = "decode_full_video(video_path, decode_device_string, resize_device_string)" ,
125
- globals = {
126
- "decode_device_string" : decode_device_string ,
127
- "video_path" : video_path ,
128
- "decode_full_video" : decode_full_video ,
129
- "resize_device_string" : resize_device_string ,
130
- },
131
- label = label ,
132
- description = f"video={ os .path .basename (video_path )} " ,
133
- sub_label = f"D={ decode_label } R={ resize_label } " ,
134
- ).blocked_autorange ()
135
- results .append (t )
82
+ for device in args .devices .split ("," ):
83
+ print ("device" , device )
84
+ t = benchmark .Timer (
85
+ stmt = "decode_full_video(video_path, device)" ,
86
+ globals = {
87
+ "device" : device ,
88
+ "video_path" : video_path ,
89
+ "decode_full_video" : decode_full_video ,
90
+ },
91
+ label = "Decode+Resize Time" ,
92
+ sub_label = f"video={ os .path .basename (video_path )} " ,
93
+ description = f"decode_device={ device } " ,
94
+ ).blocked_autorange ()
95
+ results .append (t )
136
96
compare = benchmark .Compare (results )
137
97
compare .print ()
138
- print ("Key: D=Decode, R=Resize" )
139
- print ("Native resize is done as part of the decode step" )
140
- print ("none resize means there is no resize step -- native or otherwise" )
141
98
142
99
143
100
if __name__ == "__main__" :
0 commit comments