@@ -84,45 +84,41 @@ def report_stats(
8484 min_time = unit_times .min ().item ()
8585 max_time = unit_times .max ().item ()
8686 print (
87- f"\n { prefix } : { med = :.2f} , { mean = :.2f} +- { std :.2f} , { min_time = :.2f} , { max_time = :.2f} - in { unit } "
87+ f"\n { prefix } { med = :.2f} , { mean = :.2f} +- { std :.2f} , { min_time = :.2f} , { max_time = :.2f} - in { unit } "
8888 )
89- fps = num_frames / (times * 1e-9 )
90- std = fps .std ().item ()
91- med = fps .median ().item ()
92- max_fps = fps .max ().item ()
93- print (f"{ med = :.1f} fps +- { std :.1f} , { max_fps = :.1f} " )
94-
9589 if cpu_utils is not None :
9690 cpu_avg = cpu_utils .mean ().item ()
9791 cpu_peak = cpu_utils .max ().item ()
98- print (f"CPU utilization: avg = { cpu_avg :.1f} %, peak = { cpu_peak :.1f} %" )
92+ print (f"CPU utilization: avg = { cpu_avg :.1f} %, peak = { cpu_peak :.1f} %" )
9993
10094 if gpu_utils is not None and gpu_utils .numel () > 0 :
10195 gpu_avg = gpu_utils .mean ().item ()
10296 gpu_peak = gpu_utils .max ().item ()
103- print (f"GPU utilization: avg = { gpu_avg :.1f} %, peak = { gpu_peak :.1f} %" )
97+ print (f"GPU utilization: avg = { gpu_avg :.1f} %, peak = { gpu_peak :.1f} %" )
10498
10599
106100def encode_torchcodec (frames , output_path , device = "cpu" ):
101+ encoder = VideoEncoder (frames = frames , frame_rate = 30 )
107102 if device == "cuda" :
108- # Move frames to GPU
109- gpu_frames = frames .cuda () if frames .device .type == "cpu" else frames
110- encoder = VideoEncoder (frames = gpu_frames , frame_rate = 30 , device = "cuda" )
111103 encoder .to_file (dest = output_path , codec = "h264_nvenc" , extra_options = {"qp" : 1 })
112104 else :
113- encoder = VideoEncoder (frames = frames , frame_rate = 30 , device = "cpu" )
114105 encoder .to_file (dest = output_path , codec = "libx264" , crf = 0 )
115106
116107
117- def write_raw_frames (frames , raw_path ):
108+ def write_raw_frames (frames , num_frames , raw_path ):
118109 # Convert NCHW to NHWC for raw video format
119- raw_frames = frames .permute (0 , 2 , 3 , 1 ).contiguous ()
110+ raw_frames = frames .permute (0 , 2 , 3 , 1 ).contiguous ()[: num_frames ]
120111 with open (raw_path , "wb" ) as f :
121112 f .write (raw_frames .cpu ().numpy ().tobytes ())
122113
123114
124- def encode_ffmpeg_cli (raw_path , frames_shape , output_path , device = "cpu" , codec = None ):
125- height , width = frames_shape [2 ], frames_shape [3 ]
115+ def write_and_encode_ffmpeg_cli (
116+ frames , num_frames , raw_path , output_path , device = "cpu" , write_frames = False
117+ ):
118+ # Rewrite frames during benchmarking function if write_frames flag used
119+ if write_frames :
120+ write_raw_frames (frames , num_frames , raw_path )
121+ height , width = frames .shape [2 ], frames .shape [3 ]
126122
127123 if device == "cuda" :
128124 codec = "h264_nvenc"
@@ -152,6 +148,7 @@ def encode_ffmpeg_cli(raw_path, frames_shape, output_path, device="cpu", codec=N
152148 ffmpeg_cmd .extend (quality_params )
153149 # By not setting threads, allow FFmpeg to choose.
154150 # ffmpeg_cmd.extend(["-threads", "1"])
151+ # try setting threads on VideoEncoder too?
155152 ffmpeg_cmd .extend ([str (output_path )])
156153
157154 subprocess .run (ffmpeg_cmd , check = True , capture_output = True )
@@ -174,6 +171,11 @@ def main():
174171 default = DEFAULT_MAX_FRAMES ,
175172 help = "Maximum number of frames to decode for benchmarking" ,
176173 )
174+ parser .add_argument (
175+ "--write-frames" ,
176+ action = "store_true" ,
177+ help = "Include raw frame writing time in FFmpeg CLI benchmarks for fairer comparison with tensor-based workflows" ,
178+ )
177179
178180 args = parser .parse_args ()
179181
@@ -189,21 +191,22 @@ def main():
189191 frames = decoder .get_frames_in_range (
190192 start = 0 , stop = min (args .max_frames , len (decoder ))
191193 ).data
194+ gpu_frames = frames .cuda ()
192195 print (
193196 f"Loaded { frames .shape [0 ]} frames of size { frames .shape [2 ]} x{ frames .shape [3 ]} "
194197 )
195198
196199 with tempfile .TemporaryDirectory () as temp_dir :
197200 temp_dir = Path (temp_dir )
198201 raw_frames_path = temp_dir / "input_frames.raw"
199- write_raw_frames (frames , str (raw_frames_path ))
202+ write_raw_frames (frames , args . max_frames , str (raw_frames_path ))
200203
201204 # Benchmark torchcodec on GPU
202205 if cuda_available :
203206 gpu_output = temp_dir / "torchcodec_gpu.mp4"
204207 times , _cpu_utils , gpu_utils = bench (
205208 encode_torchcodec ,
206- frames = frames ,
209+ frames = gpu_frames ,
207210 output_path = str (gpu_output ),
208211 device = "cuda" ,
209212 average_over = args .average_over ,
@@ -219,17 +222,18 @@ def main():
219222 if cuda_available :
220223 ffmpeg_gpu_output = temp_dir / "ffmpeg_gpu.mp4"
221224 times , _cpu_utils , gpu_utils = bench (
222- encode_ffmpeg_cli ,
225+ write_and_encode_ffmpeg_cli ,
226+ frames = gpu_frames ,
227+ num_frames = args .max_frames ,
223228 raw_path = str (raw_frames_path ),
224- frames_shape = frames .shape ,
225229 output_path = str (ffmpeg_gpu_output ),
226230 device = "cuda" ,
231+ write_frames = args .write_frames ,
227232 average_over = args .average_over ,
228233 warmup = 1 ,
229234 )
230- report_stats (
231- times , frames .shape [0 ], None , gpu_utils , prefix = "FFmpeg CLI on GPU"
232- )
235+ prefix = "FFmpeg CLI on GPU "
236+ report_stats (times , frames .shape [0 ], None , gpu_utils , prefix = prefix )
233237 else :
234238 print ("Skipping FFmpeg CLI GPU benchmark (CUDA not available)" )
235239
@@ -250,17 +254,18 @@ def main():
250254 # Benchmark FFmpeg CLI on CPU
251255 ffmpeg_cpu_output = temp_dir / "ffmpeg_cpu.mp4"
252256 times , cpu_utils , _gpu_utils = bench (
253- encode_ffmpeg_cli ,
257+ write_and_encode_ffmpeg_cli ,
258+ frames = frames ,
259+ num_frames = args .max_frames ,
254260 raw_path = str (raw_frames_path ),
255- frames_shape = frames .shape ,
256261 output_path = str (ffmpeg_cpu_output ),
257262 device = "cpu" ,
263+ write_frames = args .write_frames ,
258264 average_over = args .average_over ,
259265 warmup = 1 ,
260266 )
261- report_stats (
262- times , frames .shape [0 ], cpu_utils , None , prefix = "FFmpeg CLI on CPU"
263- )
267+ prefix = "FFmpeg CLI on CPU "
268+ report_stats (times , frames .shape [0 ], cpu_utils , None , prefix = prefix )
264269
265270
266271if __name__ == "__main__" :
0 commit comments