1212
1313import torch
1414import torch .utils .benchmark as benchmark
15- from torchcodec .decoders import VideoDecoder
15+ from torchcodec .decoders import VideoDecoder , VideoStreamMetadata
1616
1717from torchcodec .decoders ._core import (
1818 _add_video_stream ,
@@ -78,7 +78,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
7878 return frames
7979
8080
81- class TVNewAPIDecoderWithBackend (AbstractDecoder ):
81+ class TorchVision (AbstractDecoder ):
8282 def __init__ (self , backend ):
8383 self ._backend = backend
8484 self ._print_each_iteration_time = False
@@ -125,7 +125,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
125125 return frames
126126
127127
128- class TorchcodecNonCompiledWithOptions (AbstractDecoder ):
128+ class TorchCodecCore (AbstractDecoder ):
129129 def __init__ (self , num_threads = None , color_conversion_library = None , device = "cpu" ):
130130 self ._print_each_iteration_time = False
131131 self ._num_threads = int (num_threads ) if num_threads else None
@@ -186,7 +186,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
186186 return frames
187187
188188
189- class TorchCodecNonCompiledBatch (AbstractDecoder ):
189+ class TorchCodecCoreBatch (AbstractDecoder ):
190190 def __init__ (self , num_threads = None , color_conversion_library = None ):
191191 self ._print_each_iteration_time = False
192192 self ._num_threads = int (num_threads ) if num_threads else None
@@ -227,6 +227,24 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
227227 )
228228 return frames
229229
230+ class TorchCodecPublic (AbstractDecoder ):
231+ def __init__ (self , num_ffmpeg_threads = None ):
232+ self ._num_ffmpeg_threads = int (num_ffmpeg_threads ) if num_ffmpeg_threads else None
233+
234+ def get_frames_from_video (self , video_file , pts_list ):
235+ decoder = VideoDecoder (video_file , num_ffmpeg_threads = self ._num_ffmpeg_threads )
236+ return decoder .get_frames_played_at (pts_list )
237+
238+ def get_consecutive_frames_from_video (self , video_file , numFramesToDecode ):
239+ decoder = VideoDecoder (video_file , num_ffmpeg_threads = self ._num_ffmpeg_threads )
240+ frames = []
241+ count = 0
242+ for frame in decoder :
243+ frames .append (frame )
244+ count += 1
245+ if count == numFramesToDecode :
246+ break
247+ return frames
230248
231249@torch .compile (fullgraph = True , backend = "eager" )
232250def compiled_seek_and_next (decoder , pts ):
@@ -239,7 +257,7 @@ def compiled_next(decoder):
239257 return get_next_frame (decoder )
240258
241259
242- class TorchcodecCompiled (AbstractDecoder ):
260+ class TorchCodecCoreCompiled (AbstractDecoder ):
243261 def __init__ (self ):
244262 pass
245263
@@ -444,11 +462,13 @@ def plot_data(df_data, plot_path):
444462 plot_path ,
445463 )
446464
465+ def get_metadata (video_file_path : str ) -> VideoStreamMetadata :
466+ return VideoDecoder (video_file_path ).metadata
447467
448468def run_benchmarks (
449469 decoder_dict ,
450470 video_files_paths ,
451- num_uniform_samples ,
471+ num_samples ,
452472 min_runtime_seconds ,
453473 benchmark_video_creation ,
454474) -> list [dict [str , str | float | int ]]:
@@ -459,55 +479,57 @@ def run_benchmarks(
459479 for decoder_name , decoder in decoder_dict .items ():
460480 for video_file_path in video_files_paths :
461481 print (f"video={ video_file_path } , decoder={ decoder_name } " )
462- # We only use the VideoDecoder to get the metadata and get
463- # the list of PTS values to seek to.
464- simple_decoder = VideoDecoder ( video_file_path )
465- duration = simple_decoder . metadata .duration_seconds
466- pts_list = [
467- i * duration / num_uniform_samples for i in range (num_uniform_samples )
482+ metadata = get_metadata ( video_file_path )
483+ metadata_label = f" { metadata . codec } { metadata . width } x { metadata . height } , { metadata . duration_seconds } s { metadata . average_fps } fps"
484+
485+ duration = metadata .duration_seconds
486+ uniform_pts_list = [
487+ i * duration / num_samples for i in range (num_samples )
468488 ]
469- metadata = simple_decoder .metadata
470- metadata_string = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
471- if verbose :
472- print (
473- f"video={ video_file_path } , decoder={ decoder_name } , pts_list={ pts_list } "
489+
490+ random_pts_list = (torch .rand (num_samples ) * duration ).tolist ()
491+
492+ for kind , pts_list in [("uniform" , uniform_pts_list ), ("random" , random_pts_list )]:
493+ if verbose :
494+ print (
495+ f"video={ video_file_path } , decoder={ decoder_name } , pts_list={ pts_list } "
496+ )
497+ seeked_result = benchmark .Timer (
498+ stmt = "decoder.get_frames_from_video(video_file, pts_list)" ,
499+ globals = {
500+ "video_file" : video_file_path ,
501+ "pts_list" : pts_list ,
502+ "decoder" : decoder ,
503+ },
504+ label = f"video={ video_file_path } { metadata_label } " ,
505+ sub_label = decoder_name ,
506+ description = f"{ kind } { num_samples } seek()+next()" ,
474507 )
475- seeked_result = benchmark .Timer (
476- stmt = "decoder.get_frames_from_video(video_file, pts_list)" ,
477- globals = {
478- "video_file" : video_file_path ,
479- "pts_list" : pts_list ,
480- "decoder" : decoder ,
481- },
482- label = f"video={ video_file_path } { metadata_string } " ,
483- sub_label = decoder_name ,
484- description = f"{ num_uniform_samples } seek()+next()" ,
485- )
486- results .append (
487- seeked_result .blocked_autorange (min_run_time = min_runtime_seconds )
488- )
489- df_item = {}
490- df_item ["decoder" ] = decoder_name
491- df_item ["video" ] = video_file_path
492- df_item ["description" ] = results [- 1 ].description
493- df_item ["frame_count" ] = num_uniform_samples
494- df_item ["median" ] = results [- 1 ].median
495- df_item ["iqr" ] = results [- 1 ].iqr
496- df_item ["type" ] = "seek()+next()"
497- df_item ["fps" ] = 1.0 * num_uniform_samples / results [- 1 ].median
498- df_item ["fps_p75" ] = 1.0 * num_uniform_samples / results [- 1 ]._p75
499- df_item ["fps_p25" ] = 1.0 * num_uniform_samples / results [- 1 ]._p25
500- df_data .append (df_item )
501-
502- for num_consecutive_nexts in [1 , 10 ]:
508+ results .append (
509+ seeked_result .blocked_autorange (min_run_time = min_runtime_seconds )
510+ )
511+ df_item = {}
512+ df_item ["decoder" ] = decoder_name
513+ df_item ["video" ] = video_file_path
514+ df_item ["description" ] = results [- 1 ].description
515+ df_item ["frame_count" ] = num_samples
516+ df_item ["median" ] = results [- 1 ].median
517+ df_item ["iqr" ] = results [- 1 ].iqr
518+ df_item ["type" ] = f"{ kind } :seek()+next()"
519+ df_item ["fps" ] = 1.0 * num_samples / results [- 1 ].median
520+ df_item ["fps_p75" ] = 1.0 * num_samples / results [- 1 ]._p75
521+ df_item ["fps_p25" ] = 1.0 * num_samples / results [- 1 ]._p25
522+ df_data .append (df_item )
523+
524+ for num_consecutive_nexts in [100 ]:
503525 consecutive_frames_result = benchmark .Timer (
504526 stmt = "decoder.get_consecutive_frames_from_video(video_file, consecutive_frames_to_extract)" ,
505527 globals = {
506528 "video_file" : video_file_path ,
507529 "consecutive_frames_to_extract" : num_consecutive_nexts ,
508530 "decoder" : decoder ,
509531 },
510- label = f"video={ video_file_path } { metadata_string } " ,
532+ label = f"video={ video_file_path } { metadata_label } " ,
511533 sub_label = decoder_name ,
512534 description = f"{ num_consecutive_nexts } next()" ,
513535 )
@@ -531,17 +553,16 @@ def run_benchmarks(
531553
532554 first_video_file_path = video_files_paths [0 ]
533555 if benchmark_video_creation :
534- simple_decoder = VideoDecoder (first_video_file_path )
535- metadata = simple_decoder .metadata
536- metadata_string = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
556+ metadata = get_metadata (video_file_path )
557+ metadata_label = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
537558 creation_result = benchmark .Timer (
538559 stmt = "create_torchcodec_decoder_from_file(video_file)" ,
539560 globals = {
540561 "video_file" : first_video_file_path ,
541562 "create_torchcodec_decoder_from_file" : create_torchcodec_decoder_from_file ,
542563 },
543- label = f"video={ first_video_file_path } { metadata_string } " ,
544- sub_label = "TorchcodecNonCompiled " ,
564+ label = f"video={ first_video_file_path } { metadata_label } " ,
565+ sub_label = "TorchCodecCore: " ,
545566 description = "create()+next()" ,
546567 )
547568 results .append (
0 commit comments