1212
1313import torch
1414import torch .utils .benchmark as benchmark
15- from torchcodec .decoders import VideoDecoder
15+ from torchcodec .decoders import VideoDecoder , VideoStreamMetadata
1616
1717from torchcodec .decoders ._core import (
1818 _add_video_stream ,
@@ -78,7 +78,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
7878 return frames
7979
8080
81- class TVNewAPIDecoderWithBackend (AbstractDecoder ):
81+ class TorchVision (AbstractDecoder ):
8282 def __init__ (self , backend ):
8383 self ._backend = backend
8484 self ._print_each_iteration_time = False
@@ -125,7 +125,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
125125 return frames
126126
127127
128- class TorchcodecNonCompiledWithOptions (AbstractDecoder ):
128+ class TorchCodecCore (AbstractDecoder ):
129129 def __init__ (self , num_threads = None , color_conversion_library = None , device = "cpu" ):
130130 self ._print_each_iteration_time = False
131131 self ._num_threads = int (num_threads ) if num_threads else None
@@ -186,7 +186,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
186186 return frames
187187
188188
189- class TorchCodecNonCompiledBatch (AbstractDecoder ):
189+ class TorchCodecCoreBatch (AbstractDecoder ):
190190 def __init__ (self , num_threads = None , color_conversion_library = None ):
191191 self ._print_each_iteration_time = False
192192 self ._num_threads = int (num_threads ) if num_threads else None
@@ -227,6 +227,24 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
227227 )
228228 return frames
229229
230+ class TorchCodecPublic (AbstractDecoder ):
231+ def __init__ (self , num_ffmpeg_threads = None ):
232+ self ._num_ffmpeg_threads = int (num_ffmpeg_threads ) if num_ffmpeg_threads else None
233+
234+ def get_frames_from_video (self , video_file , pts_list ):
235+ decoder = VideoDecoder (video_file , num_ffmpeg_threads = self ._num_ffmpeg_threads )
236+ return decoder .get_frames_played_at (pts_list )
237+
238+ def get_consecutive_frames_from_video (self , video_file , numFramesToDecode ):
239+ decoder = VideoDecoder (video_file , num_ffmpeg_threads = self ._num_ffmpeg_threads )
240+ frames = []
241+ count = 0
242+ for frame in decoder :
243+ frames .append (frame )
244+ count += 1
245+ if count == numFramesToDecode :
246+ break
247+ return frames
230248
231249@torch .compile (fullgraph = True , backend = "eager" )
232250def compiled_seek_and_next (decoder , pts ):
@@ -239,7 +257,7 @@ def compiled_next(decoder):
239257 return get_next_frame (decoder )
240258
241259
242- class TorchcodecCompiled (AbstractDecoder ):
260+ class TorchCodecCoreCompiled (AbstractDecoder ):
243261 def __init__ (self ):
244262 pass
245263
@@ -414,28 +432,34 @@ def plot_data(df_data, plot_path):
414432 color = [colors (i ) for i in range (len (group ))],
415433 align = "center" ,
416434 capsize = 5 ,
435+ label = group ["decoder" ],
417436 )
418437
419438 # Set the labels
420439 ax .set_xlabel ("FPS" )
421- ax .set_ylabel ("Decoder" )
422-
423- # Reverse the order of the handles and labels to match the order of the bars
424- handles = [
425- plt .Rectangle ((0 , 0 ), 1 , 1 , color = colors (i )) for i in range (len (group ))
426- ]
427- ax .legend (
428- handles [::- 1 ],
429- group ["decoder" ][::- 1 ],
430- title = "Decoder" ,
431- loc = "upper right" ,
432- )
440+
441+ # No need for y-axis label past the plot on the far left
442+ if col == 0 :
443+ ax .set_ylabel ("Decoder" )
433444
434445 # Remove any empty subplots for videos with fewer combinations
435446 for row in range (len (unique_videos )):
436447 for col in range (video_type_combinations [unique_videos [row ]], max_combinations ):
437448 fig .delaxes (axes [row , col ])
438449
450+ # If we just call fig.legend, we'll get duplicate labels, as each label appears on
451+ # each subplot. We take advantage of dicts having unique keys to de-dupe.
452+ handles , labels = plt .gca ().get_legend_handles_labels ()
453+ unique_labels = dict (zip (labels , handles ))
454+
455+ # Reverse the order of the handles and labels to match the order of the bars
456+ fig .legend (
457+ handles = reversed (unique_labels .values ()),
458+ labels = reversed (unique_labels .keys ()),
459+ frameon = True ,
460+ loc = "right" ,
461+ )
462+
439463 # Adjust layout to avoid overlap
440464 plt .tight_layout ()
441465
@@ -444,70 +468,83 @@ def plot_data(df_data, plot_path):
444468 plot_path ,
445469 )
446470
471+ def get_metadata (video_file_path : str ) -> VideoStreamMetadata :
472+ return VideoDecoder (video_file_path ).metadata
447473
448474def run_benchmarks (
449- decoder_dict ,
450- video_files_paths ,
451- num_uniform_samples ,
452- min_runtime_seconds ,
453- benchmark_video_creation ,
475+ decoder_dict : dict [str , AbstractDecoder ],
476+ video_files_paths : list [str ],
477+ num_samples : int ,
478+ num_sequential_frames_from_start : list [int ],
479+ min_runtime_seconds : float ,
480+ benchmark_video_creation : bool ,
454481) -> list [dict [str , str | float | int ]]:
482+ # Ensure that we have the same seed across benchmark runs.
483+ torch .manual_seed (0 )
484+
485+ print (f"video_files_paths={ video_files_paths } " )
486+
455487 results = []
456488 df_data = []
457- print (f"video_files_paths={ video_files_paths } " )
458489 verbose = False
459- for decoder_name , decoder in decoder_dict .items ():
460- for video_file_path in video_files_paths :
490+ for video_file_path in video_files_paths :
491+ metadata = get_metadata (video_file_path )
492+ metadata_label = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
493+
494+ duration = metadata .duration_seconds
495+ uniform_pts_list = [
496+ i * duration / num_samples for i in range (num_samples )
497+ ]
498+
499+ # Note that we are using the same random pts values for all decoders for the same
500+ # video. However, because we use the duration as part of this calculation, we
501+ # are using different random pts values across videos.
502+ random_pts_list = (torch .rand (num_samples ) * duration ).tolist ()
503+
504+ for decoder_name , decoder in decoder_dict .items ():
461505 print (f"video={ video_file_path } , decoder={ decoder_name } " )
462- # We only use the VideoDecoder to get the metadata and get
463- # the list of PTS values to seek to.
464- simple_decoder = VideoDecoder (video_file_path )
465- duration = simple_decoder .metadata .duration_seconds
466- pts_list = [
467- i * duration / num_uniform_samples for i in range (num_uniform_samples )
468- ]
469- metadata = simple_decoder .metadata
470- metadata_string = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
471- if verbose :
472- print (
473- f"video={ video_file_path } , decoder={ decoder_name } , pts_list={ pts_list } "
506+
507+ for kind , pts_list in [("uniform" , uniform_pts_list ), ("random" , random_pts_list )]:
508+ if verbose :
509+ print (
510+ f"video={ video_file_path } , decoder={ decoder_name } , pts_list={ pts_list } "
511+ )
512+ seeked_result = benchmark .Timer (
513+ stmt = "decoder.get_frames_from_video(video_file, pts_list)" ,
514+ globals = {
515+ "video_file" : video_file_path ,
516+ "pts_list" : pts_list ,
517+ "decoder" : decoder ,
518+ },
519+ label = f"video={ video_file_path } { metadata_label } " ,
520+ sub_label = decoder_name ,
521+ description = f"{ kind } { num_samples } seek()+next()" ,
474522 )
475- seeked_result = benchmark .Timer (
476- stmt = "decoder.get_frames_from_video(video_file, pts_list)" ,
477- globals = {
478- "video_file" : video_file_path ,
479- "pts_list" : pts_list ,
480- "decoder" : decoder ,
481- },
482- label = f"video={ video_file_path } { metadata_string } " ,
483- sub_label = decoder_name ,
484- description = f"{ num_uniform_samples } seek()+next()" ,
485- )
486- results .append (
487- seeked_result .blocked_autorange (min_run_time = min_runtime_seconds )
488- )
489- df_item = {}
490- df_item ["decoder" ] = decoder_name
491- df_item ["video" ] = video_file_path
492- df_item ["description" ] = results [- 1 ].description
493- df_item ["frame_count" ] = num_uniform_samples
494- df_item ["median" ] = results [- 1 ].median
495- df_item ["iqr" ] = results [- 1 ].iqr
496- df_item ["type" ] = "seek()+next()"
497- df_item ["fps" ] = 1.0 * num_uniform_samples / results [- 1 ].median
498- df_item ["fps_p75" ] = 1.0 * num_uniform_samples / results [- 1 ]._p75
499- df_item ["fps_p25" ] = 1.0 * num_uniform_samples / results [- 1 ]._p25
500- df_data .append (df_item )
501-
502- for num_consecutive_nexts in [1 , 10 ]:
523+ results .append (
524+ seeked_result .blocked_autorange (min_run_time = min_runtime_seconds )
525+ )
526+ df_item = {}
527+ df_item ["decoder" ] = decoder_name
528+ df_item ["video" ] = video_file_path
529+ df_item ["description" ] = results [- 1 ].description
530+ df_item ["frame_count" ] = num_samples
531+ df_item ["median" ] = results [- 1 ].median
532+ df_item ["iqr" ] = results [- 1 ].iqr
533+ df_item ["type" ] = f"{ kind } :seek()+next()"
534+ df_item ["fps" ] = 1.0 * num_samples / results [- 1 ].median
535+ df_item ["fps_p75" ] = 1.0 * num_samples / results [- 1 ]._p75
536+ df_item ["fps_p25" ] = 1.0 * num_samples / results [- 1 ]._p25
537+ df_data .append (df_item )
538+
539+ for num_consecutive_nexts in num_sequential_frames_from_start :
503540 consecutive_frames_result = benchmark .Timer (
504541 stmt = "decoder.get_consecutive_frames_from_video(video_file, consecutive_frames_to_extract)" ,
505542 globals = {
506543 "video_file" : video_file_path ,
507544 "consecutive_frames_to_extract" : num_consecutive_nexts ,
508545 "decoder" : decoder ,
509546 },
510- label = f"video={ video_file_path } { metadata_string } " ,
547+ label = f"video={ video_file_path } { metadata_label } " ,
511548 sub_label = decoder_name ,
512549 description = f"{ num_consecutive_nexts } next()" ,
513550 )
@@ -531,17 +568,16 @@ def run_benchmarks(
531568
532569 first_video_file_path = video_files_paths [0 ]
533570 if benchmark_video_creation :
534- simple_decoder = VideoDecoder (first_video_file_path )
535- metadata = simple_decoder .metadata
536- metadata_string = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
571+ metadata = get_metadata (video_file_path )
572+ metadata_label = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
537573 creation_result = benchmark .Timer (
538574 stmt = "create_torchcodec_decoder_from_file(video_file)" ,
539575 globals = {
540576 "video_file" : first_video_file_path ,
541577 "create_torchcodec_decoder_from_file" : create_torchcodec_decoder_from_file ,
542578 },
543- label = f"video={ first_video_file_path } { metadata_string } " ,
544- sub_label = "TorchcodecNonCompiled " ,
579+ label = f"video={ first_video_file_path } { metadata_label } " ,
580+ sub_label = "TorchCodecCore: " ,
545581 description = "create()+next()" ,
546582 )
547583 results .append (
0 commit comments