1212
1313import torch
1414import torch .utils .benchmark as benchmark
15- from torchcodec .decoders import VideoDecoder
15+ from torchcodec .decoders import VideoDecoder , VideoStreamMetadata
1616
1717from torchcodec .decoders ._core import (
1818 _add_video_stream ,
@@ -78,7 +78,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
7878 return frames
7979
8080
81- class TVNewAPIDecoderWithBackend (AbstractDecoder ):
81+ class TorchVision (AbstractDecoder ):
8282 def __init__ (self , backend ):
8383 self ._backend = backend
8484 self ._print_each_iteration_time = False
@@ -125,7 +125,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
125125 return frames
126126
127127
128- class TorchcodecNonCompiledWithOptions (AbstractDecoder ):
128+ class TorchCodecCore (AbstractDecoder ):
129129 def __init__ (self , num_threads = None , color_conversion_library = None , device = "cpu" ):
130130 self ._print_each_iteration_time = False
131131 self ._num_threads = int (num_threads ) if num_threads else None
@@ -186,7 +186,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
186186 return frames
187187
188188
189- class TorchCodecNonCompiledBatch (AbstractDecoder ):
189+ class TorchCodecCoreBatch (AbstractDecoder ):
190190 def __init__ (self , num_threads = None , color_conversion_library = None ):
191191 self ._print_each_iteration_time = False
192192 self ._num_threads = int (num_threads ) if num_threads else None
@@ -227,6 +227,24 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
227227 )
228228 return frames
229229
230+ class TorchCodecPublic (AbstractDecoder ):
231+ def __init__ (self , num_ffmpeg_threads = None ):
232+ self ._num_ffmpeg_threads = int (num_ffmpeg_threads ) if num_ffmpeg_threads else None
233+
234+ def get_frames_from_video (self , video_file , pts_list ):
235+ decoder = VideoDecoder (video_file , num_ffmpeg_threads = self ._num_ffmpeg_threads )
236+ return decoder .get_frames_played_at (pts_list )
237+
238+ def get_consecutive_frames_from_video (self , video_file , numFramesToDecode ):
239+ decoder = VideoDecoder (video_file , num_ffmpeg_threads = self ._num_ffmpeg_threads )
240+ frames = []
241+ count = 0
242+ for frame in decoder :
243+ frames .append (frame )
244+ count += 1
245+ if count == numFramesToDecode :
246+ break
247+ return frames
230248
231249@torch .compile (fullgraph = True , backend = "eager" )
232250def compiled_seek_and_next (decoder , pts ):
@@ -239,7 +257,7 @@ def compiled_next(decoder):
239257 return get_next_frame (decoder )
240258
241259
242- class TorchcodecCompiled (AbstractDecoder ):
260+ class TorchCodecCoreCompiled (AbstractDecoder ):
243261 def __init__ (self ):
244262 pass
245263
@@ -450,70 +468,83 @@ def plot_data(df_data, plot_path):
450468 plot_path ,
451469 )
452470
471+ def get_metadata (video_file_path : str ) -> VideoStreamMetadata :
472+ return VideoDecoder (video_file_path ).metadata
453473
454474def run_benchmarks (
455- decoder_dict ,
456- video_files_paths ,
457- num_uniform_samples ,
458- min_runtime_seconds ,
459- benchmark_video_creation ,
475+ decoder_dict : dict [str , AbstractDecoder ],
476+ video_files_paths : list [str ],
477+ num_samples : int ,
478+ num_sequential_frames_from_start : list [int ],
479+ min_runtime_seconds : float ,
480+ benchmark_video_creation : bool ,
460481) -> list [dict [str , str | float | int ]]:
482+ # Ensure that we have the same seed across benchmark runs.
483+ torch .manual_seed (0 )
484+
485+ print (f"video_files_paths={ video_files_paths } " )
486+
461487 results = []
462488 df_data = []
463- print (f"video_files_paths={ video_files_paths } " )
464489 verbose = False
465- for decoder_name , decoder in decoder_dict .items ():
466- for video_file_path in video_files_paths :
490+ for video_file_path in video_files_paths :
491+ metadata = get_metadata (video_file_path )
492+ metadata_label = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
493+
494+ duration = metadata .duration_seconds
495+ uniform_pts_list = [
496+ i * duration / num_samples for i in range (num_samples )
497+ ]
498+
499+ # Note that we are using the same random pts values for all decoders for the same
500+ # video. However, because we use the duration as part of this calculation, we
501+ # are using different random pts values across videos.
502+ random_pts_list = (torch .rand (num_samples ) * duration ).tolist ()
503+
504+ for decoder_name , decoder in decoder_dict .items ():
467505 print (f"video={ video_file_path } , decoder={ decoder_name } " )
468- # We only use the VideoDecoder to get the metadata and get
469- # the list of PTS values to seek to.
470- simple_decoder = VideoDecoder (video_file_path )
471- duration = simple_decoder .metadata .duration_seconds
472- pts_list = [
473- i * duration / num_uniform_samples for i in range (num_uniform_samples )
474- ]
475- metadata = simple_decoder .metadata
476- metadata_string = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
477- if verbose :
478- print (
479- f"video={ video_file_path } , decoder={ decoder_name } , pts_list={ pts_list } "
506+
507+ for kind , pts_list in [("uniform" , uniform_pts_list ), ("random" , random_pts_list )]:
508+ if verbose :
509+ print (
510+ f"video={ video_file_path } , decoder={ decoder_name } , pts_list={ pts_list } "
511+ )
512+ seeked_result = benchmark .Timer (
513+ stmt = "decoder.get_frames_from_video(video_file, pts_list)" ,
514+ globals = {
515+ "video_file" : video_file_path ,
516+ "pts_list" : pts_list ,
517+ "decoder" : decoder ,
518+ },
519+ label = f"video={ video_file_path } { metadata_label } " ,
520+ sub_label = decoder_name ,
521+ description = f"{ kind } { num_samples } seek()+next()" ,
480522 )
481- seeked_result = benchmark .Timer (
482- stmt = "decoder.get_frames_from_video(video_file, pts_list)" ,
483- globals = {
484- "video_file" : video_file_path ,
485- "pts_list" : pts_list ,
486- "decoder" : decoder ,
487- },
488- label = f"video={ video_file_path } { metadata_string } " ,
489- sub_label = decoder_name ,
490- description = f"{ num_uniform_samples } seek()+next()" ,
491- )
492- results .append (
493- seeked_result .blocked_autorange (min_run_time = min_runtime_seconds )
494- )
495- df_item = {}
496- df_item ["decoder" ] = decoder_name
497- df_item ["video" ] = video_file_path
498- df_item ["description" ] = results [- 1 ].description
499- df_item ["frame_count" ] = num_uniform_samples
500- df_item ["median" ] = results [- 1 ].median
501- df_item ["iqr" ] = results [- 1 ].iqr
502- df_item ["type" ] = "seek()+next()"
503- df_item ["fps" ] = 1.0 * num_uniform_samples / results [- 1 ].median
504- df_item ["fps_p75" ] = 1.0 * num_uniform_samples / results [- 1 ]._p75
505- df_item ["fps_p25" ] = 1.0 * num_uniform_samples / results [- 1 ]._p25
506- df_data .append (df_item )
507-
508- for num_consecutive_nexts in [1 , 10 ]:
523+ results .append (
524+ seeked_result .blocked_autorange (min_run_time = min_runtime_seconds )
525+ )
526+ df_item = {}
527+ df_item ["decoder" ] = decoder_name
528+ df_item ["video" ] = video_file_path
529+ df_item ["description" ] = results [- 1 ].description
530+ df_item ["frame_count" ] = num_samples
531+ df_item ["median" ] = results [- 1 ].median
532+ df_item ["iqr" ] = results [- 1 ].iqr
533+ df_item ["type" ] = f"{ kind } :seek()+next()"
534+ df_item ["fps" ] = 1.0 * num_samples / results [- 1 ].median
535+ df_item ["fps_p75" ] = 1.0 * num_samples / results [- 1 ]._p75
536+ df_item ["fps_p25" ] = 1.0 * num_samples / results [- 1 ]._p25
537+ df_data .append (df_item )
538+
539+ for num_consecutive_nexts in num_sequential_frames_from_start :
509540 consecutive_frames_result = benchmark .Timer (
510541 stmt = "decoder.get_consecutive_frames_from_video(video_file, consecutive_frames_to_extract)" ,
511542 globals = {
512543 "video_file" : video_file_path ,
513544 "consecutive_frames_to_extract" : num_consecutive_nexts ,
514545 "decoder" : decoder ,
515546 },
516- label = f"video={ video_file_path } { metadata_string } " ,
547+ label = f"video={ video_file_path } { metadata_label } " ,
517548 sub_label = decoder_name ,
518549 description = f"{ num_consecutive_nexts } next()" ,
519550 )
@@ -537,17 +568,16 @@ def run_benchmarks(
537568
538569 first_video_file_path = video_files_paths [0 ]
539570 if benchmark_video_creation :
540- simple_decoder = VideoDecoder (first_video_file_path )
541- metadata = simple_decoder .metadata
542- metadata_string = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
571+ metadata = get_metadata (video_file_path )
572+ metadata_label = f"{ metadata .codec } { metadata .width } x{ metadata .height } , { metadata .duration_seconds } s { metadata .average_fps } fps"
543573 creation_result = benchmark .Timer (
544574 stmt = "create_torchcodec_decoder_from_file(video_file)" ,
545575 globals = {
546576 "video_file" : first_video_file_path ,
547577 "create_torchcodec_decoder_from_file" : create_torchcodec_decoder_from_file ,
548578 },
549- label = f"video={ first_video_file_path } { metadata_string } " ,
550- sub_label = "TorchcodecNonCompiled " ,
579+ label = f"video={ first_video_file_path } { metadata_label } " ,
580+ sub_label = "TorchCodecCore: " ,
551581 description = "create()+next()" ,
552582 )
553583 results .append (
0 commit comments