meta-pytorch
diff --git a/‎benchmarks/decoders/benchmark_decoders.py‎
Lines changed: 53 additions & 40 deletions b/‎benchmarks/decoders/benchmark_decoders.py‎
Lines changed: 53 additions & 40 deletions
diff --git a/‎benchmarks/decoders/benchmark_decoders_library.py‎
Lines changed: 62 additions & 66 deletions b/‎benchmarks/decoders/benchmark_decoders_library.py‎
Lines changed: 62 additions & 66 deletions
diff --git a/‎benchmarks/decoders/benchmark_readme_chart.png‎
1.8 KB b/‎benchmarks/decoders/benchmark_readme_chart.png‎
1.8 KB
@@ -7,20 +7,42 @@
 import argparse
 import importlib.resources
 import os
+import typing
 from pathlib import Path
+from dataclasses import dataclass, field
 
 from benchmark_decoders_library import (
-    DecordNonBatchDecoderAccurateSeek,
+    AbstractDecoder,
+    DecordAccurate,
+    DecordAccurateBatch,
     plot_data,
     run_benchmarks,
     TorchAudioDecoder,
     TorchCodecCore,
     TorchCodecCoreBatch,
+    TorchCodecCoreNonBatch,
     TorchCodecCoreCompiled,
     TorchCodecPublic,
     TorchVision,
 )
 
+@dataclass
+class DecoderKind:
+    display_name: str
+    kind: typing.Type[AbstractDecoder]
+    default_options: dict = field(default_factory=dict)
+
+decoder_registry = {
+    "decord": DecoderKind("DecordAccurate", DecordAccurate),
+    "decord_batch": DecoderKind("DecordAccurateBatch", DecordAccurateBatch),
+    "torchcodec_core": DecoderKind("TorchCodecCore:", TorchCodecCore),
+    "torchcodec_core_batch": DecoderKind("TorchCodecCoreBatch", TorchCodecCoreBatch),
+    "torchcodec_core_nonbatch": DecoderKind("TorchCodecCoreNonBatch", TorchCodecCoreNonBatch),
+    "torchcodec_core_compiled": DecoderKind("TorchCodecCoreCompiled", TorchCodecCoreCompiled),
+    "torchcodec_public": DecoderKind("TorchCodecPublic", TorchCodecPublic),
+    "torchvision": DecoderKind("TorchVision[backend=video_reader]", TorchVision, {"backend": "video_reader"}),
+    "torchaudio": DecoderKind("TorchAudio", TorchAudioDecoder),
+}
 
 def in_fbcode() -> bool:
     return "FB_PAR_RUNTIME_FILES" in os.environ
@@ -67,11 +89,18 @@ def main() -> None:
         "--decoders",
         help=(
             "Comma-separated list of decoders to benchmark. "
-            "Choices are torchcodec, torchaudio, torchvision, decord, tcoptions:num_threads=1+color_conversion_library=filtergraph, torchcodec_compiled"
-            "For torchcodec, you can specify options with tcoptions:<plus-separated-options>. "
+            "Choices are: " + ", ".join(decoder_registry.keys()) + ". " +
+            "To specify options, append a ':' and then value pairs seperated by a '+'. "
+            "For example, torchcodec:num_threads=1+color_conversion_library=filtergraph."
         ),
         type=str,
-        default="decord,tcoptions:,torchvision,torchaudio,torchcodec_compiled,torchcodec_public,tcoptions:num_threads=1,tcbatchoptions:",
+        default=(
+            "decord,decord_batch," +
+            "torchvision," +
+            "torchaudio," +
+            "torchcodec_core,torchcodec_core:num_threads=1,torchcodec_core_batch,torchcodec_core_nonbatch," +
+            "torchcodec_public"
+        ),
     )
     parser.add_argument(
         "--bm_video_dir",
@@ -87,51 +116,35 @@ def main() -> None:
     )
 
     args = parser.parse_args()
-    decoders = set(args.decoders.split(","))
+    specified_decoders = set(args.decoders.split(","))
 
     # These are the PTS values we want to extract from the small video.
     num_uniform_samples = 10
 
-    decoder_dict = {}
-    for decoder in decoders:
-        if decoder == "decord":
-            decoder_dict["DecordNonBatchDecoderAccurateSeek"] = (
-                DecordNonBatchDecoderAccurateSeek()
-            )
-        elif decoder == "torchcodec":
-            decoder_dict["TorchCodecCore:"] = TorchCodecCore()
-        elif decoder == "torchcodec_compiled":
-            decoder_dict["TorchCodecCoreCompiled"] = TorchCodecCoreCompiled()
-        elif decoder == "torchcodec_public":
-            decoder_dict["TorchCodecPublic"] = TorchCodecPublic()
-        elif decoder == "torchvision":
-            decoder_dict["TorchVision[backend=video_reader]"] = (
-                # We don't compare TorchVision's "pyav" backend because it doesn't support
-                # accurate seeks.
-                TorchVision("video_reader")
-            )
-        elif decoder == "torchaudio":
-            decoder_dict["TorchAudioDecoder"] = TorchAudioDecoder()
-        elif decoder.startswith("tcbatchoptions:"):
-            options = decoder[len("tcbatchoptions:") :]
-            kwargs_dict = {}
-            for item in options.split("+"):
-                if item.strip() == "":
-                    continue
-                k, v = item.split("=")
-                kwargs_dict[k] = v
-            decoder_dict["TorchCodecCoreBatch" + options] = TorchCodecCoreBatch(
-                **kwargs_dict
-            )
-        elif decoder.startswith("tcoptions:"):
-            options = decoder[len("tcoptions:") :]
+    decoders_to_run = {}
+    for decoder in specified_decoders:
+        if ":" in decoder:
+            decoder_name, _, options = decoder.partition(":")
+            assert decoder_name in decoder_registry
+
             kwargs_dict = {}
             for item in options.split("+"):
                 if item.strip() == "":
                     continue
                 k, v = item.split("=")
                 kwargs_dict[k] = v
-            decoder_dict["TorchCodecCore:" + options] = TorchCodecCore(**kwargs_dict)
+
+            display_name = decoder_registry[decoder_name].display_name
+            kind = decoder_registry[decoder_name].kind
+            decoders_to_run[display_name + options] = kind(**kwargs_dict)
+        elif decoder in decoder_registry:
+            display_name = decoder_registry[decoder].display_name
+            kind = decoder_registry[decoder].kind
+            default_options = decoder_registry[decoder].default_options
+            decoders_to_run[display_name] = kind(**default_options)
+        else:
+            raise ValueError(f"Unknown decoder: {decoder}")
+
     video_paths = args.bm_video_paths.split(",")
     if args.bm_video_dir:
         video_paths = []
@@ -140,7 +153,7 @@ def main() -> None:
                 video_paths.append(entry.path)
 
     df_data = run_benchmarks(
-        decoder_dict,
+        decoders_to_run,
         video_paths,
         num_uniform_samples,
         num_sequential_frames_from_start=[1, 10, 100],
 
@@ -18,6 +18,7 @@
     _add_video_stream,
     create_from_file,
     get_frames_at_indices,
+    get_frames_by_pts,
     get_json_metadata,
     get_next_frame,
     scan_all_streams_to_update_metadata,
@@ -37,47 +38,51 @@ def get_frames_from_video(self, video_file, pts_list):
         pass
 
 
-class DecordNonBatchDecoderAccurateSeek(AbstractDecoder):
+class DecordAccurate(AbstractDecoder):
     def __init__(self):
         import decord  # noqa: F401
 
         self.decord = decord
-
-        self._print_each_iteration_time = False
+        self.decord.bridge.set_bridge("torch")
 
     def get_frames_from_video(self, video_file, pts_list):
-        self.decord.bridge.set_bridge("torch")
         decord_vr = self.decord.VideoReader(video_file, ctx=self.decord.cpu())
         frames = []
-        times = []
         fps = decord_vr.get_avg_fps()
         for pts in pts_list:
-            start = timeit.default_timer()
             decord_vr.seek_accurate(int(pts * fps))
             frame = decord_vr.next()
-            end = timeit.default_timer()
-            times.append(round(end - start, 3))
             frames.append(frame)
-        if self._print_each_iteration_time:
-            print("decord times=", times, sum(times))
         return frames
 
     def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
-        self.decord.bridge.set_bridge("torch")
         decord_vr = self.decord.VideoReader(video_file, ctx=self.decord.cpu())
         frames = []
-        times = []
         for _ in range(numFramesToDecode):
-            start = timeit.default_timer()
             frame = decord_vr.next()
-            end = timeit.default_timer()
-            times.append(round(end - start, 3))
             frames.append(frame)
-        if self._print_each_iteration_time:
-            print("decord times=", times, sum(times))
         return frames
 
 
+class DecordAccurateBatch(AbstractDecoder):
+    def __init__(self):
+        import decord  # noqa: F401
+
+        self.decord = decord
+        self.decord.bridge.set_bridge("torch")
+
+    def get_frames_from_video(self, video_file, pts_list):
+        decord_vr = self.decord.VideoReader(video_file, ctx=self.decord.cpu())
+        average_fps = decord_vr.get_avg_fps()
+        indices_list = [int(pts * average_fps) for pts in pts_list]
+        return decord_vr.get_batch(indices_list)
+
+    def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
+        decord_vr = self.decord.VideoReader(video_file, ctx=self.decord.cpu())
+        indices_list = list(range(numFramesToDecode))
+        return decord_vr.get_batch(indices_list)
+
+
 class TorchVision(AbstractDecoder):
     def __init__(self, backend):
         self._backend = backend
@@ -87,47 +92,63 @@ def __init__(self, backend):
         self.torchvision = torchvision
 
     def get_frames_from_video(self, video_file, pts_list):
-        start = timeit.default_timer()
         self.torchvision.set_video_backend(self._backend)
         reader = self.torchvision.io.VideoReader(video_file, "video")
-        create_done = timeit.default_timer()
         frames = []
         for pts in pts_list:
             reader.seek(pts)
             frame = next(reader)
             frames.append(frame["data"].permute(1, 2, 0))
-        frames_done = timeit.default_timer()
-        if self._print_each_iteration_time:
-            create_duration = 1000 * round(create_done - start, 3)
-            frames_duration = 1000 * round(frames_done - create_done, 3)
-            total_duration = 1000 * round(frames_done - start, 3)
-            print(f"TV: {create_duration=} {frames_duration=} {total_duration=}")
         return frames
 
     def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
-        start = timeit.default_timer()
         self.torchvision.set_video_backend(self._backend)
         reader = self.torchvision.io.VideoReader(video_file, "video")
-        create_done = timeit.default_timer()
         frames = []
         for _ in range(numFramesToDecode):
             frame = next(reader)
             frames.append(frame["data"].permute(1, 2, 0))
-        frames_done = timeit.default_timer()
-
-        if self._print_each_iteration_time:
-            create_duration = 1000 * round(create_done - start, 3)
-            frames_duration = 1000 * round(frames_done - create_done, 3)
-            total_duration = 1000 * round(frames_done - start, 3)
-            print(
-                f"TV: consecutive: {create_duration=} {frames_duration=} {total_duration=} {frames[0].shape=}"
-            )
         return frames
 
 
 class TorchCodecCore(AbstractDecoder):
     def __init__(self, num_threads=None, color_conversion_library=None, device="cpu"):
-        self._print_each_iteration_time = False
+        self._num_threads = int(num_threads) if num_threads else None
+        self._color_conversion_library = color_conversion_library
+        self._device = device
+
+    def get_frames_from_video(self, video_file, pts_list):
+        decoder = create_from_file(video_file)
+        scan_all_streams_to_update_metadata(decoder)
+        _add_video_stream(
+            decoder,
+            num_threads=self._num_threads,
+            color_conversion_library=self._color_conversion_library,
+        )
+        metadata = json.loads(get_json_metadata(decoder))
+        best_video_stream = metadata["bestVideoStreamIndex"]
+        frames, *_ = get_frames_by_pts(
+            decoder, stream_index=best_video_stream, timestamps=pts_list
+        )
+        return frames
+
+    def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
+        decoder = create_from_file(video_file)
+        _add_video_stream(
+            decoder,
+            num_threads=self._num_threads,
+            color_conversion_library=self._color_conversion_library,
+        )
+
+        frames = []
+        for _ in range(numFramesToDecode):
+            frame = get_next_frame(decoder)
+            frames.append(frame)
+
+        return frames
+
+class TorchCodecCoreNonBatch(AbstractDecoder):
+    def __init__(self, num_threads=None, color_conversion_library=None, device="cpu"):
         self._num_threads = int(num_threads) if num_threads else None
         self._color_conversion_library = color_conversion_library
         self._device = device
@@ -140,49 +161,28 @@ def get_frames_from_video(self, video_file, pts_list):
             color_conversion_library=self._color_conversion_library,
             device=self._device,
         )
+
         frames = []
-        times = []
         for pts in pts_list:
-            start = timeit.default_timer()
             seek_to_pts(decoder, pts)
             frame = get_next_frame(decoder)
-            end = timeit.default_timer()
-            times.append(round(end - start, 3))
             frames.append(frame)
 
-        if self._print_each_iteration_time:
-            print("torchcodec times=", times, sum(times))
         return frames
 
     def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
-        create_time = timeit.default_timer()
         decoder = create_from_file(video_file)
-        add_stream_time = timeit.default_timer()
         _add_video_stream(
             decoder,
             num_threads=self._num_threads,
             color_conversion_library=self._color_conversion_library,
         )
+
         frames = []
-        times = []
-        frames_time = timeit.default_timer()
         for _ in range(numFramesToDecode):
-            start = timeit.default_timer()
             frame = get_next_frame(decoder)
-            end = timeit.default_timer()
-            times.append(round(end - start, 3))
             frames.append(frame)
 
-        if self._print_each_iteration_time:
-            done_time = timeit.default_timer()
-            create_duration = 1000 * round(add_stream_time - create_time, 3)
-            add_stream_duration = 1000 * round(frames_time - add_stream_time, 3)
-            frames_duration = 1000 * round(done_time - frames_time, 3)
-            total_duration = 1000 * round(done_time - create_time, 3)
-            print(
-                f"{numFramesToDecode=} {create_duration=} {add_stream_duration=} {frames_duration=} {total_duration=} {frames[0][0].shape=}"
-            )
-            print("torchcodec times=", times, sum(times))
         return frames
 
 
@@ -201,12 +201,9 @@ def get_frames_from_video(self, video_file, pts_list):
             color_conversion_library=self._color_conversion_library,
         )
         metadata = json.loads(get_json_metadata(decoder))
-        average_fps = metadata["averageFps"]
         best_video_stream = metadata["bestVideoStreamIndex"]
-        indices_list = [int(pts * average_fps) for pts in pts_list]
-        frames = []
-        frames, *_ = get_frames_at_indices(
-            decoder, stream_index=best_video_stream, frame_indices=indices_list
+        frames, *_ = get_frames_by_pts(
+            decoder, stream_index=best_video_stream, timestamps=pts_list
         )
         return frames
 
@@ -220,7 +217,6 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
         )
         metadata = json.loads(get_json_metadata(decoder))
         best_video_stream = metadata["bestVideoStreamIndex"]
-        frames = []
         indices_list = list(range(numFramesToDecode))
         frames, *_ = get_frames_at_indices(
             decoder, stream_index=best_video_stream, frame_indices=indices_list