meta-pytorch
diff --git a/‎README.md‎
Lines changed: 6 additions & 0 deletions b/‎README.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎benchmarks/decoders/benchmark_decoders_library.py‎
Lines changed: 53 additions & 47 deletions b/‎benchmarks/decoders/benchmark_decoders_library.py‎
Lines changed: 53 additions & 47 deletions
diff --git a/‎benchmarks/decoders/benchmark_readme_chart.png‎
27.7 KB b/‎benchmarks/decoders/benchmark_readme_chart.png‎
27.7 KB
@@ -124,6 +124,12 @@ The following was generated by running [our benchmark script](./benchmarks/decod
 
 ![benchmark_results](./benchmarks/decoders/benchmark_readme_chart.png)
 
+The top row is a [Mandelbrot](https://ffmpeg.org/ffmpeg-filters.html#mandelbrot) video
+generated from FFmpeg that has a resolution of 1280x720 at 60 fps and is 120 seconds long.
+The bottom row is [promotional video from NASA](https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4)
+that has a resolution of 960x540 at 29.7 fps and is 206 seconds long. Both videos were
+encoded with libx264 and yuv420p pixel format.
+
 ## Planned future work
 
 We are actively working on the following features:
 
@@ -1,9 +1,10 @@
 import abc
 import json
-import os
 import subprocess
+import urllib.request
 from concurrent.futures import ThreadPoolExecutor, wait
 from itertools import product
+from pathlib import Path
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -123,6 +124,7 @@ def get_frames_from_video(self, video_file, pts_list):
             decoder,
             num_threads=self._num_threads,
             color_conversion_library=self._color_conversion_library,
+            device=self._device,
         )
         metadata = json.loads(get_json_metadata(decoder))
         best_video_stream = metadata["bestVideoStreamIndex"]
@@ -137,6 +139,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
             decoder,
             num_threads=self._num_threads,
             color_conversion_library=self._color_conversion_library,
+            device=self._device,
         )
 
         frames = []
@@ -176,6 +179,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
             decoder,
             num_threads=self._num_threads,
             color_conversion_library=self._color_conversion_library,
+            device=self._device,
         )
 
         frames = []
@@ -187,10 +191,11 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
 
 
 class TorchCodecCoreBatch(AbstractDecoder):
-    def __init__(self, num_threads=None, color_conversion_library=None):
+    def __init__(self, num_threads=None, color_conversion_library=None, device="cpu"):
         self._print_each_iteration_time = False
         self._num_threads = int(num_threads) if num_threads else None
         self._color_conversion_library = color_conversion_library
+        self._device = device
 
     def get_frames_from_video(self, video_file, pts_list):
         decoder = create_from_file(video_file)
@@ -199,6 +204,7 @@ def get_frames_from_video(self, video_file, pts_list):
             decoder,
             num_threads=self._num_threads,
             color_conversion_library=self._color_conversion_library,
+            device=self._device,
         )
         metadata = json.loads(get_json_metadata(decoder))
         best_video_stream = metadata["bestVideoStreamIndex"]
@@ -214,6 +220,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
             decoder,
             num_threads=self._num_threads,
             color_conversion_library=self._color_conversion_library,
+            device=self._device,
         )
         metadata = json.loads(get_json_metadata(decoder))
         best_video_stream = metadata["bestVideoStreamIndex"]
@@ -225,17 +232,22 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
 
 
 class TorchCodecPublic(AbstractDecoder):
-    def __init__(self, num_ffmpeg_threads=None):
+    def __init__(self, num_ffmpeg_threads=None, device="cpu"):
         self._num_ffmpeg_threads = (
             int(num_ffmpeg_threads) if num_ffmpeg_threads else None
         )
+        self._device = device
 
     def get_frames_from_video(self, video_file, pts_list):
-        decoder = VideoDecoder(video_file, num_ffmpeg_threads=self._num_ffmpeg_threads)
+        decoder = VideoDecoder(
+            video_file, num_ffmpeg_threads=self._num_ffmpeg_threads, device=self._device
+        )
         return decoder.get_frames_played_at(pts_list)
 
     def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
-        decoder = VideoDecoder(video_file, num_ffmpeg_threads=self._num_ffmpeg_threads)
+        decoder = VideoDecoder(
+            video_file, num_ffmpeg_threads=self._num_ffmpeg_threads, device=self._device
+        )
         frames = []
         count = 0
         for frame in decoder:
@@ -330,6 +342,7 @@ def generate_video(command):
 def generate_videos(
     resolutions,
     encodings,
+    patterns,
     fpses,
     gop_sizes,
     durations,
@@ -341,23 +354,25 @@ def generate_videos(
     video_count = 0
 
     futures = []
-    for resolution, duration, fps, gop_size, encoding, pix_fmt in product(
-        resolutions, durations, fpses, gop_sizes, encodings, pix_fmts
+    for resolution, duration, fps, gop_size, encoding, pattern, pix_fmt in product(
+        resolutions, durations, fpses, gop_sizes, encodings, patterns, pix_fmts
     ):
-        outfile = f"{output_dir}/{resolution}_{duration}s_{fps}fps_{gop_size}gop_{encoding}_{pix_fmt}.mp4"
+        outfile = f"{output_dir}/{pattern}_{resolution}_{duration}s_{fps}fps_{gop_size}gop_{encoding}_{pix_fmt}.mp4"
         command = [
             ffmpeg_cli,
             "-y",
             "-f",
             "lavfi",
             "-i",
-            f"color=c=blue:s={resolution}:d={duration}",
+            f"{pattern}=s={resolution}",
+            "-t",
+            str(duration),
             "-c:v",
             encoding,
             "-r",
-            f"{fps}",
+            str(fps),
             "-g",
-            f"{gop_size}",
+            str(gop_size),
             "-pix_fmt",
             pix_fmt,
             outfile,
@@ -372,7 +387,14 @@ def generate_videos(
     print(f"Generated {video_count} videos")
 
 
+def retrieve_videos(urls_and_dest_paths):
+    for url, path in urls_and_dest_paths:
+        urllib.request.urlretrieve(url, path)
+
+
 def plot_data(df_data, plot_path):
+    plt.rcParams["font.size"] = 18
+
     # Creating the DataFrame
     df = pd.DataFrame(df_data)
 
@@ -400,7 +422,7 @@ def plot_data(df_data, plot_path):
         nrows=len(unique_videos),
         ncols=max_combinations,
         figsize=(max_combinations * 6, len(unique_videos) * 4),
-        sharex=True,
+        sharex=False,
         sharey=True,
     )
 
@@ -419,16 +441,17 @@ def plot_data(df_data, plot_path):
             ax = axes[row, col]  # Select the appropriate axis
 
             # Set the title for the subplot
-            base_video = os.path.basename(video)
-            ax.set_title(
-                f"video={base_video}\ndecode_pattern={vcount} x {vtype}", fontsize=12
-            )
+            base_video = Path(video).name.removesuffix(".mp4")
+            ax.set_title(f"{base_video}\n{vcount} x {vtype}", fontsize=11)
 
             # Plot bars with error bars
             ax.barh(
                 group["decoder"],
-                group["fps"],
-                xerr=[group["fps"] - group["fps_p75"], group["fps_p25"] - group["fps"]],
+                group["fps_median"],
+                xerr=[
+                    group["fps_median"] - group["fps_p75"],
+                    group["fps_p25"] - group["fps_median"],
+                ],
                 color=[colors(i) for i in range(len(group))],
                 align="center",
                 capsize=5,
@@ -438,28 +461,11 @@ def plot_data(df_data, plot_path):
             # Set the labels
             ax.set_xlabel("FPS")
 
-            # No need for y-axis label past the plot on the far left
-            if col == 0:
-                ax.set_ylabel("Decoder")
-
     # Remove any empty subplots for videos with fewer combinations
     for row in range(len(unique_videos)):
         for col in range(video_type_combinations[unique_videos[row]], max_combinations):
             fig.delaxes(axes[row, col])
 
-    # If we just call fig.legend, we'll get duplicate labels, as each label appears on
-    # each subplot. We take advantage of dicts having unique keys to de-dupe.
-    handles, labels = plt.gca().get_legend_handles_labels()
-    unique_labels = dict(zip(labels, handles))
-
-    # Reverse the order of the handles and labels to match the order of the bars
-    fig.legend(
-        handles=reversed(unique_labels.values()),
-        labels=reversed(unique_labels.keys()),
-        frameon=True,
-        loc="right",
-    )
-
     # Adjust layout to avoid overlap
     plt.tight_layout()
 
@@ -475,7 +481,7 @@ def get_metadata(video_file_path: str) -> VideoStreamMetadata:
 
 def run_benchmarks(
     decoder_dict: dict[str, AbstractDecoder],
-    video_files_paths: list[str],
+    video_files_paths: list[Path],
     num_samples: int,
     num_sequential_frames_from_start: list[int],
     min_runtime_seconds: float,
@@ -515,7 +521,7 @@ def run_benchmarks(
                 seeked_result = benchmark.Timer(
                     stmt="decoder.get_frames_from_video(video_file, pts_list)",
                     globals={
-                        "video_file": video_file_path,
+                        "video_file": str(video_file_path),
                         "pts_list": pts_list,
                         "decoder": decoder,
                     },
@@ -528,22 +534,22 @@ def run_benchmarks(
                 )
                 df_item = {}
                 df_item["decoder"] = decoder_name
-                df_item["video"] = video_file_path
+                df_item["video"] = str(video_file_path)
                 df_item["description"] = results[-1].description
                 df_item["frame_count"] = num_samples
                 df_item["median"] = results[-1].median
                 df_item["iqr"] = results[-1].iqr
                 df_item["type"] = f"{kind}:seek()+next()"
-                df_item["fps"] = 1.0 * num_samples / results[-1].median
-                df_item["fps_p75"] = 1.0 * num_samples / results[-1]._p75
-                df_item["fps_p25"] = 1.0 * num_samples / results[-1]._p25
+                df_item["fps_median"] = num_samples / results[-1].median
+                df_item["fps_p75"] = num_samples / results[-1]._p75
+                df_item["fps_p25"] = num_samples / results[-1]._p25
                 df_data.append(df_item)
 
             for num_consecutive_nexts in num_sequential_frames_from_start:
                 consecutive_frames_result = benchmark.Timer(
                     stmt="decoder.get_consecutive_frames_from_video(video_file, consecutive_frames_to_extract)",
                     globals={
-                        "video_file": video_file_path,
+                        "video_file": str(video_file_path),
                         "consecutive_frames_to_extract": num_consecutive_nexts,
                         "decoder": decoder,
                     },
@@ -558,15 +564,15 @@ def run_benchmarks(
                 )
                 df_item = {}
                 df_item["decoder"] = decoder_name
-                df_item["video"] = video_file_path
+                df_item["video"] = str(video_file_path)
                 df_item["description"] = results[-1].description
                 df_item["frame_count"] = num_consecutive_nexts
                 df_item["median"] = results[-1].median
                 df_item["iqr"] = results[-1].iqr
                 df_item["type"] = "next()"
-                df_item["fps"] = 1.0 * num_consecutive_nexts / results[-1].median
-                df_item["fps_p75"] = 1.0 * num_consecutive_nexts / results[-1]._p75
-                df_item["fps_p25"] = 1.0 * num_consecutive_nexts / results[-1]._p25
+                df_item["fps_median"] = num_consecutive_nexts / results[-1].median
+                df_item["fps_p75"] = num_consecutive_nexts / results[-1]._p75
+                df_item["fps_p25"] = num_consecutive_nexts / results[-1]._p25
                 df_data.append(df_item)
 
         first_video_file_path = video_files_paths[0]
@@ -576,7 +582,7 @@ def run_benchmarks(
             creation_result = benchmark.Timer(
                 stmt="create_torchcodec_decoder_from_file(video_file)",
                 globals={
-                    "video_file": first_video_file_path,
+                    "video_file": str(first_video_file_path),
                     "create_torchcodec_decoder_from_file": create_torchcodec_decoder_from_file,
                 },
                 label=f"video={first_video_file_path} {metadata_label}",