meta-pytorch
diff --git a/‎.github/workflows/build_ffmpeg.yaml‎
Lines changed: 4 additions & 2 deletions b/‎.github/workflows/build_ffmpeg.yaml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.github/workflows/linux_cuda_wheel.yaml‎
Lines changed: 1 addition & 2 deletions b/‎.github/workflows/linux_cuda_wheel.yaml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎.github/workflows/reference_resources.yaml‎
Lines changed: 52 additions & 0 deletions b/‎.github/workflows/reference_resources.yaml‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎benchmarks/decoders/benchmark_decoders.py‎
Lines changed: 23 additions & 2 deletions b/‎benchmarks/decoders/benchmark_decoders.py‎
Lines changed: 23 additions & 2 deletions
diff --git a/‎benchmarks/decoders/benchmark_decoders_library.py‎
Lines changed: 132 additions & 6 deletions b/‎benchmarks/decoders/benchmark_decoders_library.py‎
Lines changed: 132 additions & 6 deletions
diff --git a/‎src/torchcodec/_frame.py‎
Lines changed: 10 additions & 22 deletions b/‎src/torchcodec/_frame.py‎
Lines changed: 10 additions & 22 deletions
@@ -11,6 +11,9 @@ name: Build non-GPL FFmpeg from source
 
 on:
   workflow_dispatch:
+  pull_request:
+    paths:
+      - packaging/build_ffmpeg.sh
   schedule:
     - cron: '0 0 * * 0'  # on sunday
 
@@ -46,13 +49,12 @@ jobs:
       fail-fast: false
       matrix:
         ffmpeg-version: ["4.4.4", "5.1.4", "6.1.1", "7.0.1"]
-        runner: ["macos-m1-stable"]
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
       job-name: Build
       upload-artifact: ffmpeg-lgpl
       repository: pytorch/torchcodec
-      runner: "${{ matrix.runner }}"
+      runner: macos-14-xlarge
       script: |
         export FFMPEG_VERSION="${{ matrix.ffmpeg-version }}"
         export FFMPEG_ROOT="${PWD}/ffmpeg"
 
@@ -135,8 +135,7 @@ jobs:
           ${CONDA_RUN} python test/decoders/manual_smoke_test.py
       - name: Run Python tests
         run: |
-          # We skip test_get_ffmpeg_version because it may not have a micro version.
-          ${CONDA_RUN} FAIL_WITHOUT_CUDA=1 pytest test -k "not test_get_ffmpeg_version" -vvv
+          ${CONDA_RUN} FAIL_WITHOUT_CUDA=1 pytest test -vvv
       - name: Run Python benchmark
         run: |
           ${CONDA_RUN} time python benchmarks/decoders/gpu_benchmark.py --devices=cuda:0,cpu --resize_devices=none
@@ -0,0 +1,52 @@
+name: Reference resource generation tests
+
+on:
+  workflow_dispatch:
+  pull_request:
+    paths:
+      - test/generate_reference_resources.sh
+  schedule:
+    - cron: '0 0 * * 0'  # on sunday
+
+defaults:
+  run:
+    shell: bash -l -eo pipefail {0}
+
+jobs:
+  test-reference-resource-generation:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ['3.9']
+        ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1']
+    steps:
+      - name: Setup conda env
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          auto-update-conda: true
+          miniconda-version: "latest"
+          activate-environment: test
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install ffmpeg
+        run: |
+          conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" -c conda-forge
+          ffmpeg -version
+
+      - name: Update pip
+        run: python -m pip install --upgrade pip
+
+      - name: Instal generation dependencies
+        run: |
+          # Note that we're installing stable - this is for running a script where we're a normal PyTorch
+          # user, not for building TorhCodec.
+          python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+          python -m pip install numpy pillow
+
+      - name: Check out repo
+        uses: actions/checkout@v3
+
+      - name: Run generation reference resources
+        run: |
+          test/generate_reference_resources.sh
@@ -11,6 +11,7 @@
 
 from benchmark_decoders_library import (
     DecordNonBatchDecoderAccurateSeek,
+    plot_data,
     run_benchmarks,
     TorchAudioDecoder,
     TorchcodecCompiled,
@@ -71,6 +72,18 @@ def main() -> None:
         type=str,
         default="decord,tcoptions:,torchvision,torchaudio,torchcodec_compiled,tcoptions:num_threads=1",
     )
+    parser.add_argument(
+        "--bm_video_dir",
+        help="Directory where video files reside. We will run benchmarks on all .mp4 files in this directory.",
+        type=str,
+        default="",
+    )
+    parser.add_argument(
+        "--plot_path",
+        help="Path where the generated plot is stored, if non-empty",
+        type=str,
+        default="",
+    )
 
     args = parser.parse_args()
     decoders = set(args.decoders.split(","))
@@ -118,13 +131,21 @@ def main() -> None:
             decoder_dict["TorchcodecNonCompiled:" + options] = (
                 TorchcodecNonCompiledWithOptions(**kwargs_dict)
             )
-    run_benchmarks(
+    video_paths = args.bm_video_paths.split(",")
+    if args.bm_video_dir:
+        video_paths = []
+        for entry in os.scandir(args.bm_video_dir):
+            if entry.is_file() and entry.name.endswith(".mp4"):
+                video_paths.append(entry.path)
+
+    df_data = run_benchmarks(
         decoder_dict,
-        args.bm_video_paths,
+        video_paths,
         num_uniform_samples,
         args.bm_video_speed_min_run_seconds,
         args.bm_video_creation,
     )
+    plot_data(df_data, args.plot_path)
 
 
 if __name__ == "__main__":
 
@@ -1,7 +1,12 @@
 import abc
 import json
+import os
 import timeit
 
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
 import torch
 import torch.utils.benchmark as benchmark
 from torchcodec.decoders import VideoDecoder
@@ -118,17 +123,19 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
 
 
 class TorchcodecNonCompiledWithOptions(AbstractDecoder):
-    def __init__(self, num_threads=None, color_conversion_library=None):
+    def __init__(self, num_threads=None, color_conversion_library=None, device="cpu"):
         self._print_each_iteration_time = False
         self._num_threads = int(num_threads) if num_threads else None
         self._color_conversion_library = color_conversion_library
+        self._device = device
 
     def get_frames_from_video(self, video_file, pts_list):
         decoder = create_from_file(video_file)
         _add_video_stream(
             decoder,
             num_threads=self._num_threads,
             color_conversion_library=self._color_conversion_library,
+            device=self._device,
         )
         frames = []
         times = []
@@ -292,6 +299,97 @@ def create_torchcodec_decoder_from_file(video_file):
     return video_decoder
 
 
+def plot_data(df_data, plot_path):
+    # Creating the DataFrame
+    df = pd.DataFrame(df_data)
+
+    # Sorting by video, type, and frame_count
+    df_sorted = df.sort_values(by=["video", "type", "frame_count"])
+
+    # Group by video first
+    grouped_by_video = df_sorted.groupby("video")
+
+    # Define colors (consistent across decoders)
+    colors = plt.get_cmap("tab10")
+
+    # Find the unique combinations of (type, frame_count) per video
+    video_type_combinations = {
+        video: video_group.groupby(["type", "frame_count"]).ngroups
+        for video, video_group in grouped_by_video
+    }
+
+    # Get the unique videos and the maximum number of (type, frame_count) combinations per video
+    unique_videos = list(video_type_combinations.keys())
+    max_combinations = max(video_type_combinations.values())
+
+    # Create subplots: each row is a video, and each column is for a unique (type, frame_count)
+    fig, axes = plt.subplots(
+        nrows=len(unique_videos),
+        ncols=max_combinations,
+        figsize=(max_combinations * 6, len(unique_videos) * 4),
+        sharex=True,
+        sharey=True,
+    )
+
+    # Handle cases where there's only one row or column
+    if len(unique_videos) == 1:
+        axes = np.array([axes])  # Make sure axes is a list of lists
+    if max_combinations == 1:
+        axes = np.expand_dims(axes, axis=1)  # Ensure a 2D array for axes
+
+    # Loop through each video and its sub-groups
+    for row, (video, video_group) in enumerate(grouped_by_video):
+        sub_group = video_group.groupby(["type", "frame_count"])
+
+        # Loop through each (type, frame_count) group for this video
+        for col, ((vtype, vcount), group) in enumerate(sub_group):
+            ax = axes[row, col]  # Select the appropriate axis
+
+            # Set the title for the subplot
+            base_video = os.path.basename(video)
+            ax.set_title(
+                f"video={base_video}\ndecode_pattern={vcount} x {vtype}", fontsize=12
+            )
+
+            # Plot bars with error bars
+            ax.barh(
+                group["decoder"],
+                group["fps"],
+                xerr=[group["fps"] - group["fps_p75"], group["fps_p25"] - group["fps"]],
+                color=[colors(i) for i in range(len(group))],
+                align="center",
+                capsize=5,
+            )
+
+            # Set the labels
+            ax.set_xlabel("FPS")
+            ax.set_ylabel("Decoder")
+
+            # Reverse the order of the handles and labels to match the order of the bars
+            handles = [
+                plt.Rectangle((0, 0), 1, 1, color=colors(i)) for i in range(len(group))
+            ]
+            ax.legend(
+                handles[::-1],
+                group["decoder"][::-1],
+                title="Decoder",
+                loc="upper right",
+            )
+
+    # Remove any empty subplots for videos with fewer combinations
+    for row in range(len(unique_videos)):
+        for col in range(video_type_combinations[unique_videos[row]], max_combinations):
+            fig.delaxes(axes[row, col])
+
+    # Adjust layout to avoid overlap
+    plt.tight_layout()
+
+    # Show plot
+    plt.savefig(
+        plot_path,
+    )
+
+
 def run_benchmarks(
     decoder_dict,
     video_paths,
@@ -300,9 +398,11 @@ def run_benchmarks(
     benchmark_video_creation,
 ):
     results = []
+    df_data = []
+    print(f"video_paths={video_paths}")
     verbose = False
     for decoder_name, decoder in decoder_dict.items():
-        for video_path in video_paths.split(","):
+        for video_path in video_paths:
             print(f"video={video_path}, decoder={decoder_name}")
             # We only use the VideoDecoder to get the metadata and get
             # the list of PTS values to seek to.
@@ -331,6 +431,19 @@ def run_benchmarks(
             results.append(
                 seeked_result.blocked_autorange(min_run_time=min_runtime_seconds)
             )
+            df_item = {}
+            df_item["decoder"] = decoder_name
+            df_item["video"] = video_path
+            df_item["description"] = results[-1].description
+            df_item["frame_count"] = num_uniform_samples
+            df_item["median"] = results[-1].median
+            df_item["iqr"] = results[-1].iqr
+            df_item["type"] = "seek()+next()"
+            df_item["fps"] = 1.0 * num_uniform_samples / results[-1].median
+            df_item["fps_p75"] = 1.0 * num_uniform_samples / results[-1]._p75
+            df_item["fps_p25"] = 1.0 * num_uniform_samples / results[-1]._p25
+            df_data.append(df_item)
+
             for num_consecutive_nexts in [1, 10]:
                 consecutive_frames_result = benchmark.Timer(
                     stmt="decoder.get_consecutive_frames_from_video(video_file, consecutive_frames_to_extract)",
@@ -348,8 +461,20 @@ def run_benchmarks(
                         min_run_time=min_runtime_seconds
                     )
                 )
-
-        first_video_path = video_paths.split(",")[0]
+                df_item = {}
+                df_item["decoder"] = decoder_name
+                df_item["video"] = video_path
+                df_item["description"] = results[-1].description
+                df_item["frame_count"] = num_consecutive_nexts
+                df_item["median"] = results[-1].median
+                df_item["iqr"] = results[-1].iqr
+                df_item["type"] = "next()"
+                df_item["fps"] = 1.0 * num_consecutive_nexts / results[-1].median
+                df_item["fps_p75"] = 1.0 * num_consecutive_nexts / results[-1]._p75
+                df_item["fps_p25"] = 1.0 * num_consecutive_nexts / results[-1]._p25
+                df_data.append(df_item)
+
+        first_video_path = video_paths[0]
         if benchmark_video_creation:
             simple_decoder = VideoDecoder(first_video_path)
             metadata = simple_decoder.metadata
@@ -369,5 +494,6 @@ def run_benchmarks(
                     min_run_time=2.0,
                 )
             )
-        compare = benchmark.Compare(results)
-        compare.print()
+    compare = benchmark.Compare(results)
+    compare.print()
+    return df_data
@@ -68,10 +68,9 @@ class FrameBatch(Iterable):
     def __post_init__(self):
         # This is called after __init__() when a FrameBatch is created. We can
         # run input validation checks here.
-        if self.data.ndim < 4:
+        if self.data.ndim < 3:
             raise ValueError(
-                f"data must be at least 4-dimensional. Got {self.data.shape = } "
-                "For 3-dimensional data, create a Frame object instead."
+                f"data must be at least 3-dimensional, got {self.data.shape = }"
             )
 
         leading_dims = self.data.shape[:-3]
@@ -83,33 +82,22 @@ def __post_init__(self):
                 f"{self.pts_seconds.shape = } and {self.duration_seconds.shape = }."
             )
 
-    def __iter__(self) -> Union[Iterator["FrameBatch"], Iterator[Frame]]:
-        cls = Frame if self.data.ndim == 4 else FrameBatch
+    def __iter__(self) -> Iterator["FrameBatch"]:
         for data, pts_seconds, duration_seconds in zip(
             self.data, self.pts_seconds, self.duration_seconds
         ):
-            yield cls(
+            yield FrameBatch(
                 data=data,
                 pts_seconds=pts_seconds,
                 duration_seconds=duration_seconds,
             )
 
-    def __getitem__(self, key) -> Union["FrameBatch", Frame]:
-        data = self.data[key]
-        pts_seconds = self.pts_seconds[key]
-        duration_seconds = self.duration_seconds[key]
-        if self.data.ndim == 4:
-            return Frame(
-                data=data,
-                pts_seconds=float(pts_seconds.item()),
-                duration_seconds=float(duration_seconds.item()),
-            )
-        else:
-            return FrameBatch(
-                data=data,
-                pts_seconds=pts_seconds,
-                duration_seconds=duration_seconds,
-            )
+    def __getitem__(self, key) -> "FrameBatch":
+        return FrameBatch(
+            data=self.data[key],
+            pts_seconds=self.pts_seconds[key],
+            duration_seconds=self.duration_seconds[key],
+        )
 
     def __len__(self):
         return len(self.data)