meta-pytorch
diff --git a/‎.github/workflows/linux_cuda_wheel.yaml‎
Lines changed: 8 additions & 10 deletions b/‎.github/workflows/linux_cuda_wheel.yaml‎
Lines changed: 8 additions & 10 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/samplers/benchmark_samplers.py‎
Lines changed: 20 additions & 14 deletions b/‎benchmarks/samplers/benchmark_samplers.py‎
Lines changed: 20 additions & 14 deletions
diff --git a/‎examples/basic_example.py‎
Lines changed: 4 additions & 4 deletions b/‎examples/basic_example.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/torchcodec/_frame.py‎
Lines changed: 56 additions & 3 deletions b/‎src/torchcodec/_frame.py‎
Lines changed: 56 additions & 3 deletions
diff --git a/‎src/torchcodec/decoders/_core/VideoDecoder.cpp‎
Lines changed: 0 additions & 6 deletions b/‎src/torchcodec/decoders/_core/VideoDecoder.cpp‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎src/torchcodec/decoders/_core/VideoDecoder.h‎
Lines changed: 6 additions & 1 deletion b/‎src/torchcodec/decoders/_core/VideoDecoder.h‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎src/torchcodec/decoders/_video_decoder.py‎
Lines changed: 2 additions & 2 deletions b/‎src/torchcodec/decoders/_video_decoder.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/torchcodec/samplers/_common.py‎
Lines changed: 18 additions & 18 deletions b/‎src/torchcodec/samplers/_common.py‎
Lines changed: 18 additions & 18 deletions
@@ -71,7 +71,6 @@ jobs:
     container:
       image: "pytorch/manylinux-builder:cuda${{ matrix.cuda-version }}"
       options: "--gpus all -e NVIDIA_DRIVER_CAPABILITIES=video,compute,utility"
-    if: ${{ always() }}
     needs: build
     steps:
       - name: Setup env vars
@@ -83,20 +82,25 @@ jobs:
           name: pytorch_torchcodec__3.9_cu${{ env.cuda_version_without_periods }}_x86_64
           path: pytorch/torchcodec/dist/
       - name: Setup miniconda using test-infra
-        uses: ahmadsharif1/test-infra/.github/actions/setup-miniconda@14bc3c29f88d13b0237ab4ddf00aa409e45ade40
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
         with:
           python-version: ${{ matrix.python-version }}
-          default-packages: "conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }}"
+          #
+          # For some reason nvidia::libnpp=12.4 doesn't install but nvidia/label/cuda-12.4.0::libnpp does.
+          # So we use the latter convention for libnpp.
+          # We install conda packages at the start because otherwise conda may have conflicts with dependencies.
+          default-packages: "nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }} conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }}"
       - name: Check env
         run: |
           ${CONDA_RUN} env
           ${CONDA_RUN} conda info
           ${CONDA_RUN} nvidia-smi
+          ${CONDA_RUN} conda list
       - name: Update pip
         run: ${CONDA_RUN} python -m pip install --upgrade pip
       - name: Install PyTorch
         run: |
-          ${CONDA_RUN} python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu${{ env.cuda_version_without_periods }}
+          ${CONDA_RUN} python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu${{ env.cuda_version_without_periods }}
           ${CONDA_RUN} python -c 'import torch; print(f"{torch.__version__}"); print(f"{torch.__file__}"); print(f"{torch.cuda.is_available()=}")'
       - name: Install torchcodec from the wheel
         run: |
@@ -107,14 +111,8 @@ jobs:
       - name: Check out repo
         uses: actions/checkout@v3
 
-      - name: Install cuda runtime dependencies
-        run: |
-          # For some reason nvidia::libnpp=12.4 doesn't install but nvidia/label/cuda-12.4.0::libnpp does.
-          # So we use the latter convention for libnpp.
-          ${CONDA_RUN} conda install --yes nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }}
       - name: Install test dependencies
         run: |
-          ${CONDA_RUN} python -m pip install --pre torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
           # Ideally we would find a way to get those dependencies from pyproject.toml
           ${CONDA_RUN} python -m pip install numpy pytest pillow
 
 
@@ -65,7 +65,7 @@ decoder.get_frame_at(len(decoder) - 1)
 #   pts_seconds: 9.960000038146973
 #   duration_seconds: 0.03999999910593033
 
-decoder.get_frames_at(start=10, stop=30, step=5)
+decoder.get_frames_in_range(start=10, stop=30, step=5)
 # FrameBatch:
 #   data (shape): torch.Size([4, 3, 400, 640])
 #   pts_seconds: tensor([0.4000, 0.6000, 0.8000, 1.0000])
 
@@ -16,16 +16,22 @@ def bench(f, *args, num_exp=100, warmup=0, **kwargs):
     for _ in range(warmup):
         f(*args, **kwargs)
 
+    num_frames = None
     times = []
     for _ in range(num_exp):
         start = perf_counter_ns()
-        f(*args, **kwargs)
+        clips = f(*args, **kwargs)
         end = perf_counter_ns()
         times.append(end - start)
-    return torch.tensor(times).float()
+        num_frames = (
+            clips.data.shape[0] * clips.data.shape[1]
+        )  # should be constant across calls
+    return torch.tensor(times).float(), num_frames
+
 
+def report_stats(times, num_frames, unit="ms"):
+    fps = num_frames * 1e9 / torch.median(times)
 
-def report_stats(times, unit="ms"):
     mul = {
         "ns": 1,
         "µs": 1e-3,
@@ -35,13 +41,13 @@ def report_stats(times, unit="ms"):
     times = times * mul
     std = times.std().item()
     med = times.median().item()
-    print(f"{med = :.2f}{unit} +- {std:.2f}")
-    return med
+    print(f"{med = :.2f}{unit} +- {std:.2f}  med fps = {fps:.1f}")
+    return med, fps
 
 
 def sample(sampler, **kwargs):
     decoder = VideoDecoder(VIDEO_PATH)
-    sampler(
+    return sampler(
         decoder,
         num_frames_per_clip=10,
         **kwargs,
@@ -56,34 +62,34 @@ def sample(sampler, **kwargs):
     print(f"{num_clips = }")
 
     print("clips_at_random_indices     ", end="")
-    times = bench(
+    times, num_frames = bench(
         sample, clips_at_random_indices, num_clips=num_clips, num_exp=NUM_EXP, warmup=2
     )
-    report_stats(times, unit="ms")
+    report_stats(times, num_frames, unit="ms")
 
     print("clips_at_regular_indices    ", end="")
-    times = bench(
+    times, num_frames = bench(
         sample, clips_at_regular_indices, num_clips=num_clips, num_exp=NUM_EXP, warmup=2
     )
-    report_stats(times, unit="ms")
+    report_stats(times, num_frames, unit="ms")
 
     print("clips_at_random_timestamps  ", end="")
-    times = bench(
+    times, num_frames = bench(
         sample,
         clips_at_random_timestamps,
         num_clips=num_clips,
         num_exp=NUM_EXP,
         warmup=2,
     )
-    report_stats(times, unit="ms")
+    report_stats(times, num_frames, unit="ms")
 
     print("clips_at_regular_timestamps ", end="")
     seconds_between_clip_starts = 13 / num_clips  # approximate. video is 13s long
-    times = bench(
+    times, num_frames = bench(
         sample,
         clips_at_regular_timestamps,
         seconds_between_clip_starts=seconds_between_clip_starts,
         num_exp=NUM_EXP,
         warmup=2,
     )
-    report_stats(times, unit="ms")
+    report_stats(times, num_frames, unit="ms")
@@ -120,7 +120,7 @@ def plot(frames: torch.Tensor, title : Optional[str] = None):
 # their :term:`pts` (Presentation Time Stamp), and their duration.
 # This can be achieved using the
 # :meth:`~torchcodec.decoders.VideoDecoder.get_frame_at` and
-# :meth:`~torchcodec.decoders.VideoDecoder.get_frames_at`  methods, which
+# :meth:`~torchcodec.decoders.VideoDecoder.get_frames_in_range`  methods, which
 # will return a :class:`~torchcodec.Frame` and
 # :class:`~torchcodec.FrameBatch` objects respectively.
 
@@ -129,7 +129,7 @@ def plot(frames: torch.Tensor, title : Optional[str] = None):
 print(last_frame)
 
 # %%
-middle_frames = decoder.get_frames_at(start=10, stop=20, step=2)
+middle_frames = decoder.get_frames_in_range(start=10, stop=20, step=2)
 print(f"{type(middle_frames) = }")
 print(middle_frames)
 
@@ -152,7 +152,7 @@ def plot(frames: torch.Tensor, title : Optional[str] = None):
 # So far, we have retrieved frames based on their index. We can also retrieve
 # frames based on *when* they are displayed with
 # :meth:`~torchcodec.decoders.VideoDecoder.get_frame_displayed_at` and
-# :meth:`~torchcodec.decoders.VideoDecoder.get_frames_displayed_at`, which
+# :meth:`~torchcodec.decoders.VideoDecoder.get_frames_displayed_in_range`, which
 # also returns :class:`~torchcodec.Frame` and :class:`~torchcodec.FrameBatch`
 # respectively.
 
@@ -161,7 +161,7 @@ def plot(frames: torch.Tensor, title : Optional[str] = None):
 print(frame_at_2_seconds)
 
 # %%
-first_two_seconds = decoder.get_frames_displayed_at(
+first_two_seconds = decoder.get_frames_displayed_in_range(
     start_seconds=0,
     stop_seconds=2,
 )
 
@@ -38,6 +38,14 @@ class Frame(Iterable):
     duration_seconds: float
     """The duration of the frame, in seconds (float)."""
 
+    def __post_init__(self):
+        # This is called after __init__() when a Frame is created. We can run
+        # input validation checks here.
+        if not self.data.ndim == 3:
+            raise ValueError(f"data must be 3-dimensional, got {self.data.shape = }")
+        self.pts_seconds = float(self.pts_seconds)
+        self.duration_seconds = float(self.duration_seconds)
+
     def __iter__(self) -> Iterator[Union[Tensor, float]]:
         for field in dataclasses.fields(self):
             yield getattr(self, field.name)
@@ -57,9 +65,54 @@ class FrameBatch(Iterable):
     duration_seconds: Tensor
     """The duration of the frame, in seconds (1-D ``torch.Tensor`` of floats)."""
 
-    def __iter__(self) -> Iterator[Union[Tensor, float]]:
-        for field in dataclasses.fields(self):
-            yield getattr(self, field.name)
+    def __post_init__(self):
+        # This is called after __init__() when a FrameBatch is created. We can
+        # run input validation checks here.
+        if self.data.ndim < 4:
+            raise ValueError(
+                f"data must be at least 4-dimensional. Got {self.data.shape = } "
+                "For 3-dimensional data, create a Frame object instead."
+            )
+
+        leading_dims = self.data.shape[:-3]
+        if not (leading_dims == self.pts_seconds.shape == self.duration_seconds.shape):
+            raise ValueError(
+                "Tried to create a FrameBatch but the leading dimensions of the inputs do not match. "
+                f"Got {self.data.shape = } so we expected the shape of pts_seconds and "
+                f"duration_seconds to be {leading_dims = }, but got "
+                f"{self.pts_seconds.shape = } and {self.duration_seconds.shape = }."
+            )
+
+    def __iter__(self) -> Union[Iterator["FrameBatch"], Iterator[Frame]]:
+        cls = Frame if self.data.ndim == 4 else FrameBatch
+        for data, pts_seconds, duration_seconds in zip(
+            self.data, self.pts_seconds, self.duration_seconds
+        ):
+            yield cls(
+                data=data,
+                pts_seconds=pts_seconds,
+                duration_seconds=duration_seconds,
+            )
+
+    def __getitem__(self, key) -> Union["FrameBatch", Frame]:
+        data = self.data[key]
+        pts_seconds = self.pts_seconds[key]
+        duration_seconds = self.duration_seconds[key]
+        if self.data.ndim == 4:
+            return Frame(
+                data=data,
+                pts_seconds=float(pts_seconds.item()),
+                duration_seconds=float(duration_seconds.item()),
+            )
+        else:
+            return FrameBatch(
+                data=data,
+                pts_seconds=pts_seconds,
+                duration_seconds=duration_seconds,
+            )
+
+    def __len__(self):
+        return len(self.data)
 
     def __repr__(self):
         return _frame_repr(self)
@@ -1108,12 +1108,6 @@ VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesDisplayedByTimestamps(
           return ptsToSeconds(info.nextPts, stream.timeBase) <= framePts;
         });
     int64_t frameIndex = it - stream.allFrames.begin();
-    // If the frame index is larger than the size of allFrames, that means we
-    // couldn't match the pts value to the pts value of a NEXT FRAME. And
-    // that means that this timestamp falls during the time between when the
-    // last frame is displayed, and the video ends. Hence, it should map to the
-    // index of the last frame.
-    frameIndex = std::min(frameIndex, (int64_t)stream.allFrames.size() - 1);
     frameIndices[i] = frameIndex;
   }
 
 
@@ -299,7 +299,12 @@ class VideoDecoder {
  private:
   struct FrameInfo {
     int64_t pts = 0;
-    int64_t nextPts = 0;
+    // The value of this default is important: the last frame's nextPts will be
+    // INT64_MAX, which ensures that the allFrames vec contains FrameInfo
+    // structs with *increasing* nextPts values. That's a necessary condition
+    // for the binary searches on those values to work properly (as typically
+    // done during pts -> index conversions.)
+    int64_t nextPts = INT64_MAX;
   };
   struct FilterState {
     UniqueAVFilterGraph filterGraph;
 
@@ -181,7 +181,7 @@ def get_frame_at(self, index: int) -> Frame:
             duration_seconds=duration_seconds.item(),
         )
 
-    def get_frames_at(self, start: int, stop: int, step: int = 1) -> FrameBatch:
+    def get_frames_in_range(self, start: int, stop: int, step: int = 1) -> FrameBatch:
         """Return multiple frames at the given index range.
 
         Frames are in [start, stop).
@@ -238,7 +238,7 @@ def get_frame_displayed_at(self, seconds: float) -> Frame:
             duration_seconds=duration_seconds.item(),
         )
 
-    def get_frames_displayed_at(
+    def get_frames_displayed_in_range(
         self, start_seconds: float, stop_seconds: float
     ) -> FrameBatch:
         """Returns multiple frames in the given range.
 
@@ -1,7 +1,7 @@
 from typing import Callable, Union
 
-import torch
-from torchcodec import Frame, FrameBatch
+from torch import Tensor
+from torchcodec import FrameBatch
 
 _LIST_OF_INT_OR_FLOAT = Union[list[int], list[float]]
 
@@ -42,22 +42,6 @@ def _error_policy(
 }
 
 
-def _chunk_list(lst, chunk_size):
-    # return list of sublists of length chunk_size
-    return [lst[i : i + chunk_size] for i in range(0, len(lst), chunk_size)]
-
-
-def _to_framebatch(frames: list[Frame]) -> FrameBatch:
-    # IMPORTANT: see other IMPORTANT note in _decode_all_clips_indices and
-    # _decode_all_clips_timestamps
-    data = torch.stack([frame.data for frame in frames])
-    pts_seconds = torch.tensor([frame.pts_seconds for frame in frames])
-    duration_seconds = torch.tensor([frame.duration_seconds for frame in frames])
-    return FrameBatch(
-        data=data, pts_seconds=pts_seconds, duration_seconds=duration_seconds
-    )
-
-
 def _validate_common_params(*, decoder, num_frames_per_clip, policy):
     if len(decoder) < 1:
         raise ValueError(
@@ -72,3 +56,19 @@ def _validate_common_params(*, decoder, num_frames_per_clip, policy):
         raise ValueError(
             f"Invalid policy ({policy}). Supported values are {_POLICY_FUNCTIONS.keys()}."
         )
+
+
+def _make_5d_framebatch(
+    *,
+    data: Tensor,
+    pts_seconds: Tensor,
+    duration_seconds: Tensor,
+    num_clips: int,
+    num_frames_per_clip: int,
+) -> FrameBatch:
+    last_3_dims = data.shape[-3:]
+    return FrameBatch(
+        data=data.view(num_clips, num_frames_per_clip, *last_3_dims),
+        pts_seconds=pts_seconds.view(num_clips, num_frames_per_clip),
+        duration_seconds=duration_seconds.view(num_clips, num_frames_per_clip),
+    )
Original file line number	Diff line number	Diff line change
`@@ -181,7 +181,7 @@ def get_frame_at(self, index: int) -> Frame:`
`181`	`181`	`duration_seconds=duration_seconds.item(),`
`182`	`182`	`)`
`183`	`183`
`184`		`- def get_frames_at(self, start: int, stop: int, step: int = 1) -> FrameBatch:`
	`184`	`+ def get_frames_in_range(self, start: int, stop: int, step: int = 1) -> FrameBatch:`
`185`	`185`	`"""Return multiple frames at the given index range.`
`186`	`186`
`187`	`187`	`Frames are in [start, stop).`
`@@ -238,7 +238,7 @@ def get_frame_displayed_at(self, seconds: float) -> Frame:`
`238`	`238`	`duration_seconds=duration_seconds.item(),`
`239`	`239`	`)`
`240`	`240`
`241`		`- def get_frames_displayed_at(`
	`241`	`+ def get_frames_displayed_in_range(`
`242`	`242`	`self, start_seconds: float, stop_seconds: float`
`243`	`243`	`) -> FrameBatch:`
`244`	`244`	`"""Returns multiple frames in the given range.`