meta-pytorch · mollyxu · Dec 30, 2025 · Dec 30, 2025 · Jan 5, 2026 · Jan 5, 2026
diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp
@@ -844,7 +844,8 @@ FrameBatchOutput SingleStreamDecoder::getFramesPlayedAt(
 
 FrameBatchOutput SingleStreamDecoder::getFramesPlayedInRange(
     double startSeconds,
-    double stopSeconds) {
+    double stopSeconds,
+    std::optional<double> fps) {
   validateActiveStream(AVMEDIA_TYPE_VIDEO);
   const auto& streamMetadata =
       containerMetadata_.allStreamMetadata[activeStreamIndex_];
@@ -906,6 +907,60 @@ FrameBatchOutput SingleStreamDecoder::getFramesPlayedInRange(
             std::to_string(maxSeconds.value()) + ").");
   }
 
+  // Resample frames to match the target frame rate
+  if (fps.has_value()) {
+    TORCH_CHECK(
+        fps.value() > 0,
+        "fps must be positive, got " + std::to_string(fps.value()));
+
+    // TODO: add an early break if requested fps is the same as the current fps
+
+    double fpsVal = fps.value();
+    double frameDuration = 1.0 / fpsVal;
+
+    double product = (stopSeconds - startSeconds) * fpsVal;
+    int64_t numOutputFrames = static_cast<int64_t>(std::round(product));
+
+    // Generate target timestamps and find source frame indices
+    std::vector<int64_t> sourceFrameIndices(numOutputFrames);
+    std::vector<double> targetTimestamps(numOutputFrames);
+    for (int64_t i = 0; i < numOutputFrames; ++i) {
+      targetTimestamps[i] = startSeconds + i * frameDuration;
+      sourceFrameIndices[i] = secondsToIndexLowerBound(targetTimestamps[i]);
+    }
+
+    FrameBatchOutput frameBatchOutput(
+        numOutputFrames,
+        resizedOutputDims_.value_or(metadataDims_),
+        videoStreamOptions.device);
+
+    // Decode frames, reusing already-decoded frames for duplicates
+    int64_t lastDecodedSourceIndex = -1;
+    torch::Tensor lastDecodedData;
+
+    for (int64_t i = 0; i < numOutputFrames; ++i) {
+      int64_t sourceIdx = sourceFrameIndices[i];
+
+      if (sourceIdx == lastDecodedSourceIndex && lastDecodedSourceIndex >= 0) {
+        frameBatchOutput.data[i].copy_(lastDecodedData);
+      } else {
+        FrameOutput frameOutput =
+            getFrameAtIndexInternal(sourceIdx, frameBatchOutput.data[i]);
+        lastDecodedData = frameBatchOutput.data[i];
+        lastDecodedSourceIndex = sourceIdx;
+      }
+
+      frameBatchOutput.ptsSeconds[i] = targetTimestamps[i];
+      frameBatchOutput.durationSeconds[i] = frameDuration;
+    }
+
+    frameBatchOutput.data = maybePermuteHWC2CHW(frameBatchOutput.data);
+    return frameBatchOutput;
+  }
+
+  // Original behavior when fps is not specified:
+  // Return all frames in range at source fps
+
   // Note that we look at nextPts for a frame, and not its pts or duration.
   // Our abstract player displays frames starting at the pts for that frame
   // until the pts for the next frame. There are two consequences:

diff --git a/src/torchcodec/_core/SingleStreamDecoder.h b/src/torchcodec/_core/SingleStreamDecoder.h
@@ -147,9 +147,13 @@ class SingleStreamDecoder {
   // Valid values for startSeconds and stopSeconds are:
   //
   //   [beginStreamPtsSecondsFromContent, endStreamPtsSecondsFromContent)
+  //
+  // If fps is specified, frames are resampled to match the target frame
+  // rate by duplicating or dropping frames as necessary.
   FrameBatchOutput getFramesPlayedInRange(
       double startSeconds,
-      double stopSeconds);
+      double stopSeconds,
+      std::optional<double> fps = std::nullopt);
 
   AudioFramesOutput getFramesPlayedInRangeAudio(
       double startSeconds,
@@ -273,11 +277,6 @@ class SingleStreamDecoder {
       UniqueAVFrame& avFrame,
       std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt);
 
-  void convertAVFrameToFrameOutputOnCPU(
-      UniqueAVFrame& avFrame,
-      FrameOutput& frameOutput,
-      std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt);
-
   // --------------------------------------------------------------------------
   // PTS <-> INDEX CONVERSIONS
   // --------------------------------------------------------------------------

diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
@@ -63,7 +63,7 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "get_frames_in_range(Tensor(a!) decoder, *, int start, int stop, int? step=None) -> (Tensor, Tensor, Tensor)");
   m.def(
-      "get_frames_by_pts_in_range(Tensor(a!) decoder, *, float start_seconds, float stop_seconds) -> (Tensor, Tensor, Tensor)");
+      "get_frames_by_pts_in_range(Tensor(a!) decoder, *, float start_seconds, float stop_seconds, float? fps=None) -> (Tensor, Tensor, Tensor)");
   m.def(
       "get_frames_by_pts_in_range_audio(Tensor(a!) decoder, *, float start_seconds, float? stop_seconds) -> (Tensor, Tensor)");
   m.def(
@@ -575,13 +575,16 @@ OpsFrameBatchOutput get_frames_by_pts(
 // Return the frames inside the range as a single stacked Tensor. The range is
 // defined as [start_seconds, stop_seconds). The frames are stacked in pts
 // order.
+// If fps is specified, frames are resampled to match the target frame
+// rate by duplicating or dropping frames as necessary.
 OpsFrameBatchOutput get_frames_by_pts_in_range(
     at::Tensor& decoder,
     double start_seconds,
-    double stop_seconds) {
+    double stop_seconds,
+    std::optional<double> fps = std::nullopt) {
   auto videoDecoder = unwrapTensorToGetDecoder(decoder);
   auto result =
-      videoDecoder->getFramesPlayedInRange(start_seconds, stop_seconds);
+      videoDecoder->getFramesPlayedInRange(start_seconds, stop_seconds, fps);
   return makeOpsFrameBatchOutput(result);
 }
 

diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py
@@ -535,6 +535,7 @@ def get_frames_by_pts_in_range_abstract(
     *,
     start_seconds: float,
     stop_seconds: float,
+    fps: float | None = None,
 ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     image_size = [get_ctx().new_dynamic_size() for _ in range(4)]
     return (

diff --git a/src/torchcodec/decoders/_video_decoder.py b/src/torchcodec/decoders/_video_decoder.py
@@ -15,7 +15,6 @@
 
 import torch
 from torch import device as torch_device, nn, Tensor
-
 from torchcodec import _core as core, Frame, FrameBatch
 from torchcodec.decoders._decoder_utils import (
     _get_cuda_backend,
@@ -452,32 +451,31 @@ def get_frames_played_at(self, seconds: torch.Tensor | list[float]) -> FrameBatc
         )
 
     def get_frames_played_in_range(
-        self, start_seconds: float, stop_seconds: float
+        self, start_seconds: float, stop_seconds: float, fps: float | None = None
     ) -> FrameBatch:
         """Returns multiple frames in the given range.
 
-        Frames are in the half open range [start_seconds, stop_seconds). Each
-        returned frame's :term:`pts`, in seconds, is inside of the half open
-        range.
-
         Args:
-            start_seconds (float): Time, in seconds, of the start of the
-                range.
-            stop_seconds (float): Time, in seconds, of the end of the
-                range. As a half open range, the end is excluded.
+            start_seconds (float): Time, in seconds, of the start of the range.
+            stop_seconds (float): Time, in seconds, of the end of the range.
+                As a half open range, the end is excluded.
+            fps (float, optional): If specified, resample output to this frame
+                rate by duplicating or dropping frames as necessary. If None
+                (default), returns frames at the source video's frame rate.
 
         Returns:
             FrameBatch: The frames within the specified range.
         """
         if not start_seconds <= stop_seconds:
             raise ValueError(
-                f"Invalid start seconds: {start_seconds}. It must be less than or equal to stop seconds ({stop_seconds})."
+                f"Invalid start seconds: {start_seconds}. "
+                f"It must be less than or equal to stop seconds ({stop_seconds})."
             )
         if not self._begin_stream_seconds <= start_seconds < self._end_stream_seconds:
             raise ValueError(
                 f"Invalid start seconds: {start_seconds}. "
                 f"It must be greater than or equal to {self._begin_stream_seconds} "
-                f"and less than or equal to {self._end_stream_seconds}."
+                f"and less than {self._end_stream_seconds}."
             )
         if not stop_seconds <= self._end_stream_seconds:
             raise ValueError(
@@ -488,9 +486,27 @@ def get_frames_played_in_range(
             self._decoder,
             start_seconds=start_seconds,
             stop_seconds=stop_seconds,
+            fps=fps,
         )
         return FrameBatch(*frames)
 
+    def get_all_frames(self, fps: float | None = None) -> FrameBatch:
+        """Returns all frames in the video.
+
+        Args:
+            fps (float, optional): If specified, resample output to this frame
+                rate by duplicating or dropping frames as necessary. If None
+                (default), returns frames at the source video's frame rate.
+
+        Returns:
+            FrameBatch: All frames in the video.
+        """
+        return self.get_frames_played_in_range(
+            start_seconds=self._begin_stream_seconds,
+            stop_seconds=self._end_stream_seconds,
+            fps=fps,
+        )
+
 
 def _get_and_validate_stream_metadata(
     *,