Added logic

NicolasHug · NicolasHug · commit 14e287604e05 · 2024-10-22T14:16:39.000+01:00
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -1090,10 +1090,43 @@ VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesAtIndices(
 
 VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesAtPtss(
     int streamIndex,
-    const std::vector<int64_t>& framePtss,
+    const std::vector<double>& framePtss,
     const bool sortPtss) {
-        return getFramesAtIndices(streamIndex, framePtss, sortPtss);
-    }
+  validateUserProvidedStreamIndex(streamIndex);
+  validateScannedAllStreams("getFramesAtPtss");
+
+  // The frame displayed at timestamp t and the one displayed at timestamp `t +
+  // eps` are probably the same frame, with the same index. The easiest way to
+  // avoid decoding that unique frame twice is to convert the input timestamps
+  // to indices, and leverage the de-duplication logic of getFramesAtIndices.
+
+  const auto& streamMetadata = containerMetadata_.streams[streamIndex];
+  const auto& stream = streams_[streamIndex];
+  double minSeconds = streamMetadata.minPtsSecondsFromScan.value();
+  double maxSeconds = streamMetadata.maxPtsSecondsFromScan.value();
+
+  std::vector<int64_t> frameIndices(framePtss.size());
+  for (auto i = 0; i < framePtss.size(); ++i) {
+    auto framePts = framePtss[i];
+    TORCH_CHECK(
+        framePts >= minSeconds && framePts < maxSeconds,
+        "frame pts is " + std::to_string(framePts) + "; must be in range [" +
+            std::to_string(minSeconds) + ", " + std::to_string(maxSeconds) +
+            ").");
+
+    auto it = std::lower_bound(
+        stream.allFrames.begin(),
+        stream.allFrames.end(),
+        framePts,
+        [&stream](const FrameInfo& info, double start) {
+          return ptsToSeconds(info.nextPts, stream.timeBase) <= start;
+        });
+    int64_t frameIndex = it - stream.allFrames.begin();
+    frameIndices[i] = frameIndex;
+  }
+
+  return getFramesAtIndices(streamIndex, frameIndices, sortPtss);
+}
 
 VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesInRange(
     int streamIndex,
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.h b/src/torchcodec/decoders/_core/VideoDecoder.h
@@ -247,7 +247,7 @@ class VideoDecoder {
 
   BatchDecodedOutput getFramesAtPtss(
       int streamIndex,
-      const std::vector<int64_t>& framePtss,
+      const std::vector<double>& framePtss,
       const bool sortPtss = false);
 
   // Returns frames within a given range for a given stream as a single stacked
diff --git a/src/torchcodec/decoders/_core/VideoDecoderOps.cpp b/src/torchcodec/decoders/_core/VideoDecoderOps.cpp
@@ -42,7 +42,7 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "get_frames_at_indices(Tensor(a!) decoder, *, int stream_index, int[] frame_indices, bool sort_indices=False) -> (Tensor, Tensor, Tensor)");
   m.def(
-      "get_frames_at_ptss(Tensor(a!) decoder, *, int stream_index, int[] frame_ptss, bool sort_ptss=False) -> (Tensor, Tensor, Tensor)");
+      "get_frames_at_ptss(Tensor(a!) decoder, *, int stream_index, float[] frame_ptss, bool sort_ptss=False) -> (Tensor, Tensor, Tensor)");
   m.def(
       "get_frames_in_range(Tensor(a!) decoder, *, int stream_index, int start, int stop, int? step=None) -> (Tensor, Tensor, Tensor)");
   m.def(
@@ -214,17 +214,15 @@ OpsDecodedOutput get_frame_at_pts(at::Tensor& decoder, double seconds) {
 OpsBatchDecodedOutput get_frames_at_ptss(
     at::Tensor& decoder,
     int64_t stream_index,
-    at::IntArrayRef frame_ptss,
+    at::ArrayRef<double> frame_ptss,
     bool sort_ptss) {
   auto videoDecoder = unwrapTensorToGetDecoder(decoder);
-  std::vector<int64_t> framePtssVec(
-      frame_ptss.begin(), frame_ptss.end());
-  auto result = videoDecoder->getFramesAtPtss(
-      stream_index, framePtssVec, sort_ptss);
+  std::vector<double> framePtssVec(frame_ptss.begin(), frame_ptss.end());
+  auto result =
+      videoDecoder->getFramesAtPtss(stream_index, framePtssVec, sort_ptss);
   return makeOpsBatchDecodedOutput(result);
 }
 
-
 OpsDecodedOutput get_frame_at_index(
     at::Tensor& decoder,
     int64_t stream_index,
diff --git a/src/torchcodec/decoders/_core/VideoDecoderOps.h b/src/torchcodec/decoders/_core/VideoDecoderOps.h
@@ -79,7 +79,7 @@ OpsDecodedOutput get_frame_at_pts(at::Tensor& decoder, double seconds);
 OpsBatchDecodedOutput get_frames_at_ptss(
     at::Tensor& decoder,
     int64_t stream_index,
-    at::IntArrayRef frame_ptss,
+    at::ArrayRef<double> frame_ptss,
     bool sort_ptss = false);
 
 // Return the frame that is visible at a given index in the video.
diff --git a/src/torchcodec/decoders/_core/video_decoder_ops.py b/src/torchcodec/decoders/_core/video_decoder_ops.py
@@ -172,12 +172,13 @@ def get_frame_at_pts_abstract(
         torch.empty([], dtype=torch.float),
     )
 
+
 @register_fake("torchcodec_ns::get_frames_at_ptss")
 def get_frames_at_pts_abstract(
     decoder: torch.Tensor,
     *,
     stream_index: int,
-    frame_ptss: List[int],
+    frame_ptss: List[float],
     sort_ptss: bool = False,
 ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     image_size = [get_ctx().new_dynamic_size() for _ in range(4)]