Add proper error when backward seek is neede

NicolasHug · NicolasHug · commit f3b56f813d9a · 2025-03-08T18:08:47.000Z
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -854,14 +854,25 @@ torch::Tensor VideoDecoder::getFramesPlayedInRangeAudio(
 
   StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
 
-  auto lastDecodedFrameIsPlayedAtStopSeconds =
-      [this, &streamInfo, stopSeconds]() {
-        auto stopPts = secondsToClosestPts(stopSeconds, streamInfo.timeBase);
-        return (
-            streamInfo.lastDecodedAvFramePts <= stopPts and
-            stopPts <= streamInfo.lastDecodedAvFramePts +
-                    streamInfo.lastDecodedAvFrameDuration);
-      };
+  auto lastDecodedFrameIsPlayedAt = [this, &streamInfo](double seconds) {
+    auto pts = secondsToClosestPts(seconds, streamInfo.timeBase);
+    return (
+        streamInfo.lastDecodedAvFramePts <= pts and
+        pts <= streamInfo.lastDecodedAvFramePts +
+                streamInfo.lastDecodedAvFrameDuration);
+  };
+
+  // TODO-AUDIO This essentially enforce that we don't need to seek (backwards).
+  // We should remove it and seek back to the stream's beginning when needed.
+  // See test_multiple_calls
+  TORCH_CHECK(
+      (streamInfo.lastDecodedAvFramePts == 0 &&
+       streamInfo.lastDecodedAvFrameDuration == 0) ||
+          (streamInfo.lastDecodedAvFramePts +
+               streamInfo.lastDecodedAvFrameDuration <=
+           secondsToClosestPts(startSeconds, streamInfo.timeBase)) ||
+          !lastDecodedFrameIsPlayedAt(startSeconds),
+      "The previous call's stop_seconds is larger than the current calls's start_seconds (roughly)");
 
   setCursorPtsInSeconds(startSeconds);
 
@@ -871,7 +882,7 @@ torch::Tensor VideoDecoder::getFramesPlayedInRangeAudio(
   std::vector<torch::Tensor> tensors;
 
   bool reachedEOF = false;
-  while (!lastDecodedFrameIsPlayedAtStopSeconds() && !reachedEOF) {
+  while (!lastDecodedFrameIsPlayedAt(stopSeconds) && !reachedEOF) {
     try {
       AVFrameStream avFrameStream =
           decodeAVFrame([&streamInfo](AVFrame* avFrame) {
diff --git a/test/decoders/test_ops.py b/test/decoders/test_ops.py
@@ -725,8 +725,13 @@ def test_decode_just_one_frame_at_boundaries(self, asset, expected_shape):
 
     @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
     def test_multiple_calls(self, asset):
+        # Ensure that multiple calls are OK as long as we're decoding
+        # "sequentially", i.e. we don't require a backwards seek.
+        # And ensure a proper error is raised in such case.
+        # TODO-AUDIO We shouldn't error, we should just implement the seeking
+        # back to the beginning of the stream.
 
-        def decode_stateless(start_seconds, stop_seconds):
+        def get_reference_frames(start_seconds, stop_seconds):
             decoder = create_from_file(str(asset.path), seek_mode="approximate")
             add_audio_stream(decoder)
 
@@ -742,25 +747,56 @@ def decode_stateless(start_seconds, stop_seconds):
             decoder, start_seconds=start_seconds, stop_seconds=stop_seconds
         )
         torch.testing.assert_close(
-            frames, decode_stateless(start_seconds, stop_seconds)
+            frames, get_reference_frames(start_seconds, stop_seconds)
         )
 
         start_seconds, stop_seconds = 3, 4
         frames = get_frames_by_pts_in_range_audio(
             decoder, start_seconds=start_seconds, stop_seconds=stop_seconds
         )
         torch.testing.assert_close(
-            frames, decode_stateless(start_seconds, stop_seconds)
+            frames, get_reference_frames(start_seconds, stop_seconds)
         )
 
-        # TODO-AUDIO
+        # Starting at the frame immediately after the previous one is OK
+        index_of_frame_at_4 = asset.get_frame_index(pts_seconds=4)
+        start_seconds, stop_seconds = (
+            asset.frames[asset.default_stream_index][
+                index_of_frame_at_4 + 1
+            ].pts_seconds,
+            5,
+        )
+        frames = get_frames_by_pts_in_range_audio(
+            decoder, start_seconds=start_seconds, stop_seconds=stop_seconds
+        )
+        torch.testing.assert_close(
+            frames, get_reference_frames(start_seconds, stop_seconds)
+        )
+
+        # But starting immediately on the same frame isn't OK
+        with pytest.raises(
+            RuntimeError,
+            match="The previous call's stop_seconds is larger than the current calls's start_seconds",
+        ):
+            get_frames_by_pts_in_range_audio(
+                decoder, start_seconds=stop_seconds, stop_seconds=6
+            )
+
+        with pytest.raises(
+            RuntimeError,
+            match="The previous call's stop_seconds is larger than the current calls's start_seconds",
+        ):
+            get_frames_by_pts_in_range_audio(
+                decoder, start_seconds=stop_seconds + 1e-4, stop_seconds=6
+            )
+
         start_seconds, stop_seconds = 0, 2
         frames = get_frames_by_pts_in_range_audio(
             decoder, start_seconds=start_seconds, stop_seconds=stop_seconds
         )
         with pytest.raises(AssertionError):
             torch.testing.assert_close(
-                frames, decode_stateless(start_seconds, stop_seconds)
+                frames, get_reference_frames(start_seconds, stop_seconds)
             )
 
 
diff --git a/test/utils.py b/test/utils.py
@@ -73,7 +73,6 @@ def assert_tensor_close_on_at_least(actual_tensor, ref_tensor, *, percentage, at
         )
 
 
-
 def in_fbcode() -> bool:
     return os.environ.get("IN_FBCODE_TORCHCODEC") == "1"
 

Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,6 @@ def assert_tensor_close_on_at_least(actual_tensor, ref_tensor, *, percentage, at`
`73`	`73`	`)`
`74`	`74`
`75`	`75`
`76`		`-`
`77`	`76`	`def in_fbcode() -> bool:`
`78`	`77`	`return os.environ.get("IN_FBCODE_TORCHCODEC") == "1"`
`79`	`78`