WELL THIS WORKS

NicolasHug · NicolasHug · commit fec8a702a437 · 2025-03-12T14:32:01.000Z
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -850,7 +850,7 @@ torch::Tensor VideoDecoder::getFramesPlayedInRangeAudio(
       startSeconds <= stopSeconds,
       "Start seconds (" + std::to_string(startSeconds) +
           ") must be less than or equal to stop seconds (" +
-          std::to_string(stopSeconds) + ".");
+          std::to_string(stopSeconds) + ").");
 
   if (startSeconds == stopSeconds) {
     // For consistency with video
@@ -859,29 +859,29 @@ torch::Tensor VideoDecoder::getFramesPlayedInRangeAudio(
 
   StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
 
-  // TODO-AUDIO This essentially enforce that we don't need to seek (backwards).
-  // We should remove it and seek back to the stream's beginning when needed.
-  // See test_multiple_calls
-  TORCH_CHECK(
-      streamInfo.lastDecodedAvFramePts +
-              streamInfo.lastDecodedAvFrameDuration <=
-          secondsToClosestPts(startSeconds, streamInfo.timeBase),
-      "Audio decoder cannot seek backwards, or start from the last decoded frame.");
+  //   TORCH_CHECK(
+  //       streamInfo.lastDecodedAvFramePts +
+  //               streamInfo.lastDecodedAvFrameDuration <=
+  //           secondsToClosestPts(startSeconds, streamInfo.timeBase),
+  //       "Audio decoder cannot seek backwards, or start from the last decoded
+  //       frame.");
 
-  setCursorPtsInSeconds(startSeconds);
+  setCursorPtsInSeconds(INT64_MIN);
 
   // TODO-AUDIO Pre-allocate a long-enough tensor instead of creating a vec +
   // cat(). This would save a copy. We know the duration of the output and the
   // sample rate, so in theory we know the number of output samples.
   std::vector<torch::Tensor> tensors;
 
+  auto startPts = secondsToClosestPts(startSeconds, streamInfo.timeBase);
   auto stopPts = secondsToClosestPts(stopSeconds, streamInfo.timeBase);
   auto finished = false;
   while (!finished) {
     try {
-      AVFrameStream avFrameStream = decodeAVFrame([this](AVFrame* avFrame) {
-        return cursor_ < avFrame->pts + getDuration(avFrame);
-      });
+      AVFrameStream avFrameStream =
+          decodeAVFrame([this, startPts](AVFrame* avFrame) {
+            return startPts < avFrame->pts + getDuration(avFrame);
+          });
       auto frameOutput = convertAVFrameToFrameOutput(avFrameStream);
       tensors.push_back(frameOutput.data);
     } catch (const EndOfFileException& e) {
@@ -938,7 +938,7 @@ I    P     P    P    I    P    P    P    I    P    P    I    P    P    I    P
 bool VideoDecoder::canWeAvoidSeeking() const {
   const StreamInfo& streamInfo = streamInfos_.at(activeStreamIndex_);
   if (streamInfo.avMediaType == AVMEDIA_TYPE_AUDIO) {
-    return true;
+    return false;
   }
   int64_t lastDecodedAvFramePts =
       streamInfos_.at(activeStreamIndex_).lastDecodedAvFramePts;
diff --git a/test/decoders/test_ops.py b/test/decoders/test_ops.py
@@ -741,11 +741,9 @@ def test_decode_start_equal_stop(self, asset):
 
     @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
     def test_multiple_calls(self, asset):
-        # Ensure that multiple calls are OK as long as we're decoding
-        # "sequentially", i.e. we don't require a backwards seek.
-        # And ensure a proper error is raised in such case.
-        # TODO-AUDIO We shouldn't error, we should just implement the seeking
-        # back to the beginning of the stream.
+        # Ensure that multiple calls to get_frames_by_pts_in_range_audio on the
+        # same decoder are supported, whether it involves forward seeks or
+        # backwards seeks.
 
         def get_reference_frames(start_seconds, stop_seconds):
             # This stateless helper exists for convenience, to avoid
@@ -794,23 +792,22 @@ def get_reference_frames(start_seconds, stop_seconds):
             frames, get_reference_frames(start_seconds, stop_seconds)
         )
 
-        # but starting immediately on the same frame raises
-        expected_match = "Audio decoder cannot seek backwards"
-        with pytest.raises(RuntimeError, match=expected_match):
-            get_frames_by_pts_in_range_audio(
-                decoder, start_seconds=stop_seconds, stop_seconds=6
-            )
+        # starting immediately on the same frame is OK
+        frames = get_frames_by_pts_in_range_audio(
+            decoder, start_seconds=stop_seconds, stop_seconds=6
+        )
+        torch.testing.assert_close(frames, get_reference_frames(stop_seconds, 6))
 
-        with pytest.raises(RuntimeError, match=expected_match):
-            get_frames_by_pts_in_range_audio(
-                decoder, start_seconds=stop_seconds + 1e-4, stop_seconds=6
-            )
+        get_frames_by_pts_in_range_audio(
+            decoder, start_seconds=stop_seconds + 1e-4, stop_seconds=6
+        )
+        torch.testing.assert_close(frames, get_reference_frames(stop_seconds, 6))
 
-        # and seeking backwards doesn't work either
-        with pytest.raises(RuntimeError, match=expected_match):
-            frames = get_frames_by_pts_in_range_audio(
-                decoder, start_seconds=0, stop_seconds=2
-            )
+        # seeking backwards
+        frames = get_frames_by_pts_in_range_audio(
+            decoder, start_seconds=0, stop_seconds=2
+        )
+        torch.testing.assert_close(frames, get_reference_frames(0, 2))
 
 
 if __name__ == "__main__":