Add support for None stop_seconds

NicolasHug · NicolasHug · commit fe04cd2eacb2 · 2025-03-08T16:31:54.000Z
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -8,6 +8,7 @@
 #include <cstdint>
 #include <cstdio>
 #include <iostream>
+#include <limits>
 #include <sstream>
 #include <stdexcept>
 #include <string_view>
@@ -840,7 +841,9 @@ VideoDecoder::FrameBatchOutput VideoDecoder::getFramesPlayedInRange(
 
 torch::Tensor VideoDecoder::getFramesPlayedInRangeAudio(
     double startSeconds,
-    double stopSeconds) {
+    std::optional<double> _stopSeconds) {
+  auto stopSeconds = _stopSeconds.value_or(std::numeric_limits<double>::max());
+
   TORCH_CHECK(
       startSeconds <= stopSeconds,
       "Start seconds (" + std::to_string(startSeconds) +
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.h b/src/torchcodec/decoders/_core/VideoDecoder.h
@@ -223,7 +223,7 @@ class VideoDecoder {
 
   torch::Tensor getFramesPlayedInRangeAudio(
       double startSeconds,
-      double stopSeconds);
+      std::optional<double> _stopSeconds = std::nullopt);
 
   class EndOfFileException : public std::runtime_error {
    public:
diff --git a/src/torchcodec/decoders/_core/VideoDecoderOps.cpp b/src/torchcodec/decoders/_core/VideoDecoderOps.cpp
@@ -25,8 +25,7 @@ namespace facebook::torchcodec {
 //   https://github.com/pytorch/pytorch/tree/main/aten/src/ATen/native#readme
 TORCH_LIBRARY(torchcodec_ns, m) {
   m.impl_abstract_pystub(
-      "torchcodec.decoders._core.video_decoder_ops",
-      "//pytorch/torchcodec:torchcodec");
+      "torchcodec.decoders._core.ops", "//pytorch/torchcodec:torchcodec");
   m.def("create_from_file(str filename, str? seek_mode=None) -> Tensor");
   m.def(
       "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
@@ -49,7 +48,7 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "get_frames_by_pts_in_range(Tensor(a!) decoder, *, float start_seconds, float stop_seconds) -> (Tensor, Tensor, Tensor)");
   m.def(
-      "get_frames_by_pts_in_range_audio(Tensor(a!) decoder, *, float start_seconds, float stop_seconds) -> Tensor");
+      "get_frames_by_pts_in_range_audio(Tensor(a!) decoder, *, float start_seconds, float? stop_seconds) -> Tensor");
   m.def(
       "get_frames_by_pts(Tensor(a!) decoder, *, float[] timestamps) -> (Tensor, Tensor, Tensor)");
   m.def("_get_key_frame_indices(Tensor(a!) decoder) -> Tensor");
@@ -308,7 +307,7 @@ OpsFrameBatchOutput get_frames_by_pts_in_range(
 torch::Tensor get_frames_by_pts_in_range_audio(
     at::Tensor& decoder,
     double start_seconds,
-    double stop_seconds) {
+    std::optional<double> stop_seconds) {
   auto videoDecoder = unwrapTensorToGetDecoder(decoder);
   return videoDecoder->getFramesPlayedInRangeAudio(start_seconds, stop_seconds);
 }
diff --git a/src/torchcodec/decoders/_core/VideoDecoderOps.h b/src/torchcodec/decoders/_core/VideoDecoderOps.h
@@ -122,7 +122,7 @@ OpsFrameBatchOutput get_frames_by_pts_in_range(
 torch::Tensor get_frames_by_pts_in_range_audio(
     at::Tensor& decoder,
     double start_seconds,
-    double stop_seconds);
+    std::optional<double> stop_seconds = std::nullopt);
 
 // For testing only. We need to implement this operation as a core library
 // function because what we're testing is round-tripping pts values as
diff --git a/src/torchcodec/decoders/_core/__init__.py b/src/torchcodec/decoders/_core/__init__.py
@@ -12,7 +12,7 @@
     get_container_metadata_from_header,
     VideoStreamMetadata,
 )
-from .video_decoder_ops import (
+from .ops import (
     _add_video_stream,
     _get_key_frame_indices,
     _test_frame_pts_equality,
diff --git a/src/torchcodec/decoders/_core/_metadata.py b/src/torchcodec/decoders/_core/_metadata.py
@@ -12,7 +12,7 @@
 
 import torch
 
-from torchcodec.decoders._core.video_decoder_ops import (
+from torchcodec.decoders._core.ops import (
     _get_container_json_metadata,
     _get_stream_json_metadata,
     create_from_file,
diff --git a/src/torchcodec/decoders/_core/ops.py b/src/torchcodec/decoders/_core/ops.py
@@ -270,7 +270,7 @@ def get_frames_by_pts_in_range_audio_abstract(
     decoder: torch.Tensor,
     *,
     start_seconds: float,
-    stop_seconds: float,
+    stop_seconds: Optional[float] = None,
 ) -> torch.Tensor:
     image_size = [get_ctx().new_dynamic_size() for _ in range(4)]
     return torch.empty(image_size)
diff --git a/test/decoders/test_ops.py b/test/decoders/test_ops.py
@@ -646,6 +646,7 @@ def test_audio_bad_seek_mode(self):
         "range",
         (
             "begin_to_end",
+            "begin_to_None",
             "begin_to_beyond_end",
             "at_frame_boundaries",
             "not_at_frame_boundaries",
@@ -655,6 +656,8 @@ def test_audio_bad_seek_mode(self):
     def test_get_frames_by_pts_in_range_audio(self, range, asset):
         if range == "begin_to_end":
             start_seconds, stop_seconds = 0, asset.duration_seconds
+        elif range == "begin_to_None":
+            start_seconds, stop_seconds = 0, None
         elif range == "begin_to_beyond_end":
             start_seconds, stop_seconds = 0, asset.duration_seconds + 10
         elif range == "at_frame_boundaries":
@@ -671,18 +674,23 @@ def test_get_frames_by_pts_in_range_audio(self, range, asset):
                 stop_frame_info.duration_seconds / 2
             )
 
-        decoder = create_from_file(str(asset.path), seek_mode="approximate")
-        add_audio_stream(decoder)
-
-        # stop_offset logic: if stop_seconds is at a frame boundary i.e. when a
-        # frame starts, then that frame should *not* be included in the output.
-        # Otherwise, it should be part of it, hence why we add 1 to `stop=`.
-        stop_offset = 0 if range == "at_frame_boundaries" else 1
+        ref_start_index = asset.get_frame_index(pts_seconds=start_seconds)
+        if range == "begin_to_None":
+            ref_stop_index = (
+                asset.get_frame_index(pts_seconds=asset.duration_seconds) + 1
+            )
+        elif range == "at_frame_boundaries":
+            ref_stop_index = asset.get_frame_index(pts_seconds=stop_seconds)
+        else:
+            ref_stop_index = asset.get_frame_index(pts_seconds=stop_seconds) + 1
         reference_frames = asset.get_frame_data_by_range(
-            start=asset.get_frame_index(pts_seconds=start_seconds),
-            stop=asset.get_frame_index(pts_seconds=stop_seconds) + stop_offset,
+            start=ref_start_index,
+            stop=ref_stop_index,
         )
 
+        decoder = create_from_file(str(asset.path), seek_mode="approximate")
+        add_audio_stream(decoder)
+
         frames = get_frames_by_pts_in_range_audio(
             decoder, start_seconds=start_seconds, stop_seconds=stop_seconds
         )

Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@`
`12`	`12`	`get_container_metadata_from_header,`
`13`	`13`	`VideoStreamMetadata,`
`14`	`14`	`)`
`15`		`-from .video_decoder_ops import (`
	`15`	`+from .ops import (`
`16`	`16`	`_add_video_stream,`
`17`	`17`	`_get_key_frame_indices,`
`18`	`18`	`_test_frame_pts_equality,`