Cleanups

NicolasHug · NicolasHug · commit 3881586bea49 · 2025-03-07T18:03:00.000Z
diff --git a/test/decoders/test_ops.py b/test/decoders/test_ops.py
@@ -51,7 +51,7 @@
 INDEX_OF_FRAME_AT_6_SECONDS = 180
 
 
-class TestOps:
+class TestVideoOps:
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_seek_and_next(self, device):
         decoder = create_from_file(str(NASA_VIDEO.path))
@@ -616,6 +616,8 @@ def test_cuda_decoder(self):
             duration, torch.tensor(0.0334).double(), atol=0, rtol=1e-3
         )
 
+
+class TestAudioOps:
     @pytest.mark.parametrize(
         "method",
         (
@@ -664,19 +666,15 @@ def test_audio_decode_all_samples_with_next(self, asset):
     @pytest.mark.parametrize(
         "range", ("begin_to_end", "at_frame_boundaries", "not_at_frame_boundaries")
     )
-    # @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
-    @pytest.mark.parametrize("asset", (NASA_AUDIO,))
-    def test_audio_get_frames_by_pts_in_range_audio(self, range, asset):
+    @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
+    def test_get_frames_by_pts_in_range_audio(self, range, asset):
         if range == "begin_to_end":
             start_seconds, stop_seconds = 0, asset.duration_seconds
         elif range == "at_frame_boundaries":
             start_seconds = asset.frames[asset.default_stream_index][10].pts_seconds
-            # need -1e-5 because the upper bound in open. If we don't do this
-            # then our test util returns one frame too much.
-            stop_seconds = (
-                asset.frames[asset.default_stream_index][40].pts_seconds - 1e-5
-            )
+            stop_seconds = asset.frames[asset.default_stream_index][40].pts_seconds
         else:
+            assert range == "not_at_frame_boundaries"
             start_frame_info = asset.frames[asset.default_stream_index][10]
             stop_frame_info = asset.frames[asset.default_stream_index][40]
             start_seconds = start_frame_info.pts_seconds + (
@@ -689,20 +687,32 @@ def test_audio_get_frames_by_pts_in_range_audio(self, range, asset):
         decoder = create_from_file(str(asset.path), seek_mode="approximate")
         add_audio_stream(decoder)
 
+        stop_offset = 0 if range == "at_frame_boundaries" else 1
         reference_frames = asset.get_frame_data_by_range(
             start=asset.get_frame_index(pts_seconds=start_seconds),
-            stop=asset.get_frame_index(pts_seconds=stop_seconds) + 1,
+            stop=asset.get_frame_index(pts_seconds=stop_seconds) + stop_offset,
         )
-        reference_frames = torch.cat(reference_frames.unbind(), dim=-1)
 
         frames = get_frames_by_pts_in_range_audio(
             decoder, start_seconds=start_seconds, stop_seconds=stop_seconds
         )
 
         assert_frames_equal(frames, reference_frames)
 
+    @pytest.mark.parametrize(
+        "asset, expected_shape", ((NASA_AUDIO, (2, 1024)), (NASA_AUDIO_MP3, (2, 576)))
+    )
+    def test_decode_epsilon_range(self, asset, expected_shape):
+        decoder = create_from_file(str(asset.path), seek_mode="approximate")
+        add_audio_stream(decoder)
+
+        frames = get_frames_by_pts_in_range_audio(
+            decoder, start_seconds=5, stop_seconds=5 + 1e-5
+        )
+        assert frames.shape == expected_shape
+
     @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
-    def test_audio_seek_and_next(self, asset):
+    def test_seek_and_next_audio(self, asset):
         decoder = create_from_file(str(asset.path), seek_mode="approximate")
         add_audio_stream(decoder)
 
diff --git a/test/utils.py b/test/utils.py
@@ -190,11 +190,7 @@ def get_frame_data_by_range(
         *,
         stream_index: Optional[int] = None,
     ) -> torch.Tensor:
-        tensors = [
-            self.get_frame_data_by_index(i, stream_index=stream_index)
-            for i in range(start, stop, step)
-        ]
-        return torch.stack(tensors)
+        raise NotImplementedError("Override in child classes")
 
     def get_pts_seconds_by_range(
         self,
@@ -261,6 +257,20 @@ def get_frame_data_by_index(
         )
         return torch.load(file_path, weights_only=True).permute(2, 0, 1)
 
+    def get_frame_data_by_range(
+        self,
+        start: int,
+        stop: int,
+        step: int = 1,
+        *,
+        stream_index: Optional[int] = None,
+    ) -> torch.Tensor:
+        tensors = [
+            self.get_frame_data_by_index(i, stream_index=stream_index)
+            for i in range(start, stop, step)
+        ]
+        return torch.stack(tensors)
+
     @property
     def width(self) -> int:
         return self.stream_infos[self.default_stream_index].width
@@ -327,6 +337,7 @@ class TestAudio(TestContainerFile):
 
     stream_infos: Dict[int, TestAudioStreamInfo]
     # stream_index -> list of 2D frame tensors of shape (num_channels, num_samples_in_that_frame)
+    # num_samples_in_that_frame isn't necessarily constant for a given stream.
     _reference_frames: Dict[int, List[torch.Tensor]] = field(default_factory=dict)
 
     # Storing each individual frame is too expensive for audio, because there's
@@ -354,19 +365,40 @@ def get_frame_data_by_index(
 
         return self._reference_frames[stream_index][idx]
 
+    def get_frame_data_by_range(
+        self,
+        start: int,
+        stop: int,
+        step: int = 1,
+        *,
+        stream_index: Optional[int] = None,
+    ) -> torch.Tensor:
+        tensors = [
+            self.get_frame_data_by_index(i, stream_index=stream_index)
+            for i in range(start, stop, step)
+        ]
+        return torch.cat(tensors, dim=-1)
+
     def get_frame_index(
         self, *, pts_seconds: float, stream_index: Optional[int] = None
     ) -> int:
         if stream_index is None:
             stream_index = self.default_stream_index
-        out = next(
-            frame_index
-            for (frame_index, frame_info) in self.frames[stream_index].items()
-            if frame_info.pts_seconds
-            <= pts_seconds
-            < frame_info.pts_seconds + frame_info.duration_seconds
-        )
-        return out
+
+        if pts_seconds <= self.frames[stream_index][0].pts_seconds:
+            # Special case for e.g. NASA_AUDIO_MP3 whose first frame's pts is
+            # 0.13~, not 0.
+            return 0
+        try:
+            return next(
+                frame_index
+                for (frame_index, frame_info) in self.frames[stream_index].items()
+                if frame_info.pts_seconds
+                <= pts_seconds
+                < frame_info.pts_seconds + frame_info.duration_seconds
+            )
+        except StopIteration:
+            return len(self.frames[stream_index]) - 1
 
     @property
     def sample_rate(self) -> int: