2025-03-24 nightly release (2e6f0ed)

pytorchbot · pytorchbot · commit c3fe417cd3cf · 2025-03-24T11:35:27.000Z
diff --git a/examples/audio_decoding.py b/examples/audio_decoding.py
@@ -62,7 +62,7 @@ def play_audio(samples):
 # :meth:`~torchcodec.decoders.AudioDecoder.get_samples_played_in_range` method,
 # which returns an :class:`~torchcodec.AudioSamples` object:
 
-samples = decoder.get_samples_played_in_range(start_seconds=0)
+samples = decoder.get_samples_played_in_range()
 
 print(samples)
 play_audio(samples)
@@ -76,12 +76,6 @@ def play_audio(samples):
 # all streams start exactly at 0! This is not a bug in TorchCodec, this is a
 # property of the file that was defined when it was encoded.
 #
-# We only output the *start* of the samples, not the end or the duration. Those can
-# be easily derived from the number of samples and the sample rate:
-
-duration_seconds = samples.data.shape[1] / samples.sample_rate
-print(f"Duration = {int(duration_seconds // 60)}m{int(duration_seconds % 60)}s.")
-
 # %%
 # Specifying a range
 # ------------------
diff --git a/src/torchcodec/_frame.py b/src/torchcodec/_frame.py
@@ -124,6 +124,8 @@ class AudioSamples(Iterable):
     """The sample data (``torch.Tensor`` of float in [-1, 1], shape is ``(num_channels, num_samples)``)."""
     pts_seconds: float
     """The :term:`pts` of the first sample, in seconds."""
+    duration_seconds: float
+    """The duration of the sampleas, in seconds."""
     sample_rate: int
     """The sample rate of the samples, in Hz."""
 
diff --git a/src/torchcodec/decoders/_audio_decoder.py b/src/torchcodec/decoders/_audio_decoder.py
@@ -70,17 +70,16 @@ def __init__(
             sample_rate if sample_rate is not None else self.metadata.sample_rate
         )
 
-    # TODO-AUDIO: start_seconds should be 0 by default
     def get_samples_played_in_range(
-        self, start_seconds: float, stop_seconds: Optional[float] = None
+        self, start_seconds: float = 0.0, stop_seconds: Optional[float] = None
     ) -> AudioSamples:
         """Returns audio samples in the given range.
 
         Samples are in the half open range [start_seconds, stop_seconds).
 
         Args:
             start_seconds (float): Time, in seconds, of the start of the
-                range.
+                range. Default: 0.
             stop_seconds (float): Time, in seconds, of the end of the
                 range. As a half open range, the end is excluded.
 
@@ -139,8 +138,10 @@ def get_samples_played_in_range(
         else:
             offset_end = num_samples
 
+        data = frames[:, offset_beginning:offset_end]
         return AudioSamples(
-            data=frames[:, offset_beginning:offset_end],
+            data=data,
             pts_seconds=output_pts_seconds,
+            duration_seconds=data.shape[1] / sample_rate,
             sample_rate=sample_rate,
         )
diff --git a/test/decoders/test_decoders.py b/test/decoders/test_decoders.py
@@ -983,17 +983,14 @@ def test_get_all_samples(self, asset, stop_seconds):
         if stop_seconds == "duration":
             stop_seconds = asset.duration_seconds
 
-        samples = decoder.get_samples_played_in_range(
-            start_seconds=0, stop_seconds=stop_seconds
-        )
+        samples = decoder.get_samples_played_in_range(stop_seconds=stop_seconds)
 
         reference_frames = asset.get_frame_data_by_range(
             start=0, stop=asset.get_frame_index(pts_seconds=asset.duration_seconds) + 1
         )
 
         torch.testing.assert_close(samples.data, reference_frames)
         assert samples.sample_rate == asset.sample_rate
-
         assert samples.pts_seconds == asset.get_frame_info(idx=0).pts_seconds
 
     @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
@@ -1079,15 +1076,15 @@ def test_single_channel(self):
         asset = SINE_MONO_S32
         decoder = AudioDecoder(asset.path)
 
-        samples = decoder.get_samples_played_in_range(start_seconds=0, stop_seconds=2)
+        samples = decoder.get_samples_played_in_range(stop_seconds=2)
         assert samples.data.shape[0] == asset.num_channels == 1
 
     def test_format_conversion(self):
         asset = SINE_MONO_S32
         decoder = AudioDecoder(asset.path)
         assert decoder.metadata.sample_format == asset.sample_format == "s32"
 
-        all_samples = decoder.get_samples_played_in_range(start_seconds=0)
+        all_samples = decoder.get_samples_played_in_range()
         assert all_samples.data.dtype == torch.float32
 
         reference_frames = asset.get_frame_data_by_range(start=0, stop=asset.num_frames)
@@ -1164,7 +1161,7 @@ def test_sample_rate_conversion_stereo(self):
         assert asset.sample_rate == 8000
         assert asset.num_channels == 2
         decoder = AudioDecoder(asset.path, sample_rate=44_100)
-        decoder.get_samples_played_in_range(start_seconds=0)
+        decoder.get_samples_played_in_range()
 
     def test_downsample_empty_frame(self):
         # Non-regression test for
@@ -1184,13 +1181,13 @@ def test_downsample_empty_frame(self):
         asset = NASA_AUDIO_MP3_44100
         assert asset.sample_rate == 44_100
         decoder = AudioDecoder(asset.path, sample_rate=8_000)
-        frames_44100_to_8000 = decoder.get_samples_played_in_range(start_seconds=0)
+        frames_44100_to_8000 = decoder.get_samples_played_in_range()
 
         # Just checking correctness now
         asset = NASA_AUDIO_MP3
         assert asset.sample_rate == 8_000
         decoder = AudioDecoder(asset.path)
-        frames_8000 = decoder.get_samples_played_in_range(start_seconds=0)
+        frames_8000 = decoder.get_samples_played_in_range()
         torch.testing.assert_close(
             frames_44100_to_8000.data, frames_8000.data, atol=0.03, rtol=0
         )
@@ -1214,4 +1211,11 @@ def test_s16_ffmpeg4_bug(self):
             else contextlib.nullcontext()
         )
         with cm:
-            decoder.get_samples_played_in_range(start_seconds=0)
+            decoder.get_samples_played_in_range()
+
+    @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
+    @pytest.mark.parametrize("sample_rate", (None, 8000, 16_000, 44_1000))
+    def test_samples_duration(self, asset, sample_rate):
+        decoder = AudioDecoder(asset.path, sample_rate=sample_rate)
+        samples = decoder.get_samples_played_in_range(start_seconds=1, stop_seconds=2)
+        assert samples.duration_seconds == 1
diff --git a/test/test_frame_dataclasses.py b/test/test_frame_dataclasses.py
@@ -5,7 +5,9 @@
 
 def test_unpacking():
     data, pts_seconds, duration_seconds = Frame(torch.rand(3, 4, 5), 2, 3)  # noqa
-    data, pts_seconds, sample_rate = AudioSamples(torch.rand(2, 4), 2, 16_000)
+    data, pts_seconds, duration_seconds, sample_rate = AudioSamples(
+        torch.rand(2, 4), 2, 3, 16_000
+    )
 
 
 def test_frame_error():
@@ -147,11 +149,13 @@ def test_audio_samples_error():
         AudioSamples(
             data=torch.rand(1),
             pts_seconds=1,
+            duration_seconds=1,
             sample_rate=16_000,
         )
     with pytest.raises(ValueError, match="data must be 2-dimensional"):
         AudioSamples(
             data=torch.rand(1, 2, 3),
             pts_seconds=1,
+            duration_seconds=1,
             sample_rate=16_000,
         )