Skip to content

Commit c3fe417

Browse files
author
pytorchbot
committed
2025-03-24 nightly release (2e6f0ed)
1 parent 9b34c0b commit c3fe417

File tree

5 files changed

+27
-22
lines changed

5 files changed

+27
-22
lines changed

examples/audio_decoding.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def play_audio(samples):
6262
# :meth:`~torchcodec.decoders.AudioDecoder.get_samples_played_in_range` method,
6363
# which returns an :class:`~torchcodec.AudioSamples` object:
6464

65-
samples = decoder.get_samples_played_in_range(start_seconds=0)
65+
samples = decoder.get_samples_played_in_range()
6666

6767
print(samples)
6868
play_audio(samples)
@@ -76,12 +76,6 @@ def play_audio(samples):
7676
# all streams start exactly at 0! This is not a bug in TorchCodec, this is a
7777
# property of the file that was defined when it was encoded.
7878
#
79-
# We only output the *start* of the samples, not the end or the duration. Those can
80-
# be easily derived from the number of samples and the sample rate:
81-
82-
duration_seconds = samples.data.shape[1] / samples.sample_rate
83-
print(f"Duration = {int(duration_seconds // 60)}m{int(duration_seconds % 60)}s.")
84-
8579
# %%
8680
# Specifying a range
8781
# ------------------

src/torchcodec/_frame.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ class AudioSamples(Iterable):
124124
"""The sample data (``torch.Tensor`` of float in [-1, 1], shape is ``(num_channels, num_samples)``)."""
125125
pts_seconds: float
126126
"""The :term:`pts` of the first sample, in seconds."""
127+
duration_seconds: float
128+
"""The duration of the sampleas, in seconds."""
127129
sample_rate: int
128130
"""The sample rate of the samples, in Hz."""
129131

src/torchcodec/decoders/_audio_decoder.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,17 +70,16 @@ def __init__(
7070
sample_rate if sample_rate is not None else self.metadata.sample_rate
7171
)
7272

73-
# TODO-AUDIO: start_seconds should be 0 by default
7473
def get_samples_played_in_range(
75-
self, start_seconds: float, stop_seconds: Optional[float] = None
74+
self, start_seconds: float = 0.0, stop_seconds: Optional[float] = None
7675
) -> AudioSamples:
7776
"""Returns audio samples in the given range.
7877
7978
Samples are in the half open range [start_seconds, stop_seconds).
8079
8180
Args:
8281
start_seconds (float): Time, in seconds, of the start of the
83-
range.
82+
range. Default: 0.
8483
stop_seconds (float): Time, in seconds, of the end of the
8584
range. As a half open range, the end is excluded.
8685
@@ -139,8 +138,10 @@ def get_samples_played_in_range(
139138
else:
140139
offset_end = num_samples
141140

141+
data = frames[:, offset_beginning:offset_end]
142142
return AudioSamples(
143-
data=frames[:, offset_beginning:offset_end],
143+
data=data,
144144
pts_seconds=output_pts_seconds,
145+
duration_seconds=data.shape[1] / sample_rate,
145146
sample_rate=sample_rate,
146147
)

test/decoders/test_decoders.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -983,17 +983,14 @@ def test_get_all_samples(self, asset, stop_seconds):
983983
if stop_seconds == "duration":
984984
stop_seconds = asset.duration_seconds
985985

986-
samples = decoder.get_samples_played_in_range(
987-
start_seconds=0, stop_seconds=stop_seconds
988-
)
986+
samples = decoder.get_samples_played_in_range(stop_seconds=stop_seconds)
989987

990988
reference_frames = asset.get_frame_data_by_range(
991989
start=0, stop=asset.get_frame_index(pts_seconds=asset.duration_seconds) + 1
992990
)
993991

994992
torch.testing.assert_close(samples.data, reference_frames)
995993
assert samples.sample_rate == asset.sample_rate
996-
997994
assert samples.pts_seconds == asset.get_frame_info(idx=0).pts_seconds
998995

999996
@pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
@@ -1079,15 +1076,15 @@ def test_single_channel(self):
10791076
asset = SINE_MONO_S32
10801077
decoder = AudioDecoder(asset.path)
10811078

1082-
samples = decoder.get_samples_played_in_range(start_seconds=0, stop_seconds=2)
1079+
samples = decoder.get_samples_played_in_range(stop_seconds=2)
10831080
assert samples.data.shape[0] == asset.num_channels == 1
10841081

10851082
def test_format_conversion(self):
10861083
asset = SINE_MONO_S32
10871084
decoder = AudioDecoder(asset.path)
10881085
assert decoder.metadata.sample_format == asset.sample_format == "s32"
10891086

1090-
all_samples = decoder.get_samples_played_in_range(start_seconds=0)
1087+
all_samples = decoder.get_samples_played_in_range()
10911088
assert all_samples.data.dtype == torch.float32
10921089

10931090
reference_frames = asset.get_frame_data_by_range(start=0, stop=asset.num_frames)
@@ -1164,7 +1161,7 @@ def test_sample_rate_conversion_stereo(self):
11641161
assert asset.sample_rate == 8000
11651162
assert asset.num_channels == 2
11661163
decoder = AudioDecoder(asset.path, sample_rate=44_100)
1167-
decoder.get_samples_played_in_range(start_seconds=0)
1164+
decoder.get_samples_played_in_range()
11681165

11691166
def test_downsample_empty_frame(self):
11701167
# Non-regression test for
@@ -1184,13 +1181,13 @@ def test_downsample_empty_frame(self):
11841181
asset = NASA_AUDIO_MP3_44100
11851182
assert asset.sample_rate == 44_100
11861183
decoder = AudioDecoder(asset.path, sample_rate=8_000)
1187-
frames_44100_to_8000 = decoder.get_samples_played_in_range(start_seconds=0)
1184+
frames_44100_to_8000 = decoder.get_samples_played_in_range()
11881185

11891186
# Just checking correctness now
11901187
asset = NASA_AUDIO_MP3
11911188
assert asset.sample_rate == 8_000
11921189
decoder = AudioDecoder(asset.path)
1193-
frames_8000 = decoder.get_samples_played_in_range(start_seconds=0)
1190+
frames_8000 = decoder.get_samples_played_in_range()
11941191
torch.testing.assert_close(
11951192
frames_44100_to_8000.data, frames_8000.data, atol=0.03, rtol=0
11961193
)
@@ -1214,4 +1211,11 @@ def test_s16_ffmpeg4_bug(self):
12141211
else contextlib.nullcontext()
12151212
)
12161213
with cm:
1217-
decoder.get_samples_played_in_range(start_seconds=0)
1214+
decoder.get_samples_played_in_range()
1215+
1216+
@pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
1217+
@pytest.mark.parametrize("sample_rate", (None, 8000, 16_000, 44_1000))
1218+
def test_samples_duration(self, asset, sample_rate):
1219+
decoder = AudioDecoder(asset.path, sample_rate=sample_rate)
1220+
samples = decoder.get_samples_played_in_range(start_seconds=1, stop_seconds=2)
1221+
assert samples.duration_seconds == 1

test/test_frame_dataclasses.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55

66
def test_unpacking():
77
data, pts_seconds, duration_seconds = Frame(torch.rand(3, 4, 5), 2, 3) # noqa
8-
data, pts_seconds, sample_rate = AudioSamples(torch.rand(2, 4), 2, 16_000)
8+
data, pts_seconds, duration_seconds, sample_rate = AudioSamples(
9+
torch.rand(2, 4), 2, 3, 16_000
10+
)
911

1012

1113
def test_frame_error():
@@ -147,11 +149,13 @@ def test_audio_samples_error():
147149
AudioSamples(
148150
data=torch.rand(1),
149151
pts_seconds=1,
152+
duration_seconds=1,
150153
sample_rate=16_000,
151154
)
152155
with pytest.raises(ValueError, match="data must be 2-dimensional"):
153156
AudioSamples(
154157
data=torch.rand(1, 2, 3),
155158
pts_seconds=1,
159+
duration_seconds=1,
156160
sample_rate=16_000,
157161
)

0 commit comments

Comments
 (0)