99
1010from torch import Tensor
1111
12+ from torchcodec import AudioSamples
1213from torchcodec .decoders import _core as core
1314from torchcodec .decoders ._decoder_utils import (
1415 create_decoder ,
@@ -39,7 +40,7 @@ def __init__(
3940 )
4041
4142 def get_samples_played_in_range (
42- self , start_seconds : float = 0 , stop_seconds : Optional [float ] = None
43+ self , start_seconds : float , stop_seconds : Optional [float ] = None
4344 ) -> Tensor :
4445 """TODO-AUDIO docs"""
4546 if stop_seconds is not None and not start_seconds <= stop_seconds :
@@ -63,26 +64,37 @@ def get_samples_played_in_range(
6364 #
6465 # first_pts last_pts
6566 # v v
66- # ....x..........x..........x...........x..........x..........x..........x.....
67+ # ....x..........x..........x...........x..........x..........x.....
6768 # ^ ^
6869 # start_seconds stop_seconds
6970 #
7071 # We want to return the samples in [start_seconds, stop_seconds). But
7172 # because the core API is based on frames, the `frames` tensor contains
7273 # the samples in [first_pts, last_pts)
73- #
7474 # So we do some basic math to figure out the position of the view that
75- # we'l; return.
75+ # we'll return.
7676
77- offset_beginning = round (
78- (max (0 , start_seconds - first_pts )) * self .metadata .sample_rate
79- )
77+ # TODO: sample_rate is either the original one from metadata, or the
78+ # user-specified one (NIY)
79+ sample_rate = self .metadata .sample_rate
80+
81+ if first_pts < start_seconds :
82+ offset_beginning = round ((start_seconds - first_pts ) * sample_rate )
83+ output_pts_seconds = start_seconds
84+ else :
85+ offset_beginning = 0
86+ output_pts_seconds = first_pts
8087
8188 num_samples = frames .shape [1 ]
82- offset_end = num_samples
8389 last_pts = first_pts + num_samples / self .metadata .sample_rate
8490 if stop_seconds is not None and stop_seconds < last_pts :
85- offset_end -= round ((last_pts - stop_seconds ) * self .metadata .sample_rate )
91+ offset_end = num_samples - round ((last_pts - stop_seconds ) * sample_rate )
92+ else :
93+ offset_end = num_samples
94+
95+ return AudioSamples (
96+ data = frames [:, offset_beginning :offset_end ],
97+ pts_seconds = output_pts_seconds ,
98+ sample_rate = sample_rate ,
99+ )
86100
87- return frames [:, offset_beginning :offset_end ]
88- # return frames[:, offset_beginning:offset_end]
0 commit comments