Skip to content

Commit a4c268b

Browse files
author
pytorchbot
committed
2025-02-12 nightly release (590fe1c)
1 parent f7d81eb commit a4c268b

File tree

9 files changed

+52
-119
lines changed

9 files changed

+52
-119
lines changed

src/torchcodec/_samplers/video_clip_sampler.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,6 @@ def _get_clips_for_index_based_sampling(
242242
]
243243
frames, *_ = get_frames_at_indices(
244244
video_decoder,
245-
stream_index=metadata_json["bestVideoStreamIndex"],
246245
frame_indices=batch_indexes,
247246
)
248247
clips.append(frames)

src/torchcodec/decoders/_core/VideoDecoder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ VideoDecoder::VideoStreamOptions::VideoStreamOptions(
418418
}
419419
}
420420

421-
void VideoDecoder::addVideoStreamDecoder(
421+
void VideoDecoder::addVideoStream(
422422
int streamIndex,
423423
const VideoStreamOptions& videoStreamOptions) {
424424
TORCH_CHECK(

src/torchcodec/decoders/_core/VideoDecoder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ class VideoDecoder {
136136

137137
struct AudioStreamOptions {};
138138

139-
void addVideoStreamDecoder(
139+
void addVideoStream(
140140
int streamIndex,
141141
const VideoStreamOptions& videoStreamOptions = VideoStreamOptions());
142142
void addAudioStreamDecoder(

src/torchcodec/decoders/_core/VideoDecoderOps.cpp

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -39,24 +39,23 @@ TORCH_LIBRARY(torchcodec_ns, m) {
3939
m.def(
4040
"get_frame_at_pts(Tensor(a!) decoder, float seconds) -> (Tensor, Tensor, Tensor)");
4141
m.def(
42-
"get_frame_at_index(Tensor(a!) decoder, *, int stream_index, int frame_index) -> (Tensor, Tensor, Tensor)");
42+
"get_frame_at_index(Tensor(a!) decoder, *, int frame_index) -> (Tensor, Tensor, Tensor)");
4343
m.def(
44-
"get_frames_at_indices(Tensor(a!) decoder, *, int stream_index, int[] frame_indices) -> (Tensor, Tensor, Tensor)");
44+
"get_frames_at_indices(Tensor(a!) decoder, *, int[] frame_indices) -> (Tensor, Tensor, Tensor)");
4545
m.def(
46-
"get_frames_in_range(Tensor(a!) decoder, *, int stream_index, int start, int stop, int? step=None) -> (Tensor, Tensor, Tensor)");
46+
"get_frames_in_range(Tensor(a!) decoder, *, int start, int stop, int? step=None) -> (Tensor, Tensor, Tensor)");
4747
m.def(
48-
"get_frames_by_pts_in_range(Tensor(a!) decoder, *, int stream_index, float start_seconds, float stop_seconds) -> (Tensor, Tensor, Tensor)");
48+
"get_frames_by_pts_in_range(Tensor(a!) decoder, *, float start_seconds, float stop_seconds) -> (Tensor, Tensor, Tensor)");
4949
m.def(
50-
"get_frames_by_pts(Tensor(a!) decoder, *, int stream_index, float[] timestamps) -> (Tensor, Tensor, Tensor)");
51-
m.def(
52-
"_get_key_frame_indices(Tensor(a!) decoder, int stream_index) -> Tensor");
50+
"get_frames_by_pts(Tensor(a!) decoder, *, float[] timestamps) -> (Tensor, Tensor, Tensor)");
51+
m.def("_get_key_frame_indices(Tensor(a!) decoder) -> Tensor");
5352
m.def("get_json_metadata(Tensor(a!) decoder) -> str");
5453
m.def("get_container_json_metadata(Tensor(a!) decoder) -> str");
5554
m.def(
5655
"get_stream_json_metadata(Tensor(a!) decoder, int stream_index) -> str");
5756
m.def("_get_json_ffmpeg_library_versions() -> str");
5857
m.def(
59-
"_test_frame_pts_equality(Tensor(a!) decoder, *, int stream_index, int frame_index, float pts_seconds_to_test) -> bool");
58+
"_test_frame_pts_equality(Tensor(a!) decoder, *, int frame_index, float pts_seconds_to_test) -> bool");
6059
m.def("scan_all_streams_to_update_metadata(Tensor(a!) decoder) -> ()");
6160
}
6261

@@ -220,8 +219,7 @@ void _add_video_stream(
220219
}
221220

222221
auto videoDecoder = unwrapTensorToGetDecoder(decoder);
223-
videoDecoder->addVideoStreamDecoder(
224-
stream_index.value_or(-1), videoStreamOptions);
222+
videoDecoder->addVideoStream(stream_index.value_or(-1), videoStreamOptions);
225223
}
226224

227225
void seek_to_pts(at::Tensor& decoder, double seconds) {
@@ -237,11 +235,6 @@ OpsFrameOutput get_next_frame(at::Tensor& decoder) {
237235
} catch (const VideoDecoder::EndOfFileException& e) {
238236
C10_THROW_ERROR(IndexError, e.what());
239237
}
240-
if (result.data.sizes().size() != 3) {
241-
throw std::runtime_error(
242-
"image_size is unexpected. Expected 3, got: " +
243-
std::to_string(result.data.sizes().size()));
244-
}
245238
return makeOpsFrameOutput(result);
246239
}
247240

@@ -251,18 +244,14 @@ OpsFrameOutput get_frame_at_pts(at::Tensor& decoder, double seconds) {
251244
return makeOpsFrameOutput(result);
252245
}
253246

254-
OpsFrameOutput get_frame_at_index(
255-
at::Tensor& decoder,
256-
[[maybe_unused]] int64_t stream_index,
257-
int64_t frame_index) {
247+
OpsFrameOutput get_frame_at_index(at::Tensor& decoder, int64_t frame_index) {
258248
auto videoDecoder = unwrapTensorToGetDecoder(decoder);
259249
auto result = videoDecoder->getFrameAtIndex(frame_index);
260250
return makeOpsFrameOutput(result);
261251
}
262252

263253
OpsFrameBatchOutput get_frames_at_indices(
264254
at::Tensor& decoder,
265-
[[maybe_unused]] int64_t stream_index,
266255
at::IntArrayRef frame_indices) {
267256
auto videoDecoder = unwrapTensorToGetDecoder(decoder);
268257
std::vector<int64_t> frameIndicesVec(
@@ -273,7 +262,6 @@ OpsFrameBatchOutput get_frames_at_indices(
273262

274263
OpsFrameBatchOutput get_frames_in_range(
275264
at::Tensor& decoder,
276-
[[maybe_unused]] int64_t stream_index,
277265
int64_t start,
278266
int64_t stop,
279267
std::optional<int64_t> step) {
@@ -284,7 +272,6 @@ OpsFrameBatchOutput get_frames_in_range(
284272

285273
OpsFrameBatchOutput get_frames_by_pts(
286274
at::Tensor& decoder,
287-
[[maybe_unused]] int64_t stream_index,
288275
at::ArrayRef<double> timestamps) {
289276
auto videoDecoder = unwrapTensorToGetDecoder(decoder);
290277
std::vector<double> timestampsVec(timestamps.begin(), timestamps.end());
@@ -294,7 +281,6 @@ OpsFrameBatchOutput get_frames_by_pts(
294281

295282
OpsFrameBatchOutput get_frames_by_pts_in_range(
296283
at::Tensor& decoder,
297-
[[maybe_unused]] int64_t stream_index,
298284
double start_seconds,
299285
double stop_seconds) {
300286
auto videoDecoder = unwrapTensorToGetDecoder(decoder);
@@ -327,17 +313,14 @@ std::string mapToJson(const std::map<std::string, std::string>& metadataMap) {
327313

328314
bool _test_frame_pts_equality(
329315
at::Tensor& decoder,
330-
[[maybe_unused]] int64_t stream_index,
331316
int64_t frame_index,
332317
double pts_seconds_to_test) {
333318
auto videoDecoder = unwrapTensorToGetDecoder(decoder);
334319
return pts_seconds_to_test ==
335320
videoDecoder->getPtsSecondsForFrame(frame_index);
336321
}
337322

338-
torch::Tensor _get_key_frame_indices(
339-
at::Tensor& decoder,
340-
[[maybe_unused]] int64_t stream_index) {
323+
torch::Tensor _get_key_frame_indices(at::Tensor& decoder) {
341324
auto videoDecoder = unwrapTensorToGetDecoder(decoder);
342325
return videoDecoder->getKeyFrameIndices();
343326
}

src/torchcodec/decoders/_core/VideoDecoderOps.h

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,10 @@ OpsFrameOutput get_frame_at_pts(at::Tensor& decoder, double seconds);
8585
// Return the frames at given ptss for a given stream
8686
OpsFrameBatchOutput get_frames_by_pts(
8787
at::Tensor& decoder,
88-
int64_t stream_index,
8988
at::ArrayRef<double> timestamps);
9089

9190
// Return the frame that is visible at a given index in the video.
92-
OpsFrameOutput get_frame_at_index(
93-
at::Tensor& decoder,
94-
int64_t stream_index,
95-
int64_t frame_index);
91+
OpsFrameOutput get_frame_at_index(at::Tensor& decoder, int64_t frame_index);
9692

9793
// Get the next frame from the video as a tuple that has the frame data, pts and
9894
// duration as tensors.
@@ -101,14 +97,12 @@ OpsFrameOutput get_next_frame(at::Tensor& decoder);
10197
// Return the frames at given indices for a given stream
10298
OpsFrameBatchOutput get_frames_at_indices(
10399
at::Tensor& decoder,
104-
int64_t stream_index,
105100
at::IntArrayRef frame_indices);
106101

107102
// Return the frames inside a range as a single stacked Tensor. The range is
108103
// defined as [start, stop).
109104
OpsFrameBatchOutput get_frames_in_range(
110105
at::Tensor& decoder,
111-
int64_t stream_index,
112106
int64_t start,
113107
int64_t stop,
114108
std::optional<int64_t> step = std::nullopt);
@@ -118,7 +112,6 @@ OpsFrameBatchOutput get_frames_in_range(
118112
// order.
119113
OpsFrameBatchOutput get_frames_by_pts_in_range(
120114
at::Tensor& decoder,
121-
int64_t stream_index,
122115
double start_seconds,
123116
double stop_seconds);
124117

@@ -128,16 +121,15 @@ OpsFrameBatchOutput get_frames_by_pts_in_range(
128121
// We want to make sure that the value is preserved exactly, bit-for-bit, during
129122
// this process.
130123
//
131-
// Returns true if for the given decoder, in the stream stream_index, the pts
124+
// Returns true if for the given decoder, the pts
132125
// value when converted to seconds as a double is exactly pts_seconds_to_test.
133126
// Returns false otherwise.
134127
bool _test_frame_pts_equality(
135128
at::Tensor& decoder,
136-
int64_t stream_index,
137129
int64_t frame_index,
138130
double pts_seconds_to_test);
139131

140-
torch::Tensor _get_key_frame_indices(at::Tensor& decoder, int64_t stream_index);
132+
torch::Tensor _get_key_frame_indices(at::Tensor& decoder);
141133

142134
// Get the metadata from the video as a string.
143135
std::string get_json_metadata(at::Tensor& decoder);

src/torchcodec/decoders/_video_decoder.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,7 @@ def _getitem_int(self, key: int) -> Tensor:
152152
f"Index {key} is out of bounds; length is {self._num_frames}"
153153
)
154154

155-
frame_data, *_ = core.get_frame_at_index(
156-
self._decoder, frame_index=key, stream_index=self.stream_index
157-
)
155+
frame_data, *_ = core.get_frame_at_index(self._decoder, frame_index=key)
158156
return frame_data
159157

160158
def _getitem_slice(self, key: slice) -> Tensor:
@@ -163,7 +161,6 @@ def _getitem_slice(self, key: slice) -> Tensor:
163161
start, stop, step = key.indices(len(self))
164162
frame_data, *_ = core.get_frames_in_range(
165163
self._decoder,
166-
stream_index=self.stream_index,
167164
start=start,
168165
stop=stop,
169166
step=step,
@@ -189,9 +186,7 @@ def __getitem__(self, key: Union[numbers.Integral, slice]) -> Tensor:
189186
)
190187

191188
def _get_key_frame_indices(self) -> list[int]:
192-
return core._get_key_frame_indices(
193-
self._decoder, stream_index=self.stream_index
194-
)
189+
return core._get_key_frame_indices(self._decoder)
195190

196191
def get_frame_at(self, index: int) -> Frame:
197192
"""Return a single frame at the given index.
@@ -208,7 +203,7 @@ def get_frame_at(self, index: int) -> Frame:
208203
f"Index {index} is out of bounds; must be in the range [0, {self._num_frames})."
209204
)
210205
data, pts_seconds, duration_seconds = core.get_frame_at_index(
211-
self._decoder, frame_index=index, stream_index=self.stream_index
206+
self._decoder, frame_index=index
212207
)
213208
return Frame(
214209
data=data,
@@ -234,7 +229,7 @@ def get_frames_at(self, indices: list[int]) -> FrameBatch:
234229
"""
235230

236231
data, pts_seconds, duration_seconds = core.get_frames_at_indices(
237-
self._decoder, stream_index=self.stream_index, frame_indices=indices
232+
self._decoder, frame_indices=indices
238233
)
239234
return FrameBatch(
240235
data=data,
@@ -268,7 +263,6 @@ def get_frames_in_range(self, start: int, stop: int, step: int = 1) -> FrameBatc
268263
raise IndexError(f"Step ({step}) must be greater than 0.")
269264
frames = core.get_frames_in_range(
270265
self._decoder,
271-
stream_index=self.stream_index,
272266
start=start,
273267
stop=stop,
274268
step=step,
@@ -316,7 +310,7 @@ def get_frames_played_at(self, seconds: list[float]) -> FrameBatch:
316310
FrameBatch: The frames that are played at ``seconds``.
317311
"""
318312
data, pts_seconds, duration_seconds = core.get_frames_by_pts(
319-
self._decoder, timestamps=seconds, stream_index=self.stream_index
313+
self._decoder, timestamps=seconds
320314
)
321315
return FrameBatch(
322316
data=data,
@@ -359,7 +353,6 @@ def get_frames_played_in_range(
359353
)
360354
frames = core.get_frames_by_pts_in_range(
361355
self._decoder,
362-
stream_index=self.stream_index,
363356
start_seconds=start_seconds,
364357
stop_seconds=stop_seconds,
365358
)

test/decoders/VideoDecoderTest.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ TEST(VideoDecoderTest, RespectsWidthAndHeightFromOptions) {
148148
VideoDecoder::VideoStreamOptions videoStreamOptions;
149149
videoStreamOptions.width = 100;
150150
videoStreamOptions.height = 120;
151-
decoder->addVideoStreamDecoder(-1, videoStreamOptions);
151+
decoder->addVideoStream(-1, videoStreamOptions);
152152
torch::Tensor tensor = decoder->getNextFrame().data;
153153
EXPECT_EQ(tensor.sizes(), std::vector<long>({3, 120, 100}));
154154
}
@@ -158,7 +158,7 @@ TEST(VideoDecoderTest, RespectsOutputTensorDimensionOrderFromOptions) {
158158
std::unique_ptr<VideoDecoder> decoder = std::make_unique<VideoDecoder>(path);
159159
VideoDecoder::VideoStreamOptions videoStreamOptions;
160160
videoStreamOptions.dimensionOrder = "NHWC";
161-
decoder->addVideoStreamDecoder(-1, videoStreamOptions);
161+
decoder->addVideoStream(-1, videoStreamOptions);
162162
torch::Tensor tensor = decoder->getNextFrame().data;
163163
EXPECT_EQ(tensor.sizes(), std::vector<long>({270, 480, 3}));
164164
}
@@ -167,7 +167,7 @@ TEST_P(VideoDecoderTest, ReturnsFirstTwoFramesOfVideo) {
167167
std::string path = getResourcePath("nasa_13013.mp4");
168168
std::unique_ptr<VideoDecoder> ourDecoder =
169169
createDecoderFromPath(path, GetParam());
170-
ourDecoder->addVideoStreamDecoder(-1);
170+
ourDecoder->addVideoStream(-1);
171171
auto output = ourDecoder->getNextFrame();
172172
torch::Tensor tensor0FromOurDecoder = output.data;
173173
EXPECT_EQ(tensor0FromOurDecoder.sizes(), std::vector<long>({3, 270, 480}));
@@ -206,7 +206,7 @@ TEST_P(VideoDecoderTest, DecodesFramesInABatchInNCHW) {
206206
ourDecoder->scanFileAndUpdateMetadataAndIndex();
207207
int bestVideoStreamIndex =
208208
*ourDecoder->getContainerMetadata().bestVideoStreamIndex;
209-
ourDecoder->addVideoStreamDecoder(bestVideoStreamIndex);
209+
ourDecoder->addVideoStream(bestVideoStreamIndex);
210210
// Frame with index 180 corresponds to timestamp 6.006.
211211
auto output = ourDecoder->getFramesAtIndices({0, 180});
212212
auto tensor = output.data;
@@ -228,7 +228,7 @@ TEST_P(VideoDecoderTest, DecodesFramesInABatchInNHWC) {
228228
ourDecoder->scanFileAndUpdateMetadataAndIndex();
229229
int bestVideoStreamIndex =
230230
*ourDecoder->getContainerMetadata().bestVideoStreamIndex;
231-
ourDecoder->addVideoStreamDecoder(
231+
ourDecoder->addVideoStream(
232232
bestVideoStreamIndex,
233233
VideoDecoder::VideoStreamOptions("dimension_order=NHWC"));
234234
// Frame with index 180 corresponds to timestamp 6.006.
@@ -250,7 +250,7 @@ TEST_P(VideoDecoderTest, SeeksCloseToEof) {
250250
std::string path = getResourcePath("nasa_13013.mp4");
251251
std::unique_ptr<VideoDecoder> ourDecoder =
252252
createDecoderFromPath(path, GetParam());
253-
ourDecoder->addVideoStreamDecoder(-1);
253+
ourDecoder->addVideoStream(-1);
254254
ourDecoder->setCursorPtsInSeconds(388388. / 30'000);
255255
auto output = ourDecoder->getNextFrame();
256256
EXPECT_EQ(output.ptsSeconds, 388'388. / 30'000);
@@ -263,7 +263,7 @@ TEST_P(VideoDecoderTest, GetsFramePlayedAtTimestamp) {
263263
std::string path = getResourcePath("nasa_13013.mp4");
264264
std::unique_ptr<VideoDecoder> ourDecoder =
265265
createDecoderFromPath(path, GetParam());
266-
ourDecoder->addVideoStreamDecoder(-1);
266+
ourDecoder->addVideoStream(-1);
267267
auto output = ourDecoder->getFramePlayedAt(6.006);
268268
EXPECT_EQ(output.ptsSeconds, 6.006);
269269
// The frame's duration is 0.033367 according to ffprobe,
@@ -293,7 +293,7 @@ TEST_P(VideoDecoderTest, SeeksToFrameWithSpecificPts) {
293293
std::string path = getResourcePath("nasa_13013.mp4");
294294
std::unique_ptr<VideoDecoder> ourDecoder =
295295
createDecoderFromPath(path, GetParam());
296-
ourDecoder->addVideoStreamDecoder(-1);
296+
ourDecoder->addVideoStream(-1);
297297
ourDecoder->setCursorPtsInSeconds(6.0);
298298
auto output = ourDecoder->getNextFrame();
299299
torch::Tensor tensor6FromOurDecoder = output.data;
@@ -393,7 +393,7 @@ TEST_P(VideoDecoderTest, PreAllocatedTensorFilterGraph) {
393393
ourDecoder->scanFileAndUpdateMetadataAndIndex();
394394
int bestVideoStreamIndex =
395395
*ourDecoder->getContainerMetadata().bestVideoStreamIndex;
396-
ourDecoder->addVideoStreamDecoder(
396+
ourDecoder->addVideoStream(
397397
bestVideoStreamIndex,
398398
VideoDecoder::VideoStreamOptions("color_conversion_library=filtergraph"));
399399
auto output =
@@ -410,7 +410,7 @@ TEST_P(VideoDecoderTest, PreAllocatedTensorSwscale) {
410410
ourDecoder->scanFileAndUpdateMetadataAndIndex();
411411
int bestVideoStreamIndex =
412412
*ourDecoder->getContainerMetadata().bestVideoStreamIndex;
413-
ourDecoder->addVideoStreamDecoder(
413+
ourDecoder->addVideoStream(
414414
bestVideoStreamIndex,
415415
VideoDecoder::VideoStreamOptions("color_conversion_library=swscale"));
416416
auto output =

test/decoders/manual_smoke_test.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,5 @@
1616
)
1717
torchcodec.decoders._core.scan_all_streams_to_update_metadata(decoder)
1818
torchcodec.decoders._core.add_video_stream(decoder, stream_index=3)
19-
frame, _, _ = torchcodec.decoders._core.get_frame_at_index(
20-
decoder, stream_index=3, frame_index=180
21-
)
19+
frame, _, _ = torchcodec.decoders._core.get_frame_at_index(decoder, frame_index=180)
2220
write_png(frame, "frame180.png")

0 commit comments

Comments
 (0)