Skip to content

Commit 893c358

Browse files
committed
Merge branch 'main' of github.com:pytorch/torchcodec into audioooooooo
2 parents 0c0f62b + 374d950 commit 893c358

File tree

6 files changed

+27
-118
lines changed

6 files changed

+27
-118
lines changed

src/torchcodec/decoders/_core/VideoDecoder.cpp

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -569,10 +569,8 @@ VideoDecoder::FrameOutput VideoDecoder::getNextFrame() {
569569
VideoDecoder::FrameOutput VideoDecoder::getNextFrameInternal(
570570
std::optional<torch::Tensor> preAllocatedOutputTensor) {
571571
validateActiveStream(AVMEDIA_TYPE_VIDEO);
572-
AVFrameStream avFrameStream = decodeAVFrame([this](AVFrame* avFrame) {
573-
StreamInfo& activeStreamInfo = streamInfos_[activeStreamIndex_];
574-
return avFrame->pts >= activeStreamInfo.discardFramesBeforePts;
575-
});
572+
AVFrameStream avFrameStream = decodeAVFrame(
573+
[this](AVFrame* avFrame) { return avFrame->pts >= cursor_; });
576574
return convertAVFrameToFrameOutput(avFrameStream, preAllocatedOutputTensor);
577575
}
578576

@@ -909,7 +907,9 @@ torch::Tensor VideoDecoder::getFramesPlayedInRangeAudio(
909907
// --------------------------------------------------------------------------
910908

911909
void VideoDecoder::setCursorPtsInSeconds(double seconds) {
912-
desiredPtsSeconds_ = seconds;
910+
cursorWasJustSet_ = true;
911+
cursor_ =
912+
secondsToClosestPts(seconds, streamInfos_[activeStreamIndex_].timeBase);
913913
}
914914

915915
/*
@@ -937,29 +937,29 @@ I P P P I P P P I P P I P P I P
937937
938938
(2) is more efficient than (1) if there is an I frame between x and y.
939939
*/
940-
bool VideoDecoder::canWeAvoidSeeking(int64_t targetPts) const {
940+
bool VideoDecoder::canWeAvoidSeeking() const {
941941
const StreamInfo& streamInfo = streamInfos_.at(activeStreamIndex_);
942942
if (streamInfo.avMediaType == AVMEDIA_TYPE_AUDIO) {
943943
return true;
944944
}
945-
946-
int64_t lastDecodedAvFramePts = streamInfo.lastDecodedAvFramePts;
947-
if (targetPts < lastDecodedAvFramePts) {
945+
int64_t lastDecodedAvFramePts =
946+
streamInfos_.at(activeStreamIndex_).lastDecodedAvFramePts;
947+
if (cursor_ < lastDecodedAvFramePts) {
948948
// We can never skip a seek if we are seeking backwards.
949949
return false;
950950
}
951-
if (lastDecodedAvFramePts == targetPts) {
951+
if (lastDecodedAvFramePts == cursor_) {
952952
// We are seeking to the exact same frame as we are currently at. Without
953953
// caching we have to rewind back and decode the frame again.
954954
// TODO: https://github.com/pytorch-labs/torchcodec/issues/84 we could
955955
// implement caching.
956956
return false;
957957
}
958958
// We are seeking forwards.
959-
// We can only skip a seek if both lastDecodedAvFramePts and targetPts share
960-
// the same keyframe.
959+
// We can only skip a seek if both lastDecodedAvFramePts and
960+
// cursor_ share the same keyframe.
961961
int lastDecodedAvFrameIndex = getKeyFrameIndexForPts(lastDecodedAvFramePts);
962-
int targetKeyFrameIndex = getKeyFrameIndexForPts(targetPts);
962+
int targetKeyFrameIndex = getKeyFrameIndexForPts(cursor_);
963963
return lastDecodedAvFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
964964
lastDecodedAvFrameIndex == targetKeyFrameIndex;
965965
}
@@ -971,16 +971,14 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
971971
validateActiveStream();
972972
StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
973973

974-
int64_t desiredPts =
975-
secondsToClosestPts(*desiredPtsSeconds_, streamInfo.timeBase);
976-
streamInfo.discardFramesBeforePts = desiredPts;
977-
978974
decodeStats_.numSeeksAttempted++;
979-
if (canWeAvoidSeeking(desiredPts)) {
975+
if (canWeAvoidSeeking()) {
980976
decodeStats_.numSeeksSkipped++;
981977
return;
982978
}
983979

980+
int64_t desiredPts = cursor_;
981+
984982
// For some encodings like H265, FFMPEG sometimes seeks past the point we
985983
// set as the max_ts. So we use our own index to give it the exact pts of
986984
// the key frame that we want to seek to.
@@ -1019,10 +1017,9 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame(
10191017

10201018
resetDecodeStats();
10211019

1022-
// Seek if needed.
1023-
if (desiredPtsSeconds_.has_value()) {
1020+
if (cursorWasJustSet_) {
10241021
maybeSeekToBeforeDesiredPts();
1025-
desiredPtsSeconds_ = std::nullopt;
1022+
cursorWasJustSet_ = false;
10261023
}
10271024

10281025
StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];

src/torchcodec/decoders/_core/VideoDecoder.h

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -337,15 +337,11 @@ class VideoDecoder {
337337
std::vector<FrameInfo> keyFrames;
338338
std::vector<FrameInfo> allFrames;
339339

340-
// The current position of the cursor in the stream, and associated frame
341-
// duration.
340+
// TODO since the decoder is single-stream, these should be decoder fields,
341+
// not streamInfo fields. And they should be defined right next to
342+
// `cursor_`, with joint documentation.
342343
int64_t lastDecodedAvFramePts = 0;
343344
int64_t lastDecodedAvFrameDuration = 0;
344-
// The desired position of the cursor in the stream. We send frames >=
345-
// this pts to the user when they request a frame.
346-
// We update this field if the user requested a seek. This typically
347-
// corresponds to the decoder's desiredPts_ attribute.
348-
int64_t discardFramesBeforePts = INT64_MIN;
349345
VideoStreamOptions videoStreamOptions;
350346

351347
// color-conversion fields. Only one of FilterGraphContext and
@@ -368,7 +364,7 @@ class VideoDecoder {
368364
// DECODING APIS AND RELATED UTILS
369365
// --------------------------------------------------------------------------
370366

371-
bool canWeAvoidSeeking(int64_t targetPts) const;
367+
bool canWeAvoidSeeking() const;
372368

373369
void maybeSeekToBeforeDesiredPts();
374370

@@ -477,9 +473,11 @@ class VideoDecoder {
477473
std::map<int, StreamInfo> streamInfos_;
478474
const int NO_ACTIVE_STREAM = -2;
479475
int activeStreamIndex_ = NO_ACTIVE_STREAM;
480-
// Set when the user wants to seek and stores the desired pts that the user
481-
// wants to seek to.
482-
std::optional<double> desiredPtsSeconds_;
476+
477+
bool cursorWasJustSet_ = false;
478+
// The desired position of the cursor in the stream. We send frames >= this
479+
// pts to the user when they request a frame.
480+
int64_t cursor_ = INT64_MIN;
483481
// Stores various internal decoding stats.
484482
DecodeStats decodeStats_;
485483
// Stores the AVIOContext for the input buffer.

src/torchcodec/decoders/_core/VideoDecoderOps.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -143,20 +143,6 @@ at::Tensor create_from_tensor(
143143
return wrapDecoderPointerToTensor(std::move(uniqueDecoder));
144144
}
145145

146-
at::Tensor create_from_buffer(
147-
const void* buffer,
148-
size_t length,
149-
std::optional<std::string_view> seek_mode) {
150-
VideoDecoder::SeekMode realSeek = VideoDecoder::SeekMode::exact;
151-
if (seek_mode.has_value()) {
152-
realSeek = seekModeFromString(seek_mode.value());
153-
}
154-
155-
std::unique_ptr<VideoDecoder> uniqueDecoder =
156-
std::make_unique<VideoDecoder>(buffer, length, realSeek);
157-
return wrapDecoderPointerToTensor(std::move(uniqueDecoder));
158-
}
159-
160146
void add_video_stream(
161147
at::Tensor& decoder,
162148
std::optional<int64_t> width,

src/torchcodec/decoders/_core/VideoDecoderOps.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,6 @@ at::Tensor create_from_tensor(
2828
at::Tensor video_tensor,
2929
std::optional<std::string_view> seek_mode = std::nullopt);
3030

31-
// This API is C++ only and will not be exposed via custom ops, use
32-
// videodecoder_create_from_bytes in Python
33-
at::Tensor create_from_buffer(
34-
const void* buffer,
35-
size_t length,
36-
std::optional<std::string_view> seek_mode = std::nullopt);
37-
3831
// Add a new video stream at `stream_index` using the provided options.
3932
void add_video_stream(
4033
at::Tensor& decoder,

test/decoders/CMakeLists.txt

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,28 +21,14 @@ add_executable(
2121
VideoDecoderTest.cpp
2222
)
2323

24-
add_executable(
25-
VideoDecoderOpsTest
26-
VideoDecoderOpsTest.cpp
27-
)
28-
2924
target_include_directories(VideoDecoderTest SYSTEM PRIVATE ${TORCH_INCLUDE_DIRS})
3025
target_include_directories(VideoDecoderTest PRIVATE ../../)
31-
target_include_directories(VideoDecoderOpsTest SYSTEM PRIVATE ${TORCH_INCLUDE_DIRS})
32-
target_include_directories(VideoDecoderOpsTest PRIVATE ../../)
3326

3427
target_link_libraries(
3528
VideoDecoderTest
3629
${libtorchcodec_target_name}
3730
GTest::gtest_main
3831
)
3932

40-
target_link_libraries(
41-
VideoDecoderOpsTest
42-
${libtorchcodec_target_name}
43-
GTest::gtest_main
44-
)
45-
4633
include(GoogleTest)
4734
gtest_discover_tests(VideoDecoderTest)
48-
gtest_discover_tests(VideoDecoderOpsTest)

test/decoders/VideoDecoderOpsTest.cpp

Lines changed: 0 additions & 51 deletions
This file was deleted.

0 commit comments

Comments
 (0)