@@ -661,9 +661,10 @@ VideoDecoder::getFramesInRange(int64_t start, int64_t stop, int64_t step) {
661661VideoDecoder::FrameOutput VideoDecoder::getFramePlayedAt (double seconds) {
662662 StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
663663 double frameStartTime =
664- ptsToSeconds (streamInfo.currentPts , streamInfo.timeBase );
664+ ptsToSeconds (streamInfo.lastDecodedAvFramePts , streamInfo.timeBase );
665665 double frameEndTime = ptsToSeconds (
666- streamInfo.currentPts + streamInfo.currentDuration , streamInfo.timeBase );
666+ streamInfo.lastDecodedAvFramePts + streamInfo.lastDecodedAvFrameDuration ,
667+ streamInfo.timeBase );
667668 if (seconds >= frameStartTime && seconds < frameEndTime) {
668669 // We are in the same frame as the one we just returned. However, since we
669670 // don't cache it locally, we have to rewind back.
@@ -824,8 +825,8 @@ void VideoDecoder::setCursorPtsInSeconds(double seconds) {
824825Videos have I frames and non-I frames (P and B frames). Non-I frames need data
825826from the previous I frame to be decoded.
826827
827- Imagine the cursor is at a random frame with PTS=x and we wish to seek to a
828- user-specified PTS=y.
828+ Imagine the cursor is at a random frame with PTS=lastDecodedAvFramePts (x for
829+ brevity) and we wish to seek to a user-specified PTS=y.
829830
830831If y < x, we don't have a choice but to seek backwards to the highest I frame
831832before y.
@@ -845,23 +846,24 @@ I P P P I P P P I P P I P P I P
845846
846847(2) is more efficient than (1) if there is an I frame between x and y.
847848*/
848- bool VideoDecoder::canWeAvoidSeeking (int64_t currentPts, int64_t targetPts)
849- const {
850- if (targetPts < currentPts) {
849+ bool VideoDecoder::canWeAvoidSeeking (int64_t targetPts) const {
850+ int64_t lastDecodedAvFramePts =
851+ streamInfos_.at (activeStreamIndex_).lastDecodedAvFramePts ;
852+ if (targetPts < lastDecodedAvFramePts) {
851853 // We can never skip a seek if we are seeking backwards.
852854 return false ;
853855 }
854- if (currentPts == targetPts) {
856+ if (lastDecodedAvFramePts == targetPts) {
855857 // We are seeking to the exact same frame as we are currently at. Without
856858 // caching we have to rewind back and decode the frame again.
857859 // TODO: https://github.com/pytorch-labs/torchcodec/issues/84 we could
858860 // implement caching.
859861 return false ;
860862 }
861863 // We are seeking forwards.
862- // We can only skip a seek if both currentPts and targetPts share the same
863- // keyframe.
864- int currentKeyFrameIndex = getKeyFrameIndexForPts (currentPts );
864+ // We can only skip a seek if both lastDecodedAvFramePts and targetPts share
865+ // the same keyframe.
866+ int currentKeyFrameIndex = getKeyFrameIndexForPts (lastDecodedAvFramePts );
865867 int targetKeyFrameIndex = getKeyFrameIndexForPts (targetPts);
866868 return currentKeyFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
867869 currentKeyFrameIndex == targetKeyFrameIndex;
@@ -879,7 +881,7 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
879881 decodeStats_.numSeeksAttempted ++;
880882
881883 int64_t desiredPtsForStream = *desiredPtsSeconds_ * streamInfo.timeBase .den ;
882- if (canWeAvoidSeeking (streamInfo. currentPts , desiredPtsForStream)) {
884+ if (canWeAvoidSeeking (desiredPtsForStream)) {
883885 decodeStats_.numSeeksSkipped ++;
884886 return ;
885887 }
@@ -1032,8 +1034,8 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame(
10321034 // haven't received as frames. Eventually we will either hit AVERROR_EOF from
10331035 // av_receive_frame() or the user will have seeked to a different location in
10341036 // the file and that will flush the decoder.
1035- streamInfo.currentPts = avFrame->pts ;
1036- streamInfo.currentDuration = getDuration (avFrame);
1037+ streamInfo.lastDecodedAvFramePts = avFrame->pts ;
1038+ streamInfo.lastDecodedAvFrameDuration = getDuration (avFrame);
10371039
10381040 return AVFrameStream (std::move (avFrame), activeStreamIndex_);
10391041}
0 commit comments