@@ -1094,13 +1094,6 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const {
10941094 // Returns true if we can avoid seeking in the AVFormatContext based on
10951095 // heuristics that rely on the target cursor_ and the last decoded frame.
10961096 // Seeking is expensive, so we try to avoid it when possible.
1097- // Note that this function itself isn't always that cheap to call: in
1098- // particular the calls to getKeyFrameIndexForPts below in approximate mode
1099- // are sometimes slow.
1100- // TODO we should understand why (is it because it reads the file?) and
1101- // potentially optimize it. E.g. we may not want to ever seek, or even *check*
1102- // if we need to seek in some cases, like if we're going to decode 80% of the
1103- // frames anyway.
11041097 const StreamInfo& streamInfo = streamInfos_.at (activeStreamIndex_);
11051098 if (streamInfo.avMediaType == AVMEDIA_TYPE_AUDIO) {
11061099 // For audio, we only need to seek if a backwards seek was requested
@@ -1147,10 +1140,10 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const {
11471140 // I P P P I P P P I P P I P
11481141 // x j y
11491142 // (2) is only more efficient than (1) if there is an I frame between x and y.
1150- int lastKeyFrameIndex = getKeyFrameIndexForPts (lastDecodedAvFramePts_);
1151- int targetKeyFrameIndex = getKeyFrameIndexForPts (cursor_);
1152- return lastKeyFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
1153- lastKeyFrameIndex == targetKeyFrameIndex ;
1143+ int lastKeyFrame = getKeyFrameIdentifier (lastDecodedAvFramePts_);
1144+ int targetKeyFrame = getKeyFrameIdentifier (cursor_);
1145+ return lastKeyFrame >= 0 && targetKeyFrame >= 0 &&
1146+ lastKeyFrame == targetKeyFrame ;
11541147}
11551148
11561149// This method looks at currentPts and desiredPts and seeks in the
@@ -1367,7 +1360,19 @@ torch::Tensor SingleStreamDecoder::maybePermuteHWC2CHW(
13671360// PTS <-> INDEX CONVERSIONS
13681361// --------------------------------------------------------------------------
13691362
1370- int SingleStreamDecoder::getKeyFrameIndexForPts (int64_t pts) const {
1363+ int SingleStreamDecoder::getKeyFrameIdentifier (int64_t pts) const {
1364+ // This function "identifies" a key frame for a given pts value.
1365+ // We use the term "identifier" rather than "index" because the nature of the
1366+ // index that is returned depends on various factors:
1367+ // - If seek_mode is exact, we return the index of the key frame in the
1368+ // scanned key-frame vector (streamInfo.keyFrames). So the returned value is
1369+ // in [0, num_key_frames).
1370+ // - If seek_mode is approximate, we use av_index_search_timestamp() which
1371+ // may return a value in [0, num_key_frames) like for mkv, but also a value
1372+ // in [0, num_frames) like for mp4. It really depends on the container.
1373+ //
1374+ // The range of the "identifier" doesn't matter that much, for now we only
1375+ // use it to uniquely identify a key frame in canWeAvoidSeeking().
13711376 const StreamInfo& streamInfo = streamInfos_.at (activeStreamIndex_);
13721377 if (streamInfo.keyFrames .empty ()) {
13731378 return av_index_search_timestamp (
0 commit comments