@@ -1090,6 +1090,53 @@ VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesAtIndices(
10901090 return output;
10911091}
10921092
1093+ VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesDisplayedByTimestamps (
1094+ int streamIndex,
1095+ const std::vector<double >& timestamps) {
1096+ validateUserProvidedStreamIndex (streamIndex);
1097+ validateScannedAllStreams (" getFramesDisplayedByTimestamps" );
1098+
1099+ // The frame displayed at timestamp t and the one displayed at timestamp `t +
1100+ // eps` are probably the same frame, with the same index. The easiest way to
1101+ // avoid decoding that unique frame twice is to convert the input timestamps
1102+ // to indices, and leverage the de-duplication logic of getFramesAtIndices.
1103+ // This means this function requires a scan.
1104+ // TODO: longer term, we should implement this without requiring a scan
1105+
1106+ const auto & streamMetadata = containerMetadata_.streams [streamIndex];
1107+ const auto & stream = streams_[streamIndex];
1108+ double minSeconds = streamMetadata.minPtsSecondsFromScan .value ();
1109+ double maxSeconds = streamMetadata.maxPtsSecondsFromScan .value ();
1110+
1111+ std::vector<int64_t > frameIndices (timestamps.size ());
1112+ for (auto i = 0 ; i < timestamps.size (); ++i) {
1113+ auto framePts = timestamps[i];
1114+ TORCH_CHECK (
1115+ framePts >= minSeconds && framePts < maxSeconds,
1116+ " frame pts is " + std::to_string (framePts) + " ; must be in range [" +
1117+ std::to_string (minSeconds) + " , " + std::to_string (maxSeconds) +
1118+ " )." );
1119+
1120+ auto it = std::lower_bound (
1121+ stream.allFrames .begin (),
1122+ stream.allFrames .end (),
1123+ framePts,
1124+ [&stream](const FrameInfo& info, double framePts) {
1125+ return ptsToSeconds (info.nextPts , stream.timeBase ) <= framePts;
1126+ });
1127+ int64_t frameIndex = it - stream.allFrames .begin ();
1128+ // If the frame index is larger than the size of allFrames, that means we
1129+ // couldn't match the pts value to the pts value of a NEXT FRAME. And
1130+ // that means that this timestamp falls during the time between when the
1131+ // last frame is displayed, and the video ends. Hence, it should map to the
1132+ // index of the last frame.
1133+ frameIndex = std::min (frameIndex, (int64_t )stream.allFrames .size () - 1 );
1134+ frameIndices[i] = frameIndex;
1135+ }
1136+
1137+ return getFramesAtIndices (streamIndex, frameIndices);
1138+ }
1139+
10931140VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesInRange (
10941141 int streamIndex,
10951142 int64_t start,
0 commit comments