@@ -804,16 +804,20 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
804804 }
805805}
806806
807- VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction (
807+ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame (
808808 std::function<bool (int , AVFrame*)> filterFunction) {
809809 if (activeStreamIndices_.size () == 0 ) {
810810 throw std::runtime_error (" No active streams configured." );
811811 }
812+
812813 resetDecodeStats ();
814+
815+ // Seek if needed.
813816 if (desiredPtsSeconds_.has_value ()) {
814817 maybeSeekToBeforeDesiredPts ();
815818 desiredPtsSeconds_ = std::nullopt ;
816819 }
820+
817821 // Need to get the next frame or error from PopFrame.
818822 UniqueAVFrame avFrame (av_frame_alloc ());
819823 AutoAVPacket autoAVPacket;
@@ -823,42 +827,58 @@ VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction(
823827 while (true ) {
824828 frameStreamIndex = -1 ;
825829 bool gotPermanentErrorOnAnyActiveStream = false ;
830+
831+ // Get a frame on an active stream. Note that we don't know ahead of time
832+ // which streams have frames to receive, so we linearly try the active
833+ // streams.
826834 for (int streamIndex : activeStreamIndices_) {
827835 StreamInfo& streamInfo = streamInfos_[streamIndex];
828836 ffmpegStatus =
829837 avcodec_receive_frame (streamInfo.codecContext .get (), avFrame.get ());
830- bool gotNonRetriableError =
831- ffmpegStatus != AVSUCCESS && ffmpegStatus != AVERROR (EAGAIN);
832- if (gotNonRetriableError) {
838+
839+ if (ffmpegStatus != AVSUCCESS && ffmpegStatus != AVERROR (EAGAIN)) {
833840 gotPermanentErrorOnAnyActiveStream = true ;
834841 break ;
835842 }
843+
836844 if (ffmpegStatus == AVSUCCESS) {
837845 frameStreamIndex = streamIndex;
838846 break ;
839847 }
840848 }
849+
841850 if (gotPermanentErrorOnAnyActiveStream) {
842851 break ;
843852 }
853+
844854 decodeStats_.numFramesReceivedByDecoder ++;
845- bool gotNeededFrame = ffmpegStatus == AVSUCCESS &&
846- filterFunction (frameStreamIndex, avFrame.get ());
847- if (gotNeededFrame) {
855+
856+ // Is this the kind of frame we're looking for?
857+ if (ffmpegStatus == AVSUCCESS &&
858+ filterFunction (frameStreamIndex, avFrame.get ())) {
859+ // Yes, this is the frame we'll return; break out of the decoding loop.
848860 break ;
849861 } else if (ffmpegStatus == AVSUCCESS) {
850- // No need to send more packets here as the decoder may have frames in
851- // its buffer.
862+ // No, but we received a valid frame - just not the kind we're looking
863+ // for. The logic below will read packets and send them to the decoder.
864+ // But since we did just receive a frame, we should skip reading more
865+ // packets and sending them to the decoder and just try to receive more
866+ // frames from the decoder.
852867 continue ;
853868 }
869+
854870 if (reachedEOF) {
855871 // We don't have any more packets to send to the decoder. So keep on
856872 // pulling frames from its internal buffers.
857873 continue ;
858874 }
875+
876+ // We still haven't found the frame we're looking for. So let's read more
877+ // packets and send them to the decoder.
859878 ReferenceAVPacket packet (autoAVPacket);
860879 ffmpegStatus = av_read_frame (formatContext_.get (), packet.get ());
861880 decodeStats_.numPacketsRead ++;
881+
862882 if (ffmpegStatus == AVERROR_EOF) {
863883 // End of file reached. We must drain all codecs by sending a nullptr
864884 // packet.
@@ -873,27 +893,38 @@ VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction(
873893 getFFMPEGErrorStringFromErrorCode (ffmpegStatus));
874894 }
875895 }
896+
897+ // We've reached the end of file so we can't read any more packets from
898+ // it, but the decoder may still have frames to read in its buffer.
899+ // Continue iterating to try reading frames.
876900 reachedEOF = true ;
877901 continue ;
878902 }
903+
879904 if (ffmpegStatus < AVSUCCESS) {
880905 throw std::runtime_error (
881906 " Could not read frame from input file: " +
882907 getFFMPEGErrorStringFromErrorCode (ffmpegStatus));
883908 }
909+
884910 if (activeStreamIndices_.count (packet->stream_index ) == 0 ) {
885911 // This packet is not for any of the active streams.
886912 continue ;
887913 }
914+
915+ // We got a valid packet. Send it to the decoder, and we'll receive it in
916+ // the next iteration.
888917 ffmpegStatus = avcodec_send_packet (
889918 streamInfos_[packet->stream_index ].codecContext .get (), packet.get ());
890919 if (ffmpegStatus < AVSUCCESS) {
891920 throw std::runtime_error (
892921 " Could not push packet to decoder: " +
893922 getFFMPEGErrorStringFromErrorCode (ffmpegStatus));
894923 }
924+
895925 decodeStats_.numPacketsSentToDecoder ++;
896926 }
927+
897928 if (ffmpegStatus < AVSUCCESS) {
898929 if (reachedEOF || ffmpegStatus == AVERROR_EOF) {
899930 throw VideoDecoder::EndOfFileException (
@@ -904,6 +935,7 @@ VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction(
904935 " Could not receive frame from decoder: " +
905936 getFFMPEGErrorStringFromErrorCode (ffmpegStatus));
906937 }
938+
907939 // Note that we don't flush the decoder when we reach EOF (even though that's
908940 // mentioned in https://ffmpeg.org/doxygen/trunk/group__lavc__encdec.html).
909941 // This is because we may have packets internally in the decoder that we
@@ -913,10 +945,8 @@ VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction(
913945 StreamInfo& activeStreamInfo = streamInfos_[frameStreamIndex];
914946 activeStreamInfo.currentPts = avFrame->pts ;
915947 activeStreamInfo.currentDuration = getDuration (avFrame);
916- AVFrameStream avFrameStream;
917- avFrameStream.streamIndex = frameStreamIndex;
918- avFrameStream.avFrame = std::move (avFrame);
919- return avFrameStream;
948+
949+ return AVFrameStream (std::move (avFrame), frameStreamIndex);
920950}
921951
922952VideoDecoder::FrameOutput VideoDecoder::convertAVFrameToFrameOutput (
@@ -1080,8 +1110,8 @@ VideoDecoder::FrameOutput VideoDecoder::getFramePlayedAtNoDemux(
10801110 }
10811111
10821112 setCursorPtsInSeconds (seconds);
1083- AVFrameStream avFrameStream = getAVFrameUsingFilterFunction (
1084- [seconds, this ](int frameStreamIndex, AVFrame* avFrame) {
1113+ AVFrameStream avFrameStream =
1114+ decodeAVFrame ( [seconds, this ](int frameStreamIndex, AVFrame* avFrame) {
10851115 StreamInfo& streamInfo = streamInfos_[frameStreamIndex];
10861116 double frameStartTime = ptsToSeconds (avFrame->pts , streamInfo.timeBase );
10871117 double frameEndTime = ptsToSeconds (
@@ -1481,8 +1511,8 @@ VideoDecoder::FrameOutput VideoDecoder::getNextFrameNoDemux() {
14811511
14821512VideoDecoder::FrameOutput VideoDecoder::getNextFrameNoDemuxInternal (
14831513 std::optional<torch::Tensor> preAllocatedOutputTensor) {
1484- AVFrameStream avFrameStream = getAVFrameUsingFilterFunction (
1485- [this ](int frameStreamIndex, AVFrame* avFrame) {
1514+ AVFrameStream avFrameStream =
1515+ decodeAVFrame ( [this ](int frameStreamIndex, AVFrame* avFrame) {
14861516 StreamInfo& activeStreamInfo = streamInfos_[frameStreamIndex];
14871517 return avFrame->pts >= activeStreamInfo.discardFramesBeforePts ;
14881518 });
0 commit comments