@@ -803,16 +803,20 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
803803 }
804804}
805805
806- VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction (
806+ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame (
807807 std::function<bool (int , AVFrame*)> filterFunction) {
808808 if (activeStreamIndices_.size () == 0 ) {
809809 throw std::runtime_error (" No active streams configured." );
810810 }
811+
811812 resetDecodeStats ();
813+
814+ // Seek if needed.
812815 if (desiredPtsSeconds_.has_value ()) {
813816 maybeSeekToBeforeDesiredPts ();
814817 desiredPtsSeconds_ = std::nullopt ;
815818 }
819+
816820 // Need to get the next frame or error from PopFrame.
817821 UniqueAVFrame avFrame (av_frame_alloc ());
818822 AutoAVPacket autoAVPacket;
@@ -822,42 +826,58 @@ VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction(
822826 while (true ) {
823827 frameStreamIndex = -1 ;
824828 bool gotPermanentErrorOnAnyActiveStream = false ;
829+
830+ // Get a frame on an active stream. Note that we don't know ahead of time
831+ // which streams have frames to receive, so we linearly try the active
832+ // streams.
825833 for (int streamIndex : activeStreamIndices_) {
826834 StreamInfo& streamInfo = streamInfos_[streamIndex];
827835 ffmpegStatus =
828836 avcodec_receive_frame (streamInfo.codecContext .get (), avFrame.get ());
829- bool gotNonRetriableError =
830- ffmpegStatus != AVSUCCESS && ffmpegStatus != AVERROR (EAGAIN);
831- if (gotNonRetriableError) {
837+
838+ if (ffmpegStatus != AVSUCCESS && ffmpegStatus != AVERROR (EAGAIN)) {
832839 gotPermanentErrorOnAnyActiveStream = true ;
833840 break ;
834841 }
842+
835843 if (ffmpegStatus == AVSUCCESS) {
836844 frameStreamIndex = streamIndex;
837845 break ;
838846 }
839847 }
848+
840849 if (gotPermanentErrorOnAnyActiveStream) {
841850 break ;
842851 }
852+
843853 decodeStats_.numFramesReceivedByDecoder ++;
844- bool gotNeededFrame = ffmpegStatus == AVSUCCESS &&
845- filterFunction (frameStreamIndex, avFrame.get ());
846- if (gotNeededFrame) {
854+
855+ // Is this the kind of frame we're looking for?
856+ if (ffmpegStatus == AVSUCCESS &&
857+ filterFunction (frameStreamIndex, avFrame.get ())) {
858+ // Yes, this is the frame we'll return; break out of the decoding loop.
847859 break ;
848860 } else if (ffmpegStatus == AVSUCCESS) {
849- // No need to send more packets here as the decoder may have frames in
850- // its buffer.
861+ // No, but we received a valid frame - just not the kind we're looking
862+ // for. The logic below will read packets and send them to the decoder.
863+ // But since we did just receive a frame, we should skip reading more
864+ // packets and sending them to the decoder and just try to receive more
865+ // frames from the decoder.
851866 continue ;
852867 }
868+
853869 if (reachedEOF) {
854870 // We don't have any more packets to send to the decoder. So keep on
855871 // pulling frames from its internal buffers.
856872 continue ;
857873 }
874+
875+ // We still haven't found the frame we're looking for. So let's read more
876+ // packets and send them to the decoder.
858877 ReferenceAVPacket packet (autoAVPacket);
859878 ffmpegStatus = av_read_frame (formatContext_.get (), packet.get ());
860879 decodeStats_.numPacketsRead ++;
880+
861881 if (ffmpegStatus == AVERROR_EOF) {
862882 // End of file reached. We must drain all codecs by sending a nullptr
863883 // packet.
@@ -872,27 +892,38 @@ VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction(
872892 getFFMPEGErrorStringFromErrorCode (ffmpegStatus));
873893 }
874894 }
895+
896+ // We've reached the end of file so we can't read any more packets from
897+ // it, but the decoder may still have frames to read in its buffer.
898+ // Continue iterating to try reading frames.
875899 reachedEOF = true ;
876900 continue ;
877901 }
902+
878903 if (ffmpegStatus < AVSUCCESS) {
879904 throw std::runtime_error (
880905 " Could not read frame from input file: " +
881906 getFFMPEGErrorStringFromErrorCode (ffmpegStatus));
882907 }
908+
883909 if (activeStreamIndices_.count (packet->stream_index ) == 0 ) {
884910 // This packet is not for any of the active streams.
885911 continue ;
886912 }
913+
914+ // We got a valid packet. Send it to the decoder, and we'll receive it in
915+ // the next iteration.
887916 ffmpegStatus = avcodec_send_packet (
888917 streamInfos_[packet->stream_index ].codecContext .get (), packet.get ());
889918 if (ffmpegStatus < AVSUCCESS) {
890919 throw std::runtime_error (
891920 " Could not push packet to decoder: " +
892921 getFFMPEGErrorStringFromErrorCode (ffmpegStatus));
893922 }
923+
894924 decodeStats_.numPacketsSentToDecoder ++;
895925 }
926+
896927 if (ffmpegStatus < AVSUCCESS) {
897928 if (reachedEOF || ffmpegStatus == AVERROR_EOF) {
898929 throw VideoDecoder::EndOfFileException (
@@ -903,6 +934,7 @@ VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction(
903934 " Could not receive frame from decoder: " +
904935 getFFMPEGErrorStringFromErrorCode (ffmpegStatus));
905936 }
937+
906938 // Note that we don't flush the decoder when we reach EOF (even though that's
907939 // mentioned in https://ffmpeg.org/doxygen/trunk/group__lavc__encdec.html).
908940 // This is because we may have packets internally in the decoder that we
@@ -912,10 +944,8 @@ VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction(
912944 StreamInfo& activeStreamInfo = streamInfos_[frameStreamIndex];
913945 activeStreamInfo.currentPts = avFrame->pts ;
914946 activeStreamInfo.currentDuration = getDuration (avFrame);
915- AVFrameStream avFrameStream;
916- avFrameStream.streamIndex = frameStreamIndex;
917- avFrameStream.avFrame = std::move (avFrame);
918- return avFrameStream;
947+
948+ return AVFrameStream (std::move (avFrame), frameStreamIndex);
919949}
920950
921951VideoDecoder::FrameOutput VideoDecoder::convertAVFrameToFrameOutput (
@@ -1079,8 +1109,8 @@ VideoDecoder::FrameOutput VideoDecoder::getFramePlayedAtNoDemux(
10791109 }
10801110
10811111 setCursorPtsInSeconds (seconds);
1082- AVFrameStream avFrameStream = getAVFrameUsingFilterFunction (
1083- [seconds, this ](int frameStreamIndex, AVFrame* avFrame) {
1112+ AVFrameStream avFrameStream =
1113+ decodeAVFrame ( [seconds, this ](int frameStreamIndex, AVFrame* avFrame) {
10841114 StreamInfo& streamInfo = streamInfos_[frameStreamIndex];
10851115 double frameStartTime = ptsToSeconds (avFrame->pts , streamInfo.timeBase );
10861116 double frameEndTime = ptsToSeconds (
@@ -1480,8 +1510,8 @@ VideoDecoder::FrameOutput VideoDecoder::getNextFrameNoDemux() {
14801510
14811511VideoDecoder::FrameOutput VideoDecoder::getNextFrameNoDemuxInternal (
14821512 std::optional<torch::Tensor> preAllocatedOutputTensor) {
1483- AVFrameStream avFrameStream = getAVFrameUsingFilterFunction (
1484- [this ](int frameStreamIndex, AVFrame* avFrame) {
1513+ AVFrameStream avFrameStream =
1514+ decodeAVFrame ( [this ](int frameStreamIndex, AVFrame* avFrame) {
14851515 StreamInfo& activeStreamInfo = streamInfos_[frameStreamIndex];
14861516 return avFrame->pts >= activeStreamInfo.discardFramesBeforePts ;
14871517 });
0 commit comments