@@ -222,6 +222,13 @@ void VideoDecoder::scanFileAndUpdateMetadataAndIndex() {
222222 return ;
223223 }
224224
225+ for (unsigned int i = 0 ; i < formatContext_->nb_streams ; ++i) {
226+ // We want to scan and update the metadata of all streams.
227+ TORCH_CHECK (
228+ formatContext_->streams [i]->discard != AVDISCARD_ALL,
229+ " Did you add a stream before you called for a scan?" );
230+ }
231+
225232 AutoAVPacket autoAVPacket;
226233 while (true ) {
227234 ReferenceAVPacket packet (autoAVPacket);
@@ -481,6 +488,16 @@ void VideoDecoder::addVideoStreamDecoder(
481488 updateMetadataWithCodecContext (streamInfo.streamIndex , codecContext);
482489 streamInfo.videoStreamOptions = videoStreamOptions;
483490
491+ // We will only need packets from the active stream, so we tell FFmpeg to
492+ // discard packets from the other streams. Note that av_read_frame() may still
493+ // return some of those un-desired packet under some conditions, so it's still
494+ // important to discard/demux correctly in the inner decoding loop.
495+ for (unsigned int i = 0 ; i < formatContext_->nb_streams ; ++i) {
496+ if (i != static_cast <unsigned int >(activeStreamIndex_)) {
497+ formatContext_->streams [i]->discard = AVDISCARD_ALL;
498+ }
499+ }
500+
484501 // By default, we want to use swscale for color conversion because it is
485502 // faster. However, it has width requirements, so we may need to fall back
486503 // to filtergraph. We also need to respect what was requested from the
0 commit comments