@@ -100,6 +100,26 @@ void SingleStreamDecoder::initializeDecoder() {
100100 " Failed to find stream info: " ,
101101 getFFMPEGErrorStringFromErrorCode (status));
102102
103+ if (formatContext_->duration > 0 ) {
104+ AVRational defaultTimeBase{1 , AV_TIME_BASE};
105+ containerMetadata_.durationSecondsFromHeader =
106+ ptsToSeconds (formatContext_->duration , defaultTimeBase);
107+ }
108+
109+ if (formatContext_->bit_rate > 0 ) {
110+ containerMetadata_.bitRate = formatContext_->bit_rate ;
111+ }
112+
113+ int bestVideoStream = getBestStreamIndex (AVMEDIA_TYPE_VIDEO);
114+ if (bestVideoStream >= 0 ) {
115+ containerMetadata_.bestVideoStreamIndex = bestVideoStream;
116+ }
117+
118+ int bestAudioStream = getBestStreamIndex (AVMEDIA_TYPE_AUDIO);
119+ if (bestAudioStream >= 0 ) {
120+ containerMetadata_.bestAudioStreamIndex = bestAudioStream;
121+ }
122+
103123 for (unsigned int i = 0 ; i < formatContext_->nb_streams ; i++) {
104124 AVStream* avStream = formatContext_->streams [i];
105125 StreamMetadata streamMetadata;
@@ -110,8 +130,8 @@ void SingleStreamDecoder::initializeDecoder() {
110130 " , does not match AVStream's index, " +
111131 std::to_string (avStream->index ) + " ." );
112132 streamMetadata.streamIndex = i;
113- streamMetadata.mediaType = avStream->codecpar ->codec_type ;
114133 streamMetadata.codecName = avcodec_get_name (avStream->codecpar ->codec_id );
134+ streamMetadata.mediaType = avStream->codecpar ->codec_type ;
115135 streamMetadata.bitRate = avStream->codecpar ->bit_rate ;
116136
117137 int64_t frameCount = avStream->nb_frames ;
@@ -133,10 +153,18 @@ void SingleStreamDecoder::initializeDecoder() {
133153 if (fps > 0 ) {
134154 streamMetadata.averageFpsFromHeader = fps;
135155 }
156+ streamMetadata.width = avStream->codecpar ->width ;
157+ streamMetadata.height = avStream->codecpar ->height ;
158+ streamMetadata.sampleAspectRatio =
159+ avStream->codecpar ->sample_aspect_ratio ;
136160 containerMetadata_.numVideoStreams ++;
137161 } else if (avStream->codecpar ->codec_type == AVMEDIA_TYPE_AUDIO) {
138162 AVSampleFormat format =
139163 static_cast <AVSampleFormat>(avStream->codecpar ->format );
164+ streamMetadata.sampleRate =
165+ static_cast <int64_t >(avStream->codecpar ->sample_rate );
166+ streamMetadata.numChannels =
167+ static_cast <int64_t >(getNumChannels (avStream->codecpar ));
140168
141169 // If the AVSampleFormat is not recognized, we get back nullptr. We have
142170 // to make sure we don't initialize a std::string with nullptr. There's
@@ -149,27 +177,10 @@ void SingleStreamDecoder::initializeDecoder() {
149177 containerMetadata_.numAudioStreams ++;
150178 }
151179
152- containerMetadata_. allStreamMetadata . push_back ( streamMetadata);
153- }
180+ streamMetadata. durationSecondsFromContainer =
181+ containerMetadata_. durationSecondsFromHeader ;
154182
155- if (formatContext_->duration > 0 ) {
156- AVRational defaultTimeBase{1 , AV_TIME_BASE};
157- containerMetadata_.durationSecondsFromHeader =
158- ptsToSeconds (formatContext_->duration , defaultTimeBase);
159- }
160-
161- if (formatContext_->bit_rate > 0 ) {
162- containerMetadata_.bitRate = formatContext_->bit_rate ;
163- }
164-
165- int bestVideoStream = getBestStreamIndex (AVMEDIA_TYPE_VIDEO);
166- if (bestVideoStream >= 0 ) {
167- containerMetadata_.bestVideoStreamIndex = bestVideoStream;
168- }
169-
170- int bestAudioStream = getBestStreamIndex (AVMEDIA_TYPE_AUDIO);
171- if (bestAudioStream >= 0 ) {
172- containerMetadata_.bestAudioStreamIndex = bestAudioStream;
183+ containerMetadata_.allStreamMetadata .push_back (streamMetadata);
173184 }
174185
175186 if (seekMode_ == SeekMode::exact) {
@@ -288,6 +299,14 @@ void SingleStreamDecoder::scanFileAndUpdateMetadataAndIndex() {
288299 streamMetadata.numFramesFromContent =
289300 streamInfos_[streamIndex].allFrames .size ();
290301
302+ // This ensures that we are robust in handling cases where
303+ // we are decoding in exact mode and numFrames is 0. The current metadata
304+ // validation logic assumes that these values should not be None
305+ if (streamMetadata.numFramesFromContent .value () == 0 ) {
306+ streamMetadata.beginStreamPtsFromContent = 0 ;
307+ streamMetadata.endStreamPtsFromContent = 0 ;
308+ }
309+
291310 if (streamMetadata.beginStreamPtsFromContent .has_value ()) {
292311 streamMetadata.beginStreamPtsSecondsFromContent = ptsToSeconds (
293312 *streamMetadata.beginStreamPtsFromContent , avStream->time_base );
@@ -516,11 +535,6 @@ void SingleStreamDecoder::addVideoStream(
516535 auto & streamInfo = streamInfos_[activeStreamIndex_];
517536 streamInfo.videoStreamOptions = videoStreamOptions;
518537
519- streamMetadata.width = streamInfo.codecContext ->width ;
520- streamMetadata.height = streamInfo.codecContext ->height ;
521- streamMetadata.sampleAspectRatio =
522- streamInfo.codecContext ->sample_aspect_ratio ;
523-
524538 if (seekMode_ == SeekMode::custom_frame_mappings) {
525539 TORCH_CHECK (
526540 customFrameMappings.has_value (),
@@ -566,13 +580,6 @@ void SingleStreamDecoder::addAudioStream(
566580 auto & streamInfo = streamInfos_[activeStreamIndex_];
567581 streamInfo.audioStreamOptions = audioStreamOptions;
568582
569- auto & streamMetadata =
570- containerMetadata_.allStreamMetadata [activeStreamIndex_];
571- streamMetadata.sampleRate =
572- static_cast <int64_t >(streamInfo.codecContext ->sample_rate );
573- streamMetadata.numChannels =
574- static_cast <int64_t >(getNumChannels (streamInfo.codecContext ));
575-
576583 // FFmpeg docs say that the decoder will try to decode natively in this
577584 // format, if it can. Docs don't say what the decoder does when it doesn't
578585 // support that format, but it looks like it does nothing, so this probably
0 commit comments