@@ -556,6 +556,12 @@ void VideoDecoder::addAudioStream(int streamIndex) {
556556 static_cast <int64_t >(streamInfo.codecContext ->sample_rate );
557557 streamMetadata.numChannels =
558558 static_cast <int64_t >(getNumChannels (streamInfo.codecContext ));
559+
560+ // FFmpeg docs say that the decoder will try to decode natively in this
561+ // format, if it can. Docs don't say what the decoder does when it doesn't
562+ // support that format, but it looks like it does nothing, so this probably
563+ // doesn't hurt.
564+ streamInfo.codecContext ->request_sample_fmt = AV_SAMPLE_FMT_FLTP;
559565}
560566
561567// --------------------------------------------------------------------------
@@ -1342,24 +1348,28 @@ void VideoDecoder::convertAudioAVFrameToFrameOutputOnCPU(
13421348
13431349 const AVFrame* avFrame = avFrameStream.avFrame .get ();
13441350
1345- AVFrame* output_frame = nullptr ;
1346- SwrContext* swr_ctx = NULL ; // TODO RAII
1351+ AVSampleFormat sourceSampleFormat =
1352+ static_cast <AVSampleFormat>(avFrame->format );
1353+ AVSampleFormat desiredSampleFormat = AV_SAMPLE_FMT_FLTP;
1354+
1355+ AVFrame* output_frame = nullptr ;
1356+ SwrContext* swr_ctx = NULL ; // TODO RAII
1357+ if (sourceSampleFormat != desiredSampleFormat) {
13471358
13481359 const auto & streamInfo = streamInfos_[activeStreamIndex_];
13491360 const auto & streamMetadata =
13501361 containerMetadata_.allStreamMetadata [activeStreamIndex_];
13511362 int sampleRate = static_cast <int >(streamMetadata.sampleRate .value ());
13521363
1353- AVSampleFormat sampleFormat = AV_SAMPLE_FMT_FLTP;
13541364 AVChannelLayout layout = streamInfo.codecContext ->ch_layout ;
13551365
13561366 int status = swr_alloc_set_opts2 (
13571367 &swr_ctx,
13581368 &layout,
1359- sampleFormat ,
1369+ desiredSampleFormat ,
13601370 sampleRate,
13611371 &layout,
1362- sampleFormat ,
1372+ sourceSampleFormat ,
13631373 sampleRate,
13641374 0 ,
13651375 NULL );
@@ -1379,7 +1389,7 @@ void VideoDecoder::convertAudioAVFrameToFrameOutputOnCPU(
13791389 }
13801390 output_frame->ch_layout = layout;
13811391 output_frame->sample_rate = sampleRate;
1382- output_frame->format = sampleFormat ;
1392+ output_frame->format = desiredSampleFormat ;
13831393
13841394 output_frame->nb_samples = av_rescale_rnd (
13851395 swr_get_delay (swr_ctx, sampleRate) + avFrame->nb_samples ,
@@ -1406,6 +1416,7 @@ void VideoDecoder::convertAudioAVFrameToFrameOutputOnCPU(
14061416 }
14071417
14081418 avFrame = output_frame; // lmao
1419+ }
14091420
14101421 auto numSamples = avFrame->nb_samples ; // per channel
14111422 auto numChannels = getNumChannels (avFrame);
@@ -1434,7 +1445,7 @@ void VideoDecoder::convertAudioAVFrameToFrameOutputOnCPU(
14341445 av_get_sample_fmt_name (format));
14351446 }
14361447 frameOutput.data = outputData;
1437- // TODO
1448+ // TODO
14381449 av_frame_free (&output_frame);
14391450 swr_free (&swr_ctx);
14401451}
0 commit comments