@@ -1345,20 +1345,29 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU(
13451345 static_cast <AVSampleFormat>(srcAVFrame->format );
13461346 AVSampleFormat desiredSampleFormat = AV_SAMPLE_FMT_FLTP;
13471347
1348+ StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
13481349 int sourceSampleRate = srcAVFrame->sample_rate ;
13491350 int desiredSampleRate =
1350- streamInfos_[activeStreamIndex_].audioStreamOptions .sampleRate .value_or (
1351- sourceSampleRate);
1351+ streamInfo.audioStreamOptions .sampleRate .value_or (sourceSampleRate);
13521352
13531353 bool mustConvert =
13541354 (sourceSampleFormat != desiredSampleFormat ||
13551355 sourceSampleRate != desiredSampleRate);
13561356
13571357 UniqueAVFrame convertedAVFrame;
13581358 if (mustConvert) {
1359+ if (!streamInfo.swrContext ) {
1360+ streamInfo.swrContext .reset (createSwrContext (
1361+ streamInfo.codecContext ,
1362+ sourceSampleFormat,
1363+ desiredSampleFormat,
1364+ sourceSampleRate,
1365+ desiredSampleRate));
1366+ }
1367+
13591368 convertedAVFrame = convertAudioAVFrameSampleFormatAndSampleRate (
1369+ streamInfo.swrContext ,
13601370 srcAVFrame,
1361- sourceSampleFormat,
13621371 desiredSampleFormat,
13631372 sourceSampleRate,
13641373 desiredSampleRate);
@@ -1394,22 +1403,11 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU(
13941403}
13951404
13961405UniqueAVFrame SingleStreamDecoder::convertAudioAVFrameSampleFormatAndSampleRate (
1406+ const UniqueSwrContext& swrContext,
13971407 const UniqueAVFrame& srcAVFrame,
1398- AVSampleFormat sourceSampleFormat,
13991408 AVSampleFormat desiredSampleFormat,
14001409 int sourceSampleRate,
14011410 int desiredSampleRate) {
1402- auto & streamInfo = streamInfos_[activeStreamIndex_];
1403-
1404- if (!streamInfo.swrContext ) {
1405- streamInfo.swrContext .reset (createSwrContext (
1406- streamInfo.codecContext ,
1407- sourceSampleFormat,
1408- desiredSampleFormat,
1409- sourceSampleRate,
1410- desiredSampleRate));
1411- }
1412-
14131411 UniqueAVFrame convertedAVFrame (av_frame_alloc ());
14141412 TORCH_CHECK (
14151413 convertedAVFrame,
@@ -1428,7 +1426,7 @@ UniqueAVFrame SingleStreamDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
14281426 // output samples, but empirically `av_rescale_rnd()` seems to provide a
14291427 // tighter bound.
14301428 convertedAVFrame->nb_samples = av_rescale_rnd (
1431- swr_get_delay (streamInfo. swrContext .get (), sourceSampleRate) +
1429+ swr_get_delay (swrContext.get (), sourceSampleRate) +
14321430 srcAVFrame->nb_samples ,
14331431 desiredSampleRate,
14341432 sourceSampleRate,
@@ -1444,7 +1442,7 @@ UniqueAVFrame SingleStreamDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
14441442 getFFMPEGErrorStringFromErrorCode (status));
14451443
14461444 auto numConvertedSamples = swr_convert (
1447- streamInfo. swrContext .get (),
1445+ swrContext.get (),
14481446 convertedAVFrame->data ,
14491447 convertedAVFrame->nb_samples ,
14501448 static_cast <const uint8_t **>(
0 commit comments