@@ -1370,19 +1370,21 @@ void VideoDecoder::convertAudioAVFrameToFrameOutputOnCPU(
13701370 streamInfos_[activeStreamIndex_].audioStreamOptions .sampleRate .value_or (
13711371 sourceSampleRate);
13721372
1373+ bool mustConvert =
1374+ (sourceSampleFormat != desiredSampleFormat ||
1375+ sourceSampleRate != desiredSampleRate);
1376+
13731377 UniqueAVFrame convertedAVFrame;
1374- if (sourceSampleFormat != desiredSampleFormat ||
1375- sourceSampleRate != desiredSampleRate) {
1378+ if (mustConvert) {
13761379 convertedAVFrame = convertAudioAVFrameSampleFormatAndSampleRate (
13771380 avFrameStream.avFrame ,
13781381 sourceSampleFormat,
13791382 desiredSampleFormat,
13801383 sourceSampleRate,
13811384 desiredSampleRate);
13821385 }
1383- const UniqueAVFrame& avFrame = (sourceSampleFormat != desiredSampleFormat)
1384- ? convertedAVFrame
1385- : avFrameStream.avFrame ;
1386+ const UniqueAVFrame& avFrame =
1387+ mustConvert ? convertedAVFrame : avFrameStream.avFrame ;
13861388
13871389 AVSampleFormat format = static_cast <AVSampleFormat>(avFrame->format );
13881390 TORCH_CHECK (
@@ -1415,13 +1417,14 @@ UniqueAVFrame VideoDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
14151417 int sourceSampleRate,
14161418 int desiredSampleRate) {
14171419 auto & streamInfo = streamInfos_[activeStreamIndex_];
1418- const auto & streamMetadata =
1419- containerMetadata_.allStreamMetadata [activeStreamIndex_];
1420- int sampleRate = static_cast <int >(streamMetadata.sampleRate .value ());
14211420
14221421 if (!streamInfo.swrContext ) {
14231422 createSwrContext (
1424- streamInfo, sampleRate, sourceSampleFormat, desiredSampleFormat);
1423+ streamInfo,
1424+ sourceSampleFormat,
1425+ desiredSampleFormat,
1426+ sourceSampleRate,
1427+ desiredSampleRate);
14251428 }
14261429
14271430 UniqueAVFrame convertedAVFrame (av_frame_alloc ());
@@ -1431,8 +1434,17 @@ UniqueAVFrame VideoDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
14311434
14321435 setChannelLayout (convertedAVFrame, avFrame);
14331436 convertedAVFrame->format = static_cast <int >(desiredSampleFormat);
1434- convertedAVFrame->sample_rate = avFrame->sample_rate ;
1435- convertedAVFrame->nb_samples = avFrame->nb_samples ;
1437+ convertedAVFrame->sample_rate = desiredSampleRate;
1438+ if (sourceSampleRate != desiredSampleRate) {
1439+ convertedAVFrame->nb_samples = av_rescale_rnd (
1440+ swr_get_delay (streamInfo.swrContext .get (), sourceSampleRate) +
1441+ avFrame->nb_samples ,
1442+ desiredSampleRate,
1443+ sourceSampleRate,
1444+ AV_ROUND_UP);
1445+ } else {
1446+ convertedAVFrame->nb_samples = avFrame->nb_samples ;
1447+ }
14361448
14371449 auto status = av_frame_get_buffer (convertedAVFrame.get (), 0 );
14381450 TORCH_CHECK (
@@ -1689,14 +1701,16 @@ void VideoDecoder::createSwsContext(
16891701
16901702void VideoDecoder::createSwrContext (
16911703 StreamInfo& streamInfo,
1692- int sampleRate,
16931704 AVSampleFormat sourceSampleFormat,
1694- AVSampleFormat desiredSampleFormat) {
1705+ AVSampleFormat desiredSampleFormat,
1706+ int sourceSampleRate,
1707+ int desiredSampleRate) {
16951708 auto swrContext = allocateSwrContext (
16961709 streamInfo.codecContext ,
1697- sampleRate,
16981710 sourceSampleFormat,
1699- desiredSampleFormat);
1711+ desiredSampleFormat,
1712+ sourceSampleRate,
1713+ desiredSampleRate);
17001714
17011715 auto status = swr_init (swrContext);
17021716 TORCH_CHECK (
0 commit comments