@@ -1436,6 +1436,11 @@ UniqueAVFrame VideoDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
14361436 convertedAVFrame->format = static_cast <int >(desiredSampleFormat);
14371437 convertedAVFrame->sample_rate = desiredSampleRate;
14381438 if (sourceSampleRate != desiredSampleRate) {
1439+ // Note that this is an upper bound on the number of output samples.
1440+ // `swr_convert()` will likely not fill convertedAVFrame with that many
1441+ // samples, it will buffer the last few ones because those require future
1442+ // samples. That's also why we reset nb_samples after the call to
1443+ // `swr_convert()`.
14391444 convertedAVFrame->nb_samples = av_rescale_rnd (
14401445 swr_get_delay (streamInfo.swrContext .get (), sourceSampleRate) +
14411446 avFrame->nb_samples ,
@@ -1452,16 +1457,20 @@ UniqueAVFrame VideoDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
14521457 " Could not allocate frame buffers for sample format conversion: " ,
14531458 getFFMPEGErrorStringFromErrorCode (status));
14541459
1455- auto numSampleConverted = swr_convert (
1460+ auto numConvertedSamples = swr_convert (
14561461 streamInfo.swrContext .get (),
14571462 convertedAVFrame->data ,
14581463 convertedAVFrame->nb_samples ,
14591464 static_cast <const uint8_t **>(const_cast <const uint8_t **>(avFrame->data )),
14601465 avFrame->nb_samples );
14611466 TORCH_CHECK (
1462- numSampleConverted > 0 ,
1467+ numConvertedSamples > 0 ,
14631468 " Error in swr_convert: " ,
1464- getFFMPEGErrorStringFromErrorCode (numSampleConverted));
1469+ getFFMPEGErrorStringFromErrorCode (numConvertedSamples));
1470+
1471+ // See comment above about nb_samples
1472+ convertedAVFrame->nb_samples = numConvertedSamples;
1473+ // TODO need to flush properly to retrieve the last few samples.
14651474
14661475 return convertedAVFrame;
14671476}
0 commit comments