@@ -282,10 +282,13 @@ void AudioEncoder::encode() {
282282 // encoded frame would contain more samples than necessary and our results
283283 // wouldn't match the ffmpeg CLI.
284284 avFrame->nb_samples = numSamplesToEncode;
285- encodeInnerLoop (autoAVPacket, avFrame);
286285
287- avFrame->pts += static_cast <int64_t >(numSamplesToEncode);
286+ UniqueAVFrame convertedAVFrame = maybeConvertAVFrame (avFrame);
287+ encodeInnerLoop (autoAVPacket, convertedAVFrame);
288+
288289 numEncodedSamples += numSamplesToEncode;
290+ // TODO-ENCODING set frame pts correctly, and test against it.
291+ // avFrame->pts += static_cast<int64_t>(numSamplesToEncode);
289292 }
290293 TORCH_CHECK (numEncodedSamples == numSamples, " Hmmmmmm something went wrong." );
291294
@@ -298,42 +301,43 @@ void AudioEncoder::encode() {
298301 getFFMPEGErrorStringFromErrorCode (status));
299302}
300303
301- void AudioEncoder::encodeInnerLoop (
302- AutoAVPacket& autoAVPacket,
303- const UniqueAVFrame& srcAVFrame) {
304- bool mustConvert =
305- (srcAVFrame != nullptr &&
306- (avCodecContext_->sample_fmt != AV_SAMPLE_FMT_FLTP ||
307- getNumChannels (srcAVFrame) != outNumChannels_));
308-
309- UniqueAVFrame convertedAVFrame;
310- if (mustConvert) {
311- if (!swrContext_) {
312- swrContext_.reset (createSwrContext (
313- AV_SAMPLE_FMT_FLTP,
314- avCodecContext_->sample_fmt ,
315- srcAVFrame->sample_rate , // No sample rate conversion
316- srcAVFrame->sample_rate ,
317- srcAVFrame,
318- outNumChannels_));
319- }
320- convertedAVFrame = convertAudioAVFrameSamples (
321- swrContext_,
322- srcAVFrame,
304+ UniqueAVFrame AudioEncoder::maybeConvertAVFrame (const UniqueAVFrame& avFrame) {
305+ if (static_cast <AVSampleFormat>(avFrame->format ) ==
306+ avCodecContext_->sample_fmt &&
307+ getNumChannels (avFrame) == outNumChannels_) {
308+ // Note: the clone references the same underlying data, it's a cheap copy.
309+ return UniqueAVFrame (av_frame_clone (avFrame.get ()));
310+ }
311+
312+ if (!swrContext_) {
313+ swrContext_.reset (createSwrContext (
314+ static_cast <AVSampleFormat>(avFrame->format ),
323315 avCodecContext_->sample_fmt ,
324- srcAVFrame->sample_rate , // No sample rate conversion
325- outNumChannels_);
326- TORCH_CHECK (
327- convertedAVFrame->nb_samples == srcAVFrame->nb_samples ,
328- " convertedAVFrame->nb_samples=" ,
329- convertedAVFrame->nb_samples ,
330- " differs from " ,
331- " srcAVFrame->nb_samples=" ,
332- srcAVFrame->nb_samples ,
333- " This is unexpected, please report on the TorchCodec bug tracker." );
316+ avFrame->sample_rate , // No sample rate conversion
317+ avFrame->sample_rate ,
318+ avFrame,
319+ outNumChannels_));
334320 }
335- const UniqueAVFrame& avFrame = mustConvert ? convertedAVFrame : srcAVFrame;
321+ UniqueAVFrame convertedAVFrame = convertAudioAVFrameSamples (
322+ swrContext_,
323+ avFrame,
324+ avCodecContext_->sample_fmt ,
325+ avFrame->sample_rate , // No sample rate conversion
326+ outNumChannels_);
327+ TORCH_CHECK (
328+ convertedAVFrame->nb_samples == avFrame->nb_samples ,
329+ " convertedAVFrame->nb_samples=" ,
330+ convertedAVFrame->nb_samples ,
331+ " differs from " ,
332+ " avFrame->nb_samples=" ,
333+ avFrame->nb_samples ,
334+ " This is unexpected, please report on the TorchCodec bug tracker." );
335+ return convertedAVFrame;
336+ }
336337
338+ void AudioEncoder::encodeInnerLoop (
339+ AutoAVPacket& autoAVPacket,
340+ const UniqueAVFrame& avFrame) {
337341 auto status = avcodec_send_frame (avCodecContext_.get (), avFrame.get ());
338342 TORCH_CHECK (
339343 status == AVSUCCESS,
0 commit comments