@@ -215,6 +215,9 @@ void AudioEncoder::initializeEncoder(
215215 status == AVSUCCESS,
216216 " avcodec_open2 failed: " ,
217217 getFFMPEGErrorStringFromErrorCode (status));
218+
219+ bool supportsVariableFrameSize = avCodec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE;
220+ printf (" supportsVariableFrameSize = %d\n " , supportsVariableFrameSize);
218221
219222 // We're allocating the stream here. Streams are meant to be freed by
220223 // avformat_free_context(avFormatContext), which we call in the
@@ -228,6 +231,12 @@ void AudioEncoder::initializeEncoder(
228231 " avcodec_parameters_from_context failed: " ,
229232 getFFMPEGErrorStringFromErrorCode (status));
230233 streamIndex_ = avStream->index ;
234+
235+ // frame_size * 2 is a decent default size. FFmpeg automatically re-allocates
236+ // the fifo if more space is needed.
237+ auto avAudioFifo = av_audio_fifo_alloc (avCodecContext_->sample_fmt , outNumChannels_, avCodecContext_->frame_size * 2 );
238+ TORCH_CHECK (avAudioFifo!= nullptr , " Couldn't create AVAudioFifo." );
239+ avAudioFifo_.reset (avAudioFifo);
231240}
232241
233242torch::Tensor AudioEncoder::encodeToTensor () {
@@ -309,7 +318,7 @@ void AudioEncoder::encode() {
309318
310319void AudioEncoder::encodeInnerLoop (
311320 AutoAVPacket& autoAVPacket,
312- const UniqueAVFrame& srcAVFrame,
321+ UniqueAVFrame& srcAVFrame,
313322 bool allowConvert) {
314323 // TODO: Probably makes more sense to move the conversion away? It shouldn't
315324 // be in inner loop in any case. We should also remove allowConvert.
@@ -348,8 +357,26 @@ void AudioEncoder::encodeInnerLoop(
348357 " This is unexpected, please report on the TorchCodec bug tracker." );
349358 }
350359 }
351- const UniqueAVFrame& avFrame = mustConvert ? convertedAVFrame : srcAVFrame;
360+ UniqueAVFrame& avFrame = mustConvert ? convertedAVFrame : srcAVFrame;
361+
362+ if (avFrame != nullptr ) {
363+ // TODO static cast
364+ int numSamplesWritten = av_audio_fifo_write (avAudioFifo_.get (), (void **)avFrame->data , avFrame->nb_samples );
365+ TORCH_CHECK (numSamplesWritten == avFrame->nb_samples , " Tried to write TODO" );
366+ printf (" Writing %d samples to fifo (size = %d)\n " , avFrame->nb_samples , av_audio_fifo_size (avAudioFifo_.get ()));
367+
368+ avFrame = allocateAVFrame (avCodecContext_->frame_size , outSampleRate_, outNumChannels_);
369+ // TODO cast
370+ int numSamplesRead = av_audio_fifo_read (avAudioFifo_.get (), (void **)avFrame->data , avFrame->nb_samples );
371+ printf (" Read %d from fifo\n " , numSamplesRead);
372+ TORCH_CHECK (numSamplesRead > 0 , " Tried to read TODO" );
373+ }
352374
375+ if (avFrame != nullptr ) {
376+ printf (" Sending frame with %d samples\n " , avFrame->nb_samples );
377+ } else {
378+ printf (" AVFrame is empty\n " );
379+ }
353380 auto status = avcodec_send_frame (avCodecContext_.get (), avFrame.get ());
354381 TORCH_CHECK (
355382 status == AVSUCCESS,
@@ -413,6 +440,7 @@ void AudioEncoder::maybeFlushSwrBuffers(AutoAVPacket& autoAVPacket) {
413440void AudioEncoder::flushBuffers () {
414441 AutoAVPacket autoAVPacket;
415442 maybeFlushSwrBuffers (autoAVPacket);
416- encodeInnerLoop (autoAVPacket, UniqueAVFrame (nullptr ));
443+ auto zob = UniqueAVFrame (nullptr );
444+ encodeInnerLoop (autoAVPacket, zob);
417445}
418446} // namespace facebook::torchcodec
0 commit comments