@@ -109,11 +109,17 @@ UniqueAVFrame allocateAVFrame(
109109 av_channel_layout_default (&avFrame->ch_layout , numChannels);
110110 avFrame->format = sampleFormat;
111111 auto status = av_frame_get_buffer (avFrame.get (), 0 );
112+
112113 TORCH_CHECK (
113114 status == AVSUCCESS,
114115 " Couldn't allocate avFrame's buffers: " ,
115116 getFFMPEGErrorStringFromErrorCode (status));
116117
118+ status = av_frame_make_writable (avFrame.get ());
119+ TORCH_CHECK (
120+ status == AVSUCCESS,
121+ " Couldn't make AVFrame writable: " ,
122+ getFFMPEGErrorStringFromErrorCode (status));
117123 return avFrame;
118124}
119125
@@ -236,19 +242,17 @@ void AudioEncoder::initializeEncoder(
236242 getFFMPEGErrorStringFromErrorCode (status));
237243 streamIndex_ = avStream->index ;
238244
239- // bool supportsVariableFrameSize =
240- // avCodec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE;
241- // printf("supportsVariableFrameSize = %d\n", supportsVariableFrameSize);
242-
243- // // frame_size * 2 is a decent default size. FFmpeg automatically
244- // re-allocates
245- // // the fifo if more space is needed.
246- auto avAudioFifo = av_audio_fifo_alloc (
247- avCodecContext_->sample_fmt ,
248- outNumChannels_,
249- avCodecContext_->frame_size * 2 );
250- TORCH_CHECK (avAudioFifo != nullptr , " Couldn't create AVAudioFifo." );
251- avAudioFifo_.reset (avAudioFifo);
245+ if (((avCodec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) == 0 ) &&
246+ (sampleRateInput_ != outSampleRate_)) {
247+ // frame_size * 2 is a decent default size. FFmpeg automatically
248+ // re-allocates the fifo if more space is needed.
249+ auto avAudioFifo = av_audio_fifo_alloc (
250+ avCodecContext_->sample_fmt ,
251+ outNumChannels_,
252+ avCodecContext_->frame_size * 2 );
253+ TORCH_CHECK (avAudioFifo != nullptr , " Couldn't create AVAudioFifo." );
254+ avAudioFifo_.reset (avAudioFifo);
255+ }
252256}
253257
254258torch::Tensor AudioEncoder::encodeToTensor () {
@@ -291,12 +295,6 @@ void AudioEncoder::encode() {
291295 getFFMPEGErrorStringFromErrorCode (status));
292296
293297 while (numEncodedSamples < numSamples) {
294- status = av_frame_make_writable (avFrame.get ());
295- TORCH_CHECK (
296- status == AVSUCCESS,
297- " Couldn't make AVFrame writable: " ,
298- getFFMPEGErrorStringFromErrorCode (status));
299-
300298 int numSamplesToEncode =
301299 std::min (numSamplesAllocatedPerFrame, numSamples - numEncodedSamples);
302300 int numBytesToEncode = numSamplesToEncode * numBytesPerSample;
@@ -317,34 +315,7 @@ void AudioEncoder::encode() {
317315 avFrame->nb_samples = numSamplesToEncode;
318316
319317 UniqueAVFrame convertedAVFrame = maybeConvertAVFrame (avFrame);
320- // TODO static cast
321- int numSamplesWritten = av_audio_fifo_write (
322- avAudioFifo_.get (),
323- (void **)convertedAVFrame->data ,
324- convertedAVFrame->nb_samples );
325- TORCH_CHECK (
326- numSamplesWritten == convertedAVFrame->nb_samples ,
327- " Tried to write TODO" );
328-
329- UniqueAVFrame newavFrame = allocateAVFrame (
330- avCodecContext_->frame_size ,
331- outSampleRate_,
332- outNumChannels_,
333- avCodecContext_->sample_fmt );
334- while (av_audio_fifo_size (avAudioFifo_.get ()) >=
335- avCodecContext_->frame_size ) {
336-
337- // TODO cast
338- int numSamplesRead = av_audio_fifo_read (
339- avAudioFifo_.get (), (void **)newavFrame->data , newavFrame->nb_samples );
340- TORCH_CHECK (numSamplesRead > 0 , " Tried to read TODO" );
341-
342- // UniqueAVFrame clonedFrame(av_frame_clone(newavFrame.get()));
343- // UniqueAVFrame refFrame(av_frame_alloc());
344- // av_frame_ref(refFrame.get(), newavFrame.get());
345-
346- encodeInnerLoop (autoAVPacket, newavFrame);
347- }
318+ sendFrameThroughFifo (autoAVPacket, convertedAVFrame);
348319
349320 numEncodedSamples += numSamplesToEncode;
350321 // TODO-ENCODING set frame pts correctly, and test against it.
@@ -367,7 +338,6 @@ UniqueAVFrame AudioEncoder::maybeConvertAVFrame(const UniqueAVFrame& avFrame) {
367338 getNumChannels (avFrame) == outNumChannels_ &&
368339 avFrame->sample_rate == outSampleRate_) {
369340 // Note: the clone references the same underlying data, it's a cheap copy.
370- TORCH_CHECK (false , " unexpected" );
371341 return UniqueAVFrame (av_frame_clone (avFrame.get ()));
372342 }
373343
@@ -400,7 +370,37 @@ UniqueAVFrame AudioEncoder::maybeConvertAVFrame(const UniqueAVFrame& avFrame) {
400370 return convertedAVFrame;
401371}
402372
403- void AudioEncoder::encodeInnerLoop (
373+ void AudioEncoder::sendFrameThroughFifo (
374+ AutoAVPacket& autoAVPacket,
375+ const UniqueAVFrame& avFrame,
376+ bool andFlushFifo) {
377+ if (avAudioFifo_ == nullptr ) {
378+ encodeFrame (autoAVPacket, avFrame);
379+ return ;
380+ }
381+ // TODO static cast
382+ int numSamplesWritten = av_audio_fifo_write (
383+ avAudioFifo_.get (), (void **)avFrame->data , avFrame->nb_samples );
384+ TORCH_CHECK (numSamplesWritten == avFrame->nb_samples , " Tried to write TODO" );
385+
386+ UniqueAVFrame newavFrame = allocateAVFrame (
387+ avCodecContext_->frame_size ,
388+ outSampleRate_,
389+ outNumChannels_,
390+ avCodecContext_->sample_fmt );
391+
392+ while (av_audio_fifo_size (avAudioFifo_.get ()) >=
393+ (andFlushFifo ? 1 : avCodecContext_->frame_size )) {
394+ // TODO cast
395+ int numSamplesRead = av_audio_fifo_read (
396+ avAudioFifo_.get (), (void **)newavFrame->data , newavFrame->nb_samples );
397+ TORCH_CHECK (numSamplesRead > 0 , " Tried to read TODO" );
398+
399+ encodeFrame (autoAVPacket, newavFrame);
400+ }
401+ }
402+
403+ void AudioEncoder::encodeFrame (
404404 AutoAVPacket& autoAVPacket,
405405 const UniqueAVFrame& avFrame) {
406406 auto status = avcodec_send_frame (avCodecContext_.get (), avFrame.get ());
@@ -463,14 +463,12 @@ void AudioEncoder::maybeFlushSwrBuffers(AutoAVPacket& autoAVPacket) {
463463 swrContext_.get (), avFrame->data , avFrame->nb_samples , NULL , 0 );
464464 avFrame->nb_samples = actualNumRemainingSamples;
465465
466- encodeInnerLoop (autoAVPacket, avFrame);
466+ sendFrameThroughFifo (autoAVPacket, avFrame, /* andFlushFifo= */ true );
467467}
468468
469469void AudioEncoder::flushBuffers () {
470- printf (" Flushing, there are %d samples in fifo\n " , av_audio_fifo_size (avAudioFifo_.get ()));
471470 AutoAVPacket autoAVPacket;
472471 maybeFlushSwrBuffers (autoAVPacket);
473- encodeInnerLoop (autoAVPacket, UniqueAVFrame (nullptr ));
474- printf (" Done flushing, there are %d samples in fifo\n " , av_audio_fifo_size (avAudioFifo_.get ()));
472+ encodeFrame (autoAVPacket, UniqueAVFrame (nullptr ));
475473}
476474} // namespace facebook::torchcodec
0 commit comments