@@ -96,14 +96,18 @@ AVSampleFormat findBestOutputSampleFormat(const AVCodec& avCodec) {
9696 return avCodec.sample_fmts [0 ];
9797}
9898
99- UniqueAVFrame allocateAVFrame (int numSamples, int sampleRate, int numChannels) {
99+ UniqueAVFrame allocateAVFrame (
100+ int numSamples,
101+ int sampleRate,
102+ int numChannels,
103+ AVSampleFormat sampleFormat) {
100104 auto avFrame = UniqueAVFrame (av_frame_alloc ());
101105 TORCH_CHECK (avFrame != nullptr , " Couldn't allocate AVFrame." );
102106
103107 avFrame->nb_samples = numSamples;
104- avFrame->format = AV_SAMPLE_FMT_FLTP;
105108 avFrame->sample_rate = sampleRate;
106109 av_channel_layout_default (&avFrame->ch_layout , numChannels);
110+ avFrame->format = sampleFormat;
107111 auto status = av_frame_get_buffer (avFrame.get (), 0 );
108112 TORCH_CHECK (
109113 status == AVSUCCESS,
@@ -239,12 +243,12 @@ void AudioEncoder::initializeEncoder(
239243 // // frame_size * 2 is a decent default size. FFmpeg automatically
240244 // re-allocates
241245 // // the fifo if more space is needed.
242- // auto avAudioFifo = av_audio_fifo_alloc(
243- // avCodecContext_->sample_fmt,
244- // outNumChannels_,
245- // avCodecContext_->frame_size * 2);
246- // TORCH_CHECK(avAudioFifo != nullptr, "Couldn't create AVAudioFifo.");
247- // avAudioFifo_.reset(avAudioFifo);
246+ auto avAudioFifo = av_audio_fifo_alloc (
247+ avCodecContext_->sample_fmt ,
248+ outNumChannels_,
249+ avCodecContext_->frame_size * 2 );
250+ TORCH_CHECK (avAudioFifo != nullptr , " Couldn't create AVAudioFifo." );
251+ avAudioFifo_.reset (avAudioFifo);
248252}
249253
250254torch::Tensor AudioEncoder::encodeToTensor () {
@@ -268,7 +272,8 @@ void AudioEncoder::encode() {
268272 UniqueAVFrame avFrame = allocateAVFrame (
269273 numSamplesAllocatedPerFrame,
270274 sampleRateInput_,
271- static_cast <int >(samples_.sizes ()[0 ]));
275+ static_cast <int >(samples_.sizes ()[0 ]),
276+ AV_SAMPLE_FMT_FLTP);
272277 avFrame->pts = 0 ;
273278
274279 AutoAVPacket autoAVPacket;
@@ -312,7 +317,34 @@ void AudioEncoder::encode() {
312317 avFrame->nb_samples = numSamplesToEncode;
313318
314319 UniqueAVFrame convertedAVFrame = maybeConvertAVFrame (avFrame);
315- encodeInnerLoop (autoAVPacket, convertedAVFrame);
320+ // TODO static cast
321+ int numSamplesWritten = av_audio_fifo_write (
322+ avAudioFifo_.get (),
323+ (void **)convertedAVFrame->data ,
324+ convertedAVFrame->nb_samples );
325+ TORCH_CHECK (
326+ numSamplesWritten == convertedAVFrame->nb_samples ,
327+ " Tried to write TODO" );
328+
329+ UniqueAVFrame newavFrame = allocateAVFrame (
330+ avCodecContext_->frame_size ,
331+ outSampleRate_,
332+ outNumChannels_,
333+ avCodecContext_->sample_fmt );
334+ while (av_audio_fifo_size (avAudioFifo_.get ()) >=
335+ avCodecContext_->frame_size ) {
336+
337+ // TODO cast
338+ int numSamplesRead = av_audio_fifo_read (
339+ avAudioFifo_.get (), (void **)newavFrame->data , newavFrame->nb_samples );
340+ TORCH_CHECK (numSamplesRead > 0 , " Tried to read TODO" );
341+
342+ // UniqueAVFrame clonedFrame(av_frame_clone(newavFrame.get()));
343+ // UniqueAVFrame refFrame(av_frame_alloc());
344+ // av_frame_ref(refFrame.get(), newavFrame.get());
345+
346+ encodeInnerLoop (autoAVPacket, newavFrame);
347+ }
316348
317349 numEncodedSamples += numSamplesToEncode;
318350 // TODO-ENCODING set frame pts correctly, and test against it.
@@ -335,6 +367,7 @@ UniqueAVFrame AudioEncoder::maybeConvertAVFrame(const UniqueAVFrame& avFrame) {
335367 getNumChannels (avFrame) == outNumChannels_ &&
336368 avFrame->sample_rate == outSampleRate_) {
337369 // Note: the clone references the same underlying data, it's a cheap copy.
370+ TORCH_CHECK (false , " unexpected" );
338371 return UniqueAVFrame (av_frame_clone (avFrame.get ()));
339372 }
340373
@@ -370,28 +403,6 @@ UniqueAVFrame AudioEncoder::maybeConvertAVFrame(const UniqueAVFrame& avFrame) {
370403void AudioEncoder::encodeInnerLoop (
371404 AutoAVPacket& autoAVPacket,
372405 const UniqueAVFrame& avFrame) {
373- // if (avFrame != nullptr) {
374- // // TODO static cast
375- // int numSamplesWritten = av_audio_fifo_write(avAudioFifo_.get(),
376- // (void**)avFrame->data, avFrame->nb_samples);
377- // TORCH_CHECK(numSamplesWritten == avFrame->nb_samples, "Tried to write
378- // TODO"); printf("Writing %d samples to fifo (size = %d)\n",
379- // avFrame->nb_samples, av_audio_fifo_size(avAudioFifo_.get()));
380-
381- // avFrame = allocateAVFrame(avCodecContext_->frame_size, outSampleRate_,
382- // outNumChannels_);
383- // // TODO cast
384- // int numSamplesRead = av_audio_fifo_read(avAudioFifo_.get(),
385- // (void**)avFrame->data, avFrame->nb_samples); printf("Read %d from
386- // fifo\n", numSamplesRead); TORCH_CHECK(numSamplesRead > 0, "Tried to
387- // read TODO");
388- // }
389-
390- // if (avFrame != nullptr) {
391- // printf("Sending frame with %d samples\n", avFrame->nb_samples);
392- // } else{
393- // printf("AVFrame is empty\n");
394- // }
395406 auto status = avcodec_send_frame (avCodecContext_.get (), avFrame.get ());
396407 TORCH_CHECK (
397408 status == AVSUCCESS,
@@ -443,8 +454,11 @@ void AudioEncoder::maybeFlushSwrBuffers(AutoAVPacket& autoAVPacket) {
443454 return ;
444455 }
445456
446- UniqueAVFrame avFrame =
447- allocateAVFrame (numRemainingSamples, outSampleRate_, outNumChannels_);
457+ UniqueAVFrame avFrame = allocateAVFrame (
458+ numRemainingSamples,
459+ outSampleRate_,
460+ outNumChannels_,
461+ avCodecContext_->sample_fmt );
448462 int actualNumRemainingSamples = swr_convert (
449463 swrContext_.get (), avFrame->data , avFrame->nb_samples , NULL , 0 );
450464 avFrame->nb_samples = actualNumRemainingSamples;
@@ -453,8 +467,10 @@ void AudioEncoder::maybeFlushSwrBuffers(AutoAVPacket& autoAVPacket) {
453467}
454468
455469void AudioEncoder::flushBuffers () {
470+ printf (" Flushing, there are %d samples in fifo\n " , av_audio_fifo_size (avAudioFifo_.get ()));
456471 AutoAVPacket autoAVPacket;
457472 maybeFlushSwrBuffers (autoAVPacket);
458473 encodeInnerLoop (autoAVPacket, UniqueAVFrame (nullptr ));
474+ printf (" Done flushing, there are %d samples in fifo\n " , av_audio_fifo_size (avAudioFifo_.get ()));
459475}
460476} // namespace facebook::torchcodec
0 commit comments