Skip to content

Commit 3ce4612

Browse files
committed
WIP
1 parent 639d5ab commit 3ce4612

File tree

3 files changed

+39
-4
lines changed

3 files changed

+39
-4
lines changed

src/torchcodec/_core/Encoder.cpp

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,9 @@ void AudioEncoder::initializeEncoder(
215215
status == AVSUCCESS,
216216
"avcodec_open2 failed: ",
217217
getFFMPEGErrorStringFromErrorCode(status));
218+
219+
bool supportsVariableFrameSize = avCodec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE;
220+
printf("supportsVariableFrameSize = %d\n", supportsVariableFrameSize);
218221

219222
// We're allocating the stream here. Streams are meant to be freed by
220223
// avformat_free_context(avFormatContext), which we call in the
@@ -228,6 +231,12 @@ void AudioEncoder::initializeEncoder(
228231
"avcodec_parameters_from_context failed: ",
229232
getFFMPEGErrorStringFromErrorCode(status));
230233
streamIndex_ = avStream->index;
234+
235+
// frame_size * 2 is a decent default size. FFmpeg automatically re-allocates
236+
// the fifo if more space is needed.
237+
auto avAudioFifo = av_audio_fifo_alloc(avCodecContext_->sample_fmt, outNumChannels_, avCodecContext_->frame_size * 2);
238+
TORCH_CHECK(avAudioFifo!= nullptr, "Couldn't create AVAudioFifo.");
239+
avAudioFifo_.reset(avAudioFifo);
231240
}
232241

233242
torch::Tensor AudioEncoder::encodeToTensor() {
@@ -309,7 +318,7 @@ void AudioEncoder::encode() {
309318

310319
void AudioEncoder::encodeInnerLoop(
311320
AutoAVPacket& autoAVPacket,
312-
const UniqueAVFrame& srcAVFrame,
321+
UniqueAVFrame& srcAVFrame,
313322
bool allowConvert) {
314323
// TODO: Probably makes more sense to move the conversion away? It shouldn't
315324
// be in inner loop in any case. We should also remove allowConvert.
@@ -348,8 +357,26 @@ void AudioEncoder::encodeInnerLoop(
348357
"This is unexpected, please report on the TorchCodec bug tracker.");
349358
}
350359
}
351-
const UniqueAVFrame& avFrame = mustConvert ? convertedAVFrame : srcAVFrame;
360+
UniqueAVFrame& avFrame = mustConvert ? convertedAVFrame : srcAVFrame;
361+
362+
if (avFrame != nullptr) {
363+
// TODO static cast
364+
int numSamplesWritten = av_audio_fifo_write(avAudioFifo_.get(), (void**)avFrame->data, avFrame->nb_samples);
365+
TORCH_CHECK(numSamplesWritten == avFrame->nb_samples, "Tried to write TODO");
366+
printf("Writing %d samples to fifo (size = %d)\n", avFrame->nb_samples, av_audio_fifo_size(avAudioFifo_.get()));
367+
368+
avFrame = allocateAVFrame(avCodecContext_->frame_size, outSampleRate_, outNumChannels_);
369+
// TODO cast
370+
int numSamplesRead = av_audio_fifo_read(avAudioFifo_.get(), (void**)avFrame->data, avFrame->nb_samples);
371+
printf("Read %d from fifo\n", numSamplesRead);
372+
TORCH_CHECK(numSamplesRead > 0, "Tried to read TODO");
373+
}
352374

375+
if (avFrame != nullptr) {
376+
printf("Sending frame with %d samples\n", avFrame->nb_samples);
377+
} else{
378+
printf("AVFrame is empty\n");
379+
}
353380
auto status = avcodec_send_frame(avCodecContext_.get(), avFrame.get());
354381
TORCH_CHECK(
355382
status == AVSUCCESS,
@@ -413,6 +440,7 @@ void AudioEncoder::maybeFlushSwrBuffers(AutoAVPacket& autoAVPacket) {
413440
void AudioEncoder::flushBuffers() {
414441
AutoAVPacket autoAVPacket;
415442
maybeFlushSwrBuffers(autoAVPacket);
416-
encodeInnerLoop(autoAVPacket, UniqueAVFrame(nullptr));
443+
auto zob = UniqueAVFrame(nullptr);
444+
encodeInnerLoop(autoAVPacket, zob);
417445
}
418446
} // namespace facebook::torchcodec

src/torchcodec/_core/Encoder.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class AudioEncoder {
3838
void initializeEncoder(const AudioStreamOptions& audioStreamOptions);
3939
void encodeInnerLoop(
4040
AutoAVPacket& autoAVPacket,
41-
const UniqueAVFrame& srcAVFrame,
41+
UniqueAVFrame& srcAVFrame,
4242
bool allowConvert = true);
4343
void maybeFlushSwrBuffers(AutoAVPacket& autoAVPacket);
4444
void flushBuffers();
@@ -55,6 +55,9 @@ class AudioEncoder {
5555
const torch::Tensor wf_;
5656
int sampleRateInput_ = -1;
5757

58+
59+
UniqueAVAudioFifo avAudioFifo_;
60+
5861
// Stores the AVIOContext for the output tensor buffer.
5962
std::unique_ptr<AVIOToTensorContext> avioContextHolder_;
6063

src/torchcodec/_core/FFMPEGCommon.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ extern "C" {
2222
#include <libavutil/opt.h>
2323
#include <libavutil/pixfmt.h>
2424
#include <libavutil/version.h>
25+
#include <libavutil/audio_fifo.h>
2526
#include <libswresample/swresample.h>
2627
#include <libswscale/swscale.h>
2728
}
@@ -73,6 +74,9 @@ using UniqueSwsContext =
7374
std::unique_ptr<SwsContext, Deleter<SwsContext, void, sws_freeContext>>;
7475
using UniqueSwrContext =
7576
std::unique_ptr<SwrContext, Deleterp<SwrContext, void, swr_free>>;
77+
using UniqueAVAudioFifo = std::unique_ptr<
78+
AVAudioFifo,
79+
Deleter<AVAudioFifo, void, av_audio_fifo_free>>;
7680

7781
// These 2 classes share the same underlying AVPacket object. They are meant to
7882
// be used in tandem, like so:

0 commit comments

Comments
 (0)