Skip to content

Commit 6c91450

Browse files
committed
Refactor audio sample conversion in encoder
1 parent ba44fdb commit 6c91450

File tree

2 files changed

+40
-35
lines changed

2 files changed

+40
-35
lines changed

src/torchcodec/_core/Encoder.cpp

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,13 @@ void AudioEncoder::encode() {
282282
// encoded frame would contain more samples than necessary and our results
283283
// wouldn't match the ffmpeg CLI.
284284
avFrame->nb_samples = numSamplesToEncode;
285-
encodeInnerLoop(autoAVPacket, avFrame);
286285

287-
avFrame->pts += static_cast<int64_t>(numSamplesToEncode);
286+
UniqueAVFrame convertedAVFrame = maybeConvertAVFrame(avFrame);
287+
encodeInnerLoop(autoAVPacket, convertedAVFrame);
288+
288289
numEncodedSamples += numSamplesToEncode;
290+
// TODO-ENCODING set frame pts correctly, and test against it.
291+
// avFrame->pts += static_cast<int64_t>(numSamplesToEncode);
289292
}
290293
TORCH_CHECK(numEncodedSamples == numSamples, "Hmmmmmm something went wrong.");
291294

@@ -298,42 +301,43 @@ void AudioEncoder::encode() {
298301
getFFMPEGErrorStringFromErrorCode(status));
299302
}
300303

301-
void AudioEncoder::encodeInnerLoop(
302-
AutoAVPacket& autoAVPacket,
303-
const UniqueAVFrame& srcAVFrame) {
304-
bool mustConvert =
305-
(srcAVFrame != nullptr &&
306-
(avCodecContext_->sample_fmt != AV_SAMPLE_FMT_FLTP ||
307-
getNumChannels(srcAVFrame) != outNumChannels_));
308-
309-
UniqueAVFrame convertedAVFrame;
310-
if (mustConvert) {
311-
if (!swrContext_) {
312-
swrContext_.reset(createSwrContext(
313-
AV_SAMPLE_FMT_FLTP,
314-
avCodecContext_->sample_fmt,
315-
srcAVFrame->sample_rate, // No sample rate conversion
316-
srcAVFrame->sample_rate,
317-
srcAVFrame,
318-
outNumChannels_));
319-
}
320-
convertedAVFrame = convertAudioAVFrameSamples(
321-
swrContext_,
322-
srcAVFrame,
304+
UniqueAVFrame AudioEncoder::maybeConvertAVFrame(const UniqueAVFrame& avFrame) {
305+
if (static_cast<AVSampleFormat>(avFrame->format) ==
306+
avCodecContext_->sample_fmt &&
307+
getNumChannels(avFrame) == outNumChannels_) {
308+
// Note: the clone references the same underlying data, it's a cheap copy.
309+
return UniqueAVFrame(av_frame_clone(avFrame.get()));
310+
}
311+
312+
if (!swrContext_) {
313+
swrContext_.reset(createSwrContext(
314+
static_cast<AVSampleFormat>(avFrame->format),
323315
avCodecContext_->sample_fmt,
324-
srcAVFrame->sample_rate, // No sample rate conversion
325-
outNumChannels_);
326-
TORCH_CHECK(
327-
convertedAVFrame->nb_samples == srcAVFrame->nb_samples,
328-
"convertedAVFrame->nb_samples=",
329-
convertedAVFrame->nb_samples,
330-
" differs from ",
331-
"srcAVFrame->nb_samples=",
332-
srcAVFrame->nb_samples,
333-
"This is unexpected, please report on the TorchCodec bug tracker.");
316+
avFrame->sample_rate, // No sample rate conversion
317+
avFrame->sample_rate,
318+
avFrame,
319+
outNumChannels_));
334320
}
335-
const UniqueAVFrame& avFrame = mustConvert ? convertedAVFrame : srcAVFrame;
321+
UniqueAVFrame convertedAVFrame = convertAudioAVFrameSamples(
322+
swrContext_,
323+
avFrame,
324+
avCodecContext_->sample_fmt,
325+
avFrame->sample_rate, // No sample rate conversion
326+
outNumChannels_);
327+
TORCH_CHECK(
328+
convertedAVFrame->nb_samples == avFrame->nb_samples,
329+
"convertedAVFrame->nb_samples=",
330+
convertedAVFrame->nb_samples,
331+
" differs from ",
332+
"avFrame->nb_samples=",
333+
avFrame->nb_samples,
334+
"This is unexpected, please report on the TorchCodec bug tracker.");
335+
return convertedAVFrame;
336+
}
336337

338+
void AudioEncoder::encodeInnerLoop(
339+
AutoAVPacket& autoAVPacket,
340+
const UniqueAVFrame& avFrame) {
337341
auto status = avcodec_send_frame(avCodecContext_.get(), avFrame.get());
338342
TORCH_CHECK(
339343
status == AVSUCCESS,

src/torchcodec/_core/Encoder.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class AudioEncoder {
3838
void initializeEncoder(
3939
int sampleRate,
4040
const AudioStreamOptions& audioStreamOptions);
41+
UniqueAVFrame maybeConvertAVFrame(const UniqueAVFrame& avFrame);
4142
void encodeInnerLoop(
4243
AutoAVPacket& autoAVPacket,
4344
const UniqueAVFrame& srcAVFrame);

0 commit comments

Comments
 (0)