Skip to content

Commit 963cbd1

Browse files
authored
Unify use of 'output' and 'desired' in audio code-base (#697)
1 parent c45c9c6 commit 963cbd1

File tree

4 files changed

+66
-68
lines changed

4 files changed

+66
-68
lines changed

src/torchcodec/_core/Encoder.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -177,11 +177,11 @@ void AudioEncoder::initializeEncoder(
177177
// well when "-b:a" isn't specified.
178178
avCodecContext_->bit_rate = bitRate.value_or(0);
179179

180-
desiredNumChannels_ = static_cast<int>(numChannels.value_or(wf_.sizes()[0]));
181-
validateNumChannels(*avCodec, desiredNumChannels_);
180+
outNumChannels_ = static_cast<int>(numChannels.value_or(wf_.sizes()[0]));
181+
validateNumChannels(*avCodec, outNumChannels_);
182182
// The avCodecContext layout defines the layout of the encoded output, it's
183183
// not related to the input sampes.
184-
setDefaultChannelLayout(avCodecContext_, desiredNumChannels_);
184+
setDefaultChannelLayout(avCodecContext_, outNumChannels_);
185185

186186
validateSampleRate(*avCodec, sampleRate);
187187
avCodecContext_->sample_rate = sampleRate;
@@ -304,7 +304,7 @@ void AudioEncoder::encodeInnerLoop(
304304
bool mustConvert =
305305
(srcAVFrame != nullptr &&
306306
(avCodecContext_->sample_fmt != AV_SAMPLE_FMT_FLTP ||
307-
getNumChannels(srcAVFrame) != desiredNumChannels_));
307+
getNumChannels(srcAVFrame) != outNumChannels_));
308308

309309
UniqueAVFrame convertedAVFrame;
310310
if (mustConvert) {
@@ -315,14 +315,14 @@ void AudioEncoder::encodeInnerLoop(
315315
srcAVFrame->sample_rate, // No sample rate conversion
316316
srcAVFrame->sample_rate,
317317
srcAVFrame,
318-
desiredNumChannels_));
318+
outNumChannels_));
319319
}
320320
convertedAVFrame = convertAudioAVFrameSamples(
321321
swrContext_,
322322
srcAVFrame,
323323
avCodecContext_->sample_fmt,
324324
srcAVFrame->sample_rate, // No sample rate conversion
325-
desiredNumChannels_);
325+
outNumChannels_);
326326
TORCH_CHECK(
327327
convertedAVFrame->nb_samples == srcAVFrame->nb_samples,
328328
"convertedAVFrame->nb_samples=",

src/torchcodec/_core/Encoder.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@ class AudioEncoder {
5050
UniqueAVCodecContext avCodecContext_;
5151
int streamIndex_;
5252
UniqueSwrContext swrContext_;
53-
// TODO-ENCODING: desiredNumChannels should just be part of an options struct,
53+
// TODO-ENCODING: outNumChannels should just be part of an options struct,
5454
// see other TODO above.
55-
int desiredNumChannels_ = -1;
55+
int outNumChannels_ = -1;
5656

5757
const torch::Tensor wf_;
5858

src/torchcodec/_core/FFMPEGCommon.cpp

Lines changed: 43 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -159,74 +159,74 @@ namespace {
159159
#if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
160160

161161
// Returns:
162-
// - the srcAVFrame's channel layout if srcAVFrame has desiredNumChannels
163-
// - the default channel layout with desiredNumChannels otherwise.
164-
AVChannelLayout getDesiredChannelLayout(
165-
int desiredNumChannels,
162+
// - the srcAVFrame's channel layout if srcAVFrame has outNumChannels
163+
// - the default channel layout with outNumChannels otherwise.
164+
AVChannelLayout getOutputChannelLayout(
165+
int outNumChannels,
166166
const UniqueAVFrame& srcAVFrame) {
167-
AVChannelLayout desiredLayout;
168-
if (desiredNumChannels == getNumChannels(srcAVFrame)) {
169-
desiredLayout = srcAVFrame->ch_layout;
167+
AVChannelLayout outLayout;
168+
if (outNumChannels == getNumChannels(srcAVFrame)) {
169+
outLayout = srcAVFrame->ch_layout;
170170
} else {
171-
av_channel_layout_default(&desiredLayout, desiredNumChannels);
171+
av_channel_layout_default(&outLayout, outNumChannels);
172172
}
173-
return desiredLayout;
173+
return outLayout;
174174
}
175175

176176
#else
177177

178178
// Same as above
179-
int64_t getDesiredChannelLayout(
180-
int desiredNumChannels,
179+
int64_t getOutputChannelLayout(
180+
int outNumChannels,
181181
const UniqueAVFrame& srcAVFrame) {
182-
int64_t desiredLayout;
183-
if (desiredNumChannels == getNumChannels(srcAVFrame)) {
184-
desiredLayout = srcAVFrame->channel_layout;
182+
int64_t outLayout;
183+
if (outNumChannels == getNumChannels(srcAVFrame)) {
184+
outLayout = srcAVFrame->channel_layout;
185185
} else {
186-
desiredLayout = av_get_default_channel_layout(desiredNumChannels);
186+
outLayout = av_get_default_channel_layout(outNumChannels);
187187
}
188-
return desiredLayout;
188+
return outLayout;
189189
}
190190
#endif
191191
} // namespace
192192

193-
// Sets dstAVFrame' channel layout to getDesiredChannelLayout(): see doc above
193+
// Sets dstAVFrame' channel layout to getOutputChannelLayout(): see doc above
194194
void setChannelLayout(
195195
UniqueAVFrame& dstAVFrame,
196196
const UniqueAVFrame& srcAVFrame,
197-
int desiredNumChannels) {
197+
int outNumChannels) {
198198
#if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
199-
AVChannelLayout desiredLayout =
200-
getDesiredChannelLayout(desiredNumChannels, srcAVFrame);
201-
auto status = av_channel_layout_copy(&dstAVFrame->ch_layout, &desiredLayout);
199+
AVChannelLayout outLayout =
200+
getOutputChannelLayout(outNumChannels, srcAVFrame);
201+
auto status = av_channel_layout_copy(&dstAVFrame->ch_layout, &outLayout);
202202
TORCH_CHECK(
203203
status == AVSUCCESS,
204204
"Couldn't copy channel layout to avFrame: ",
205205
getFFMPEGErrorStringFromErrorCode(status));
206206
#else
207207
dstAVFrame->channel_layout =
208-
getDesiredChannelLayout(desiredNumChannels, srcAVFrame);
209-
dstAVFrame->channels = desiredNumChannels;
208+
getOutputChannelLayout(outNumChannels, srcAVFrame);
209+
dstAVFrame->channels = outNumChannels;
210210
#endif
211211
}
212212

213213
SwrContext* createSwrContext(
214214
AVSampleFormat srcSampleFormat,
215-
AVSampleFormat desiredSampleFormat,
215+
AVSampleFormat outSampleFormat,
216216
int srcSampleRate,
217-
int desiredSampleRate,
217+
int outSampleRate,
218218
const UniqueAVFrame& srcAVFrame,
219-
int desiredNumChannels) {
219+
int outNumChannels) {
220220
SwrContext* swrContext = nullptr;
221221
int status = AVSUCCESS;
222222
#if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
223-
AVChannelLayout desiredLayout =
224-
getDesiredChannelLayout(desiredNumChannels, srcAVFrame);
223+
AVChannelLayout outLayout =
224+
getOutputChannelLayout(outNumChannels, srcAVFrame);
225225
status = swr_alloc_set_opts2(
226226
&swrContext,
227-
&desiredLayout,
228-
desiredSampleFormat,
229-
desiredSampleRate,
227+
&outLayout,
228+
outSampleFormat,
229+
outSampleRate,
230230
&srcAVFrame->ch_layout,
231231
srcSampleFormat,
232232
srcSampleRate,
@@ -238,13 +238,12 @@ SwrContext* createSwrContext(
238238
"Couldn't create SwrContext: ",
239239
getFFMPEGErrorStringFromErrorCode(status));
240240
#else
241-
int64_t desiredLayout =
242-
getDesiredChannelLayout(desiredNumChannels, srcAVFrame);
241+
int64_t outLayout = getOutputChannelLayout(outNumChannels, srcAVFrame);
243242
swrContext = swr_alloc_set_opts(
244243
nullptr,
245-
desiredLayout,
246-
desiredSampleFormat,
247-
desiredSampleRate,
244+
outLayout,
245+
outSampleFormat,
246+
outSampleRate,
248247
srcAVFrame->channel_layout,
249248
srcSampleFormat,
250249
srcSampleRate,
@@ -267,19 +266,19 @@ SwrContext* createSwrContext(
267266
UniqueAVFrame convertAudioAVFrameSamples(
268267
const UniqueSwrContext& swrContext,
269268
const UniqueAVFrame& srcAVFrame,
270-
AVSampleFormat desiredSampleFormat,
271-
int desiredSampleRate,
272-
int desiredNumChannels) {
269+
AVSampleFormat outSampleFormat,
270+
int outSampleRate,
271+
int outNumChannels) {
273272
UniqueAVFrame convertedAVFrame(av_frame_alloc());
274273
TORCH_CHECK(
275274
convertedAVFrame,
276275
"Could not allocate frame for sample format conversion.");
277276

278-
convertedAVFrame->format = static_cast<int>(desiredSampleFormat);
277+
convertedAVFrame->format = static_cast<int>(outSampleFormat);
279278

280-
convertedAVFrame->sample_rate = desiredSampleRate;
279+
convertedAVFrame->sample_rate = outSampleRate;
281280
int srcSampleRate = srcAVFrame->sample_rate;
282-
if (srcSampleRate != desiredSampleRate) {
281+
if (srcSampleRate != outSampleRate) {
283282
// Note that this is an upper bound on the number of output samples.
284283
// `swr_convert()` will likely not fill convertedAVFrame with that many
285284
// samples if sample rate conversion is needed. It will buffer the last few
@@ -290,14 +289,14 @@ UniqueAVFrame convertAudioAVFrameSamples(
290289
// tighter bound.
291290
convertedAVFrame->nb_samples = av_rescale_rnd(
292291
swr_get_delay(swrContext.get(), srcSampleRate) + srcAVFrame->nb_samples,
293-
desiredSampleRate,
292+
outSampleRate,
294293
srcSampleRate,
295294
AV_ROUND_UP);
296295
} else {
297296
convertedAVFrame->nb_samples = srcAVFrame->nb_samples;
298297
}
299298

300-
setChannelLayout(convertedAVFrame, srcAVFrame, desiredNumChannels);
299+
setChannelLayout(convertedAVFrame, srcAVFrame, outNumChannels);
301300

302301
auto status = av_frame_get_buffer(convertedAVFrame.get(), 0);
303302
TORCH_CHECK(

src/torchcodec/_core/SingleStreamDecoder.cpp

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,11 +1186,11 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU(
11861186
FrameOutput& frameOutput) {
11871187
AVSampleFormat srcSampleFormat =
11881188
static_cast<AVSampleFormat>(srcAVFrame->format);
1189-
AVSampleFormat desiredSampleFormat = AV_SAMPLE_FMT_FLTP;
1189+
AVSampleFormat outSampleFormat = AV_SAMPLE_FMT_FLTP;
11901190

11911191
StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
11921192
int srcSampleRate = srcAVFrame->sample_rate;
1193-
int desiredSampleRate =
1193+
int outSampleRate =
11941194
streamInfo.audioStreamOptions.sampleRate.value_or(srcSampleRate);
11951195

11961196
int srcNumChannels = getNumChannels(streamInfo.codecContext);
@@ -1203,50 +1203,49 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU(
12031203
". If you are hitting this, it may be because you are using "
12041204
"a buggy FFmpeg version. FFmpeg4 is known to fail here in some "
12051205
"valid scenarios. Try to upgrade FFmpeg?");
1206-
int desiredNumChannels =
1206+
int outNumChannels =
12071207
streamInfo.audioStreamOptions.numChannels.value_or(srcNumChannels);
12081208

12091209
bool mustConvert =
1210-
(srcSampleFormat != desiredSampleFormat ||
1211-
srcSampleRate != desiredSampleRate ||
1212-
srcNumChannels != desiredNumChannels);
1210+
(srcSampleFormat != outSampleFormat || srcSampleRate != outSampleRate ||
1211+
srcNumChannels != outNumChannels);
12131212

12141213
UniqueAVFrame convertedAVFrame;
12151214
if (mustConvert) {
12161215
if (!streamInfo.swrContext) {
12171216
streamInfo.swrContext.reset(createSwrContext(
12181217
srcSampleFormat,
1219-
desiredSampleFormat,
1218+
outSampleFormat,
12201219
srcSampleRate,
1221-
desiredSampleRate,
1220+
outSampleRate,
12221221
srcAVFrame,
1223-
desiredNumChannels));
1222+
outNumChannels));
12241223
}
12251224

12261225
convertedAVFrame = convertAudioAVFrameSamples(
12271226
streamInfo.swrContext,
12281227
srcAVFrame,
1229-
desiredSampleFormat,
1230-
desiredSampleRate,
1231-
desiredNumChannels);
1228+
outSampleFormat,
1229+
outSampleRate,
1230+
outNumChannels);
12321231
}
12331232
const UniqueAVFrame& avFrame = mustConvert ? convertedAVFrame : srcAVFrame;
12341233

12351234
AVSampleFormat format = static_cast<AVSampleFormat>(avFrame->format);
12361235
TORCH_CHECK(
1237-
format == desiredSampleFormat,
1236+
format == outSampleFormat,
12381237
"Something went wrong, the frame didn't get converted to the desired format. ",
12391238
"Desired format = ",
1240-
av_get_sample_fmt_name(desiredSampleFormat),
1239+
av_get_sample_fmt_name(outSampleFormat),
12411240
"source format = ",
12421241
av_get_sample_fmt_name(format));
12431242

12441243
int numChannels = getNumChannels(avFrame);
12451244
TORCH_CHECK(
1246-
numChannels == desiredNumChannels,
1245+
numChannels == outNumChannels,
12471246
"Something went wrong, the frame didn't get converted to the desired ",
12481247
"number of channels = ",
1249-
desiredNumChannels,
1248+
outNumChannels,
12501249
". Got ",
12511250
numChannels,
12521251
" instead.");

0 commit comments

Comments
 (0)