Skip to content

Commit 9150137

Browse files
committed
Move convertAudioAVFrameSampleFormatAndSampleRate in ffmpeg file
1 parent f525848 commit 9150137

File tree

4 files changed

+67
-67
lines changed

4 files changed

+67
-67
lines changed

src/torchcodec/_core/FFMPEGCommon.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,66 @@ SwrContext* createSwrContext(
167167
return swrContext;
168168
}
169169

170+
UniqueAVFrame convertAudioAVFrameSampleFormatAndSampleRate(
171+
const UniqueSwrContext& swrContext,
172+
const UniqueAVFrame& srcAVFrame,
173+
AVSampleFormat desiredSampleFormat,
174+
int sourceSampleRate,
175+
int desiredSampleRate) {
176+
UniqueAVFrame convertedAVFrame(av_frame_alloc());
177+
TORCH_CHECK(
178+
convertedAVFrame,
179+
"Could not allocate frame for sample format conversion.");
180+
181+
setChannelLayout(convertedAVFrame, srcAVFrame);
182+
convertedAVFrame->format = static_cast<int>(desiredSampleFormat);
183+
convertedAVFrame->sample_rate = desiredSampleRate;
184+
if (sourceSampleRate != desiredSampleRate) {
185+
// Note that this is an upper bound on the number of output samples.
186+
// `swr_convert()` will likely not fill convertedAVFrame with that many
187+
// samples if sample rate conversion is needed. It will buffer the last few
188+
// ones because those require future samples. That's also why we reset
189+
// nb_samples after the call to `swr_convert()`.
190+
// We could also use `swr_get_out_samples()` to determine the number of
191+
// output samples, but empirically `av_rescale_rnd()` seems to provide a
192+
// tighter bound.
193+
convertedAVFrame->nb_samples = av_rescale_rnd(
194+
swr_get_delay(swrContext.get(), sourceSampleRate) +
195+
srcAVFrame->nb_samples,
196+
desiredSampleRate,
197+
sourceSampleRate,
198+
AV_ROUND_UP);
199+
} else {
200+
convertedAVFrame->nb_samples = srcAVFrame->nb_samples;
201+
}
202+
203+
auto status = av_frame_get_buffer(convertedAVFrame.get(), 0);
204+
TORCH_CHECK(
205+
status == AVSUCCESS,
206+
"Could not allocate frame buffers for sample format conversion: ",
207+
getFFMPEGErrorStringFromErrorCode(status));
208+
209+
auto numConvertedSamples = swr_convert(
210+
swrContext.get(),
211+
convertedAVFrame->data,
212+
convertedAVFrame->nb_samples,
213+
static_cast<const uint8_t**>(
214+
const_cast<const uint8_t**>(srcAVFrame->data)),
215+
srcAVFrame->nb_samples);
216+
// numConvertedSamples can be 0 if we're downsampling by a great factor and
217+
// the first frame doesn't contain a lot of samples. It should be handled
218+
// properly by the caller.
219+
TORCH_CHECK(
220+
numConvertedSamples >= 0,
221+
"Error in swr_convert: ",
222+
getFFMPEGErrorStringFromErrorCode(numConvertedSamples));
223+
224+
// See comment above about nb_samples
225+
convertedAVFrame->nb_samples = numConvertedSamples;
226+
227+
return convertedAVFrame;
228+
}
229+
170230
void setFFmpegLogLevel() {
171231
auto logLevel = AV_LOG_QUIET;
172232
const char* logLevelEnvPtr = std::getenv("TORCHCODEC_FFMPEG_LOG_LEVEL");

src/torchcodec/_core/FFMPEGCommon.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,13 @@ SwrContext* createSwrContext(
165165
int sourceSampleRate,
166166
int desiredSampleRate);
167167

168+
UniqueAVFrame convertAudioAVFrameSampleFormatAndSampleRate(
169+
const UniqueSwrContext& swrContext,
170+
const UniqueAVFrame& srcAVFrame,
171+
AVSampleFormat desiredSampleFormat,
172+
int sourceSampleRate,
173+
int desiredSampleRate);
174+
168175
// Returns true if sws_scale can handle unaligned data.
169176
bool canSwsScaleHandleUnalignedData();
170177

src/torchcodec/_core/SingleStreamDecoder.cpp

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1402,66 +1402,6 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU(
14021402
}
14031403
}
14041404

1405-
UniqueAVFrame SingleStreamDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
1406-
const UniqueSwrContext& swrContext,
1407-
const UniqueAVFrame& srcAVFrame,
1408-
AVSampleFormat desiredSampleFormat,
1409-
int sourceSampleRate,
1410-
int desiredSampleRate) {
1411-
UniqueAVFrame convertedAVFrame(av_frame_alloc());
1412-
TORCH_CHECK(
1413-
convertedAVFrame,
1414-
"Could not allocate frame for sample format conversion.");
1415-
1416-
setChannelLayout(convertedAVFrame, srcAVFrame);
1417-
convertedAVFrame->format = static_cast<int>(desiredSampleFormat);
1418-
convertedAVFrame->sample_rate = desiredSampleRate;
1419-
if (sourceSampleRate != desiredSampleRate) {
1420-
// Note that this is an upper bound on the number of output samples.
1421-
// `swr_convert()` will likely not fill convertedAVFrame with that many
1422-
// samples if sample rate conversion is needed. It will buffer the last few
1423-
// ones because those require future samples. That's also why we reset
1424-
// nb_samples after the call to `swr_convert()`.
1425-
// We could also use `swr_get_out_samples()` to determine the number of
1426-
// output samples, but empirically `av_rescale_rnd()` seems to provide a
1427-
// tighter bound.
1428-
convertedAVFrame->nb_samples = av_rescale_rnd(
1429-
swr_get_delay(swrContext.get(), sourceSampleRate) +
1430-
srcAVFrame->nb_samples,
1431-
desiredSampleRate,
1432-
sourceSampleRate,
1433-
AV_ROUND_UP);
1434-
} else {
1435-
convertedAVFrame->nb_samples = srcAVFrame->nb_samples;
1436-
}
1437-
1438-
auto status = av_frame_get_buffer(convertedAVFrame.get(), 0);
1439-
TORCH_CHECK(
1440-
status == AVSUCCESS,
1441-
"Could not allocate frame buffers for sample format conversion: ",
1442-
getFFMPEGErrorStringFromErrorCode(status));
1443-
1444-
auto numConvertedSamples = swr_convert(
1445-
swrContext.get(),
1446-
convertedAVFrame->data,
1447-
convertedAVFrame->nb_samples,
1448-
static_cast<const uint8_t**>(
1449-
const_cast<const uint8_t**>(srcAVFrame->data)),
1450-
srcAVFrame->nb_samples);
1451-
// numConvertedSamples can be 0 if we're downsampling by a great factor and
1452-
// the first frame doesn't contain a lot of samples. It should be handled
1453-
// properly by the caller.
1454-
TORCH_CHECK(
1455-
numConvertedSamples >= 0,
1456-
"Error in swr_convert: ",
1457-
getFFMPEGErrorStringFromErrorCode(numConvertedSamples));
1458-
1459-
// See comment above about nb_samples
1460-
convertedAVFrame->nb_samples = numConvertedSamples;
1461-
1462-
return convertedAVFrame;
1463-
}
1464-
14651405
std::optional<torch::Tensor> SingleStreamDecoder::maybeFlushSwrBuffers() {
14661406
// When sample rate conversion is involved, swresample buffers some of the
14671407
// samples in-between calls to swr_convert (see the libswresample docs).

src/torchcodec/_core/SingleStreamDecoder.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -287,13 +287,6 @@ class SingleStreamDecoder {
287287
const UniqueAVFrame& avFrame,
288288
torch::Tensor& outputTensor);
289289

290-
UniqueAVFrame convertAudioAVFrameSampleFormatAndSampleRate(
291-
const UniqueSwrContext& swrContext,
292-
const UniqueAVFrame& srcAVFrame,
293-
AVSampleFormat desiredSampleFormat,
294-
int sourceSampleRate,
295-
int desiredSampleRate);
296-
297290
std::optional<torch::Tensor> maybeFlushSwrBuffers();
298291

299292
// --------------------------------------------------------------------------

0 commit comments

Comments
 (0)