Skip to content

Commit f525848

Browse files
committed
WIP
1 parent 1f9f904 commit f525848

File tree

2 files changed

+16
-18
lines changed

2 files changed

+16
-18
lines changed

src/torchcodec/_core/SingleStreamDecoder.cpp

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1345,20 +1345,29 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU(
13451345
static_cast<AVSampleFormat>(srcAVFrame->format);
13461346
AVSampleFormat desiredSampleFormat = AV_SAMPLE_FMT_FLTP;
13471347

1348+
StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
13481349
int sourceSampleRate = srcAVFrame->sample_rate;
13491350
int desiredSampleRate =
1350-
streamInfos_[activeStreamIndex_].audioStreamOptions.sampleRate.value_or(
1351-
sourceSampleRate);
1351+
streamInfo.audioStreamOptions.sampleRate.value_or(sourceSampleRate);
13521352

13531353
bool mustConvert =
13541354
(sourceSampleFormat != desiredSampleFormat ||
13551355
sourceSampleRate != desiredSampleRate);
13561356

13571357
UniqueAVFrame convertedAVFrame;
13581358
if (mustConvert) {
1359+
if (!streamInfo.swrContext) {
1360+
streamInfo.swrContext.reset(createSwrContext(
1361+
streamInfo.codecContext,
1362+
sourceSampleFormat,
1363+
desiredSampleFormat,
1364+
sourceSampleRate,
1365+
desiredSampleRate));
1366+
}
1367+
13591368
convertedAVFrame = convertAudioAVFrameSampleFormatAndSampleRate(
1369+
streamInfo.swrContext,
13601370
srcAVFrame,
1361-
sourceSampleFormat,
13621371
desiredSampleFormat,
13631372
sourceSampleRate,
13641373
desiredSampleRate);
@@ -1394,22 +1403,11 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU(
13941403
}
13951404

13961405
UniqueAVFrame SingleStreamDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
1406+
const UniqueSwrContext& swrContext,
13971407
const UniqueAVFrame& srcAVFrame,
1398-
AVSampleFormat sourceSampleFormat,
13991408
AVSampleFormat desiredSampleFormat,
14001409
int sourceSampleRate,
14011410
int desiredSampleRate) {
1402-
auto& streamInfo = streamInfos_[activeStreamIndex_];
1403-
1404-
if (!streamInfo.swrContext) {
1405-
streamInfo.swrContext.reset(createSwrContext(
1406-
streamInfo.codecContext,
1407-
sourceSampleFormat,
1408-
desiredSampleFormat,
1409-
sourceSampleRate,
1410-
desiredSampleRate));
1411-
}
1412-
14131411
UniqueAVFrame convertedAVFrame(av_frame_alloc());
14141412
TORCH_CHECK(
14151413
convertedAVFrame,
@@ -1428,7 +1426,7 @@ UniqueAVFrame SingleStreamDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
14281426
// output samples, but empirically `av_rescale_rnd()` seems to provide a
14291427
// tighter bound.
14301428
convertedAVFrame->nb_samples = av_rescale_rnd(
1431-
swr_get_delay(streamInfo.swrContext.get(), sourceSampleRate) +
1429+
swr_get_delay(swrContext.get(), sourceSampleRate) +
14321430
srcAVFrame->nb_samples,
14331431
desiredSampleRate,
14341432
sourceSampleRate,
@@ -1444,7 +1442,7 @@ UniqueAVFrame SingleStreamDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
14441442
getFFMPEGErrorStringFromErrorCode(status));
14451443

14461444
auto numConvertedSamples = swr_convert(
1447-
streamInfo.swrContext.get(),
1445+
swrContext.get(),
14481446
convertedAVFrame->data,
14491447
convertedAVFrame->nb_samples,
14501448
static_cast<const uint8_t**>(

src/torchcodec/_core/SingleStreamDecoder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,8 @@ class SingleStreamDecoder {
288288
torch::Tensor& outputTensor);
289289

290290
UniqueAVFrame convertAudioAVFrameSampleFormatAndSampleRate(
291+
const UniqueSwrContext& swrContext,
291292
const UniqueAVFrame& srcAVFrame,
292-
AVSampleFormat sourceSampleFormat,
293293
AVSampleFormat desiredSampleFormat,
294294
int sourceSampleRate,
295295
int desiredSampleRate);

0 commit comments

Comments
 (0)