Skip to content

Commit 2adf496

Browse files
committed
WIP
1 parent 9af4bc8 commit 2adf496

File tree

4 files changed

+42
-25
lines changed

4 files changed

+42
-25
lines changed

src/torchcodec/decoders/_core/FFMPEGCommon.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,20 +90,21 @@ void setChannelLayout(
9090

9191
SwrContext* allocateSwrContext(
9292
UniqueAVCodecContext& avCodecContext,
93-
int sampleRate,
9493
AVSampleFormat sourceSampleFormat,
95-
AVSampleFormat desiredSampleFormat) {
94+
AVSampleFormat desiredSampleFormat,
95+
int sourceSampleRate,
96+
int desiredSampleRate) {
9697
SwrContext* swrContext = nullptr;
9798
#if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
9899
AVChannelLayout layout = avCodecContext->ch_layout;
99100
auto status = swr_alloc_set_opts2(
100101
&swrContext,
101102
&layout,
102103
desiredSampleFormat,
103-
sampleRate,
104+
desiredSampleRate,
104105
&layout,
105106
sourceSampleFormat,
106-
sampleRate,
107+
sourceSampleRate,
107108
0,
108109
nullptr);
109110

@@ -117,10 +118,10 @@ SwrContext* allocateSwrContext(
117118
nullptr,
118119
layout,
119120
desiredSampleFormat,
120-
sampleRate,
121+
desiredSampleRate,
121122
layout,
122123
sourceSampleFormat,
123-
sampleRate,
124+
sourceSampleRate,
124125
0,
125126
nullptr);
126127
#endif

src/torchcodec/decoders/_core/FFMPEGCommon.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,10 @@ void setChannelLayout(
150150
const UniqueAVFrame& srcAVFrame);
151151
SwrContext* allocateSwrContext(
152152
UniqueAVCodecContext& avCodecContext,
153-
int sampleRate,
154153
AVSampleFormat sourceSampleFormat,
155-
AVSampleFormat desiredSampleFormat);
154+
AVSampleFormat desiredSampleFormat,
155+
int sourceSampleRate,
156+
int desiredSampleRate);
156157

157158
// Returns true if sws_scale can handle unaligned data.
158159
bool canSwsScaleHandleUnalignedData();

src/torchcodec/decoders/_core/VideoDecoder.cpp

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1370,19 +1370,21 @@ void VideoDecoder::convertAudioAVFrameToFrameOutputOnCPU(
13701370
streamInfos_[activeStreamIndex_].audioStreamOptions.sampleRate.value_or(
13711371
sourceSampleRate);
13721372

1373+
bool mustConvert =
1374+
(sourceSampleFormat != desiredSampleFormat ||
1375+
sourceSampleRate != desiredSampleRate);
1376+
13731377
UniqueAVFrame convertedAVFrame;
1374-
if (sourceSampleFormat != desiredSampleFormat ||
1375-
sourceSampleRate != desiredSampleRate) {
1378+
if (mustConvert) {
13761379
convertedAVFrame = convertAudioAVFrameSampleFormatAndSampleRate(
13771380
avFrameStream.avFrame,
13781381
sourceSampleFormat,
13791382
desiredSampleFormat,
13801383
sourceSampleRate,
13811384
desiredSampleRate);
13821385
}
1383-
const UniqueAVFrame& avFrame = (sourceSampleFormat != desiredSampleFormat)
1384-
? convertedAVFrame
1385-
: avFrameStream.avFrame;
1386+
const UniqueAVFrame& avFrame =
1387+
mustConvert ? convertedAVFrame : avFrameStream.avFrame;
13861388

13871389
AVSampleFormat format = static_cast<AVSampleFormat>(avFrame->format);
13881390
TORCH_CHECK(
@@ -1415,13 +1417,14 @@ UniqueAVFrame VideoDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
14151417
int sourceSampleRate,
14161418
int desiredSampleRate) {
14171419
auto& streamInfo = streamInfos_[activeStreamIndex_];
1418-
const auto& streamMetadata =
1419-
containerMetadata_.allStreamMetadata[activeStreamIndex_];
1420-
int sampleRate = static_cast<int>(streamMetadata.sampleRate.value());
14211420

14221421
if (!streamInfo.swrContext) {
14231422
createSwrContext(
1424-
streamInfo, sampleRate, sourceSampleFormat, desiredSampleFormat);
1423+
streamInfo,
1424+
sourceSampleFormat,
1425+
desiredSampleFormat,
1426+
sourceSampleRate,
1427+
desiredSampleRate);
14251428
}
14261429

14271430
UniqueAVFrame convertedAVFrame(av_frame_alloc());
@@ -1431,8 +1434,17 @@ UniqueAVFrame VideoDecoder::convertAudioAVFrameSampleFormatAndSampleRate(
14311434

14321435
setChannelLayout(convertedAVFrame, avFrame);
14331436
convertedAVFrame->format = static_cast<int>(desiredSampleFormat);
1434-
convertedAVFrame->sample_rate = avFrame->sample_rate;
1435-
convertedAVFrame->nb_samples = avFrame->nb_samples;
1437+
convertedAVFrame->sample_rate = desiredSampleRate;
1438+
if (sourceSampleRate != desiredSampleRate) {
1439+
convertedAVFrame->nb_samples = av_rescale_rnd(
1440+
swr_get_delay(streamInfo.swrContext.get(), sourceSampleRate) +
1441+
avFrame->nb_samples,
1442+
desiredSampleRate,
1443+
sourceSampleRate,
1444+
AV_ROUND_UP);
1445+
} else {
1446+
convertedAVFrame->nb_samples = avFrame->nb_samples;
1447+
}
14361448

14371449
auto status = av_frame_get_buffer(convertedAVFrame.get(), 0);
14381450
TORCH_CHECK(
@@ -1689,14 +1701,16 @@ void VideoDecoder::createSwsContext(
16891701

16901702
void VideoDecoder::createSwrContext(
16911703
StreamInfo& streamInfo,
1692-
int sampleRate,
16931704
AVSampleFormat sourceSampleFormat,
1694-
AVSampleFormat desiredSampleFormat) {
1705+
AVSampleFormat desiredSampleFormat,
1706+
int sourceSampleRate,
1707+
int desiredSampleRate) {
16951708
auto swrContext = allocateSwrContext(
16961709
streamInfo.codecContext,
1697-
sampleRate,
16981710
sourceSampleFormat,
1699-
desiredSampleFormat);
1711+
desiredSampleFormat,
1712+
sourceSampleRate,
1713+
desiredSampleRate);
17001714

17011715
auto status = swr_init(swrContext);
17021716
TORCH_CHECK(

src/torchcodec/decoders/_core/VideoDecoder.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,9 +435,10 @@ class VideoDecoder {
435435

436436
void createSwrContext(
437437
StreamInfo& streamInfo,
438-
int sampleRate,
439438
AVSampleFormat sourceSampleFormat,
440-
AVSampleFormat desiredSampleFormat);
439+
AVSampleFormat desiredSampleFormat,
440+
int sourceSampleRate,
441+
int desiredSampleRate);
441442

442443
// --------------------------------------------------------------------------
443444
// PTS <-> INDEX CONVERSIONS

0 commit comments

Comments
 (0)