Skip to content

Commit 823e9bb

Browse files
committed
WIP
1 parent 0a20541 commit 823e9bb

File tree

2 files changed

+72
-1
lines changed

2 files changed

+72
-1
lines changed

src/torchcodec/decoders/_core/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ set(CMAKE_CXX_STANDARD 17)
44
set(CMAKE_CXX_STANDARD_REQUIRED ON)
55

66
find_package(Torch REQUIRED)
7-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
7+
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
8+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra ${TORCH_CXX_FLAGS}")
89
find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
910

1011
function(make_torchcodec_library library_name ffmpeg_target)
@@ -97,6 +98,7 @@ else()
9798
libavformat
9899
libavcodec
99100
libavutil
101+
libswresample
100102
libswscale
101103
)
102104

src/torchcodec/decoders/_core/VideoDecoder.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ extern "C" {
2323
#include <libavutil/imgutils.h>
2424
#include <libavutil/log.h>
2525
#include <libavutil/pixdesc.h>
26+
#include <libswresample/swresample.h>
2627
#include <libswscale/swscale.h>
2728
}
2829

@@ -1341,6 +1342,71 @@ void VideoDecoder::convertAudioAVFrameToFrameOutputOnCPU(
13411342

13421343
const AVFrame* avFrame = avFrameStream.avFrame.get();
13431344

1345+
AVFrame* output_frame = nullptr;
1346+
SwrContext* swr_ctx = NULL; // TODO RAII
1347+
1348+
const auto& streamInfo = streamInfos_[activeStreamIndex_];
1349+
const auto& streamMetadata =
1350+
containerMetadata_.allStreamMetadata[activeStreamIndex_];
1351+
int sampleRate = static_cast<int>(streamMetadata.sampleRate.value());
1352+
1353+
AVSampleFormat sampleFormat = AV_SAMPLE_FMT_FLTP;
1354+
AVChannelLayout layout = streamInfo.codecContext->ch_layout;
1355+
1356+
int status = swr_alloc_set_opts2(
1357+
&swr_ctx,
1358+
&layout,
1359+
sampleFormat,
1360+
sampleRate,
1361+
&layout,
1362+
sampleFormat,
1363+
sampleRate,
1364+
0,
1365+
NULL);
1366+
1367+
TORCH_CHECK(status == 0, "IS NULL");
1368+
1369+
if (swr_init(swr_ctx) < 0) {
1370+
swr_free(&swr_ctx);
1371+
TORCH_CHECK(false, "Failed to initialize the resampling context\n");
1372+
}
1373+
1374+
// Allocate output frame
1375+
output_frame = av_frame_alloc();
1376+
if (!output_frame) {
1377+
swr_free(&swr_ctx);
1378+
TORCH_CHECK(false, "Could not allocate output frame\n");
1379+
}
1380+
output_frame->ch_layout = layout;
1381+
output_frame->sample_rate = sampleRate;
1382+
output_frame->format = sampleFormat;
1383+
1384+
output_frame->nb_samples = av_rescale_rnd(
1385+
swr_get_delay(swr_ctx, sampleRate) + avFrame->nb_samples,
1386+
sampleRate,
1387+
sampleRate,
1388+
AV_ROUND_UP);
1389+
1390+
if (av_frame_get_buffer(output_frame, 0) < 0) {
1391+
av_frame_free(&output_frame);
1392+
swr_free(&swr_ctx);
1393+
TORCH_CHECK(false, "Could not allocate output frame samples");
1394+
}
1395+
1396+
int ret = swr_convert(
1397+
swr_ctx,
1398+
output_frame->data,
1399+
output_frame->nb_samples,
1400+
(const uint8_t**)avFrame->data,
1401+
avFrame->nb_samples);
1402+
if (ret < 0) {
1403+
av_frame_free(&output_frame);
1404+
swr_free(&swr_ctx);
1405+
TORCH_CHECK(false, "Error while converting\n");
1406+
}
1407+
1408+
avFrame = output_frame; // lmao
1409+
13441410
auto numSamples = avFrame->nb_samples; // per channel
13451411
auto numChannels = getNumChannels(avFrame);
13461412
torch::Tensor outputData =
@@ -1368,6 +1434,9 @@ void VideoDecoder::convertAudioAVFrameToFrameOutputOnCPU(
13681434
av_get_sample_fmt_name(format));
13691435
}
13701436
frameOutput.data = outputData;
1437+
// TODO
1438+
av_frame_free(&output_frame);
1439+
swr_free(&swr_ctx);
13711440
}
13721441

13731442
// --------------------------------------------------------------------------

0 commit comments

Comments
 (0)