Skip to content

Commit 5e325a2

Browse files
authored
Refactor C++ decoder initialization (#533)
1. We had a struct, AVInput, which was not necessary. Removing it allows us to simplify setting up a VideoDecoder that uses an AVIOContext that reads directly from a provided buffer of bytes. 2. Normalize style of names and clarify purpose. 3. Use TORCH_CHECK instead of throwing exceptions.
1 parent 9703c37 commit 5e325a2

File tree

5 files changed

+52
-61
lines changed

5 files changed

+52
-61
lines changed

src/torchcodec/decoders/_core/FFMPEGCommon.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -62,28 +62,26 @@ int64_t getDuration(const AVFrame* frame) {
6262

6363
AVIOBytesContext::AVIOBytesContext(
6464
const void* data,
65-
size_t data_size,
66-
size_t tempBufferSize) {
67-
auto buffer = static_cast<uint8_t*>(av_malloc(tempBufferSize));
68-
if (!buffer) {
69-
throw std::runtime_error(
70-
"Failed to allocate buffer of size " + std::to_string(tempBufferSize));
71-
}
72-
bufferData_.data = static_cast<const uint8_t*>(data);
73-
bufferData_.size = data_size;
74-
bufferData_.current = 0;
65+
size_t dataSize,
66+
size_t bufferSize)
67+
: bufferData_{static_cast<const uint8_t*>(data), dataSize, 0} {
68+
auto buffer = static_cast<uint8_t*>(av_malloc(bufferSize));
69+
TORCH_CHECK(
70+
buffer != nullptr,
71+
"Failed to allocate buffer of size " + std::to_string(bufferSize));
7572

7673
avioContext_.reset(avio_alloc_context(
7774
buffer,
78-
tempBufferSize,
75+
bufferSize,
7976
0,
8077
&bufferData_,
8178
&AVIOBytesContext::read,
8279
nullptr,
8380
&AVIOBytesContext::seek));
81+
8482
if (!avioContext_) {
8583
av_freep(&buffer);
86-
throw std::runtime_error("Failed to allocate AVIOContext");
84+
TORCH_CHECK(false, "Failed to allocate AVIOContext");
8785
}
8886
}
8987

@@ -99,14 +97,14 @@ AVIOContext* AVIOBytesContext::getAVIO() {
9997

10098
// The signature of this function is defined by FFMPEG.
10199
int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) {
102-
struct AVIOBufferData* bufferData =
103-
static_cast<struct AVIOBufferData*>(opaque);
100+
auto bufferData = static_cast<AVIOBufferData*>(opaque);
104101
TORCH_CHECK(
105102
bufferData->current <= bufferData->size,
106103
"Tried to read outside of the buffer: current=",
107104
bufferData->current,
108105
", size=",
109106
bufferData->size);
107+
110108
buf_size =
111109
FFMIN(buf_size, static_cast<int>(bufferData->size - bufferData->current));
112110
TORCH_CHECK(
@@ -117,6 +115,7 @@ int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) {
117115
bufferData->size,
118116
", current=",
119117
bufferData->current);
118+
120119
if (!buf_size) {
121120
return AVERROR_EOF;
122121
}
@@ -127,7 +126,7 @@ int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) {
127126

128127
// The signature of this function is defined by FFMPEG.
129128
int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) {
130-
AVIOBufferData* bufferData = (AVIOBufferData*)opaque;
129+
auto bufferData = static_cast<AVIOBufferData*>(opaque);
131130
int64_t ret = -1;
132131

133132
switch (whence) {
@@ -141,6 +140,7 @@ int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) {
141140
default:
142141
break;
143142
}
143+
144144
return ret;
145145
}
146146

src/torchcodec/decoders/_core/FFMPEGCommon.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ struct AVIOBufferData {
155155
// memory buffer that is passed in.
156156
class AVIOBytesContext {
157157
public:
158-
AVIOBytesContext(const void* data, size_t data_size, size_t tempBufferSize);
158+
AVIOBytesContext(const void* data, size_t dataSize, size_t bufferSize);
159159
~AVIOBytesContext();
160160

161161
// Returns the AVIOContext that can be passed to FFMPEG.

src/torchcodec/decoders/_core/VideoDecoder.cpp

Lines changed: 32 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,6 @@ int64_t secondsToClosestPts(double seconds, const AVRational& timeBase) {
4040
return static_cast<int64_t>(std::round(seconds * timeBase.den));
4141
}
4242

43-
struct AVInput {
44-
UniqueAVFormatContext formatContext;
45-
std::unique_ptr<AVIOBytesContext> ioBytesContext;
46-
};
47-
4843
std::vector<std::string> splitStringWithDelimiters(
4944
const std::string& str,
5045
const std::string& delims) {
@@ -72,50 +67,47 @@ VideoDecoder::VideoDecoder(const std::string& videoFilePath, SeekMode seekMode)
7267
: seekMode_(seekMode) {
7368
av_log_set_level(AV_LOG_QUIET);
7469

75-
AVFormatContext* formatContext = nullptr;
76-
int open_ret = avformat_open_input(
77-
&formatContext, videoFilePath.c_str(), nullptr, nullptr);
78-
if (open_ret != 0) {
79-
throw std::invalid_argument(
80-
"Could not open input file: " + videoFilePath + " " +
81-
getFFMPEGErrorStringFromErrorCode(open_ret));
82-
}
83-
TORCH_CHECK(formatContext != nullptr);
84-
AVInput input;
85-
input.formatContext.reset(formatContext);
86-
formatContext_ = std::move(input.formatContext);
70+
AVFormatContext* rawContext = nullptr;
71+
int ffmpegStatus =
72+
avformat_open_input(&rawContext, videoFilePath.c_str(), nullptr, nullptr);
73+
TORCH_CHECK(
74+
ffmpegStatus == 0,
75+
"Could not open input file: " + videoFilePath + " " +
76+
getFFMPEGErrorStringFromErrorCode(ffmpegStatus));
77+
TORCH_CHECK(rawContext != nullptr);
78+
formatContext_.reset(rawContext);
8779

8880
initializeDecoder();
8981
}
9082

91-
VideoDecoder::VideoDecoder(const void* buffer, size_t length, SeekMode seekMode)
83+
VideoDecoder::VideoDecoder(const void* data, size_t length, SeekMode seekMode)
9284
: seekMode_(seekMode) {
93-
TORCH_CHECK(buffer != nullptr, "Video buffer cannot be nullptr!");
85+
TORCH_CHECK(data != nullptr, "Video data buffer cannot be nullptr!");
9486

9587
av_log_set_level(AV_LOG_QUIET);
9688

97-
AVInput input;
98-
input.formatContext.reset(avformat_alloc_context());
99-
TORCH_CHECK(
100-
input.formatContext.get() != nullptr, "Unable to alloc avformat context");
101-
constexpr int kAVIOInternalTemporaryBufferSize = 64 * 1024;
102-
input.ioBytesContext.reset(
103-
new AVIOBytesContext(buffer, length, kAVIOInternalTemporaryBufferSize));
104-
if (!input.ioBytesContext) {
105-
throw std::runtime_error("Failed to create AVIOBytesContext");
106-
}
107-
input.formatContext->pb = input.ioBytesContext->getAVIO();
108-
AVFormatContext* tempFormatContext = input.formatContext.release();
109-
int open_ret =
110-
avformat_open_input(&tempFormatContext, nullptr, nullptr, nullptr);
111-
input.formatContext.reset(tempFormatContext);
112-
if (open_ret != 0) {
113-
throw std::runtime_error(
114-
std::string("Failed to open input buffer: ") +
115-
getFFMPEGErrorStringFromErrorCode(open_ret));
89+
constexpr int bufferSize = 64 * 1024;
90+
ioBytesContext_.reset(new AVIOBytesContext(data, length, bufferSize));
91+
TORCH_CHECK(ioBytesContext_, "Failed to create AVIOBytesContext");
92+
93+
// Because FFmpeg requires a reference to a pointer in the call to open, we
94+
// can't use a unique pointer here. Note that means we must call free if open
95+
// fails.
96+
AVFormatContext* rawContext = avformat_alloc_context();
97+
TORCH_CHECK(rawContext != nullptr, "Unable to alloc avformat context");
98+
99+
rawContext->pb = ioBytesContext_->getAVIO();
100+
int ffmpegStatus =
101+
avformat_open_input(&rawContext, nullptr, nullptr, nullptr);
102+
if (ffmpegStatus != 0) {
103+
avformat_free_context(rawContext);
104+
TORCH_CHECK(
105+
false,
106+
"Failed to open input buffer: " +
107+
getFFMPEGErrorStringFromErrorCode(ffmpegStatus));
116108
}
117-
formatContext_ = std::move(input.formatContext);
118-
ioBytesContext_ = std::move(input.ioBytesContext);
109+
110+
formatContext_.reset(rawContext);
119111

120112
initializeDecoder();
121113
}

src/torchcodec/decoders/_core/VideoDecoder.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ class VideoDecoder {
3434
const std::string& videoFilePath,
3535
SeekMode seekMode = SeekMode::exact);
3636

37-
// Creates a VideoDecoder from a given buffer. Note that the buffer is not
38-
// owned by the VideoDecoder.
37+
// Creates a VideoDecoder from a given buffer of data. Note that the data is
38+
// not owned by the VideoDecoder.
3939
explicit VideoDecoder(
40-
const void* buffer,
40+
const void* data,
4141
size_t length,
4242
SeekMode seekMode = SeekMode::exact);
4343

test/decoders/VideoDecoderTest.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,7 @@ TEST_P(VideoDecoderTest, ReturnsFpsAndDurationForVideoInMetadata) {
9393
}
9494

9595
TEST(VideoDecoderTest, MissingVideoFileThrowsException) {
96-
EXPECT_THROW(
97-
VideoDecoder("/this/file/does/not/exist"), std::invalid_argument);
96+
EXPECT_THROW(VideoDecoder("/this/file/does/not/exist"), c10::Error);
9897
}
9998

10099
void dumpTensorToDisk(

0 commit comments

Comments
 (0)