Skip to content

Commit a093003

Browse files
committed
Add support for decoding from Python file-like objects
1 parent 30fe734 commit a093003

File tree

11 files changed

+222
-107
lines changed

11 files changed

+222
-107
lines changed

src/torchcodec/decoders/_core/CMakeLists.txt

Lines changed: 92 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -7,58 +7,107 @@ find_package(Torch REQUIRED)
77
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
88
find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
99

10-
function(make_torchcodec_library library_name ffmpeg_target)
11-
set(
12-
sources
13-
FFMPEGCommon.h
14-
FFMPEGCommon.cpp
15-
VideoDecoder.h
16-
VideoDecoder.cpp
17-
VideoDecoderOps.h
18-
VideoDecoderOps.cpp
19-
DeviceInterface.h
20-
)
21-
if(ENABLE_CUDA)
22-
list(APPEND sources CudaDevice.cpp)
23-
else()
24-
list(APPEND sources CPUOnlyDevice.cpp)
25-
endif()
26-
add_library(${library_name} SHARED ${sources})
27-
set_property(TARGET ${library_name} PROPERTY CXX_STANDARD 17)
10+
function(make_torchcodec_sublibrary
11+
library_name
12+
sources
13+
dependent_libraries
14+
ffmpeg_include_dirs)
2815

29-
target_include_directories(
30-
${library_name}
16+
add_library(${library_name} SHARED ${sources})
17+
set_target_properties(${library_name} PROPERTIES CXX_STANDARD 17)
18+
target_include_directories(${library_name}
3119
PRIVATE
3220
./../../../../
3321
"${TORCH_INSTALL_PREFIX}/include"
3422
${Python3_INCLUDE_DIRS}
23+
${ffmpeg_include_dirs}
3524
)
3625

37-
set(NEEDED_LIBRARIES ${ffmpeg_target} ${TORCH_LIBRARIES}
38-
${Python3_LIBRARIES})
39-
if(ENABLE_CUDA)
40-
list(APPEND NEEDED_LIBRARIES
41-
${CUDA_nppi_LIBRARY} ${CUDA_nppicc_LIBRARY} )
42-
endif()
26+
# Avoid adding the "lib" prefix which we already add explicitly.
27+
set_target_properties(${library_name} PROPERTIES PREFIX "")
28+
4329
target_link_libraries(
4430
${library_name}
4531
PUBLIC
46-
${NEEDED_LIBRARIES}
32+
${dependent_libraries}
4733
)
34+
endfunction()
4835

49-
# We already set the library_name to be libtorchcodecN, so we don't want
50-
# cmake to add another "lib" prefix. We do it this way because it makes it
51-
# easier to find references to libtorchcodec in the code (e.g. via `git
52-
# grep`)
53-
set_target_properties(${library_name} PROPERTIES PREFIX "")
36+
function(make_torchcodec_libraries
37+
ffmpeg_major_version
38+
ffmpeg_target
39+
ffmpeg_include_dirs)
40+
41+
# Create libtorchcodec_decoderN.so
42+
set(decoder_library_name "libtorchcodec_decoder${ffmpeg_major_version}")
43+
set(decoder_sources FFMPEGCommon.cpp VideoDecoder.cpp)
44+
45+
if(ENABLE_CUDA)
46+
list(APPEND decoder_sources CudaDevice.cpp)
47+
else()
48+
list(APPEND decoder_sources CPUOnlyDevice.cpp)
49+
endif()
50+
51+
set(decoder_dependent_libraries
52+
${ffmpeg_target}
53+
${TORCH_LIBRARIES}
54+
${Python3_LIBRARIES}
55+
)
56+
57+
if(ENABLE_CUDA)
58+
list(APPEND decoder_dependent_libraries
59+
${CUDA_nppi_LIBRARY}
60+
${CUDA_nppicc_LIBRARY}
61+
)
62+
endif()
63+
64+
make_torchcodec_sublibrary(
65+
"${decoder_library_name}"
66+
"${decoder_sources}"
67+
"${decoder_dependent_libraries}"
68+
"${ffmpeg_include_dirs}"
69+
)
70+
71+
# Create libtorchcodec_custom_opsN.so
72+
set(custom_ops_library_name "libtorchcodec_custom_ops${ffmpeg_major_version}")
73+
set(custom_ops_sources VideoDecoderOps.cpp)
74+
make_torchcodec_sublibrary(
75+
"${custom_ops_library_name}"
76+
"${custom_ops_sources}"
77+
"${decoder_library_name}"
78+
"${ffmpeg_include_dirs}"
79+
)
80+
81+
# Create libtorchcodec_pybind_opsN.so
82+
set(pybind_ops_library_name "libtorchcodec_pybind_ops${ffmpeg_major_version}")
83+
set(pybind_ops_sources PyBindOps.cpp)
84+
make_torchcodec_sublibrary(
85+
"${pybind_ops_library_name}"
86+
"${pybind_ops_sources}"
87+
"${decoder_library_name}"
88+
"${ffmpeg_include_dirs}"
89+
)
90+
target_compile_definitions(
91+
${pybind_ops_library_name}
92+
PUBLIC
93+
TORCHCODEC_PYBIND=_torchcodec_pybind_ops${ffmpeg_major_version}
94+
)
95+
96+
# Install all libraries.
97+
set(
98+
all_libraries
99+
${decoder_library_name}
100+
${custom_ops_library_name}
101+
${pybind_ops_library_name}
102+
)
54103

55104
# The install step is invoked within CMakeBuild.build_library() in
56105
# setup.py and just copies the built .so files from the temp
57106
# cmake/setuptools build folder into the CMAKE_INSTALL_PREFIX folder. We
58107
# still need to manually pass "DESTINATION ..." for cmake to copy those
59108
# files in CMAKE_INSTALL_PREFIX instead of CMAKE_INSTALL_PREFIX/lib.
60109
install(
61-
TARGETS ${library_name}
110+
TARGETS ${all_libraries}
62111
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}
63112
)
64113
endfunction()
@@ -76,11 +125,10 @@ if(DEFINED ENV{BUILD_AGAINST_ALL_FFMPEG_FROM_S3})
76125
${CMAKE_CURRENT_SOURCE_DIR}/fetch_and_expose_non_gpl_ffmpeg_libs.cmake
77126
)
78127

79-
80-
make_torchcodec_library(libtorchcodec4 ffmpeg4)
81-
make_torchcodec_library(libtorchcodec7 ffmpeg7)
82-
make_torchcodec_library(libtorchcodec6 ffmpeg6)
83-
make_torchcodec_library(libtorchcodec5 ffmpeg5)
128+
make_torchcodec_libraries(4 ffmpeg4 $ffmpeg4_INCLUDE_DIRS)
129+
make_torchcodec_libraries(7 ffmpeg7 $ffmpeg7_INCLUDE_DIRs)
130+
make_torchcodec_libraries(6 ffmpeg6 $ffmpeg6_INCLUDE_DIRS)
131+
make_torchcodec_libraries(5 ffmpeg5 $ffmpeg5_INCLUDE_DIRS)
84132

85133
else()
86134
message(
@@ -120,10 +168,11 @@ else()
120168
)
121169
endif()
122170

123-
set(libtorchcodec_target_name libtorchcodec${ffmpeg_major_version})
124-
# Make libtorchcodec_target_name available in the parent's scope, for the
125-
# test's CMakeLists.txt
126-
set(libtorchcodec_target_name ${libtorchcodec_target_name} PARENT_SCOPE)
171+
make_torchcodec_libraries(${ffmpeg_major_version} PkgConfig::LIBAV ${LIBAV_INCLUDE_DIRS})
127172

128-
make_torchcodec_library(${libtorchcodec_target_name} PkgConfig::LIBAV)
173+
# Expose these values updwards so that the test compilation does not need
174+
# to re-figure it out. FIXME: it's not great that we just copy-paste the
175+
# library name.
176+
set(libtorchcodec_library_name "libtorchcodec_decoder${ffmpeg_major_version}" PARENT_SCOPE)
177+
set(libav_include_dirs ${LIBAV_INCLUDE_DIRS} PARENT_SCOPE)
129178
endif()

src/torchcodec/decoders/_core/FFMPEGCommon.cpp

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,12 @@ int64_t getNumChannels(const UniqueAVCodecContext& avCodecContext) {
7373

7474
AVIOBytesContext::AVIOBytesContext(
7575
const void* data,
76-
size_t dataSize,
77-
size_t bufferSize)
78-
: bufferData_{static_cast<const uint8_t*>(data), dataSize, 0} {
76+
int64_t dataSize,
77+
int bufferSize)
78+
: dataContext_{static_cast<const uint8_t*>(data), dataSize, 0} {
79+
TORCH_CHECK(data != nullptr, "Video data buffer cannot be nullptr!");
80+
TORCH_CHECK(dataSize > 0, "Video data size must be positive");
81+
7982
auto buffer = static_cast<uint8_t*>(av_malloc(bufferSize));
8083
TORCH_CHECK(
8184
buffer != nullptr,
@@ -85,7 +88,7 @@ AVIOBytesContext::AVIOBytesContext(
8588
buffer,
8689
bufferSize,
8790
0,
88-
&bufferData_,
91+
&dataContext_,
8992
&AVIOBytesContext::read,
9093
nullptr,
9194
&AVIOBytesContext::seek));
@@ -102,50 +105,50 @@ AVIOBytesContext::~AVIOBytesContext() {
102105
}
103106
}
104107

105-
AVIOContext* AVIOBytesContext::getAVIO() {
108+
AVIOContext* AVIOBytesContext::getAVIOContext() const {
106109
return avioContext_.get();
107110
}
108111

109-
// The signature of this function is defined by FFMPEG.
112+
// The signature of this function is defined by FFmpeg.
110113
int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) {
111-
auto bufferData = static_cast<AVIOBufferData*>(opaque);
114+
auto dataContext = static_cast<DataContext*>(opaque);
112115
TORCH_CHECK(
113-
bufferData->current <= bufferData->size,
116+
dataContext->current <= dataContext->size,
114117
"Tried to read outside of the buffer: current=",
115-
bufferData->current,
118+
dataContext->current,
116119
", size=",
117-
bufferData->size);
120+
dataContext->size);
118121

119-
buf_size =
120-
FFMIN(buf_size, static_cast<int>(bufferData->size - bufferData->current));
122+
buf_size = FFMIN(
123+
buf_size, static_cast<int>(dataContext->size - dataContext->current));
121124
TORCH_CHECK(
122125
buf_size >= 0,
123126
"Tried to read negative bytes: buf_size=",
124127
buf_size,
125128
", size=",
126-
bufferData->size,
129+
dataContext->size,
127130
", current=",
128-
bufferData->current);
131+
dataContext->current);
129132

130133
if (!buf_size) {
131134
return AVERROR_EOF;
132135
}
133-
memcpy(buf, bufferData->data + bufferData->current, buf_size);
134-
bufferData->current += buf_size;
136+
memcpy(buf, dataContext->data + dataContext->current, buf_size);
137+
dataContext->current += buf_size;
135138
return buf_size;
136139
}
137140

138-
// The signature of this function is defined by FFMPEG.
141+
// The signature of this function is defined by FFmpeg.
139142
int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) {
140-
auto bufferData = static_cast<AVIOBufferData*>(opaque);
143+
auto dataContext = static_cast<DataContext*>(opaque);
141144
int64_t ret = -1;
142145

143146
switch (whence) {
144147
case AVSEEK_SIZE:
145-
ret = bufferData->size;
148+
ret = dataContext->size;
146149
break;
147150
case SEEK_SET:
148-
bufferData->current = offset;
151+
dataContext->current = offset;
149152
ret = offset;
150153
break;
151154
default:

src/torchcodec/decoders/_core/FFMPEGCommon.h

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -144,24 +144,27 @@ int64_t getNumChannels(const UniqueAVCodecContext& avCodecContext);
144144
// Returns true if sws_scale can handle unaligned data.
145145
bool canSwsScaleHandleUnalignedData();
146146

147+
// TODO: explain purpose of context holder
148+
class AVIOContextHolder {
149+
public:
150+
virtual ~AVIOContextHolder(){};
151+
virtual AVIOContext* getAVIOContext() const = 0;
152+
};
153+
154+
// TODO: make comment below better
147155
// A struct that holds state for reading bytes from an IO context.
148156
// We give this to FFMPEG and it will pass it back to us when it needs to read
149157
// or seek in the memory buffer.
150-
struct AVIOBufferData {
151-
const uint8_t* data;
152-
size_t size;
153-
size_t current;
154-
};
155-
158+
//
156159
// A class that can be used as AVFormatContext's IO context. It reads from a
157160
// memory buffer that is passed in.
158-
class AVIOBytesContext {
161+
class AVIOBytesContext : public AVIOContextHolder {
159162
public:
160-
AVIOBytesContext(const void* data, size_t dataSize, size_t bufferSize);
161-
~AVIOBytesContext();
163+
AVIOBytesContext(const void* data, int64_t dataSize, int bufferSize);
164+
virtual ~AVIOBytesContext();
162165

163166
// Returns the AVIOContext that can be passed to FFMPEG.
164-
AVIOContext* getAVIO();
167+
virtual AVIOContext* getAVIOContext() const override;
165168

166169
// The signature of this function is defined by FFMPEG.
167170
static int read(void* opaque, uint8_t* buf, int buf_size);
@@ -170,8 +173,14 @@ class AVIOBytesContext {
170173
static int64_t seek(void* opaque, int64_t offset, int whence);
171174

172175
private:
176+
struct DataContext {
177+
const uint8_t* data;
178+
int64_t size;
179+
int64_t current;
180+
};
181+
173182
UniqueAVIOContext avioContext_;
174-
struct AVIOBufferData bufferData_;
183+
DataContext dataContext_;
175184
};
176185

177186
} // namespace facebook::torchcodec

src/torchcodec/decoders/_core/VideoDecoder.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,23 +80,21 @@ VideoDecoder::VideoDecoder(const std::string& videoFilePath, SeekMode seekMode)
8080
initializeDecoder();
8181
}
8282

83-
VideoDecoder::VideoDecoder(const void* data, size_t length, SeekMode seekMode)
84-
: seekMode_(seekMode) {
85-
TORCH_CHECK(data != nullptr, "Video data buffer cannot be nullptr!");
86-
83+
VideoDecoder::VideoDecoder(
84+
std::unique_ptr<AVIOContextHolder> context,
85+
SeekMode seekMode)
86+
: seekMode_(seekMode), avioContextHolder_(std::move(context)) {
8787
av_log_set_level(AV_LOG_QUIET);
8888

89-
constexpr int bufferSize = 64 * 1024;
90-
ioBytesContext_.reset(new AVIOBytesContext(data, length, bufferSize));
91-
TORCH_CHECK(ioBytesContext_, "Failed to create AVIOBytesContext");
89+
TORCH_CHECK(avioContextHolder_, "Context holder cannot be null");
9290

9391
// Because FFmpeg requires a reference to a pointer in the call to open, we
9492
// can't use a unique pointer here. Note that means we must call free if open
9593
// fails.
9694
AVFormatContext* rawContext = avformat_alloc_context();
9795
TORCH_CHECK(rawContext != nullptr, "Unable to alloc avformat context");
9896

99-
rawContext->pb = ioBytesContext_->getAVIO();
97+
rawContext->pb = avioContextHolder_->getAVIOContext();
10098
int status = avformat_open_input(&rawContext, nullptr, nullptr, nullptr);
10199
if (status != 0) {
102100
avformat_free_context(rawContext);
@@ -1747,4 +1745,14 @@ FrameDims getHeightAndWidthFromOptionsOrAVFrame(
17471745
videoStreamOptions.width.value_or(avFrame.width));
17481746
}
17491747

1748+
VideoDecoder::SeekMode seekModeFromString(std::string_view seekMode) {
1749+
if (seekMode == "exact") {
1750+
return VideoDecoder::SeekMode::exact;
1751+
} else if (seekMode == "approximate") {
1752+
return VideoDecoder::SeekMode::approximate;
1753+
} else {
1754+
TORCH_CHECK(false, "Invalid seek mode: " + std::string(seekMode));
1755+
}
1756+
}
1757+
17501758
} // namespace facebook::torchcodec

src/torchcodec/decoders/_core/VideoDecoder.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,9 @@ class VideoDecoder {
3434
const std::string& videoFilePath,
3535
SeekMode seekMode = SeekMode::exact);
3636

37-
// Creates a VideoDecoder from a given buffer of data. Note that the data is
38-
// not owned by the VideoDecoder.
37+
// TODO: make comment accurate
3938
explicit VideoDecoder(
40-
const void* data,
41-
size_t length,
39+
std::unique_ptr<AVIOContextHolder> context,
4240
SeekMode seekMode = SeekMode::exact);
4341

4442
// --------------------------------------------------------------------------
@@ -472,7 +470,7 @@ class VideoDecoder {
472470
// Stores various internal decoding stats.
473471
DecodeStats decodeStats_;
474472
// Stores the AVIOContext for the input buffer.
475-
std::unique_ptr<AVIOBytesContext> ioBytesContext_;
473+
std::unique_ptr<AVIOContextHolder> avioContextHolder_;
476474
// Whether or not we have already scanned all streams to update the metadata.
477475
bool scannedAllStreams_ = false;
478476
// Tracks that we've already been initialized.
@@ -554,4 +552,6 @@ std::ostream& operator<<(
554552
std::ostream& os,
555553
const VideoDecoder::DecodeStats& stats);
556554

555+
VideoDecoder::SeekMode seekModeFromString(std::string_view seekMode);
556+
557557
} // namespace facebook::torchcodec

0 commit comments

Comments
 (0)