meta-pytorch
diff --git a/‎src/torchcodec/decoders/_core/CMakeLists.txt‎
Lines changed: 92 additions & 43 deletions b/‎src/torchcodec/decoders/_core/CMakeLists.txt‎
Lines changed: 92 additions & 43 deletions
diff --git a/‎src/torchcodec/decoders/_core/FFMPEGCommon.cpp‎
Lines changed: 23 additions & 20 deletions b/‎src/torchcodec/decoders/_core/FFMPEGCommon.cpp‎
Lines changed: 23 additions & 20 deletions
diff --git a/‎src/torchcodec/decoders/_core/FFMPEGCommon.h‎
Lines changed: 20 additions & 11 deletions b/‎src/torchcodec/decoders/_core/FFMPEGCommon.h‎
Lines changed: 20 additions & 11 deletions
diff --git a/‎src/torchcodec/decoders/_core/VideoDecoder.cpp‎
Lines changed: 16 additions & 8 deletions b/‎src/torchcodec/decoders/_core/VideoDecoder.cpp‎
Lines changed: 16 additions & 8 deletions
diff --git a/‎src/torchcodec/decoders/_core/VideoDecoder.h‎
Lines changed: 5 additions & 5 deletions b/‎src/torchcodec/decoders/_core/VideoDecoder.h‎
Lines changed: 5 additions & 5 deletions
@@ -7,58 +7,107 @@ find_package(Torch REQUIRED)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
 find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
 
-function(make_torchcodec_library library_name ffmpeg_target)
-    set(
-        sources
-        FFMPEGCommon.h
-        FFMPEGCommon.cpp
-        VideoDecoder.h
-        VideoDecoder.cpp
-        VideoDecoderOps.h
-        VideoDecoderOps.cpp
-        DeviceInterface.h
-    )
-    if(ENABLE_CUDA)
-        list(APPEND sources CudaDevice.cpp)
-    else()
-        list(APPEND sources CPUOnlyDevice.cpp)
-    endif()
-    add_library(${library_name} SHARED ${sources})
-    set_property(TARGET ${library_name} PROPERTY CXX_STANDARD 17)
+function(make_torchcodec_sublibrary
+    library_name
+    sources
+    dependent_libraries
+    ffmpeg_include_dirs)
 
-    target_include_directories(
-        ${library_name}
+    add_library(${library_name} SHARED ${sources})
+    set_target_properties(${library_name} PROPERTIES CXX_STANDARD 17)
+    target_include_directories(${library_name}
         PRIVATE
         ./../../../../
         "${TORCH_INSTALL_PREFIX}/include"
         ${Python3_INCLUDE_DIRS}
+        ${ffmpeg_include_dirs}
     )
 
-    set(NEEDED_LIBRARIES ${ffmpeg_target} ${TORCH_LIBRARIES}
-        ${Python3_LIBRARIES})
-    if(ENABLE_CUDA)
-        list(APPEND NEEDED_LIBRARIES
-            ${CUDA_nppi_LIBRARY} ${CUDA_nppicc_LIBRARY} )
-    endif()
+    # Avoid adding the "lib" prefix which we already add explicitly.
+    set_target_properties(${library_name} PROPERTIES PREFIX "")
+
     target_link_libraries(
         ${library_name}
         PUBLIC
-        ${NEEDED_LIBRARIES}
+        ${dependent_libraries}
     )
+endfunction()
 
-    # We already set the library_name to be libtorchcodecN, so we don't want
-    # cmake to add another "lib" prefix. We do it this way because it makes it
-    # easier to find references to libtorchcodec in the code (e.g. via `git
-    # grep`)
-    set_target_properties(${library_name} PROPERTIES PREFIX "")
+function(make_torchcodec_libraries
+    ffmpeg_major_version
+    ffmpeg_target
+    ffmpeg_include_dirs)
+
+    # Create libtorchcodec_decoderN.so
+    set(decoder_library_name "libtorchcodec_decoder${ffmpeg_major_version}")
+    set(decoder_sources FFMPEGCommon.cpp VideoDecoder.cpp)
+
+    if(ENABLE_CUDA)
+        list(APPEND decoder_sources CudaDevice.cpp)
+    else()
+        list(APPEND decoder_sources CPUOnlyDevice.cpp)
+    endif()
+
+    set(decoder_dependent_libraries
+        ${ffmpeg_target}
+        ${TORCH_LIBRARIES}
+        ${Python3_LIBRARIES}
+    )
+
+    if(ENABLE_CUDA)
+        list(APPEND decoder_dependent_libraries
+            ${CUDA_nppi_LIBRARY}
+            ${CUDA_nppicc_LIBRARY}
+        )
+    endif()
+
+    make_torchcodec_sublibrary(
+        "${decoder_library_name}"
+        "${decoder_sources}"
+        "${decoder_dependent_libraries}"
+        "${ffmpeg_include_dirs}"
+    )
+
+    # Create libtorchcodec_custom_opsN.so
+    set(custom_ops_library_name "libtorchcodec_custom_ops${ffmpeg_major_version}")
+    set(custom_ops_sources VideoDecoderOps.cpp)
+    make_torchcodec_sublibrary(
+        "${custom_ops_library_name}"
+        "${custom_ops_sources}"
+        "${decoder_library_name}"
+        "${ffmpeg_include_dirs}"
+    )
+
+    # Create libtorchcodec_pybind_opsN.so
+    set(pybind_ops_library_name "libtorchcodec_pybind_ops${ffmpeg_major_version}")
+    set(pybind_ops_sources PyBindOps.cpp)
+    make_torchcodec_sublibrary(
+        "${pybind_ops_library_name}"
+        "${pybind_ops_sources}"
+        "${decoder_library_name}"
+        "${ffmpeg_include_dirs}"
+    )
+    target_compile_definitions(
+        ${pybind_ops_library_name}
+        PUBLIC
+        TORCHCODEC_PYBIND=_torchcodec_pybind_ops${ffmpeg_major_version}
+    )
+
+    # Install all libraries.
+    set(
+        all_libraries
+        ${decoder_library_name}
+        ${custom_ops_library_name}
+        ${pybind_ops_library_name}
+    )
 
     # The install step is invoked within CMakeBuild.build_library() in
     # setup.py and just copies the built .so files from the temp
     # cmake/setuptools build folder into the CMAKE_INSTALL_PREFIX folder. We
     # still need to manually pass "DESTINATION ..." for cmake to copy those
     # files in CMAKE_INSTALL_PREFIX instead of CMAKE_INSTALL_PREFIX/lib.
     install(
-        TARGETS ${library_name}
+        TARGETS ${all_libraries}
         LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}
     )
 endfunction()
@@ -76,11 +125,10 @@ if(DEFINED ENV{BUILD_AGAINST_ALL_FFMPEG_FROM_S3})
         ${CMAKE_CURRENT_SOURCE_DIR}/fetch_and_expose_non_gpl_ffmpeg_libs.cmake
     )
 
-
-	make_torchcodec_library(libtorchcodec4 ffmpeg4)
-	make_torchcodec_library(libtorchcodec7 ffmpeg7)
-	make_torchcodec_library(libtorchcodec6 ffmpeg6)
-	make_torchcodec_library(libtorchcodec5 ffmpeg5)
+    make_torchcodec_libraries(4 ffmpeg4 $ffmpeg4_INCLUDE_DIRS)
+    make_torchcodec_libraries(7 ffmpeg7 $ffmpeg7_INCLUDE_DIRs)
+    make_torchcodec_libraries(6 ffmpeg6 $ffmpeg6_INCLUDE_DIRS)
+    make_torchcodec_libraries(5 ffmpeg5 $ffmpeg5_INCLUDE_DIRS)
 
 else()
     message(
@@ -120,10 +168,11 @@ else()
         )
     endif()
 
-    set(libtorchcodec_target_name libtorchcodec${ffmpeg_major_version})
-    # Make libtorchcodec_target_name available in the parent's scope, for the
-    # test's CMakeLists.txt
-    set(libtorchcodec_target_name ${libtorchcodec_target_name} PARENT_SCOPE)
+    make_torchcodec_libraries(${ffmpeg_major_version} PkgConfig::LIBAV ${LIBAV_INCLUDE_DIRS})
 
-    make_torchcodec_library(${libtorchcodec_target_name} PkgConfig::LIBAV)
+    # Expose these values updwards so that the test compilation does not need
+    # to re-figure it out. FIXME: it's not great that we just copy-paste the
+    # library name.
+    set(libtorchcodec_library_name "libtorchcodec_decoder${ffmpeg_major_version}" PARENT_SCOPE)
+    set(libav_include_dirs ${LIBAV_INCLUDE_DIRS} PARENT_SCOPE)
 endif()
@@ -73,9 +73,12 @@ int64_t getNumChannels(const UniqueAVCodecContext& avCodecContext) {
 
 AVIOBytesContext::AVIOBytesContext(
     const void* data,
-    size_t dataSize,
-    size_t bufferSize)
-    : bufferData_{static_cast<const uint8_t*>(data), dataSize, 0} {
+    int64_t dataSize,
+    int bufferSize)
+    : dataContext_{static_cast<const uint8_t*>(data), dataSize, 0} {
+  TORCH_CHECK(data != nullptr, "Video data buffer cannot be nullptr!");
+  TORCH_CHECK(dataSize > 0, "Video data size must be positive");
+
   auto buffer = static_cast<uint8_t*>(av_malloc(bufferSize));
   TORCH_CHECK(
       buffer != nullptr,
@@ -85,7 +88,7 @@ AVIOBytesContext::AVIOBytesContext(
       buffer,
       bufferSize,
       0,
-      &bufferData_,
+      &dataContext_,
       &AVIOBytesContext::read,
       nullptr,
       &AVIOBytesContext::seek));
@@ -102,50 +105,50 @@ AVIOBytesContext::~AVIOBytesContext() {
   }
 }
 
-AVIOContext* AVIOBytesContext::getAVIO() {
+AVIOContext* AVIOBytesContext::getAVIOContext() const {
   return avioContext_.get();
 }
 
-// The signature of this function is defined by FFMPEG.
+// The signature of this function is defined by FFmpeg.
 int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) {
-  auto bufferData = static_cast<AVIOBufferData*>(opaque);
+  auto dataContext = static_cast<DataContext*>(opaque);
   TORCH_CHECK(
-      bufferData->current <= bufferData->size,
+      dataContext->current <= dataContext->size,
       "Tried to read outside of the buffer: current=",
-      bufferData->current,
+      dataContext->current,
       ", size=",
-      bufferData->size);
+      dataContext->size);
 
-  buf_size =
-      FFMIN(buf_size, static_cast<int>(bufferData->size - bufferData->current));
+  buf_size = FFMIN(
+      buf_size, static_cast<int>(dataContext->size - dataContext->current));
   TORCH_CHECK(
       buf_size >= 0,
       "Tried to read negative bytes: buf_size=",
       buf_size,
       ", size=",
-      bufferData->size,
+      dataContext->size,
       ", current=",
-      bufferData->current);
+      dataContext->current);
 
   if (!buf_size) {
     return AVERROR_EOF;
   }
-  memcpy(buf, bufferData->data + bufferData->current, buf_size);
-  bufferData->current += buf_size;
+  memcpy(buf, dataContext->data + dataContext->current, buf_size);
+  dataContext->current += buf_size;
   return buf_size;
 }
 
-// The signature of this function is defined by FFMPEG.
+// The signature of this function is defined by FFmpeg.
 int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) {
-  auto bufferData = static_cast<AVIOBufferData*>(opaque);
+  auto dataContext = static_cast<DataContext*>(opaque);
   int64_t ret = -1;
 
   switch (whence) {
     case AVSEEK_SIZE:
-      ret = bufferData->size;
+      ret = dataContext->size;
       break;
     case SEEK_SET:
-      bufferData->current = offset;
+      dataContext->current = offset;
       ret = offset;
       break;
     default:
 
@@ -144,24 +144,27 @@ int64_t getNumChannels(const UniqueAVCodecContext& avCodecContext);
 // Returns true if sws_scale can handle unaligned data.
 bool canSwsScaleHandleUnalignedData();
 
+// TODO: explain purpose of context holder
+class AVIOContextHolder {
+ public:
+  virtual ~AVIOContextHolder(){};
+  virtual AVIOContext* getAVIOContext() const = 0;
+};
+
+// TODO: make comment below better
 // A struct that holds state for reading bytes from an IO context.
 // We give this to FFMPEG and it will pass it back to us when it needs to read
 // or seek in the memory buffer.
-struct AVIOBufferData {
-  const uint8_t* data;
-  size_t size;
-  size_t current;
-};
-
+//
 // A class that can be used as AVFormatContext's IO context. It reads from a
 // memory buffer that is passed in.
-class AVIOBytesContext {
+class AVIOBytesContext : public AVIOContextHolder {
  public:
-  AVIOBytesContext(const void* data, size_t dataSize, size_t bufferSize);
-  ~AVIOBytesContext();
+  AVIOBytesContext(const void* data, int64_t dataSize, int bufferSize);
+  virtual ~AVIOBytesContext();
 
   // Returns the AVIOContext that can be passed to FFMPEG.
-  AVIOContext* getAVIO();
+  virtual AVIOContext* getAVIOContext() const override;
 
   // The signature of this function is defined by FFMPEG.
   static int read(void* opaque, uint8_t* buf, int buf_size);
@@ -170,8 +173,14 @@ class AVIOBytesContext {
   static int64_t seek(void* opaque, int64_t offset, int whence);
 
  private:
+  struct DataContext {
+    const uint8_t* data;
+    int64_t size;
+    int64_t current;
+  };
+
   UniqueAVIOContext avioContext_;
-  struct AVIOBufferData bufferData_;
+  DataContext dataContext_;
 };
 
 } // namespace facebook::torchcodec
@@ -80,23 +80,21 @@ VideoDecoder::VideoDecoder(const std::string& videoFilePath, SeekMode seekMode)
   initializeDecoder();
 }
 
-VideoDecoder::VideoDecoder(const void* data, size_t length, SeekMode seekMode)
-    : seekMode_(seekMode) {
-  TORCH_CHECK(data != nullptr, "Video data buffer cannot be nullptr!");
-
+VideoDecoder::VideoDecoder(
+    std::unique_ptr<AVIOContextHolder> context,
+    SeekMode seekMode)
+    : seekMode_(seekMode), avioContextHolder_(std::move(context)) {
   av_log_set_level(AV_LOG_QUIET);
 
-  constexpr int bufferSize = 64 * 1024;
-  ioBytesContext_.reset(new AVIOBytesContext(data, length, bufferSize));
-  TORCH_CHECK(ioBytesContext_, "Failed to create AVIOBytesContext");
+  TORCH_CHECK(avioContextHolder_, "Context holder cannot be null");
 
   // Because FFmpeg requires a reference to a pointer in the call to open, we
   // can't use a unique pointer here. Note that means we must call free if open
   // fails.
   AVFormatContext* rawContext = avformat_alloc_context();
   TORCH_CHECK(rawContext != nullptr, "Unable to alloc avformat context");
 
-  rawContext->pb = ioBytesContext_->getAVIO();
+  rawContext->pb = avioContextHolder_->getAVIOContext();
   int status = avformat_open_input(&rawContext, nullptr, nullptr, nullptr);
   if (status != 0) {
     avformat_free_context(rawContext);
@@ -1747,4 +1745,14 @@ FrameDims getHeightAndWidthFromOptionsOrAVFrame(
       videoStreamOptions.width.value_or(avFrame.width));
 }
 
+VideoDecoder::SeekMode seekModeFromString(std::string_view seekMode) {
+  if (seekMode == "exact") {
+    return VideoDecoder::SeekMode::exact;
+  } else if (seekMode == "approximate") {
+    return VideoDecoder::SeekMode::approximate;
+  } else {
+    TORCH_CHECK(false, "Invalid seek mode: " + std::string(seekMode));
+  }
+}
+
 } // namespace facebook::torchcodec
@@ -34,11 +34,9 @@ class VideoDecoder {
       const std::string& videoFilePath,
       SeekMode seekMode = SeekMode::exact);
 
-  // Creates a VideoDecoder from a given buffer of data. Note that the data is
-  // not owned by the VideoDecoder.
+  // TODO: make comment accurate
   explicit VideoDecoder(
-      const void* data,
-      size_t length,
+      std::unique_ptr<AVIOContextHolder> context,
       SeekMode seekMode = SeekMode::exact);
 
   // --------------------------------------------------------------------------
@@ -472,7 +470,7 @@ class VideoDecoder {
   // Stores various internal decoding stats.
   DecodeStats decodeStats_;
   // Stores the AVIOContext for the input buffer.
-  std::unique_ptr<AVIOBytesContext> ioBytesContext_;
+  std::unique_ptr<AVIOContextHolder> avioContextHolder_;
   // Whether or not we have already scanned all streams to update the metadata.
   bool scannedAllStreams_ = false;
   // Tracks that we've already been initialized.
@@ -554,4 +552,6 @@ std::ostream& operator<<(
     std::ostream& os,
     const VideoDecoder::DecodeStats& stats);
 
+VideoDecoder::SeekMode seekModeFromString(std::string_view seekMode);
+
 } // namespace facebook::torchcodec