Better comment for AVIOContextHolder.

scotts · scotts · commit d301f5317dc7 · 2025-03-17T07:48:22.000-07:00
diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.h b/src/torchcodec/decoders/_core/FFMPEGCommon.h
@@ -148,7 +148,28 @@ bool canSwsScaleHandleUnalignedData();
 using AVIOReadFunction = int (*)(void*, uint8_t*, int);
 using AVIOSeekFunction = int64_t (*)(void*, int64_t, int);
 
-// TODO: explain purpose of context holder
+// The AVIOContextHolder serves several purposes:
+//
+//   1. It is a smart pointer for the AVIOContext. It has the logic to create
+//      a new AVIOContext and will appropriately free the AVIOContext when it
+//      goes out of scope. Note that this requires more than just the having a
+//      UniqueAVIOContext, as the AVIOContext points to a buffer which must be
+//      freed.
+//   2. It is a base class for AVIOContext specializations. When specializing a
+//      AVIOContext, we need to provide four things:
+//        1. A read callback function.
+//        2. A seek callback function.
+//        3. A write callback function. (Not supported yet; it's for encoding.)
+//        4. A pointer to some context object that has the same lifetime as the
+//           AVIOContext itself. This context object holds the custom state that
+//           tracks the custom behavior of reading, seeking and writing. It is
+//           provided upon AVIOContext creation and to the read, seek and
+//           write callback functions.
+//      While it's not required, it is natural for the derived classes to make
+//      all of the above members. Base classes need to call
+//      createAVIOContext(), ideally in there constructor.
+//  3. A generic handle for those that just need to manage having access to an
+//     AVIOContext, but aren't necessarily concerned with how it was customized.
 class AVIOContextHolder {
  public:
   virtual ~AVIOContextHolder();
@@ -165,7 +186,7 @@ class AVIOContextHolder {
   UniqueAVIOContext avioContext_;
 
   // Defaults to 64 KB
-  static const int defaultBufferSize = 64 * 1014;
+  static const int defaultBufferSize = 64 * 1024;
 };
 
 } // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/PyBindOps.cpp b/src/torchcodec/decoders/_core/PyBindOps.cpp
@@ -53,6 +53,8 @@ class AVIOFileLikeContext : public AVIOContextHolder {
     int num_read = 0;
     while (num_read < buf_size) {
       int request = buf_size - num_read;
+      // TODO: It is maybe more efficient to grab the lock once in the
+      // surrounding scope?
       py::gil_scoped_acquire gil;
       auto chunk = static_cast<std::string>(
           static_cast<py::bytes>((*fileLike)->attr("read")(request)));
@@ -85,11 +87,11 @@ class AVIOFileLikeContext : public AVIOContextHolder {
   }
 
  private:
-  // Note that we keep a pointer to the Python object because we need to
+  // Note that we dynamically allocate the Python object because we need to
   // strictly control when its destructor is called. We must hold the GIL
   // when its destructor gets called, as it needs to update the reference
-  // count. It's easiest to control that when it's a pointer. Otherwise, we'd
-  // have to ensure whatever enclosing scope holds the object has the GIL,
+  // count. It's easiest to control that when it's dynamic memory. Otherwise,
+  // we'd have to ensure whatever enclosing scope holds the object has the GIL,
   // and that's, at least, hard. For all of the common pitfalls, see:
   //
   //   https://pybind11.readthedocs.io/en/stable/advanced/misc.html#common-sources-of-global-interpreter-lock-errors
diff --git a/test/decoders/VideoDecoderTest.cpp b/test/decoders/VideoDecoderTest.cpp
@@ -51,10 +51,7 @@ class VideoDecoderTest : public testing::TestWithParam<bool> {
 
       void* buffer = content_.data();
       size_t length = content_.length();
-      constexpr int bufferSize = 64 * 1024;
-      auto contextHolder =
-          std::make_unique<AVIOBytesContext>(buffer, length, bufferSize);
-
+      auto contextHolder = std::make_unique<AVIOBytesContext>(buffer, length);
       return std::make_unique<VideoDecoder>(
           std::move(contextHolder), VideoDecoder::SeekMode::approximate);
     } else {