Merge branch 'main' of github.com:pytorch/torchcodec into renamingzzzz

NicolasHug · NicolasHug · commit 11779a7a91c9 · 2025-01-23T09:33:22.000Z
diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.cpp b/src/torchcodec/decoders/_core/FFMPEGCommon.cpp
@@ -10,6 +10,25 @@
 
 namespace facebook::torchcodec {
 
+AutoAVPacket::AutoAVPacket() : avPacket_(av_packet_alloc()) {
+  TORCH_CHECK(avPacket_ != nullptr, "Couldn't allocate avPacket.");
+}
+AutoAVPacket::~AutoAVPacket() {
+  av_packet_free(&avPacket_);
+}
+
+ReferenceAVPacket::ReferenceAVPacket(AutoAVPacket& shared)
+    : avPacket_(shared.avPacket_) {}
+ReferenceAVPacket::~ReferenceAVPacket() {
+  av_packet_unref(avPacket_);
+}
+AVPacket* ReferenceAVPacket::get() {
+  return avPacket_;
+}
+AVPacket* ReferenceAVPacket::operator->() {
+  return avPacket_;
+}
+
 AVCodecOnlyUseForCallingAVFindBestStream
 makeAVCodecOnlyUseForCallingAVFindBestStream(const AVCodec* codec) {
 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(59, 18, 100)
diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.h b/src/torchcodec/decoders/_core/FFMPEGCommon.h
@@ -57,8 +57,6 @@ using UniqueAVCodecContext = std::unique_ptr<
     Deleterp<AVCodecContext, void, avcodec_free_context>>;
 using UniqueAVFrame =
     std::unique_ptr<AVFrame, Deleterp<AVFrame, void, av_frame_free>>;
-using UniqueAVPacket =
-    std::unique_ptr<AVPacket, Deleterp<AVPacket, void, av_packet_free>>;
 using UniqueAVFilterGraph = std::unique_ptr<
     AVFilterGraph,
     Deleterp<AVFilterGraph, void, avfilter_graph_free>>;
@@ -70,6 +68,44 @@ using UniqueAVIOContext = std::
 using UniqueSwsContext =
     std::unique_ptr<SwsContext, Deleter<SwsContext, void, sws_freeContext>>;
 
+// These 2 classes share the same underlying AVPacket object. They are meant to
+// be used in tandem, like so:
+//
+// AutoAVPacket autoAVPacket; // <-- malloc for AVPacket happens here
+// while(...){
+//   ReferenceAVPacket packet(autoAVPacket);
+//   av_read_frame(..., packet.get());  <-- av_packet_ref() called by FFmpeg
+// } <-- av_packet_unref() called here
+//
+// This achieves a few desirable things:
+// - Memory allocation of the underlying AVPacket happens only once, when
+//   autoAVPacket is created.
+// - av_packet_free() is called when autoAVPacket gets out of scope
+// - av_packet_unref() is automatically called when needed, i.e. at the end of
+//   each loop iteration (or when hitting break / continue). This prevents the
+//   risk of us forgetting to call it.
+class AutoAVPacket {
+  friend class ReferenceAVPacket;
+
+ private:
+  AVPacket* avPacket_;
+
+ public:
+  AutoAVPacket();
+  ~AutoAVPacket();
+};
+
+class ReferenceAVPacket {
+ private:
+  AVPacket* avPacket_;
+
+ public:
+  ReferenceAVPacket(AutoAVPacket& shared);
+  ~ReferenceAVPacket();
+  AVPacket* get();
+  AVPacket* operator->();
+};
+
 // av_find_best_stream is not const-correct before commit:
 // https://github.com/FFmpeg/FFmpeg/commit/46dac8cf3d250184ab4247809bc03f60e14f4c0c
 // which was released in FFMPEG version=5.0.3
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -579,9 +579,11 @@ void VideoDecoder::scanFileAndUpdateMetadataAndIndex() {
     return;
   }
 
+  AutoAVPacket autoAVPacket;
   while (true) {
-    // Get the next packet.
-    UniqueAVPacket packet(av_packet_alloc());
+    ReferenceAVPacket packet(autoAVPacket);
+
+    // av_read_frame is a misleading name: it gets the next **packet**.
     int ffmpegStatus = av_read_frame(formatContext_.get(), packet.get());
 
     if (ffmpegStatus == AVERROR_EOF) {
@@ -812,6 +814,7 @@ VideoDecoder::RawDecodedOutput VideoDecoder::getDecodedOutputWithFilter(
   }
   // Need to get the next frame or error from PopFrame.
   UniqueAVFrame avFrame(av_frame_alloc());
+  AutoAVPacket autoAVPacket;
   int ffmpegStatus = AVSUCCESS;
   bool reachedEOF = false;
   int frameStreamIndex = -1;
@@ -851,7 +854,7 @@ VideoDecoder::RawDecodedOutput VideoDecoder::getDecodedOutputWithFilter(
       // pulling frames from its internal buffers.
       continue;
     }
-    UniqueAVPacket packet(av_packet_alloc());
+    ReferenceAVPacket packet(autoAVPacket);
     ffmpegStatus = av_read_frame(formatContext_.get(), packet.get());
     decodeStats_.numPacketsRead++;
     if (ffmpegStatus == AVERROR_EOF) {
@@ -882,12 +885,12 @@ VideoDecoder::RawDecodedOutput VideoDecoder::getDecodedOutputWithFilter(
     }
     ffmpegStatus = avcodec_send_packet(
         streamInfos_[packet->stream_index].codecContext.get(), packet.get());
-    decodeStats_.numPacketsSentToDecoder++;
     if (ffmpegStatus < AVSUCCESS) {
       throw std::runtime_error(
           "Could not push packet to decoder: " +
           getFFMPEGErrorStringFromErrorCode(ffmpegStatus));
     }
+    decodeStats_.numPacketsSentToDecoder++;
   }
   if (ffmpegStatus < AVSUCCESS) {
     if (reachedEOF || ffmpegStatus == AVERROR_EOF) {