Merge branch 'main' of github.com:pytorch/torchcodec into framezzzzz

NicolasHug · NicolasHug · commit 63049c5f1d85 · 2025-01-23T15:50:36.000Z
diff --git a/.clang-format b/.clang-format
@@ -83,6 +83,7 @@ PenaltyExcessCharacter: 1000000
 PenaltyReturnTypeOnItsOwnLine: 200
 PointerAlignment: Left
 ReflowComments: true
+SeparateDefinitionBlocks: Always
 SortIncludes: true
 SortUsingDeclarations: true
 SpaceAfterCStyleCast: false
diff --git a/benchmarks/decoders/BenchmarkDecodersMain.cpp b/benchmarks/decoders/BenchmarkDecodersMain.cpp
diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.cpp b/src/torchcodec/decoders/_core/FFMPEGCommon.cpp
@@ -13,18 +13,22 @@ namespace facebook::torchcodec {
 AutoAVPacket::AutoAVPacket() : avPacket_(av_packet_alloc()) {
   TORCH_CHECK(avPacket_ != nullptr, "Couldn't allocate avPacket.");
 }
+
 AutoAVPacket::~AutoAVPacket() {
   av_packet_free(&avPacket_);
 }
 
 ReferenceAVPacket::ReferenceAVPacket(AutoAVPacket& shared)
     : avPacket_(shared.avPacket_) {}
+
 ReferenceAVPacket::~ReferenceAVPacket() {
   av_packet_unref(avPacket_);
 }
+
 AVPacket* ReferenceAVPacket::get() {
   return avPacket_;
 }
+
 AVPacket* ReferenceAVPacket::operator->() {
   return avPacket_;
 }
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -386,6 +386,7 @@ void VideoDecoder::createFilterGraph(
   }
 
   enum AVPixelFormat pix_fmts[] = {AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE};
+
   ffmpegStatus = av_opt_set_int_list(
       filterState.sinkContext,
       "pix_fmts",
@@ -685,6 +686,7 @@ int VideoDecoder::getKeyFrameIndexForPts(
   }
   return getKeyFrameIndexForPtsUsingScannedIndex(streamInfo.keyFrames, pts);
 }
+
 /*
 Videos have I frames and non-I frames (P and B frames). Non-I frames need data
 from the previous I frame to be decoded.
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.h b/src/torchcodec/decoders/_core/VideoDecoder.h
@@ -74,6 +74,7 @@ class VideoDecoder {
   // Updates the metadata of the video to accurate values obtained by scanning
   // the contents of the video file.
   void scanFileAndUpdateMetadataAndIndex();
+
   struct StreamMetadata {
     // Common (video and audio) fields derived from the AVStream.
     int streamIndex;
@@ -103,6 +104,7 @@ class VideoDecoder {
     std::optional<int64_t> width;
     std::optional<int64_t> height;
   };
+
   struct ContainerMetadata {
     std::vector<StreamMetadata> allStreamMetadata;
     int numAudioStreams = 0;
@@ -117,6 +119,7 @@ class VideoDecoder {
     // If set, this is the index to the default video stream.
     std::optional<int> bestVideoStreamIndex;
   };
+
   // Returns the metadata for the container.
   ContainerMetadata getContainerMetadata() const;
 
@@ -130,8 +133,10 @@ class VideoDecoder {
     // Use the libswscale library for color conversion.
     SWSCALE
   };
+
   struct VideoStreamOptions {
     VideoStreamOptions() {}
+
     explicit VideoStreamOptions(const std::string& optionsString);
     // Number of threads we pass to FFMPEG for decoding.
     // 0 means FFMPEG will choose the number of threads automatically to fully
@@ -149,7 +154,9 @@ class VideoDecoder {
     // By default we use CPU for decoding for both C++ and python users.
     torch::Device device = torch::kCPU;
   };
+
   struct AudioStreamOptions {};
+
   void addVideoStreamDecoder(
       int streamIndex,
       const VideoStreamOptions& videoStreamOptions = VideoStreamOptions());
@@ -202,6 +209,7 @@ class VideoDecoder {
     explicit EndOfFileException(const std::string& msg)
         : std::runtime_error(msg) {}
   };
+
   // Decodes the frame where the current cursor position is. It also advances
   // the cursor to the next frame.
   FrameOutput getNextFrameNoDemux();
@@ -261,6 +269,7 @@ class VideoDecoder {
       int streamIndex,
       double startSeconds,
       double stopSeconds);
+
   // --------------------------------------------------------------------------
   // DECODER PERFORMANCE STATISTICS API
   // --------------------------------------------------------------------------
@@ -275,6 +284,7 @@ class VideoDecoder {
     int64_t numFramesReceivedByDecoder = 0;
     int64_t numFlushes = 0;
   };
+
   DecodeStats getDecodeStats() const;
   void resetDecodeStats();
 
@@ -290,11 +300,13 @@ class VideoDecoder {
     // done during pts -> index conversions.)
     int64_t nextPts = INT64_MAX;
   };
+
   struct FilterState {
     UniqueAVFilterGraph filterGraph;
     AVFilterContext* sourceContext = nullptr;
     AVFilterContext* sinkContext = nullptr;
   };
+
   struct DecodedFrameContext {
     int decodedWidth;
     int decodedHeight;
@@ -304,6 +316,7 @@ class VideoDecoder {
     bool operator==(const DecodedFrameContext&);
     bool operator!=(const DecodedFrameContext&);
   };
+
   // Stores information for each stream.
   struct StreamInfo {
     int streamIndex = -1;
@@ -327,6 +340,7 @@ class VideoDecoder {
     DecodedFrameContext prevFrameContext;
     UniqueSwsContext swsContext;
   };
+
   // Returns the key frame index of the presentation timestamp using FFMPEG's
   // index. Note that this index may be truncated for some files.
   int getKeyFrameIndexForPtsUsingEncoderIndex(AVStream* stream, int64_t pts)
@@ -484,6 +498,7 @@ class VideoDecoder {
 struct FrameDims {
   int height;
   int width;
+
   FrameDims(int h, int w) : height(h), width(w) {}
 };
 
diff --git a/src/torchcodec/decoders/_core/VideoDecoderOps.cpp b/src/torchcodec/decoders/_core/VideoDecoderOps.cpp
@@ -280,6 +280,7 @@ OpsFrameBatchOutput get_frames_in_range(
       stream_index, start, stop, step.value_or(1));
   return makeOpsFrameBatchOutput(result);
 }
+
 OpsFrameBatchOutput get_frames_by_pts(
     at::Tensor& decoder,
     int64_t stream_index,
diff --git a/test/decoders/VideoDecoderTest.cpp b/test/decoders/VideoDecoderTest.cpp
@@ -57,6 +57,7 @@ class VideoDecoderTest : public testing::TestWithParam<bool> {
           filepath, VideoDecoder::SeekMode::approximate);
     }
   }
+
   std::string content_;
 };
 

Original file line number	Diff line number	Diff line change
`@@ -13,18 +13,22 @@ namespace facebook::torchcodec {`
`13`	`13`	`AutoAVPacket::AutoAVPacket() : avPacket_(av_packet_alloc()) {`
`14`	`14`	`TORCH_CHECK(avPacket_ != nullptr, "Couldn't allocate avPacket.");`
`15`	`15`	`}`
	`16`	`+`
`16`	`17`	`AutoAVPacket::~AutoAVPacket() {`
`17`	`18`	`av_packet_free(&avPacket_);`
`18`	`19`	`}`
`19`	`20`
`20`	`21`	`ReferenceAVPacket::ReferenceAVPacket(AutoAVPacket& shared)`
`21`	`22`	`: avPacket_(shared.avPacket_) {}`
	`23`	`+`
`22`	`24`	`ReferenceAVPacket::~ReferenceAVPacket() {`
`23`	`25`	`av_packet_unref(avPacket_);`
`24`	`26`	`}`
	`27`	`+`
`25`	`28`	`AVPacket* ReferenceAVPacket::get() {`
`26`	`29`	`return avPacket_;`
`27`	`30`	`}`
	`31`	`+`
`28`	`32`	`AVPacket* ReferenceAVPacket::operator->() {`
`29`	`33`	`return avPacket_;`
`30`	`34`	`}`
Original file line number	Diff line number	Diff line change
`@@ -386,6 +386,7 @@ void VideoDecoder::createFilterGraph(`
`386`	`386`	`}`
`387`	`387`
`388`	`388`	`enum AVPixelFormat pix_fmts[] = {AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE};`
	`389`	`+`
`389`	`390`	`ffmpegStatus = av_opt_set_int_list(`
`390`	`391`	`filterState.sinkContext,`
`391`	`392`	`"pix_fmts",`
`@@ -685,6 +686,7 @@ int VideoDecoder::getKeyFrameIndexForPts(`
`685`	`686`	`}`
`686`	`687`	`return getKeyFrameIndexForPtsUsingScannedIndex(streamInfo.keyFrames, pts);`
`687`	`688`	`}`
	`689`	`+`
`688`	`690`	`/*`
`689`	`691`	`Videos have I frames and non-I frames (P and B frames). Non-I frames need data`
`690`	`692`	`from the previous I frame to be decoded.`
Original file line number	Diff line number	Diff line change
`@@ -280,6 +280,7 @@ OpsFrameBatchOutput get_frames_in_range(`
`280`	`280`	`stream_index, start, stop, step.value_or(1));`
`281`	`281`	`return makeOpsFrameBatchOutput(result);`
`282`	`282`	`}`
	`283`	`+`
`283`	`284`	`OpsFrameBatchOutput get_frames_by_pts(`
`284`	`285`	`at::Tensor& decoder,`
`285`	`286`	`int64_t stream_index,`
Original file line number	Diff line number	Diff line change
`@@ -57,6 +57,7 @@ class VideoDecoderTest : public testing::TestWithParam<bool> {`
`57`	`57`	`filepath, VideoDecoder::SeekMode::approximate);`
`58`	`58`	`}`
`59`	`59`	`}`
	`60`	`+`
`60`	`61`	`std::string content_;`
`61`	`62`	`};`
`62`	`63`