Merge branch 'main' of https://github.com/pytorch/torchcodec into doc1

ahmadsharif1 · ahmadsharif1 · commit dfa9fcc8cb93 · 2024-11-10T16:05:19.000-08:00
diff --git a/benchmarks/decoders/benchmark_decoders_library.py b/benchmarks/decoders/benchmark_decoders_library.py
@@ -393,6 +393,8 @@ def retrieve_videos(urls_and_dest_paths):
 
 
 def plot_data(df_data, plot_path):
+    plt.rcParams["font.size"] = 18
+
     # Creating the DataFrame
     df = pd.DataFrame(df_data)
 
@@ -440,9 +442,7 @@ def plot_data(df_data, plot_path):
 
             # Set the title for the subplot
             base_video = Path(video).name.removesuffix(".mp4")
-            ax.set_title(
-                f"video={base_video}\ndecode_pattern={vcount} x {vtype}", fontsize=10
-            )
+            ax.set_title(f"{base_video}\n{vcount} x {vtype}", fontsize=11)
 
             # Plot bars with error bars
             ax.barh(
diff --git a/benchmarks/decoders/benchmark_readme_chart.png b/benchmarks/decoders/benchmark_readme_chart.png
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -203,6 +203,18 @@ VideoDecoder::BatchDecodedOutput::BatchDecodedOutput(
   frames = allocateEmptyHWCTensor(height, width, options.device, numFrames);
 }
 
+bool VideoDecoder::SwsContextKey::operator==(
+    const VideoDecoder::SwsContextKey& other) {
+  return decodedWidth == other.decodedWidth && decodedHeight == decodedHeight &&
+      decodedFormat == other.decodedFormat &&
+      outputWidth == other.outputWidth && outputHeight == other.outputHeight;
+}
+
+bool VideoDecoder::SwsContextKey::operator!=(
+    const VideoDecoder::SwsContextKey& other) {
+  return !(*this == other);
+}
+
 VideoDecoder::VideoDecoder() {}
 
 void VideoDecoder::initializeDecoder() {
@@ -1339,7 +1351,14 @@ int VideoDecoder::convertFrameToBufferUsingSwsScale(
 
   int expectedOutputHeight = outputTensor.sizes()[0];
   int expectedOutputWidth = outputTensor.sizes()[1];
-  if (activeStream.swsContext.get() == nullptr) {
+  auto curFrameSwsContextKey = SwsContextKey{
+      frame->width,
+      frame->height,
+      frameFormat,
+      expectedOutputWidth,
+      expectedOutputHeight};
+  if (activeStream.swsContext.get() == nullptr ||
+      activeStream.swsContextKey != curFrameSwsContextKey) {
     SwsContext* swsContext = sws_getContext(
         frame->width,
         frame->height,
@@ -1373,6 +1392,7 @@ int VideoDecoder::convertFrameToBufferUsingSwsScale(
         brightness,
         contrast,
         saturation);
+    activeStream.swsContextKey = curFrameSwsContextKey;
     activeStream.swsContext.reset(swsContext);
   }
   SwsContext* swsContext = activeStream.swsContext.get();
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.h b/src/torchcodec/decoders/_core/VideoDecoder.h
@@ -317,6 +317,15 @@ class VideoDecoder {
     AVFilterContext* sourceContext = nullptr;
     AVFilterContext* sinkContext = nullptr;
   };
+  struct SwsContextKey {
+    int decodedWidth;
+    int decodedHeight;
+    AVPixelFormat decodedFormat;
+    int outputWidth;
+    int outputHeight;
+    bool operator==(const SwsContextKey&);
+    bool operator!=(const SwsContextKey&);
+  };
   // Stores information for each stream.
   struct StreamInfo {
     int streamIndex = -1;
@@ -337,6 +346,7 @@ class VideoDecoder {
     ColorConversionLibrary colorConversionLibrary = FILTERGRAPH;
     std::vector<FrameInfo> keyFrames;
     std::vector<FrameInfo> allFrames;
+    SwsContextKey swsContextKey;
     UniqueSwsContext swsContext;
   };
   VideoDecoder();