Merge branch 'main' of https://github.com/meta-pytorch/torchcodec into python314_on_ci

Dan-Flores · Dan-Flores · commit 7ecd09dc6fc2 · 2025-12-01T14:25:29.000-05:00
diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp
@@ -1088,32 +1088,17 @@ void SingleStreamDecoder::setCursor(int64_t pts) {
   cursor_ = pts;
 }
 
-/*
-Videos have I frames and non-I frames (P and B frames). Non-I frames need data
-from the previous I frame to be decoded.
-
-Imagine the cursor is at a random frame with PTS=lastDecodedAvFramePts (x for
-brevity) and we wish to seek to a user-specified PTS=y.
-
-If y < x, we don't have a choice but to seek backwards to the highest I frame
-before y.
-
-If y > x, we have two choices:
-
-1. We could keep decoding forward until we hit y. Illustrated below:
-
-I    P     P    P    I    P    P    P    I    P    P    I    P    P    I    P
-                          x         y
-
-2. We could try to jump to an I frame between x and y (indicated by j below).
-And then start decoding until we encounter y. Illustrated below:
-
-I    P     P    P    I    P    P    P    I    P    P    I    P    P    I    P
-                          x              j         y
-
-(2) is more efficient than (1) if there is an I frame between x and y.
-*/
 bool SingleStreamDecoder::canWeAvoidSeeking() const {
+  // Returns true if we can avoid seeking in the AVFormatContext based on
+  // heuristics that rely on the target cursor_ and the last decoded frame.
+  // Seeking is expensive, so we try to avoid it when possible.
+  // Note that this function itself isn't always that cheap to call: in
+  // particular the calls to getKeyFrameIndexForPts below in approximate mode
+  // are sometimes slow.
+  // TODO we should understand why (is it because it reads the file?) and
+  // potentially optimize it. E.g. we may not want to ever seek, or even *check*
+  // if we need to seek in some cases, like if we're going to decode 80% of the
+  // frames anyway.
   const StreamInfo& streamInfo = streamInfos_.at(activeStreamIndex_);
   if (streamInfo.avMediaType == AVMEDIA_TYPE_AUDIO) {
     // For audio, we only need to seek if a backwards seek was requested
@@ -1136,13 +1121,34 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const {
     // implement caching.
     return false;
   }
-  // We are seeking forwards.
-  // We can only skip a seek if both lastDecodedAvFramePts and
-  // cursor_ share the same keyframe.
-  int lastDecodedAvFrameIndex = getKeyFrameIndexForPts(lastDecodedAvFramePts_);
+  // We are seeking forwards. We can skip a seek if both the last decoded frame
+  // and cursor_ share the same keyframe:
+  // Videos have I frames and non-I frames (P and B frames). Non-I frames need
+  // data from the previous I frame to be decoded.
+  //
+  // Imagine the cursor is at a random frame with PTS=lastDecodedAvFramePts (x
+  // for brevity) and we wish to seek to a user-specified PTS=y.
+  //
+  // If y < x, we don't have a choice but to seek backwards to the highest I
+  // frame before y.
+  //
+  // If y > x, we have two choices:
+  //
+  // 1. We could keep decoding forward until we hit y. Illustrated below:
+  //
+  // I    P     P    P    I    P    P    P    I    P    P    I    P
+  //                           x         y
+  //
+  // 2. We could try to jump to an I frame between x and y (indicated by j
+  // below). And then start decoding until we encounter y. Illustrated below:
+  //
+  // I    P     P    P    I    P    P    P    I    P    P    I    P
+  //                           x              j         y
+  // (2) is only more efficient than (1) if there is an I frame between x and y.
+  int lastKeyFrameIndex = getKeyFrameIndexForPts(lastDecodedAvFramePts_);
   int targetKeyFrameIndex = getKeyFrameIndexForPts(cursor_);
-  return lastDecodedAvFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
-      lastDecodedAvFrameIndex == targetKeyFrameIndex;
+  return lastKeyFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
+      lastKeyFrameIndex == targetKeyFrameIndex;
 }
 
 // This method looks at currentPts and desiredPts and seeks in the
diff --git a/test/conftest.py b/test/conftest.py
@@ -4,12 +4,17 @@
 import pytest
 import torch
 
+from .utils import in_fbcode
+
 
 def pytest_configure(config):
     # register an additional marker (see pytest_collection_modifyitems)
     config.addinivalue_line(
         "markers", "needs_cuda: mark for tests that rely on a CUDA device"
     )
+    config.addinivalue_line(
+        "markers", "needs_ffmpeg_cli: mark for tests that rely on ffmpeg"
+    )
 
 
 def pytest_collection_modifyitems(items):
@@ -28,6 +33,15 @@ def pytest_collection_modifyitems(items):
         # 'needs_cuda' mark, and the ones with device == 'cpu' won't have the
         # mark.
         needs_cuda = item.get_closest_marker("needs_cuda") is not None
+        needs_ffmpeg_cli = item.get_closest_marker("needs_ffmpeg_cli") is not None
+        has_skip_marker = item.get_closest_marker("skip") is not None
+        has_skipif_marker = item.get_closest_marker("skipif") is not None
+
+        if in_fbcode():
+            # fbcode doesn't like skipping tests, so instead we  just don't collect the test
+            # so that they don't even "exist", hence the continue statements.
+            if needs_ffmpeg_cli or has_skip_marker or has_skipif_marker:
+                continue
 
         if (
             needs_cuda
diff --git a/test/test_decoders.py b/test/test_decoders.py
@@ -29,6 +29,7 @@
     BT709_FULL_RANGE,
     cuda_version_used_for_building_torch,
     get_ffmpeg_major_version,
+    get_python_version,
     H264_10BITS,
     H265_10BITS,
     H265_VIDEO,
@@ -39,6 +40,7 @@
     NASA_AUDIO_MP3_44100,
     NASA_VIDEO,
     needs_cuda,
+    needs_ffmpeg_cli,
     psnr,
     SINE_MONO_S16,
     SINE_MONO_S32,
@@ -1146,6 +1148,10 @@ def test_get_key_frame_indices(self, device):
 
     # TODO investigate why this fails internally.
     @pytest.mark.skipif(in_fbcode(), reason="Compile test fails internally.")
+    @pytest.mark.skipif(
+        get_python_version() >= (3, 14),
+        reason="torch.compile is not supported on Python 3.14+",
+    )
     @pytest.mark.parametrize("device", all_supported_devices())
     def test_compile(self, device):
         decoder, device = make_video_decoder(NASA_VIDEO.path, device=device)
@@ -1311,10 +1317,7 @@ def setup_frame_mappings(tmp_path, file, stream_index):
             # Return the custom frame mappings as a JSON string
             return custom_frame_mappings
 
-    @pytest.mark.skipif(
-        in_fbcode(),
-        reason="ffprobe not available internally",
-    )
+    @needs_ffmpeg_cli
     @pytest.mark.parametrize("device", all_supported_devices())
     @pytest.mark.parametrize("stream_index", [0, 3])
     @pytest.mark.parametrize(
@@ -1361,10 +1364,7 @@ def test_custom_frame_mappings_json_and_bytes(
             ),
         )
 
-    @pytest.mark.skipif(
-        in_fbcode(),
-        reason="ffprobe not available internally",
-    )
+    @needs_ffmpeg_cli
     @pytest.mark.parametrize("device", all_supported_devices())
     @pytest.mark.parametrize(
         "custom_frame_mappings,expected_match",
diff --git a/test/test_encoders.py b/test/test_encoders.py
@@ -17,9 +17,9 @@
     assert_tensor_close_on_at_least,
     get_ffmpeg_major_version,
     get_ffmpeg_minor_version,
-    in_fbcode,
     IS_WINDOWS,
     NASA_AUDIO_MP3,
+    needs_ffmpeg_cli,
     psnr,
     SINE_MONO_S32,
     TEST_SRC_2_720P,
@@ -217,13 +217,22 @@ def test_bad_input_parametrized(self, method, tmp_path):
                 getattr(decoder, method)(**valid_params, num_channels=num_channels)
 
     @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
-    @pytest.mark.parametrize("format", ("wav", "flac"))
+    @pytest.mark.parametrize(
+        "format",
+        [
+            pytest.param(
+                "wav",
+                marks=pytest.mark.skipif(
+                    get_ffmpeg_major_version() == 4,
+                    reason="Swresample with FFmpeg 4 doesn't work on wav files",
+                ),
+            ),
+            "flac",
+        ],
+    )
     def test_round_trip(self, method, format, tmp_path):
         # Check that decode(encode(samples)) == samples on lossless formats
 
-        if get_ffmpeg_major_version() == 4 and format == "wav":
-            pytest.skip("Swresample with FFmpeg 4 doesn't work on wav files")
-
         asset = NASA_AUDIO_MP3
         source_samples = self.decode(asset).data
 
@@ -249,7 +258,7 @@ def test_round_trip(self, method, format, tmp_path):
             self.decode(encoded_source).data, source_samples, rtol=rtol, atol=atol
         )
 
-    @pytest.mark.skipif(in_fbcode(), reason="TODO: enable ffmpeg CLI")
+    @needs_ffmpeg_cli
     @pytest.mark.parametrize("asset", (NASA_AUDIO_MP3, SINE_MONO_S32))
     @pytest.mark.parametrize("bit_rate", (None, 0, 44_100, 999_999_999))
     @pytest.mark.parametrize("num_channels", (None, 1, 2))
@@ -356,17 +365,31 @@ def test_against_cli(
     @pytest.mark.parametrize("asset", (NASA_AUDIO_MP3, SINE_MONO_S32))
     @pytest.mark.parametrize("bit_rate", (None, 0, 44_100, 999_999_999))
     @pytest.mark.parametrize("num_channels", (None, 1, 2))
-    @pytest.mark.parametrize("format", ("mp3", "wav", "flac"))
+    @pytest.mark.parametrize(
+        "format",
+        [
+            # TODO: https://github.com/pytorch/torchcodec/issues/837
+            pytest.param(
+                "mp3",
+                marks=pytest.mark.skipif(
+                    IS_WINDOWS and get_ffmpeg_major_version() <= 5,
+                    reason="Encoding mp3 on Windows is weirdly buggy",
+                ),
+            ),
+            pytest.param(
+                "wav",
+                marks=pytest.mark.skipif(
+                    get_ffmpeg_major_version() == 4,
+                    reason="Swresample with FFmpeg 4 doesn't work on wav files",
+                ),
+            ),
+            "flac",
+        ],
+    )
     @pytest.mark.parametrize("method", ("to_tensor", "to_file_like"))
     def test_against_to_file(
         self, asset, bit_rate, num_channels, format, tmp_path, method
     ):
-        if get_ffmpeg_major_version() == 4 and format == "wav":
-            pytest.skip("Swresample with FFmpeg 4 doesn't work on wav files")
-        if IS_WINDOWS and get_ffmpeg_major_version() <= 5 and format == "mp3":
-            # TODO: https://github.com/pytorch/torchcodec/issues/837
-            pytest.skip("Encoding mp3 on Windows is weirdly buggy")
-
         encoder = AudioEncoder(self.decode(asset).data, sample_rate=asset.sample_rate)
 
         params = dict(bit_rate=bit_rate, num_channels=num_channels)
@@ -847,16 +870,27 @@ def encode_to_tensor(frames):
         )
 
     @pytest.mark.parametrize(
-        "format", ("mov", "mp4", "mkv", pytest.param("webm", marks=pytest.mark.slow))
+        "format",
+        [
+            "mov",
+            "mp4",
+            "mkv",
+            pytest.param(
+                "webm",
+                marks=[
+                    pytest.mark.slow,
+                    pytest.mark.skipif(
+                        get_ffmpeg_major_version() == 4
+                        or (IS_WINDOWS and get_ffmpeg_major_version() in (6, 7)),
+                        reason="Codec for webm is not available in this FFmpeg installation.",
+                    ),
+                ],
+            ),
+        ],
     )
     @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
     def test_round_trip(self, tmp_path, format, method):
         # Test that decode(encode(decode(frames))) == decode(frames)
-        ffmpeg_version = get_ffmpeg_major_version()
-        if format == "webm" and (
-            ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7))
-        ):
-            pytest.skip("Codec for webm is not available in this FFmpeg installation.")
         source_frames, frame_rate = self.decode_and_get_frame_rate(TEST_SRC_2_720P.path)
 
         encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate)
@@ -889,25 +923,29 @@ def test_round_trip(self, tmp_path, format, method):
 
     @pytest.mark.parametrize(
         "format",
-        (
+        [
             "mov",
             "mp4",
             "avi",
             "mkv",
             "flv",
             "gif",
-            pytest.param("webm", marks=pytest.mark.slow),
-        ),
+            pytest.param(
+                "webm",
+                marks=[
+                    pytest.mark.slow,
+                    pytest.mark.skipif(
+                        get_ffmpeg_major_version() == 4
+                        or (IS_WINDOWS and get_ffmpeg_major_version() in (6, 7)),
+                        reason="Codec for webm is not available in this FFmpeg installation.",
+                    ),
+                ],
+            ),
+        ],
     )
     @pytest.mark.parametrize("method", ("to_tensor", "to_file_like"))
     def test_against_to_file(self, tmp_path, format, method):
         # Test that to_file, to_tensor, and to_file_like produce the same results
-        ffmpeg_version = get_ffmpeg_major_version()
-        if format == "webm" and (
-            ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7))
-        ):
-            pytest.skip("Codec for webm is not available in this FFmpeg installation.")
-
         source_frames, frame_rate = self.decode_and_get_frame_rate(TEST_SRC_2_720P.path)
         encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate)
 
@@ -928,7 +966,7 @@ def test_against_to_file(self, tmp_path, format, method):
             rtol=0,
         )
 
-    @pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available")
+    @needs_ffmpeg_cli
     @pytest.mark.parametrize(
         "format",
         (
@@ -1150,10 +1188,7 @@ def write(self, data):
         ):
             encoder.to_file_like(NoSeekMethod(), format="mp4")
 
-    @pytest.mark.skipif(
-        in_fbcode(),
-        reason="ffprobe not available internally",
-    )
+    @needs_ffmpeg_cli
     @pytest.mark.parametrize(
         "format,codec_spec",
         [
@@ -1181,10 +1216,7 @@ def test_codec_parameter_utilized(self, tmp_path, format, codec_spec):
         ]
         assert actual_codec_spec == codec_spec
 
-    @pytest.mark.skipif(
-        in_fbcode(),
-        reason="ffprobe not available internally",
-    )
+    @needs_ffmpeg_cli
     @pytest.mark.parametrize(
         "codec_spec,codec_impl",
         [
@@ -1227,7 +1259,7 @@ def test_codec_spec_vs_impl_equivalence(self, tmp_path, codec_spec, codec_impl):
         frames_impl = self.decode(impl_output)
         torch.testing.assert_close(frames_spec, frames_impl, rtol=0, atol=0)
 
-    @pytest.mark.skipif(in_fbcode(), reason="ffprobe not available")
+    @needs_ffmpeg_cli
     @pytest.mark.parametrize(
         "profile,colorspace,color_range",
         [
diff --git a/test/test_ops.py b/test/test_ops.py
diff --git a/test/utils.py b/test/utils.py