Encoder: validate pts of encoded frames

NicolasHug · NicolasHug · commit 30a4754e799c · 2025-06-13T15:14:43.000+01:00
diff --git a/src/torchcodec/_frame.py b/src/torchcodec/_frame.py
@@ -125,7 +125,7 @@ class AudioSamples(Iterable):
     pts_seconds: float
     """The :term:`pts` of the first sample, in seconds."""
     duration_seconds: float
-    """The duration of the sampleas, in seconds."""
+    """The duration of the samples, in seconds."""
     sample_rate: int
     """The sample rate of the samples, in Hz."""
 
diff --git a/test/test_encoders.py b/test/test_encoders.py
@@ -1,5 +1,7 @@
+import json
 import re
 import subprocess
+from pathlib import Path
 
 import pytest
 import torch
@@ -16,6 +18,49 @@
 )
 
 
+def validate_frames_properties(*, actual: Path, expected: Path):
+
+    frames_actual, frames_expected = (
+        json.loads(
+            subprocess.run(
+                [
+                    "ffprobe",
+                    "-v",
+                    "error",
+                    "-hide_banner",
+                    "-select_streams",
+                    "a:0",
+                    "-show_frames",
+                    "-of",
+                    "json",
+                    f"{f}",
+                ],
+                check=True,
+                capture_output=True,
+                text=True,
+            ).stdout
+        )["frames"]
+        for f in (actual, expected)
+    )
+
+    # frames_actual and frames_expected are both a list of dicts, each dict
+    # corresponds to a frame and each key-value pair corresponds to a frame
+    # property like pts, nb_samples, etc., similar to the AVFrame fields.
+    assert isinstance(frames_actual, list)
+    assert all(isinstance(d, dict) for d in frames_actual)
+
+    assert len(frames_actual) == len(frames_expected)
+    for frame_index, (d_actual, d_expected) in enumerate(
+        zip(frames_actual, frames_expected)
+    ):
+        for prop in d_actual:
+            if prop == "pkt_pos":
+                continue  # TODO this probably matters
+            assert (
+                d_actual[prop] == d_expected[prop]
+            ), f"{prop} value is different for frame {frame_index}:"
+
+
 class TestAudioEncoder:
 
     def decode(self, source) -> torch.Tensor:
@@ -162,13 +207,19 @@ def test_against_cli(self, asset, bit_rate, num_channels, format, method, tmp_pa
             rtol, atol = 0, 1e-3
         else:
             rtol, atol = None, None
+        # TODO should validate `.pts_seconds` and `duration_seconds` as well
         torch.testing.assert_close(
-            self.decode(encoded_by_ffmpeg),
             self.decode(encoded_by_us),
+            self.decode(encoded_by_ffmpeg),
             rtol=rtol,
             atol=atol,
         )
 
+        if method == "to_file":
+            validate_frames_properties(actual=encoded_by_us, expected=encoded_by_ffmpeg)
+        else:
+            assert method == "to_tensor", "wrong test parametrization!"
+
     @pytest.mark.parametrize("asset", (NASA_AUDIO_MP3, SINE_MONO_S32))
     @pytest.mark.parametrize("bit_rate", (None, 0, 44_100, 999_999_999))
     @pytest.mark.parametrize("num_channels", (None, 1, 2))