From 1e06ea57a439a061700b2649f952d8635983907a Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@fb.com>
Date: Mon, 20 Oct 2025 15:38:35 -0700
Subject: [PATCH 1/7] video encoder python file

---
 src/torchcodec/encoders/__init__.py       |  1 +
 src/torchcodec/encoders/_video_encoder.py | 97 +++++++++++++++++++++++
 2 files changed, 98 insertions(+)
 create mode 100644 src/torchcodec/encoders/_video_encoder.py

diff --git a/src/torchcodec/encoders/__init__.py b/src/torchcodec/encoders/__init__.py
index 51f5942b3..cf78fe427 100644
--- a/src/torchcodec/encoders/__init__.py
+++ b/src/torchcodec/encoders/__init__.py
@@ -1 +1,2 @@
 from ._audio_encoder import AudioEncoder  # noqa
+from ._video_encoder import VideoEncoder  # noqa
diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
new file mode 100644
index 000000000..fd354abeb
--- /dev/null
+++ b/src/torchcodec/encoders/_video_encoder.py
@@ -0,0 +1,97 @@
+from pathlib import Path
+from typing import Union
+
+import torch
+from torch import Tensor
+
+from torchcodec import _core
+
+
+class VideoEncoder:
+    """A video encoder.
+
+    Args:
+        frames (``torch.Tensor``): The frames to encode. This must be a 4D
+            tensor of shape ``(N, C, H, W)`` where N is the number of frames,
+            C is 3 channels (RGB), H is height, and W is width.
+            A 3D tensor of shape ``(C, H, W)`` is also accepted as a single RGB frame.
+            Values must be uint8 in the range ``[0, 255]``.
+        frame_rate (int): The frame rate to use when encoding the
+            **input** ``frames``.
+    """
+
+    def __init__(self, frames: Tensor, *, frame_rate: int):
+        torch._C._log_api_usage_once("torchcodec.encoders.VideoEncoder")
+        if not isinstance(frames, Tensor):
+            raise ValueError(f"Expected frames to be a Tensor, got {type(frames) = }.")
+        if frames.ndim == 3:
+            # make it 4D and assume single RGB frame, CHW -> NCHW
+            frames = torch.unsqueeze(frames, 0)
+        if frames.ndim != 4:
+            raise ValueError(f"Expected 3D or 4D frames, got {frames.shape = }.")
+        if frames.dtype != torch.uint8:
+            raise ValueError(f"Expected uint8 frames, got {frames.dtype = }.")
+        if frame_rate <= 0:
+            raise ValueError(f"{frame_rate = } must be > 0.")
+
+        self._frames = frames
+        self._frame_rate = frame_rate
+
+    def to_file(
+        self,
+        dest: Union[str, Path],
+    ) -> None:
+        """Encode frames into a file.
+
+        Args:
+            dest (str or ``pathlib.Path``): The path to the output file, e.g.
+                ``video.mp4``. The extension of the file determines the video
+                format and container.
+        """
+        _core.encode_video_to_file(
+            frames=self._frames,
+            frame_rate=self._frame_rate,
+            filename=str(dest),
+        )
+
+    def to_tensor(
+        self,
+        format: str,
+    ) -> Tensor:
+        """Encode frames into raw bytes, as a 1D uint8 Tensor.
+
+        Args:
+            format (str): The format of the encoded frames, e.g. "mp4", "mov",
+            "mkv", "avi", "webm", "flv", or "gif"
+
+        Returns:
+            Tensor: The raw encoded bytes as 4D uint8 Tensor.
+        """
+        return _core.encode_video_to_tensor(
+            frames=self._frames,
+            frame_rate=self._frame_rate,
+            format=format,
+        )
+
+    def to_file_like(
+        self,
+        file_like,
+        format: str,
+    ) -> None:
+        """Encode frames into a file-like object.
+
+        Args:
+            file_like: A file-like object that supports ``write()`` and
+                ``seek()`` methods, such as io.BytesIO(), an open file in binary
+                write mode, etc. Methods must have the following signature:
+                ``write(data: bytes) -> int`` and ``seek(offset: int, whence:
+                int = 0) -> int``.
+            format (str): The format of the encoded frames, e.g. "mp4", "mov",
+                "mkv", "avi", "webm", "flv", or "gif".
+        """
+        _core.encode_video_to_file_like(
+            frames=self._frames,
+            frame_rate=self._frame_rate,
+            format=format,
+            file_like=file_like,
+        )

From 1e7dc3468b06998207f695afefedd3a6cde9465c Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@fb.com>
Date: Mon, 20 Oct 2025 15:42:03 -0700
Subject: [PATCH 2/7] testing

---
 test/test_encoders.py | 116 +++++++++++++++++++++++++++++++++++++++++-
 test/test_ops.py      |  68 ++-----------------------
 2 files changed, 120 insertions(+), 64 deletions(-)

diff --git a/test/test_encoders.py b/test/test_encoders.py
index c5946654d..4f3c0cf76 100644
--- a/test/test_encoders.py
+++ b/test/test_encoders.py
@@ -11,7 +11,7 @@
 import torch
 from torchcodec.decoders import AudioDecoder
 
-from torchcodec.encoders import AudioEncoder
+from torchcodec.encoders import AudioEncoder, VideoEncoder
 
 from .utils import (
     assert_tensor_close_on_at_least,
@@ -564,3 +564,117 @@ def write(self, data):
             RuntimeError, match="File like object must implement a seek method"
         ):
             encoder.to_file_like(NoSeekMethod(), format="wav")
+
+
+class TestVideoEncoder:
+    @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
+    def test_bad_input_parameterized(self, tmp_path, method):
+        if method == "to_file":
+            valid_params = dict(dest=str(tmp_path / "output.mp4"))
+        elif method == "to_tensor":
+            valid_params = dict(format="mp4")
+        elif method == "to_file_like":
+            valid_params = dict(file_like=io.BytesIO(), format="mp4")
+        else:
+            raise ValueError(f"Unknown method: {method}")
+
+        with pytest.raises(
+            ValueError, match="Expected uint8 frames, got frames.dtype = torch.float32"
+        ):
+            encoder = VideoEncoder(
+                frames=torch.rand(5, 3, 64, 64),
+                frame_rate=30,
+            )
+            getattr(encoder, method)(**valid_params)
+
+        with pytest.raises(
+            ValueError, match=r"Expected 3D or 4D frames, got frames.shape = torch.Size"
+        ):
+            encoder = VideoEncoder(
+                frames=torch.zeros(10),
+                frame_rate=30,
+            )
+            getattr(encoder, method)(**valid_params)
+
+        with pytest.raises(
+            RuntimeError, match=r"frame must have 3 channels \(R, G, B\), got 2"
+        ):
+            encoder = VideoEncoder(
+                frames=torch.zeros((5, 2, 64, 64), dtype=torch.uint8),
+                frame_rate=30,
+            )
+            getattr(encoder, method)(**valid_params)
+
+    def test_bad_input(self, tmp_path):
+        encoder = VideoEncoder(
+            frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8),
+            frame_rate=30,
+        )
+
+        with pytest.raises(
+            RuntimeError,
+            match=r"Couldn't allocate AVFormatContext. The destination file is ./file.bad_extension, check the desired extension\?",
+        ):
+            encoder.to_file("./file.bad_extension")
+
+        with pytest.raises(
+            RuntimeError,
+            match=r"avio_open failed. The destination file is ./bad/path.mp3, make sure it's a valid path\?",
+        ):
+            encoder.to_file("./bad/path.mp3")
+
+        with pytest.raises(
+            RuntimeError,
+            match=r"Couldn't allocate AVFormatContext. Check the desired format\? Got format=bad_format",
+        ):
+            encoder.to_tensor(format="bad_format")
+
+    @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
+    def test_contiguity(self, method, tmp_path):
+        # Ensure that 2 sets of video frames with the same pixel values are encoded
+        # in the same way, regardless of their memory layout. Here we encode 2 equal
+        # frame tensors, one is contiguous while the other is non-contiguous.
+
+        num_frames, channels, height, width = 5, 3, 64, 64
+        contiguous_frames = (
+            (torch.rand(num_frames, channels, height, width) * 255)
+            .to(torch.uint8)
+            .contiguous()
+        )
+        assert contiguous_frames.is_contiguous()
+
+        # Create non-contiguous frames by permuting, calling contiguous to update memory layout,
+        # then permuting back to the initial order
+        non_contiguous_frames = (
+            contiguous_frames.permute(0, 3, 2, 1).contiguous().permute(0, 3, 2, 1)
+        )
+        assert non_contiguous_frames.stride() != contiguous_frames.stride()
+        assert not non_contiguous_frames.is_contiguous()
+
+        torch.testing.assert_close(
+            contiguous_frames, non_contiguous_frames, rtol=0, atol=0
+        )
+
+        def encode_to_tensor(frames):
+            if method == "to_file":
+                dest = str(tmp_path / "output.mp4")
+                VideoEncoder(frames, frame_rate=30).to_file(dest=dest)
+                with open(dest, "rb") as f:
+                    return torch.frombuffer(f.read(), dtype=torch.uint8)
+            elif method == "to_tensor":
+                return VideoEncoder(frames, frame_rate=30).to_tensor(format="mp4")
+            elif method == "to_file_like":
+                file_like = io.BytesIO()
+                VideoEncoder(frames, frame_rate=30).to_file_like(
+                    file_like, format="mp4"
+                )
+                return torch.frombuffer(file_like.getvalue(), dtype=torch.uint8)
+            else:
+                raise ValueError(f"Unknown method: {method}")
+
+        encoded_from_contiguous = encode_to_tensor(contiguous_frames)
+        encoded_from_non_contiguous = encode_to_tensor(non_contiguous_frames)
+
+        torch.testing.assert_close(
+            encoded_from_contiguous, encoded_from_non_contiguous, rtol=0, atol=0
+        )
diff --git a/test/test_ops.py b/test/test_ops.py
index 627829689..075929335 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -1152,68 +1152,6 @@ def test_bad_input(self, tmp_path):
 
 
 class TestVideoEncoderOps:
-    # TODO-VideoEncoder: Test encoding against different memory layouts (ex. test_contiguity)
-    # TODO-VideoEncoder: Parametrize test after moving to test_encoders
-    def test_bad_input(self, tmp_path):
-        output_file = str(tmp_path / ".mp4")
-
-        with pytest.raises(
-            RuntimeError, match="frames must have uint8 dtype, got float"
-        ):
-            encode_video_to_file(
-                frames=torch.rand((10, 3, 60, 60), dtype=torch.float),
-                frame_rate=10,
-                filename=output_file,
-            )
-
-        with pytest.raises(
-            RuntimeError, match=r"frames must have 4 dimensions \(N, C, H, W\), got 3"
-        ):
-            encode_video_to_file(
-                frames=torch.randint(high=1, size=(3, 60, 60), dtype=torch.uint8),
-                frame_rate=10,
-                filename=output_file,
-            )
-
-        with pytest.raises(
-            RuntimeError, match=r"frame must have 3 channels \(R, G, B\), got 2"
-        ):
-            encode_video_to_file(
-                frames=torch.randint(high=1, size=(10, 2, 60, 60), dtype=torch.uint8),
-                frame_rate=10,
-                filename=output_file,
-            )
-
-        with pytest.raises(
-            RuntimeError,
-            match=r"Couldn't allocate AVFormatContext. The destination file is ./file.bad_extension, check the desired extension\?",
-        ):
-            encode_video_to_file(
-                frames=torch.randint(high=255, size=(10, 3, 60, 60), dtype=torch.uint8),
-                frame_rate=10,
-                filename="./file.bad_extension",
-            )
-
-        with pytest.raises(
-            RuntimeError,
-            match=r"avio_open failed. The destination file is ./bad/path.mp3, make sure it's a valid path\?",
-        ):
-            encode_video_to_file(
-                frames=torch.randint(high=255, size=(10, 3, 60, 60), dtype=torch.uint8),
-                frame_rate=10,
-                filename="./bad/path.mp3",
-            )
-
-        with pytest.raises(
-            RuntimeError,
-            match=r"Couldn't allocate AVFormatContext. Check the desired format\? Got format=bad_format",
-        ):
-            encode_video_to_tensor(
-                frames=torch.randint(high=255, size=(10, 3, 60, 60), dtype=torch.uint8),
-                frame_rate=10,
-                format="bad_format",
-            )
-
     def decode(self, source=None) -> torch.Tensor:
         return VideoDecoder(source).get_frames_in_range(start=0, stop=60)
 
@@ -1406,7 +1344,9 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format):
             )
 
     def test_to_file_like_custom_file_object(self):
-        """Test with a custom file-like object that implements write and seek."""
+        """Test to_file_like with a custom file-like object that implements write and seek."""
+        if get_ffmpeg_major_version() == 6:
+            pytest.skip("Skipping round trip test for FFmpeg 6")
 
         class CustomFileObject:
             def __init__(self):
@@ -1437,6 +1377,8 @@ def get_encoded_data(self):
 
     def test_to_file_like_real_file(self, tmp_path):
         """Test to_file_like with a real file opened in binary write mode."""
+        if get_ffmpeg_major_version() == 6:
+            pytest.skip("Skipping round trip test for FFmpeg 6")
         source_frames = self.decode(TEST_SRC_2_720P.path).data
         file_path = tmp_path / "test_file_like.mp4"
 

From cf7b75cc2a8d15def0fa41b0c991ffff47ddbfa0 Mon Sep 17 00:00:00 2001
From: Dan-Flores <danielflores3@fb.com>
Date: Mon, 20 Oct 2025 16:39:32 -0700
Subject: [PATCH 3/7] delete contiguous todo

---
 src/torchcodec/_core/Encoder.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
index 1d9c2c089..4e5d6a604 100644
--- a/src/torchcodec/_core/Encoder.cpp
+++ b/src/torchcodec/_core/Encoder.cpp
@@ -531,7 +531,6 @@ torch::Tensor validateFrames(const torch::Tensor& frames) {
       frames.sizes()[1] == 3,
       "frame must have 3 channels (R, G, B), got ",
       frames.sizes()[1]);
-  // TODO-VideoEncoder: Investigate if non-contiguous frames can be accepted
   return frames.contiguous();
 }
 

From ee2285eb28294870d7e4f4a5186e1638d15e77b7 Mon Sep 17 00:00:00 2001
From: Dan-Flores <danielflores3@fb.com>
Date: Mon, 27 Oct 2025 11:00:15 -0400
Subject: [PATCH 4/7] use randint suggestion, remove test skips

---
 test/test_encoders.py | 8 +++-----
 test/test_ops.py      | 4 ----
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/test/test_encoders.py b/test/test_encoders.py
index 4f3c0cf76..a73a26967 100644
--- a/test/test_encoders.py
+++ b/test/test_encoders.py
@@ -636,11 +636,9 @@ def test_contiguity(self, method, tmp_path):
         # frame tensors, one is contiguous while the other is non-contiguous.
 
         num_frames, channels, height, width = 5, 3, 64, 64
-        contiguous_frames = (
-            (torch.rand(num_frames, channels, height, width) * 255)
-            .to(torch.uint8)
-            .contiguous()
-        )
+        contiguous_frames = torch.randint(
+            0, 256, size=(num_frames, channels, height, width), dtype=torch.uint8
+        ).contiguous()
         assert contiguous_frames.is_contiguous()
 
         # Create non-contiguous frames by permuting, calling contiguous to update memory layout,
diff --git a/test/test_ops.py b/test/test_ops.py
index 075929335..e798a7a2b 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -1345,8 +1345,6 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format):
 
     def test_to_file_like_custom_file_object(self):
         """Test to_file_like with a custom file-like object that implements write and seek."""
-        if get_ffmpeg_major_version() == 6:
-            pytest.skip("Skipping round trip test for FFmpeg 6")
 
         class CustomFileObject:
             def __init__(self):
@@ -1377,8 +1375,6 @@ def get_encoded_data(self):
 
     def test_to_file_like_real_file(self, tmp_path):
         """Test to_file_like with a real file opened in binary write mode."""
-        if get_ffmpeg_major_version() == 6:
-            pytest.skip("Skipping round trip test for FFmpeg 6")
         source_frames = self.decode(TEST_SRC_2_720P.path).data
         file_path = tmp_path / "test_file_like.mp4"
 

From d14deb8b122bdaf38a0ec3ec53d1f1757daa0cd2 Mon Sep 17 00:00:00 2001
From: Dan-Flores <danielflores3@fb.com>
Date: Mon, 27 Oct 2025 15:41:26 -0400
Subject: [PATCH 5/7] assert contiguity with channels_last

---
 test/test_encoders.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test_encoders.py b/test/test_encoders.py
index a73a26967..f901fbd22 100644
--- a/test/test_encoders.py
+++ b/test/test_encoders.py
@@ -641,13 +641,13 @@ def test_contiguity(self, method, tmp_path):
         ).contiguous()
         assert contiguous_frames.is_contiguous()
 
-        # Create non-contiguous frames by permuting, calling contiguous to update memory layout,
-        # then permuting back to the initial order
+        # Permute NCHW to NHWC, then update the memory layout, then permute back
         non_contiguous_frames = (
-            contiguous_frames.permute(0, 3, 2, 1).contiguous().permute(0, 3, 2, 1)
+            contiguous_frames.permute(0, 2, 3, 1).contiguous().permute(0, 3, 1, 2)
         )
         assert non_contiguous_frames.stride() != contiguous_frames.stride()
         assert not non_contiguous_frames.is_contiguous()
+        assert non_contiguous_frames.is_contiguous(memory_format=torch.channels_last)
 
         torch.testing.assert_close(
             contiguous_frames, non_contiguous_frames, rtol=0, atol=0

From 7ed7f21ac6dcd91324c33df4963ecca257a79a0b Mon Sep 17 00:00:00 2001
From: Dan-Flores <danielflores3@meta.com>
Date: Wed, 29 Oct 2025 13:54:25 -0400
Subject: [PATCH 6/7] incorporate feedback

---
 src/torchcodec/encoders/_video_encoder.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
index fd354abeb..f6a725278 100644
--- a/src/torchcodec/encoders/_video_encoder.py
+++ b/src/torchcodec/encoders/_video_encoder.py
@@ -14,21 +14,16 @@ class VideoEncoder:
         frames (``torch.Tensor``): The frames to encode. This must be a 4D
             tensor of shape ``(N, C, H, W)`` where N is the number of frames,
             C is 3 channels (RGB), H is height, and W is width.
-            A 3D tensor of shape ``(C, H, W)`` is also accepted as a single RGB frame.
             Values must be uint8 in the range ``[0, 255]``.
-        frame_rate (int): The frame rate to use when encoding the
-            **input** ``frames``.
+        frame_rate (int): The frame rate of the **input** ``frames``. Also defines the encoded **output** frame rate.
     """
 
     def __init__(self, frames: Tensor, *, frame_rate: int):
         torch._C._log_api_usage_once("torchcodec.encoders.VideoEncoder")
         if not isinstance(frames, Tensor):
             raise ValueError(f"Expected frames to be a Tensor, got {type(frames) = }.")
-        if frames.ndim == 3:
-            # make it 4D and assume single RGB frame, CHW -> NCHW
-            frames = torch.unsqueeze(frames, 0)
         if frames.ndim != 4:
-            raise ValueError(f"Expected 3D or 4D frames, got {frames.shape = }.")
+            raise ValueError(f"Expected 4D frames, got {frames.shape = }.")
         if frames.dtype != torch.uint8:
             raise ValueError(f"Expected uint8 frames, got {frames.dtype = }.")
         if frame_rate <= 0:
@@ -46,7 +41,7 @@ def to_file(
         Args:
             dest (str or ``pathlib.Path``): The path to the output file, e.g.
                 ``video.mp4``. The extension of the file determines the video
-                format and container.
+                container format.
         """
         _core.encode_video_to_file(
             frames=self._frames,
@@ -61,7 +56,7 @@ def to_tensor(
         """Encode frames into raw bytes, as a 1D uint8 Tensor.
 
         Args:
-            format (str): The format of the encoded frames, e.g. "mp4", "mov",
+            format (str): The container format of the encoded frames, e.g. "mp4", "mov",
             "mkv", "avi", "webm", "flv", or "gif"
 
         Returns:
@@ -86,7 +81,7 @@ def to_file_like(
                 write mode, etc. Methods must have the following signature:
                 ``write(data: bytes) -> int`` and ``seek(offset: int, whence:
                 int = 0) -> int``.
-            format (str): The format of the encoded frames, e.g. "mp4", "mov",
+            format (str): The container format of the encoded frames, e.g. "mp4", "mov",
                 "mkv", "avi", "webm", "flv", or "gif".
         """
         _core.encode_video_to_file_like(

From b10d80bf1be27d2842a3a8e0e19270cbbe5b7d13 Mon Sep 17 00:00:00 2001
From: Dan-Flores <danielflores3@meta.com>
Date: Wed, 29 Oct 2025 14:25:50 -0400
Subject: [PATCH 7/7] update test_bad_input match text

---
 test/test_encoders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_encoders.py b/test/test_encoders.py
index f901fbd22..b7223c88a 100644
--- a/test/test_encoders.py
+++ b/test/test_encoders.py
@@ -588,7 +588,7 @@ def test_bad_input_parameterized(self, tmp_path, method):
             getattr(encoder, method)(**valid_params)
 
         with pytest.raises(
-            ValueError, match=r"Expected 3D or 4D frames, got frames.shape = torch.Size"
+            ValueError, match=r"Expected 4D frames, got frames.shape = torch.Size"
         ):
             encoder = VideoEncoder(
                 frames=torch.zeros(10),