diff --git a/docs/source/api_ref_transforms.rst b/docs/source/api_ref_transforms.rst
deleted file mode 100644
index 04ef28ab9..000000000
--- a/docs/source/api_ref_transforms.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-.. _transforms:
-
-=====================
-torchcodec.transforms
-=====================
-
-.. currentmodule:: torchcodec.transforms
-
-For a tutorial, see: TODO_DECODER_TRANSFORMS_TUTORIAL.
-
-.. autosummary::
-    :toctree: generated/
-    :nosignatures:
-    :template: dataclass.rst
-
-    DecoderTransform
-    Resize
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 74e8d1298..2318adfd9 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -133,4 +133,3 @@ Encoding
    api_ref_decoders
    api_ref_encoders
    api_ref_samplers
-   api_ref_transforms
diff --git a/src/torchcodec/__init__.py b/src/torchcodec/__init__.py
index 144d3a67f..079db9824 100644
--- a/src/torchcodec/__init__.py
+++ b/src/torchcodec/__init__.py
@@ -9,7 +9,7 @@
 # Note: usort wants to put Frame and FrameBatch after decoders and samplers,
 # but that results in circular import.
 from ._frame import AudioSamples, Frame, FrameBatch  # usort:skip # noqa
-from . import decoders, encoders, samplers, transforms  # noqa
+from . import decoders, encoders, samplers  # noqa
 
 try:
     # Note that version.py is generated during install.
diff --git a/src/torchcodec/decoders/_video_decoder.py b/src/torchcodec/decoders/_video_decoder.py
index 1b4d4706d..87d79b217 100644
--- a/src/torchcodec/decoders/_video_decoder.py
+++ b/src/torchcodec/decoders/_video_decoder.py
@@ -8,10 +8,10 @@
 import json
 import numbers
 from pathlib import Path
-from typing import List, Literal, Optional, Sequence, Tuple, Union
+from typing import Literal, Optional, Tuple, Union
 
 import torch
-from torch import device as torch_device, nn, Tensor
+from torch import device as torch_device, Tensor
 
 from torchcodec import _core as core, Frame, FrameBatch
 from torchcodec.decoders._decoder_utils import (
@@ -19,7 +19,6 @@
     create_decoder,
     ERROR_REPORTING_INSTRUCTIONS,
 )
-from torchcodec.transforms import DecoderTransform, Resize
 
 
 class VideoDecoder:
@@ -68,11 +67,6 @@ class VideoDecoder:
             probably is. Default: "exact".
             Read more about this parameter in:
             :ref:`sphx_glr_generated_examples_decoding_approximate_mode.py`
-        transforms (sequence of transform objects, optional): Sequence of transforms to be
-            applied to the decoded frames by the decoder itself, in order. Accepts both
-            :class:`~torchcodec.transforms.DecoderTransform` and
-            :class:`~torchvision.transforms.v2.Transform`
-            objects. Read more about this parameter in: TODO_DECODER_TRANSFORMS_TUTORIAL.
         custom_frame_mappings (str, bytes, or file-like object, optional):
             Mapping of frames to their metadata, typically generated via ffprobe.
             This enables accurate frame seeking without requiring a full video scan.
@@ -111,7 +105,6 @@ def __init__(
         num_ffmpeg_threads: int = 1,
         device: Optional[Union[str, torch_device]] = None,
         seek_mode: Literal["exact", "approximate"] = "exact",
-        transforms: Optional[Sequence[Union[DecoderTransform, nn.Module]]] = None,
         custom_frame_mappings: Optional[
             Union[str, bytes, io.RawIOBase, io.BufferedReader]
         ] = None,
@@ -167,7 +160,6 @@ def __init__(
             device = str(device)
 
         device_variant = _get_cuda_backend()
-        transform_specs = _make_transform_specs(transforms)
 
         core.add_video_stream(
             self._decoder,
@@ -176,7 +168,6 @@ def __init__(
             num_threads=num_ffmpeg_threads,
             device=device,
             device_variant=device_variant,
-            transform_specs=transform_specs,
             custom_frame_mappings=custom_frame_mappings_data,
         )
 
@@ -448,78 +439,6 @@ def _get_and_validate_stream_metadata(
     )
 
 
-def _convert_to_decoder_transforms(
-    transforms: Sequence[Union[DecoderTransform, nn.Module]],
-) -> List[DecoderTransform]:
-    """Convert a sequence of transforms that may contain TorchVision transform
-    objects into a list of only TorchCodec transform objects.
-
-    Args:
-        transforms: Squence of transform objects. The objects can be one of two
-        types:
-                1. torchcodec.transforms.DecoderTransform
-                2. torchvision.transforms.v2.Transform, but our type annotation
-                   only mentions its base, nn.Module. We don't want to take a
-                   hard dependency on TorchVision.
-
-    Returns:
-        List of DecoderTransform objects.
-    """
-    try:
-        from torchvision.transforms import v2
-
-        tv_available = True
-    except ImportError:
-        tv_available = False
-
-    converted_transforms: list[DecoderTransform] = []
-    for transform in transforms:
-        if not isinstance(transform, DecoderTransform):
-            if not tv_available:
-                raise ValueError(
-                    f"The supplied transform, {transform}, is not a TorchCodec "
-                    " DecoderTransform. TorchCodec also accept TorchVision "
-                    "v2 transforms, but TorchVision is not installed."
-                )
-            elif isinstance(transform, v2.Resize):
-                converted_transforms.append(Resize._from_torchvision(transform))
-            else:
-                raise ValueError(
-                    f"Unsupported transform: {transform}. Transforms must be "
-                    "either a TorchCodec DecoderTransform or a TorchVision "
-                    "v2 transform."
-                )
-        else:
-            converted_transforms.append(transform)
-
-    return converted_transforms
-
-
-def _make_transform_specs(
-    transforms: Optional[Sequence[Union[DecoderTransform, nn.Module]]],
-) -> str:
-    """Given a sequence of transforms, turn those into the specification string
-       the core API expects.
-
-    Args:
-        transforms: Optional sequence of transform objects. The objects can be
-            one of two types:
-                1. torchcodec.transforms.DecoderTransform
-                2. torchvision.transforms.v2.Transform, but our type annotation
-                   only mentions its base, nn.Module. We don't want to take a
-                   hard dependency on TorchVision.
-
-    Returns:
-        String of transforms in the format the core API expects: transform
-        specifications separate by semicolons.
-    """
-    if transforms is None:
-        return ""
-
-    transforms = _convert_to_decoder_transforms(transforms)
-    return ";".join([t._make_transform_spec() for t in transforms])
-
-
 def _read_custom_frame_mappings(
     custom_frame_mappings: Union[str, bytes, io.RawIOBase, io.BufferedReader]
 ) -> tuple[Tensor, Tensor, Tensor]:
diff --git a/src/torchcodec/transforms/__init__.py b/src/torchcodec/transforms/__init__.py
deleted file mode 100644
index 9f4a92f81..000000000
--- a/src/torchcodec/transforms/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-from ._decoder_transforms import DecoderTransform, Resize  # noqa
diff --git a/test/test_transform_ops.py b/test/test_transform_ops.py
deleted file mode 100644
index bc42732ef..000000000
--- a/test/test_transform_ops.py
+++ /dev/null
@@ -1,420 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-import contextlib
-
-import json
-import os
-import subprocess
-
-import pytest
-
-import torch
-import torchcodec
-
-from torchcodec._core import (
-    _add_video_stream,
-    add_video_stream,
-    create_from_file,
-    get_frame_at_index,
-    get_json_metadata,
-)
-from torchcodec.decoders import VideoDecoder
-
-from torchvision.transforms import v2
-
-from .utils import (
-    assert_frames_equal,
-    assert_tensor_close_on_at_least,
-    AV1_VIDEO,
-    get_ffmpeg_major_version,
-    get_ffmpeg_minor_version,
-    H265_VIDEO,
-    NASA_VIDEO,
-    needs_cuda,
-    TEST_SRC_2_720P,
-)
-
-
-class TestPublicVideoDecoderTransformOps:
-    @pytest.mark.parametrize(
-        "height_scaling_factor, width_scaling_factor",
-        ((1.5, 1.31), (0.5, 0.71), (0.7, 1.31), (1.5, 0.71), (1.0, 1.0), (2.0, 2.0)),
-    )
-    @pytest.mark.parametrize("video", [NASA_VIDEO, TEST_SRC_2_720P])
-    def test_resize_torchvision(
-        self, video, height_scaling_factor, width_scaling_factor
-    ):
-        height = int(video.get_height() * height_scaling_factor)
-        width = int(video.get_width() * width_scaling_factor)
-
-        # We're using both the TorchCodec object and the TorchVision object to
-        # ensure that they specify exactly the same thing.
-        decoder_resize = VideoDecoder(
-            video.path, transforms=[torchcodec.transforms.Resize(size=(height, width))]
-        )
-        decoder_resize_tv = VideoDecoder(
-            video.path, transforms=[v2.Resize(size=(height, width))]
-        )
-
-        decoder_full = VideoDecoder(video.path)
-
-        num_frames = len(decoder_resize)
-        assert num_frames == len(decoder_full)
-
-        for frame_index in [
-            0,
-            int(num_frames * 0.1),
-            int(num_frames * 0.2),
-            int(num_frames * 0.3),
-            int(num_frames * 0.4),
-            int(num_frames * 0.5),
-            int(num_frames * 0.75),
-            int(num_frames * 0.90),
-            num_frames - 1,
-        ]:
-            frame_resize_tv = decoder_resize_tv[frame_index]
-            frame_resize = decoder_resize[frame_index]
-            assert_frames_equal(frame_resize_tv, frame_resize)
-
-            frame_full = decoder_full[frame_index]
-
-            frame_tv = v2.functional.resize(frame_full, size=(height, width))
-            frame_tv_no_antialias = v2.functional.resize(
-                frame_full, size=(height, width), antialias=False
-            )
-
-            expected_shape = (video.get_num_color_channels(), height, width)
-            assert frame_resize.shape == expected_shape
-            assert frame_tv.shape == expected_shape
-            assert frame_tv_no_antialias.shape == expected_shape
-
-            assert_tensor_close_on_at_least(
-                frame_resize, frame_tv, percentage=99.8, atol=1
-            )
-            torch.testing.assert_close(frame_resize, frame_tv, rtol=0, atol=6)
-
-            if height_scaling_factor < 1 or width_scaling_factor < 1:
-                # Antialias only relevant when down-scaling!
-                with pytest.raises(AssertionError, match="Expected at least"):
-                    assert_tensor_close_on_at_least(
-                        frame_resize, frame_tv_no_antialias, percentage=99, atol=1
-                    )
-                with pytest.raises(AssertionError, match="Tensor-likes are not close"):
-                    torch.testing.assert_close(
-                        frame_resize, frame_tv_no_antialias, rtol=0, atol=6
-                    )
-
-    def test_resize_fails(self):
-        with pytest.raises(
-            ValueError,
-            match=r"must use bilinear interpolation",
-        ):
-            VideoDecoder(
-                NASA_VIDEO.path,
-                transforms=[
-                    v2.Resize(
-                        size=(100, 100), interpolation=v2.InterpolationMode.BICUBIC
-                    )
-                ],
-            )
-
-        with pytest.raises(
-            ValueError,
-            match=r"must have antialias enabled",
-        ):
-            VideoDecoder(
-                NASA_VIDEO.path,
-                transforms=[v2.Resize(size=(100, 100), antialias=False)],
-            )
-
-        with pytest.raises(
-            ValueError,
-            match=r"must have a size specified",
-        ):
-            VideoDecoder(
-                NASA_VIDEO.path, transforms=[v2.Resize(size=None, max_size=100)]
-            )
-
-        with pytest.raises(
-            ValueError,
-            match=r"must have a \(height, width\) pair for the size",
-        ):
-            VideoDecoder(NASA_VIDEO.path, transforms=[v2.Resize(size=(100))])
-
-    def test_transform_fails(self):
-        with pytest.raises(
-            ValueError,
-            match="Unsupported transform",
-        ):
-            VideoDecoder(NASA_VIDEO.path, transforms=[v2.RandomHorizontalFlip(p=1.0)])
-
-
-class TestCoreVideoDecoderTransformOps:
-    def get_num_frames_core_ops(self, video):
-        decoder = create_from_file(str(video.path))
-        add_video_stream(decoder)
-        metadata = get_json_metadata(decoder)
-        metadata_dict = json.loads(metadata)
-        num_frames = metadata_dict["numFramesFromHeader"]
-        assert num_frames is not None
-        return num_frames
-
-    @pytest.mark.parametrize("video", [NASA_VIDEO, H265_VIDEO, AV1_VIDEO])
-    def test_color_conversion_library(self, video):
-        num_frames = self.get_num_frames_core_ops(video)
-
-        filtergraph_decoder = create_from_file(str(video.path))
-        _add_video_stream(
-            filtergraph_decoder,
-            color_conversion_library="filtergraph",
-        )
-
-        swscale_decoder = create_from_file(str(video.path))
-        _add_video_stream(
-            swscale_decoder,
-            color_conversion_library="swscale",
-        )
-
-        for frame_index in [
-            0,
-            int(num_frames * 0.25),
-            int(num_frames * 0.5),
-            int(num_frames * 0.75),
-            num_frames - 1,
-        ]:
-            filtergraph_frame, *_ = get_frame_at_index(
-                filtergraph_decoder, frame_index=frame_index
-            )
-            swscale_frame, *_ = get_frame_at_index(
-                swscale_decoder, frame_index=frame_index
-            )
-
-            assert_frames_equal(filtergraph_frame, swscale_frame)
-
-    @pytest.mark.parametrize("width", [30, 32, 300])
-    @pytest.mark.parametrize("height", [128])
-    def test_color_conversion_library_with_generated_videos(
-        self, tmp_path, width, height
-    ):
-        # We consider filtergraph to be the reference color conversion library.
-        # However the video decoder sometimes uses swscale as that is faster.
-        # The exact color conversion library used is an implementation detail
-        # of the video decoder and depends on the video's width.
-        #
-        # In this test we compare the output of filtergraph (which is the
-        # reference) with the output of the video decoder (which may use
-        # swscale if it chooses for certain video widths) to make sure they are
-        # always the same.
-        video_path = f"{tmp_path}/frame_numbers_{width}x{height}.mp4"
-        # We don't specify a particular encoder because the ffmpeg binary could
-        # be configured with different encoders. For the purposes of this test,
-        # the actual encoder is irrelevant.
-        with contextlib.ExitStack() as stack:
-            ffmpeg_cli = "ffmpeg"
-
-            if os.environ.get("IN_FBCODE_TORCHCODEC") == "1":
-                import importlib.resources
-
-                ffmpeg_cli = stack.enter_context(
-                    importlib.resources.path(__package__, "ffmpeg")
-                )
-
-            command = [
-                ffmpeg_cli,
-                "-y",
-                "-f",
-                "lavfi",
-                "-i",
-                "color=blue",
-                "-pix_fmt",
-                "yuv420p",
-                "-s",
-                f"{width}x{height}",
-                "-frames:v",
-                "1",
-                video_path,
-            ]
-            subprocess.check_call(command)
-
-        decoder = create_from_file(str(video_path))
-        add_video_stream(decoder)
-        metadata = get_json_metadata(decoder)
-        metadata_dict = json.loads(metadata)
-        assert metadata_dict["width"] == width
-        assert metadata_dict["height"] == height
-
-        num_frames = metadata_dict["numFramesFromHeader"]
-        assert num_frames is not None and num_frames == 1
-
-        filtergraph_decoder = create_from_file(str(video_path))
-        _add_video_stream(
-            filtergraph_decoder,
-            color_conversion_library="filtergraph",
-        )
-
-        auto_decoder = create_from_file(str(video_path))
-        add_video_stream(
-            auto_decoder,
-        )
-
-        filtergraph_frame0, *_ = get_frame_at_index(filtergraph_decoder, frame_index=0)
-        auto_frame0, *_ = get_frame_at_index(auto_decoder, frame_index=0)
-        assert_frames_equal(filtergraph_frame0, auto_frame0)
-
-    @needs_cuda
-    def test_scaling_on_cuda_fails(self):
-        decoder = create_from_file(str(NASA_VIDEO.path))
-        with pytest.raises(
-            RuntimeError,
-            match="Transforms are only supported for CPU devices.",
-        ):
-            add_video_stream(decoder, device="cuda", transform_specs="resize, 100, 100")
-
-    def test_transform_fails(self):
-        decoder = create_from_file(str(NASA_VIDEO.path))
-        with pytest.raises(
-            RuntimeError,
-            match="Invalid transform spec",
-        ):
-            add_video_stream(decoder, transform_specs=";")
-
-        with pytest.raises(
-            RuntimeError,
-            match="Invalid transform name",
-        ):
-            add_video_stream(decoder, transform_specs="invalid, 1, 2")
-
-    def test_resize_ffmpeg(self):
-        height = 135
-        width = 240
-        expected_shape = (NASA_VIDEO.get_num_color_channels(), height, width)
-        resize_spec = f"resize, {height}, {width}"
-        resize_filtergraph = f"scale={width}:{height}:flags=bilinear"
-
-        decoder_resize = create_from_file(str(NASA_VIDEO.path))
-        add_video_stream(decoder_resize, transform_specs=resize_spec)
-
-        for frame_index in [17, 230, 389]:
-            frame_resize, *_ = get_frame_at_index(
-                decoder_resize, frame_index=frame_index
-            )
-            frame_ref = NASA_VIDEO.get_frame_data_by_index(
-                frame_index, filters=resize_filtergraph
-            )
-
-            assert frame_resize.shape == expected_shape
-            assert frame_ref.shape == expected_shape
-
-            if get_ffmpeg_major_version() <= 4 and get_ffmpeg_minor_version() <= 1:
-                # FFmpeg version 4.1 and before appear to have a different
-                # resize implementation.
-                torch.testing.assert_close(frame_resize, frame_ref, rtol=0, atol=2)
-            else:
-                assert_frames_equal(frame_resize, frame_ref)
-
-    def test_resize_transform_fails(self):
-        decoder = create_from_file(str(NASA_VIDEO.path))
-        with pytest.raises(
-            RuntimeError,
-            match="must have 3 elements",
-        ):
-            add_video_stream(decoder, transform_specs="resize, 100, 100, 100")
-
-        with pytest.raises(
-            RuntimeError,
-            match="must be a positive integer",
-        ):
-            add_video_stream(decoder, transform_specs="resize, -10, 100")
-
-        with pytest.raises(
-            RuntimeError,
-            match="must be a positive integer",
-        ):
-            add_video_stream(decoder, transform_specs="resize, 100, 0")
-
-        with pytest.raises(
-            RuntimeError,
-            match="cannot be converted to an int",
-        ):
-            add_video_stream(decoder, transform_specs="resize, blah, 100")
-
-        with pytest.raises(
-            RuntimeError,
-            match="out of range",
-        ):
-            add_video_stream(decoder, transform_specs="resize, 100, 1000000000000")
-
-    def test_crop_transform(self):
-        # Note that filtergraph accepts dimensions as (w, h) and we accept them as (h, w).
-        width = 300
-        height = 200
-        x = 50
-        y = 35
-        crop_spec = f"crop, {height}, {width}, {x}, {y}"
-        crop_filtergraph = f"crop={width}:{height}:{x}:{y}:exact=1"
-        expected_shape = (NASA_VIDEO.get_num_color_channels(), height, width)
-
-        decoder_crop = create_from_file(str(NASA_VIDEO.path))
-        add_video_stream(decoder_crop, transform_specs=crop_spec)
-
-        decoder_full = create_from_file(str(NASA_VIDEO.path))
-        add_video_stream(decoder_full)
-
-        for frame_index in [0, 15, 200, 389]:
-            frame_crop, *_ = get_frame_at_index(decoder_crop, frame_index=frame_index)
-            frame_ref = NASA_VIDEO.get_frame_data_by_index(
-                frame_index, filters=crop_filtergraph
-            )
-
-            frame_full, *_ = get_frame_at_index(decoder_full, frame_index=frame_index)
-            frame_tv = v2.functional.crop(
-                frame_full, top=y, left=x, height=height, width=width
-            )
-
-            assert frame_crop.shape == expected_shape
-            assert frame_ref.shape == expected_shape
-            assert frame_tv.shape == expected_shape
-
-            assert_frames_equal(frame_crop, frame_ref)
-            assert_frames_equal(frame_crop, frame_tv)
-
-    def test_crop_transform_fails(self):
-
-        with pytest.raises(
-            RuntimeError,
-            match="must have 5 elements",
-        ):
-            decoder = create_from_file(str(NASA_VIDEO.path))
-            add_video_stream(decoder, transform_specs="crop, 100, 100")
-
-        with pytest.raises(
-            RuntimeError,
-            match="must be a positive integer",
-        ):
-            decoder = create_from_file(str(NASA_VIDEO.path))
-            add_video_stream(decoder, transform_specs="crop, -10, 100, 100, 100")
-
-        with pytest.raises(
-            RuntimeError,
-            match="cannot be converted to an int",
-        ):
-            decoder = create_from_file(str(NASA_VIDEO.path))
-            add_video_stream(decoder, transform_specs="crop, 100, 100, blah, 100")
-
-        with pytest.raises(
-            RuntimeError,
-            match="x position out of bounds",
-        ):
-            decoder = create_from_file(str(NASA_VIDEO.path))
-            add_video_stream(decoder, transform_specs="crop, 100, 100, 9999, 100")
-
-        with pytest.raises(
-            RuntimeError,
-            match="y position out of bounds",
-        ):
-            decoder = create_from_file(str(NASA_VIDEO.path))
-            add_video_stream(decoder, transform_specs="crop, 999, 100, 100, 100")