diff --git a/benchmarks/benchmark_transforms.py b/benchmarks/benchmark_transforms.py
new file mode 100644
index 00000000000..1511f917d8a
--- /dev/null
+++ b/benchmarks/benchmark_transforms.py
@@ -0,0 +1,259 @@
+#!/usr/bin/env python3
+"""
+Benchmark script for torchvision transforms performance.
+
+This script benchmarks the performance of torchvision.transforms.v2 transforms
+in various configurations and will be extended to compare against other libraries
+like OpenCV.
+
+The pipeline tested: uint8 image -> resize -> normalize (to [0,1] float)
+"""
+
+import argparse
+import torch
+import random
+import warnings
+from typing import Dict, Any
+import torchvision.transforms.v2.functional as F
+import numpy as np
+from utils import bench, report_stats, print_comparison_table, print_benchmark_info
+
+# Filter out the specific TF32 warning
+warnings.filterwarnings(
+    "ignore",
+    message="Please use the new API settings to control TF32 behavior.*",
+    category=UserWarning,
+    module="torch.backends.cuda",
+)
+
+try:
+    import cv2
+
+    HAS_OPENCV = True
+except ImportError:
+    HAS_OPENCV = False
+
+try:
+    import albumentations as A
+
+    HAS_ALBUMENTATIONS = True
+except ImportError:
+    HAS_ALBUMENTATIONS = False
+
+try:
+    import kornia as K
+    import kornia.augmentation as KA
+
+    HAS_KORNIA = True
+except ImportError:
+    HAS_KORNIA = False
+
+from PIL import Image
+
+# ImageNet normalization constants
+NORM_MEAN = [0.485, 0.456, 0.406]
+NORM_STD = [0.229, 0.224, 0.225]
+
+
+def torchvision_pipeline(images: torch.Tensor, target_size: int) -> torch.Tensor:
+    images = F.resize(
+        images, size=(target_size, target_size), interpolation=F.InterpolationMode.BILINEAR, antialias=True
+    )
+    images = F.to_dtype(images, dtype=torch.float32, scale=True)
+    images = F.normalize(images, mean=NORM_MEAN, std=NORM_STD)
+    return images
+
+
+def opencv_pipeline(image: np.ndarray, target_size: int) -> torch.Tensor:
+    img = cv2.resize(image, (target_size, target_size), interpolation=cv2.INTER_LINEAR)  # no antialias in OpenCV
+    img = img.astype(np.float32) / 255.0
+    img = (img - np.array(NORM_MEAN)) / np.array(NORM_STD)
+    img = img.transpose(2, 0, 1)  # HWC -> CHW
+    return torch.from_numpy(img)
+
+
+def pil_pipeline(image: Image.Image, target_size: int) -> torch.Tensor:
+    img = image.resize((target_size, target_size), Image.BILINEAR)  # PIL forces antialias
+    img = F.pil_to_tensor(img)
+    img = F.to_dtype(img, dtype=torch.float32, scale=True)
+    img = F.normalize(img, mean=NORM_MEAN, std=NORM_STD)
+    return img
+
+
+def albumentations_pipeline(image: np.ndarray, target_size: int) -> torch.Tensor:
+    transform = A.Compose(
+        [
+            A.Resize(target_size, target_size, interpolation=cv2.INTER_LINEAR),
+            A.Normalize(mean=NORM_MEAN, std=NORM_STD, max_pixel_value=255.0),
+        ]
+    )
+    img = transform(image=image)["image"]
+    img = torch.from_numpy(img).permute(2, 0, 1)
+    return img
+
+
+def kornia_pipeline(image: torch.Tensor, target_size: int) -> torch.Tensor:
+    # Kornia expects float tensors in [0, 1] range
+    # TODO check that this is needed?
+    img = image.float() / 255.0
+    img = img.unsqueeze(0)  # Add batch dimension for kornia
+
+    img = K.geometry.transform.resize(img, (target_size, target_size), interpolation="bilinear")
+
+    img = K.enhance.normalize(img, mean=torch.tensor(NORM_MEAN), std=torch.tensor(NORM_STD))
+
+    return img.squeeze(0)  # Remove batch dimension
+
+
+# TODO double check that this works as expected: no graph break, and no issues with dynamic shapes
+compiled_torchvision_pipeline = torch.compile(torchvision_pipeline, mode="default", fullgraph=True, dynamic=True)
+
+
+def run_benchmark(args) -> Dict[str, Any]:
+    backend = args.backend.lower()
+
+    device = args.device.lower()
+    # Check device compatibility
+    if device == "cuda" and backend not in ["tv", "tv-compiled"]:
+        raise RuntimeError(
+            f"CUDA device not supported for {backend} backend. Only 'tv' and 'tv-compiled' support CUDA."
+        )
+
+    if device == "cuda" and not torch.cuda.is_available():
+        raise RuntimeError("CUDA not available. Install cuda-enabled torch and torchvision, or use 'cpu' device.")
+
+    if backend == "opencv" and not HAS_OPENCV:
+        raise RuntimeError("OpenCV not available. Install with: pip install opencv-python")
+    if backend == "albumentations" and not HAS_ALBUMENTATIONS:
+        raise RuntimeError("Albumentations not available. Install with: pip install albumentations")
+    if backend == "kornia" and not HAS_KORNIA:
+        raise RuntimeError("Kornia not available. Install with: pip install kornia")
+
+    if args.verbose:
+        backend_display = args.backend.upper()
+        print(f"\n=== {backend_display} ===")
+        print(f"Device: {device}, Threads: {args.num_threads}, Batch size: {args.batch_size}")
+
+        memory_format = torch.channels_last if args.contiguity == "CL" else torch.contiguous_format
+        print(f"Memory format: {'channels_last' if memory_format == torch.channels_last else 'channels_first'}")
+
+    if backend == "tv":
+        torch.set_num_threads(args.num_threads)
+        pipeline = torchvision_pipeline
+    elif backend == "tv-compiled":
+        torch.set_num_threads(args.num_threads)
+        pipeline = compiled_torchvision_pipeline
+    elif backend == "opencv":
+        cv2.setNumThreads(args.num_threads)
+        pipeline = opencv_pipeline
+    elif backend == "pil":
+        torch.set_num_threads(args.num_threads)
+        pipeline = pil_pipeline
+    elif backend == "albumentations":
+        cv2.setNumThreads(args.num_threads)
+        pipeline = albumentations_pipeline
+    elif backend == "kornia":
+        torch.set_num_threads(args.num_threads)
+        pipeline = kornia_pipeline
+
+    def generate_test_images():
+        height = random.randint(args.min_size, args.max_size)
+        width = random.randint(args.min_size, args.max_size)
+        images = torch.randint(0, 256, (args.batch_size, 3, height, width), dtype=torch.uint8)
+
+        memory_format = torch.channels_last if args.contiguity == "CL" else torch.contiguous_format
+        if memory_format == torch.channels_last:
+            images = images.to(memory_format=torch.channels_last)
+
+        # Move to device for torchvision backends
+        if backend in ["tv", "tv-compiled"]:
+            images = images.to(device)
+
+        if args.batch_size == 1:
+            images = images[0]
+
+        if backend == "opencv":
+            if args.batch_size > 1:
+                raise ValueError("Batches not supported in OpenCV pipeline")
+            # TODO double check that contiguity requirement is respected for numpy array
+            images = images.numpy().transpose(1, 2, 0)
+        elif backend == "pil":
+            if args.batch_size > 1:
+                raise ValueError("Batches not supported in PIL pipeline")
+            # Convert to PIL Image (CHW -> HWC)
+            images = images.numpy().transpose(1, 2, 0)
+            images = Image.fromarray(images)
+        elif backend == "albumentations":
+            if args.batch_size > 1:
+                # TODO is that true????
+                raise ValueError("Batches not supported in Albumentations pipeline")
+            images = images.numpy().transpose(1, 2, 0)
+        elif backend == "kornia":
+            if args.batch_size > 1:
+                # TODO is that true????
+                raise ValueError("Batches not supported in Kornia pipeline")
+
+        return images
+
+    times = bench(
+        lambda images: pipeline(images, args.target_size),
+        data_generator=generate_test_images,
+        num_exp=args.num_exp,
+        warmup=args.warmup,
+    )
+
+    stats = report_stats(times, "ms", args.verbose)
+    return {"backend": args.backend, "stats": stats}
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Benchmark torchvision transforms")
+    parser.add_argument("--num-exp", type=int, default=100, help="Number of experiments we average over")
+    parser.add_argument(
+        "--warmup", type=int, default=10, help="Number of warmup runs before running the num-exp experiments"
+    )
+    parser.add_argument(
+        "--target-size", type=int, default=224, help="size parameter of the Resize step, for both H and W."
+    )
+    parser.add_argument("--min-size", type=int, default=128, help="Minimum input image size for random generation")
+    parser.add_argument("--max-size", type=int, default=512, help="Maximum input image size for random generation")
+    parser.add_argument(
+        "--num-threads", type=int, default=1, help="Number of intra-op threads as set with torch.set_num_threads() & Co"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, default=1, help="Batch size. 1 means single 3D image without a batch dimension"
+    )
+    parser.add_argument(
+        "--contiguity",
+        choices=["CL", "CF"],
+        default="CF",
+        help="Memory format: CL (channels_last) or CF (channels_first, i.e. contiguous)",
+    )
+    all_backends = ["tv", "tv-compiled", "opencv", "pil", "albumentations", "kornia"]
+    parser.add_argument(
+        "--backend", type=str.lower, choices=all_backends + ["all"], default="all", help="Backend to benchmark"
+    )
+    parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
+    parser.add_argument("--device", type=str, default="cpu", help="Device to use: cpu or cuda (default: cpu)")
+
+    args = parser.parse_args()
+
+    print_benchmark_info(args)
+
+    backends_to_run = all_backends if args.backend.lower() == "all" else [args.backend]
+    results = []
+
+    for backend in backends_to_run:
+        args.backend = backend
+        try:
+            result = run_benchmark(args)
+            results.append(result)
+        except Exception as e:
+            print(f"ERROR with {backend}: {e}")
+
+    if len(results) > 1:
+        print_comparison_table(results)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/utils.py b/benchmarks/utils.py
new file mode 100644
index 00000000000..cc157065840
--- /dev/null
+++ b/benchmarks/utils.py
@@ -0,0 +1,140 @@
+"""
+Utility functions for benchmarking transforms.
+"""
+
+import torch
+import torchvision
+from time import perf_counter_ns
+from typing import Callable, List, Dict, Any
+from tabulate import tabulate
+
+try:
+    import cv2
+
+    HAS_OPENCV = True
+except ImportError:
+    HAS_OPENCV = False
+
+try:
+    import albumentations as A
+
+    HAS_ALBUMENTATIONS = True
+except ImportError:
+    HAS_ALBUMENTATIONS = False
+
+try:
+    import kornia as K
+
+    HAS_KORNIA = True
+except ImportError:
+    HAS_KORNIA = False
+
+from PIL import Image
+
+
+def bench(f: Callable, data_generator: Callable, num_exp: int, warmup: int) -> torch.Tensor:
+    """
+    Benchmark function execution time with fresh data for each experiment.
+
+    Args:
+        f: Function to benchmark
+        data_generator: Callable that returns fresh data for each experiment
+        num_exp: Number of experiments to run
+        warmup: Number of warmup runs
+
+    Returns:
+        Tensor of execution times in nanoseconds
+    """
+    for _ in range(warmup):
+        data = data_generator()
+        f(data)
+
+    times = []
+    for _ in range(num_exp):
+        data = data_generator()
+        start = perf_counter_ns()
+        result = f(data)
+        end = perf_counter_ns()
+        times.append(end - start)
+        del result
+
+    return torch.tensor(times, dtype=torch.float32)
+
+
+def report_stats(times: torch.Tensor, unit: str, verbose: bool = True) -> Dict[str, float]:
+    mul = {
+        "ns": 1,
+        "µs": 1e-3,
+        "ms": 1e-6,
+        "s": 1e-9,
+    }[unit]
+
+    times = times * mul
+    stats = {
+        "std": times.std().item(),
+        "median": times.median().item(),
+        "mean": times.mean().item(),
+        "min": times.min().item(),
+        "max": times.max().item(),
+    }
+
+    if verbose:
+        print(f"  Median: {stats['median']:.2f}{unit} ± {stats['std']:.2f}{unit}")
+        print(f"  Mean: {stats['mean']:.2f}{unit}, Min: {stats['min']:.2f}{unit}, Max: {stats['max']:.2f}{unit}")
+
+    return stats
+
+
+def print_comparison_table(results: List[Dict[str, Any]]) -> None:
+    torchvision_median = next((r["stats"]["median"] for r in results if r["backend"].lower() == "tv"), None)
+
+    table_data = []
+    for result in results:
+        stats = result["stats"]
+        relative = f"{stats['median'] / torchvision_median:.2f}x" if torchvision_median else "N/A"
+
+        table_data.append(
+            {
+                "Backend": result["backend"],
+                "Median (ms)": f"{stats['median']:.2f}",
+                "Std (ms)": f"{stats['std']:.2f}",
+                "Mean (ms)": f"{stats['mean']:.2f}",
+                "Min (ms)": f"{stats['min']:.2f}",
+                "Max (ms)": f"{stats['max']:.2f}",
+                "Relative": relative,
+            }
+        )
+
+    print(tabulate(table_data, headers="keys", tablefmt="grid"))
+
+
+def print_benchmark_info(args):
+    """Print benchmark configuration and library versions."""
+    device = args.device.lower()
+
+    memory_format = "channels_last" if args.contiguity == "CL" else "channels_first"
+
+    # Collect configuration info
+    config = [
+        ["Device", device],
+        ["Threads", args.num_threads],
+        ["Batch size", args.batch_size],
+        ["Memory format", memory_format],
+        ["Experiments", f"{args.num_exp} (+ {args.warmup} warmup)"],
+        ["Input → output size", f"{args.min_size}-{args.max_size} → {args.target_size}×{args.target_size}"],
+    ]
+
+    print(tabulate(config, headers=["Parameter", "Value"], tablefmt="simple"))
+    print()
+
+    # Collect library versions
+    versions = [
+        ["PyTorch", torch.__version__],
+        ["TorchVision", torchvision.__version__],
+        ["OpenCV", cv2.__version__ if HAS_OPENCV else "Not available"],
+        ["PIL/Pillow", getattr(Image, '__version__', "Version unavailable")],
+        ["Albumentations", A.__version__ if HAS_ALBUMENTATIONS else "Not available"],
+        ["Kornia", K.__version__ if HAS_KORNIA else "Not available"],
+    ]
+
+    print(tabulate(versions, headers=["Library", "Version"], tablefmt="simple"))