From d678de1da907e4288e2132dcc3071f63ce0672ea Mon Sep 17 00:00:00 2001 From: Zhitao Yu Date: Fri, 31 Oct 2025 10:07:59 -0700 Subject: [PATCH] Docstring Fix for PILToTensor in Torchvision (#9254) Summary: #9221 identifies a confusion around image shape conventions for ToTensor and PILToTensor classes. The docstring has the following statement: Converts a PIL Image (H x W x C) to a Tensor of shape (C x H x W). This is confusing since PIL Image shape is not (H x W x C) but rather PIL Images expose their size as (W, H) via the size attribute, not as a shape tuple. Proposed Docstring Update Convert a PIL Image with H height, W width, and C channels to a Tensor of shape (C x H x W). Differential Revision: D85779518 --- torchvision/transforms/transforms.py | 10 +++++++++- torchvision/transforms/v2/_type_conversion.py | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py index 6de15075033..c6595a3402e 100644 --- a/torchvision/transforms/transforms.py +++ b/torchvision/transforms/transforms.py @@ -145,7 +145,15 @@ class PILToTensor: This transform does not support torchscript. - Converts a PIL Image (H x W x C) to a Tensor of shape (C x H x W). + Convert a PIL Image with H height, W width, and C channels to a Tensor of shape (C x H x W). + + Example: + >>> from PIL import Image + >>> import torchvision.transforms as T + >>> img = Image.new("RGB", (320, 240)) # size (W=320, H=240) + >>> tensor = T.PILToTensor()(img) + >>> print(tensor.shape) + torch.Size([3, 240, 320]) """ def __init__(self) -> None: diff --git a/torchvision/transforms/v2/_type_conversion.py b/torchvision/transforms/v2/_type_conversion.py index d9cbf502bb6..7cac62868b9 100644 --- a/torchvision/transforms/v2/_type_conversion.py +++ b/torchvision/transforms/v2/_type_conversion.py @@ -15,7 +15,15 @@ class PILToTensor(Transform): This transform does not support torchscript. - Converts a PIL Image (H x W x C) to a Tensor of shape (C x H x W). + Convert a PIL Image with H height, W width, and C channels to a Tensor of shape (C x H x W). + + Example: + >>> from PIL import Image + >>> from torchvision.transforms import v2 + >>> img = Image.new("RGB", (320, 240)) # size (W=320, H=240) + >>> tensor = v2.PILToTensor()(img) + >>> print(tensor.shape) + torch.Size([3, 240, 320]) """ _transformed_types = (PIL.Image.Image,)