pytorch
diff --git a/‎setup.py‎
Lines changed: 10 additions & 3 deletions b/‎setup.py‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎test/datasets_utils.py‎
Lines changed: 18 additions & 5 deletions b/‎test/datasets_utils.py‎
Lines changed: 18 additions & 5 deletions
diff --git a/‎test/test_datasets.py‎
Lines changed: 42 additions & 0 deletions b/‎test/test_datasets.py‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎test/test_transforms_v2.py‎
Lines changed: 8 additions & 2 deletions b/‎test/test_transforms_v2.py‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎torchvision/datasets/clevr.py‎
Lines changed: 9 additions & 4 deletions b/‎torchvision/datasets/clevr.py‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎torchvision/datasets/coco.py‎
Lines changed: 4 additions & 2 deletions b/‎torchvision/datasets/coco.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎torchvision/datasets/country211.py‎
Lines changed: 14 additions & 5 deletions b/‎torchvision/datasets/country211.py‎
Lines changed: 14 additions & 5 deletions
@@ -2,6 +2,7 @@
 import distutils.spawn
 import glob
 import os
+import shlex
 import shutil
 import subprocess
 import sys
@@ -95,8 +96,14 @@ def get_dist(pkgname):
             return None
 
     pytorch_dep = os.getenv("TORCH_PACKAGE_NAME", "torch")
-    if os.getenv("PYTORCH_VERSION"):
-        pytorch_dep += "==" + os.getenv("PYTORCH_VERSION")
+    if version_pin := os.getenv("PYTORCH_VERSION"):
+        pytorch_dep += "==" + version_pin
+    elif (version_pin_ge := os.getenv("PYTORCH_VERSION_GE")) and (version_pin_lt := os.getenv("PYTORCH_VERSION_LT")):
+        # This branch and the associated env vars exist to help third-party
+        # builds like in https://github.com/pytorch/vision/pull/8936. This is
+        # supported on a best-effort basis, we don't guarantee that this won't
+        # eventually break (and we don't test it.)
+        pytorch_dep += f">={version_pin_ge},<{version_pin_lt}"
 
     requirements = [
         "numpy",
@@ -123,7 +130,7 @@ def get_macros_and_flags():
             if NVCC_FLAGS is None:
                 nvcc_flags = []
             else:
-                nvcc_flags = NVCC_FLAGS.split(" ")
+                nvcc_flags = shlex.split(NVCC_FLAGS)
         extra_compile_args["nvcc"] = nvcc_flags
 
     if sys.platform == "win32":
 
@@ -611,6 +611,7 @@ class ImageDatasetTestCase(DatasetTestCase):
     """
 
     FEATURE_TYPES = (PIL.Image.Image, int)
+    SUPPORT_TV_IMAGE_DECODE: bool = False
 
     @contextlib.contextmanager
     def create_dataset(
@@ -632,22 +633,34 @@ def create_dataset(
             # This problem only occurs during testing since some tests, e.g. DatasetTestCase.test_feature_types open an
             # image, but never use the underlying data. During normal operation it is reasonable to assume that the
             # user wants to work with the image he just opened rather than deleting the underlying file.
-            with self._force_load_images():
+            with self._force_load_images(loader=(config or {}).get("loader", None)):
                 yield dataset, info
 
     @contextlib.contextmanager
-    def _force_load_images(self):
-        open = PIL.Image.open
+    def _force_load_images(self, loader: Optional[Callable[[str], Any]] = None):
+        open = loader or PIL.Image.open
 
         def new(fp, *args, **kwargs):
             image = open(fp, *args, **kwargs)
-            if isinstance(fp, (str, pathlib.Path)):
+            if isinstance(fp, (str, pathlib.Path)) and isinstance(image, PIL.Image.Image):
                 image.load()
             return image
 
-        with unittest.mock.patch("PIL.Image.open", new=new):
+        with unittest.mock.patch(open.__module__ + "." + open.__qualname__, new=new):
             yield
 
+    def test_tv_decode_image_support(self):
+        if not self.SUPPORT_TV_IMAGE_DECODE:
+            pytest.skip(f"{self.DATASET_CLASS.__name__} does not support torchvision.io.decode_image.")
+
+        with self.create_dataset(
+            config=dict(
+                loader=torchvision.io.decode_image,
+            )
+        ) as (dataset, _):
+            image = dataset[0][0]
+            assert isinstance(image, torch.Tensor)
+
 
 class VideoDatasetTestCase(DatasetTestCase):
     """Abstract base class for video dataset testcases.
 
@@ -24,6 +24,7 @@
 import torch.nn.functional as F
 from common_utils import combinations_grid
 from torchvision import datasets
+from torchvision.io import decode_image
 from torchvision.transforms import v2
 
 
@@ -405,6 +406,8 @@ class ImageNetTestCase(datasets_utils.ImageDatasetTestCase):
     REQUIRED_PACKAGES = ("scipy",)
     ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val"))
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir, config):
         tmpdir = pathlib.Path(tmpdir)
 
@@ -1173,6 +1176,8 @@ class SBUTestCase(datasets_utils.ImageDatasetTestCase):
     DATASET_CLASS = datasets.SBU
     FEATURE_TYPES = (PIL.Image.Image, str)
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir, config):
         num_images = 3
 
@@ -1411,6 +1416,8 @@ class Flickr8kTestCase(datasets_utils.ImageDatasetTestCase):
     _IMAGES_FOLDER = "images"
     _ANNOTATIONS_FILE = "captions.html"
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def dataset_args(self, tmpdir, config):
         tmpdir = pathlib.Path(tmpdir)
         root = tmpdir / self._IMAGES_FOLDER
@@ -1480,6 +1487,8 @@ class Flickr30kTestCase(Flickr8kTestCase):
 
     _ANNOTATIONS_FILE = "captions.token"
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def _image_file_name(self, idx):
         return f"{idx}.jpg"
 
@@ -1940,6 +1949,8 @@ class LFWPeopleTestCase(datasets_utils.DatasetTestCase):
     _IMAGES_DIR = {"original": "lfw", "funneled": "lfw_funneled", "deepfunneled": "lfw-deepfunneled"}
     _file_id = {"10fold": "", "train": "DevTrain", "test": "DevTest"}
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir, config):
         tmpdir = pathlib.Path(tmpdir) / "lfw-py"
         os.makedirs(tmpdir, exist_ok=True)
@@ -1976,6 +1987,18 @@ def _create_random_id(self):
         part2 = datasets_utils.create_random_string(random.randint(4, 7))
         return f"{part1}_{part2}"
 
+    def test_tv_decode_image_support(self):
+        if not self.SUPPORT_TV_IMAGE_DECODE:
+            pytest.skip(f"{self.DATASET_CLASS.__name__} does not support torchvision.io.decode_image.")
+
+        with self.create_dataset(
+            config=dict(
+                loader=decode_image,
+            )
+        ) as (dataset, _):
+            image = dataset[0][0]
+            assert isinstance(image, torch.Tensor)
+
 
 class LFWPairsTestCase(LFWPeopleTestCase):
     DATASET_CLASS = datasets.LFWPairs
@@ -2308,6 +2331,7 @@ def inject_fake_data(self, tmpdir, config):
 class EuroSATTestCase(datasets_utils.ImageDatasetTestCase):
     DATASET_CLASS = datasets.EuroSAT
     FEATURE_TYPES = (PIL.Image.Image, int)
+    SUPPORT_TV_IMAGE_DECODE = True
 
     def inject_fake_data(self, tmpdir, config):
         data_folder = os.path.join(tmpdir, "eurosat", "2750")
@@ -2332,6 +2356,8 @@ class Food101TestCase(datasets_utils.ImageDatasetTestCase):
 
     ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"))
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir: str, config):
         root_folder = pathlib.Path(tmpdir) / "food-101"
         image_folder = root_folder / "images"
@@ -2368,6 +2394,7 @@ class FGVCAircraftTestCase(datasets_utils.ImageDatasetTestCase):
     ADDITIONAL_CONFIGS = combinations_grid(
         split=("train", "val", "trainval", "test"), annotation_level=("variant", "family", "manufacturer")
     )
+    SUPPORT_TV_IMAGE_DECODE = True
 
     def inject_fake_data(self, tmpdir: str, config):
         split = config["split"]
@@ -2417,6 +2444,8 @@ def inject_fake_data(self, tmpdir: str, config):
 class SUN397TestCase(datasets_utils.ImageDatasetTestCase):
     DATASET_CLASS = datasets.SUN397
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir: str, config):
         data_dir = pathlib.Path(tmpdir) / "SUN397"
         data_dir.mkdir()
@@ -2448,6 +2477,8 @@ class DTDTestCase(datasets_utils.ImageDatasetTestCase):
     DATASET_CLASS = datasets.DTD
     FEATURE_TYPES = (PIL.Image.Image, int)
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     ADDITIONAL_CONFIGS = combinations_grid(
         split=("train", "test", "val"),
         # There is no need to test the whole matrix here, since each fold is treated exactly the same
@@ -2608,6 +2639,7 @@ class CLEVRClassificationTestCase(datasets_utils.ImageDatasetTestCase):
     FEATURE_TYPES = (PIL.Image.Image, (int, type(None)))
 
     ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test"))
+    SUPPORT_TV_IMAGE_DECODE = True
 
     def inject_fake_data(self, tmpdir, config):
         data_folder = pathlib.Path(tmpdir) / "clevr" / "CLEVR_v1.0"
@@ -2705,6 +2737,8 @@ class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase):
     REQUIRED_PACKAGES = ("scipy",)
     ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"))
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir, config):
         import scipy.io as io
         from numpy.core.records import fromarrays
@@ -2749,6 +2783,8 @@ class Country211TestCase(datasets_utils.ImageDatasetTestCase):
 
     ADDITIONAL_CONFIGS = combinations_grid(split=("train", "valid", "test"))
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir: str, config):
         split_folder = pathlib.Path(tmpdir) / "country211" / config["split"]
         split_folder.mkdir(parents=True, exist_ok=True)
@@ -2777,6 +2813,8 @@ class Flowers102TestCase(datasets_utils.ImageDatasetTestCase):
     ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test"))
     REQUIRED_PACKAGES = ("scipy",)
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir: str, config):
         base_folder = pathlib.Path(tmpdir) / "flowers-102"
 
@@ -2835,6 +2873,8 @@ class RenderedSST2TestCase(datasets_utils.ImageDatasetTestCase):
     ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test"))
     SPLIT_TO_FOLDER = {"train": "train", "val": "valid", "test": "test"}
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir: str, config):
         root_folder = pathlib.Path(tmpdir) / "rendered-sst2"
         image_folder = root_folder / self.SPLIT_TO_FOLDER[config["split"]]
@@ -3495,6 +3535,8 @@ class ImagenetteTestCase(datasets_utils.ImageDatasetTestCase):
     DATASET_CLASS = datasets.Imagenette
     ADDITIONAL_CONFIGS = combinations_grid(split=["train", "val"], size=["full", "320px", "160px"])
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     _WNIDS = [
         "n01440764",
         "n02102040",
 
@@ -3758,12 +3758,18 @@ def test_transform_errors_warnings(self):
         with pytest.raises(ValueError, match="provide only two dimensions"):
             transforms.RandomResizedCrop(size=(1, 2, 3))
 
-        with pytest.raises(TypeError, match="Scale should be a sequence"):
+        with pytest.raises(TypeError, match="Scale should be a sequence of two floats."):
             transforms.RandomResizedCrop(size=self.INPUT_SIZE, scale=123)
 
-        with pytest.raises(TypeError, match="Ratio should be a sequence"):
+        with pytest.raises(TypeError, match="Ratio should be a sequence of two floats."):
             transforms.RandomResizedCrop(size=self.INPUT_SIZE, ratio=123)
 
+        with pytest.raises(TypeError, match="Ratio should be a sequence of two floats."):
+            transforms.RandomResizedCrop(size=self.INPUT_SIZE, ratio=[1, 2, 3])
+
+        with pytest.raises(TypeError, match="Scale should be a sequence of two floats."):
+            transforms.RandomResizedCrop(size=self.INPUT_SIZE, scale=[1, 2, 3])
+
         for param in ["scale", "ratio"]:
             with pytest.warns(match="Scale and ratio should be of kind"):
                 transforms.RandomResizedCrop(size=self.INPUT_SIZE, **{param: [1, 0]})
 
@@ -3,7 +3,7 @@
 from typing import Any, Callable, List, Optional, Tuple, Union
 from urllib.parse import urlparse
 
-from PIL import Image
+from .folder import default_loader
 
 from .utils import download_and_extract_archive, verify_str_arg
 from .vision import VisionDataset
@@ -18,11 +18,14 @@ class CLEVRClassification(VisionDataset):
         root (str or ``pathlib.Path``): Root directory of dataset where directory ``root/clevr`` exists or will be saved to if download is
             set to True.
         split (string, optional): The dataset split, supports ``"train"`` (default), ``"val"``, or ``"test"``.
-        transform (callable, optional): A function/transform that takes in a PIL image and returns a transformed
-            version. E.g, ``transforms.RandomCrop``
+        transform (callable, optional): A function/transform that takes in a PIL image or torch.Tensor, depends on the given loader,
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
         target_transform (callable, optional): A function/transform that takes in them target and transforms it.
         download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If
             dataset is already downloaded, it is not downloaded again.
+        loader (callable, optional): A function to load an image given its path.
+            By default, it uses PIL as its image loader, but users could also pass in
+            ``torchvision.io.decode_image`` for decoding image data into tensors directly.
     """
 
     _URL = "https://dl.fbaipublicfiles.com/clevr/CLEVR_v1.0.zip"
@@ -35,9 +38,11 @@ def __init__(
         transform: Optional[Callable] = None,
         target_transform: Optional[Callable] = None,
         download: bool = False,
+        loader: Callable[[Union[str, pathlib.Path]], Any] = default_loader,
     ) -> None:
         self._split = verify_str_arg(split, "split", ("train", "val", "test"))
         super().__init__(root, transform=transform, target_transform=target_transform)
+        self.loader = loader
         self._base_folder = pathlib.Path(self.root) / "clevr"
         self._data_folder = self._base_folder / pathlib.Path(urlparse(self._URL).path).stem
 
@@ -65,7 +70,7 @@ def __getitem__(self, idx: int) -> Tuple[Any, Any]:
         image_file = self._image_files[idx]
         label = self._labels[idx]
 
-        image = Image.open(image_file).convert("RGB")
+        image = self.loader(image_file)
 
         if self.transform:
             image = self.transform(image)
 
@@ -10,7 +10,8 @@
 class CocoDetection(VisionDataset):
     """`MS Coco Detection <https://cocodataset.org/#detection-2016>`_ Dataset.
 
-    It requires the `COCO API to be installed <https://github.com/pdollar/coco/tree/master/PythonAPI>`_.
+    It requires `pycocotools <https://github.com/ppwwyyxx/cocoapi>`_ to be installed,
+    which could be installed via ``pip install pycocotools`` or ``conda install conda-forge::pycocotools``.
 
     Args:
         root (str or ``pathlib.Path``): Root directory where images are downloaded to.
@@ -65,7 +66,8 @@ def __len__(self) -> int:
 class CocoCaptions(CocoDetection):
     """`MS Coco Captions <https://cocodataset.org/#captions-2015>`_ Dataset.
 
-    It requires the `COCO API to be installed <https://github.com/pdollar/coco/tree/master/PythonAPI>`_.
+    It requires `pycocotools <https://github.com/ppwwyyxx/cocoapi>`_ to be installed,
+    which could be installed via ``pip install pycocotools`` or ``conda install conda-forge::pycocotools``.
 
     Args:
         root (str or ``pathlib.Path``): Root directory where images are downloaded to.
 
@@ -1,7 +1,7 @@
 from pathlib import Path
-from typing import Callable, Optional, Union
+from typing import Any, Callable, Optional, Union
 
-from .folder import ImageFolder
+from .folder import default_loader, ImageFolder
 from .utils import download_and_extract_archive, verify_str_arg
 
 
@@ -16,11 +16,14 @@ class Country211(ImageFolder):
     Args:
         root (str or ``pathlib.Path``): Root directory of the dataset.
         split (string, optional): The dataset split, supports ``"train"`` (default), ``"valid"`` and ``"test"``.
-        transform (callable, optional): A function/transform that takes in a PIL image and returns a transformed
-            version. E.g, ``transforms.RandomCrop``.
+        transform (callable, optional): A function/transform that takes in a PIL image or torch.Tensor, depends on the given loader,
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
         download (bool, optional): If True, downloads the dataset from the internet and puts it into
             ``root/country211/``. If dataset is already downloaded, it is not downloaded again.
+        loader (callable, optional): A function to load an image given its path.
+            By default, it uses PIL as its image loader, but users could also pass in
+            ``torchvision.io.decode_image`` for decoding image data into tensors directly.
     """
 
     _URL = "https://openaipublic.azureedge.net/clip/data/country211.tgz"
@@ -33,6 +36,7 @@ def __init__(
         transform: Optional[Callable] = None,
         target_transform: Optional[Callable] = None,
         download: bool = False,
+        loader: Callable[[str], Any] = default_loader,
     ) -> None:
         self._split = verify_str_arg(split, "split", ("train", "valid", "test"))
 
@@ -46,7 +50,12 @@ def __init__(
         if not self._check_exists():
             raise RuntimeError("Dataset not found. You can use download=True to download it")
 
-        super().__init__(str(self._base_folder / self._split), transform=transform, target_transform=target_transform)
+        super().__init__(
+            str(self._base_folder / self._split),
+            transform=transform,
+            target_transform=target_transform,
+            loader=loader,
+        )
         self.root = str(root)
 
     def _check_exists(self) -> bool: