|
88 | 88 | #
|
89 | 89 | # Let's briefly look at a detection example with bounding boxes.
|
90 | 90 |
|
91 |
| -from torchvision import datapoints # we'll describe this a bit later, bare with us |
| 91 | +from torchvision import tv_tensors # we'll describe this a bit later, bare with us |
92 | 92 |
|
93 |
| -boxes = datapoints.BoundingBoxes( |
| 93 | +boxes = tv_tensors.BoundingBoxes( |
94 | 94 | [
|
95 | 95 | [15, 10, 370, 510],
|
96 | 96 | [275, 340, 510, 510],
|
|
111 | 111 | # %%
|
112 | 112 | #
|
113 | 113 | # The example above focuses on object detection. But if we had masks
|
114 |
| -# (:class:`torchvision.datapoints.Mask`) for object segmentation or semantic |
115 |
| -# segmentation, or videos (:class:`torchvision.datapoints.Video`), we could have |
| 114 | +# (:class:`torchvision.tv_tensors.Mask`) for object segmentation or semantic |
| 115 | +# segmentation, or videos (:class:`torchvision.tv_tensors.Video`), we could have |
116 | 116 | # passed them to the transforms in exactly the same way.
|
117 | 117 | #
|
118 |
| -# By now you likely have a few questions: what are these datapoints, how do we |
| 118 | +# By now you likely have a few questions: what are these tv_tensors, how do we |
119 | 119 | # use them, and what is the expected input/output of those transforms? We'll
|
120 | 120 | # answer these in the next sections.
|
121 | 121 |
|
122 | 122 | # %%
|
123 | 123 | #
|
124 |
| -# .. _what_are_datapoints: |
| 124 | +# .. _what_are_tv_tensors: |
125 | 125 | #
|
126 |
| -# What are Datapoints? |
| 126 | +# What are TVTensors? |
127 | 127 | # --------------------
|
128 | 128 | #
|
129 |
| -# Datapoints are :class:`torch.Tensor` subclasses. The available datapoints are |
130 |
| -# :class:`~torchvision.datapoints.Image`, |
131 |
| -# :class:`~torchvision.datapoints.BoundingBoxes`, |
132 |
| -# :class:`~torchvision.datapoints.Mask`, and |
133 |
| -# :class:`~torchvision.datapoints.Video`. |
| 129 | +# TVTensors are :class:`torch.Tensor` subclasses. The available tv_tensors are |
| 130 | +# :class:`~torchvision.tv_tensors.Image`, |
| 131 | +# :class:`~torchvision.tv_tensors.BoundingBoxes`, |
| 132 | +# :class:`~torchvision.tv_tensors.Mask`, and |
| 133 | +# :class:`~torchvision.tv_tensors.Video`. |
134 | 134 | #
|
135 |
| -# Datapoints look and feel just like regular tensors - they **are** tensors. |
| 135 | +# TVTensors look and feel just like regular tensors - they **are** tensors. |
136 | 136 | # Everything that is supported on a plain :class:`torch.Tensor` like ``.sum()``
|
137 |
| -# or any ``torch.*`` operator will also work on a datapoint: |
| 137 | +# or any ``torch.*`` operator will also work on a tv_tensor: |
138 | 138 |
|
139 |
| -img_dp = datapoints.Image(torch.randint(0, 256, (3, 256, 256), dtype=torch.uint8)) |
| 139 | +img_dp = tv_tensors.Image(torch.randint(0, 256, (3, 256, 256), dtype=torch.uint8)) |
140 | 140 |
|
141 | 141 | print(f"{isinstance(img_dp, torch.Tensor) = }")
|
142 | 142 | print(f"{img_dp.dtype = }, {img_dp.shape = }, {img_dp.sum() = }")
|
143 | 143 |
|
144 | 144 | # %%
|
145 |
| -# These Datapoint classes are at the core of the transforms: in order to |
| 145 | +# These TVTensor classes are at the core of the transforms: in order to |
146 | 146 | # transform a given input, the transforms first look at the **class** of the
|
147 | 147 | # object, and dispatch to the appropriate implementation accordingly.
|
148 | 148 | #
|
149 |
| -# You don't need to know much more about datapoints at this point, but advanced |
| 149 | +# You don't need to know much more about tv_tensors at this point, but advanced |
150 | 150 | # users who want to learn more can refer to
|
151 |
| -# :ref:`sphx_glr_auto_examples_transforms_plot_datapoints.py`. |
| 151 | +# :ref:`sphx_glr_auto_examples_transforms_plot_tv_tensors.py`. |
152 | 152 | #
|
153 | 153 | # What do I pass as input?
|
154 | 154 | # ------------------------
|
|
196 | 196 | # Pure :class:`torch.Tensor` objects are, in general, treated as images (or
|
197 | 197 | # as videos for video-specific transforms). Indeed, you may have noticed
|
198 | 198 | # that in the code above we haven't used the
|
199 |
| -# :class:`~torchvision.datapoints.Image` class at all, and yet our images |
| 199 | +# :class:`~torchvision.tv_tensors.Image` class at all, and yet our images |
200 | 200 | # got transformed properly. Transforms follow the following logic to
|
201 | 201 | # determine whether a pure Tensor should be treated as an image (or video),
|
202 | 202 | # or just ignored:
|
203 | 203 | #
|
204 |
| -# * If there is an :class:`~torchvision.datapoints.Image`, |
205 |
| -# :class:`~torchvision.datapoints.Video`, |
| 204 | +# * If there is an :class:`~torchvision.tv_tensors.Image`, |
| 205 | +# :class:`~torchvision.tv_tensors.Video`, |
206 | 206 | # or :class:`PIL.Image.Image` instance in the input, all other pure
|
207 | 207 | # tensors are passed-through.
|
208 |
| -# * If there is no :class:`~torchvision.datapoints.Image` or |
209 |
| -# :class:`~torchvision.datapoints.Video` instance, only the first pure |
| 208 | +# * If there is no :class:`~torchvision.tv_tensors.Image` or |
| 209 | +# :class:`~torchvision.tv_tensors.Video` instance, only the first pure |
210 | 210 | # :class:`torch.Tensor` will be transformed as image or video, while all
|
211 | 211 | # others will be passed-through. Here "first" means "first in a depth-wise
|
212 | 212 | # traversal".
|
|
234 | 234 | # Torchvision also supports datasets for object detection or segmentation like
|
235 | 235 | # :class:`torchvision.datasets.CocoDetection`. Those datasets predate
|
236 | 236 | # the existence of the :mod:`torchvision.transforms.v2` module and of the
|
237 |
| -# datapoints, so they don't return datapoints out of the box. |
| 237 | +# tv_tensors, so they don't return tv_tensors out of the box. |
238 | 238 | #
|
239 |
| -# An easy way to force those datasets to return datapoints and to make them |
| 239 | +# An easy way to force those datasets to return tv_tensors and to make them |
240 | 240 | # compatible with v2 transforms is to use the
|
241 | 241 | # :func:`torchvision.datasets.wrap_dataset_for_transforms_v2` function:
|
242 | 242 | #
|
|
246 | 246 | #
|
247 | 247 | # dataset = CocoDetection(..., transforms=my_transforms)
|
248 | 248 | # dataset = wrap_dataset_for_transforms_v2(dataset)
|
249 |
| -# # Now the dataset returns datapoints! |
| 249 | +# # Now the dataset returns tv_tensors! |
250 | 250 | #
|
251 | 251 | # Using your own datasets
|
252 | 252 | # ^^^^^^^^^^^^^^^^^^^^^^^
|
253 | 253 | #
|
254 | 254 | # If you have a custom dataset, then you'll need to convert your objects into
|
255 |
| -# the appropriate Datapoint classes. Creating Datapoint instances is very easy, |
256 |
| -# refer to :ref:`datapoint_creation` for more details. |
| 255 | +# the appropriate TVTensor classes. Creating TVTensor instances is very easy, |
| 256 | +# refer to :ref:`tv_tensor_creation` for more details. |
257 | 257 | #
|
258 | 258 | # There are two main places where you can implement that conversion logic:
|
259 | 259 | #
|
|
0 commit comments