Skip to content

Commit 1fc4ad3

Browse files
Merge branch 'main' into internallinters
2 parents d1bccdb + cdc1fee commit 1fc4ad3

File tree

6 files changed

+98
-29
lines changed

6 files changed

+98
-29
lines changed

setup.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import shutil
77
import subprocess
88
import sys
9+
import sysconfig
910
import warnings
1011
from pathlib import Path
1112

@@ -136,6 +137,8 @@ def get_macros_and_flags():
136137
if sys.platform == "win32":
137138
define_macros += [("torchvision_EXPORTS", None)]
138139
extra_compile_args["cxx"].append("/MP")
140+
if sysconfig.get_config_var("Py_GIL_DISABLED"):
141+
extra_compile_args["cxx"].append("-DPy_GIL_DISABLED")
139142

140143
if DEBUG:
141144
extra_compile_args["cxx"].append("-g")

test/test_transforms_v2.py

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3993,7 +3993,7 @@ class TestGaussianNoise:
39933993
"make_input",
39943994
[make_image_tensor, make_image, make_video],
39953995
)
3996-
def test_kernel(self, make_input):
3996+
def test_kernel_float(self, make_input):
39973997
check_kernel(
39983998
F.gaussian_noise,
39993999
make_input(dtype=torch.float32),
@@ -4005,9 +4005,28 @@ def test_kernel(self, make_input):
40054005
"make_input",
40064006
[make_image_tensor, make_image, make_video],
40074007
)
4008-
def test_functional(self, make_input):
4008+
def test_kernel_uint8(self, make_input):
4009+
check_kernel(
4010+
F.gaussian_noise,
4011+
make_input(dtype=torch.uint8),
4012+
# This cannot pass because the noise on a batch in not per-image
4013+
check_batched_vs_unbatched=False,
4014+
)
4015+
4016+
@pytest.mark.parametrize(
4017+
"make_input",
4018+
[make_image_tensor, make_image, make_video],
4019+
)
4020+
def test_functional_float(self, make_input):
40094021
check_functional(F.gaussian_noise, make_input(dtype=torch.float32))
40104022

4023+
@pytest.mark.parametrize(
4024+
"make_input",
4025+
[make_image_tensor, make_image, make_video],
4026+
)
4027+
def test_functional_uint8(self, make_input):
4028+
check_functional(F.gaussian_noise, make_input(dtype=torch.uint8))
4029+
40114030
@pytest.mark.parametrize(
40124031
("kernel", "input_type"),
40134032
[
@@ -4023,10 +4042,11 @@ def test_functional_signature(self, kernel, input_type):
40234042
"make_input",
40244043
[make_image_tensor, make_image, make_video],
40254044
)
4026-
def test_transform(self, make_input):
4045+
def test_transform_float(self, make_input):
40274046
def adapter(_, input, __):
4028-
# This transform doesn't support uint8 so we have to convert the auto-generated uint8 tensors to float32
4029-
# Same for PIL images
4047+
# We have two different implementations for floats and uint8
4048+
# To test this implementation we'll convert the auto-generated uint8 tensors to float32
4049+
# We don't support other int dtypes nor pil images
40304050
for key, value in input.items():
40314051
if isinstance(value, torch.Tensor) and not value.is_floating_point():
40324052
input[key] = value.to(torch.float32)
@@ -4036,11 +4056,29 @@ def adapter(_, input, __):
40364056

40374057
check_transform(transforms.GaussianNoise(), make_input(dtype=torch.float32), check_sample_input=adapter)
40384058

4059+
@pytest.mark.parametrize(
4060+
"make_input",
4061+
[make_image_tensor, make_image, make_video],
4062+
)
4063+
def test_transform_uint8(self, make_input):
4064+
def adapter(_, input, __):
4065+
# We have two different implementations for floats and uint8
4066+
# To test this implementation we'll convert every tensor to uint8
4067+
# We don't support other int dtypes nor pil images
4068+
for key, value in input.items():
4069+
if isinstance(value, torch.Tensor) and not value.dtype != torch.uint8:
4070+
input[key] = value.to(torch.uint8)
4071+
if isinstance(value, PIL.Image.Image):
4072+
input[key] = F.pil_to_tensor(value).to(torch.uint8)
4073+
return input
4074+
4075+
check_transform(transforms.GaussianNoise(), make_input(dtype=torch.uint8), check_sample_input=adapter)
4076+
40394077
def test_bad_input(self):
40404078
with pytest.raises(ValueError, match="Gaussian Noise is not implemented for PIL images."):
40414079
F.gaussian_noise(make_image_pil())
4042-
with pytest.raises(ValueError, match="Input tensor is expected to be in float dtype"):
4043-
F.gaussian_noise(make_image(dtype=torch.uint8))
4080+
with pytest.raises(ValueError, match="Input tensor is expected to be in uint8 or float dtype"):
4081+
F.gaussian_noise(make_image(dtype=torch.int32))
40444082
with pytest.raises(ValueError, match="sigma shouldn't be negative"):
40454083
F.gaussian_noise(make_image(dtype=torch.float32), sigma=-1)
40464084

torchvision/datasets/caltech.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import os
22
import os.path
3+
import shutil
34
from pathlib import Path
45
from typing import Any, Callable, Optional, Union
56

67
from PIL import Image
78

8-
from .utils import download_and_extract_archive, verify_str_arg
9+
from .utils import download_and_extract_archive, extract_archive, verify_str_arg
910
from .vision import VisionDataset
1011

1112

@@ -133,17 +134,17 @@ def download(self) -> None:
133134
return
134135

135136
download_and_extract_archive(
136-
"https://drive.google.com/file/d/137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp",
137-
self.root,
138-
filename="101_ObjectCategories.tar.gz",
139-
md5="b224c7392d521a49829488ab0f1120d9",
140-
)
141-
download_and_extract_archive(
142-
"https://drive.google.com/file/d/175kQy3UsZ0wUEHZjqkUDdNVssr7bgh_m",
143-
self.root,
144-
filename="Annotations.tar",
145-
md5="6f83eeb1f24d99cab4eb377263132c91",
137+
"https://data.caltech.edu/records/mzrjq-6wc02/files/caltech-101.zip",
138+
download_root=self.root,
139+
filename="caltech-101.zip",
140+
md5="3138e1922a9193bfa496528edbbc45d0",
146141
)
142+
gzip_folder = os.path.join(self.root, "caltech-101")
143+
for gzip_file in os.listdir(gzip_folder):
144+
if gzip_file.endswith(".gz"):
145+
extract_archive(os.path.join(gzip_folder, gzip_file), self.root)
146+
shutil.rmtree(gzip_folder)
147+
os.remove(os.path.join(self.root, "caltech-101.zip"))
147148

148149
def extra_repr(self) -> str:
149150
return "Target type: {target_type}".format(**self.__dict__)
@@ -233,7 +234,7 @@ def download(self) -> None:
233234
return
234235

235236
download_and_extract_archive(
236-
"https://drive.google.com/file/d/1r6o0pSROcV1_VwT4oSjA2FBUSCWGuxLK",
237+
"https://data.caltech.edu/records/nyy15-4j048/files/256_ObjectCategories.tar",
237238
self.root,
238239
filename="256_ObjectCategories.tar",
239240
md5="67b4f42ca05d46448c6bb8ecd2220f6d",

torchvision/ops/drop_block.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ def drop_block2d(
3636

3737
N, C, H, W = input.size()
3838
block_size = min(block_size, W, H)
39+
if block_size % 2 == 0:
40+
raise ValueError(f"block size should be odd. Got {block_size} which is even.")
41+
3942
# compute the gamma of Bernoulli distribution
4043
gamma = (p * H * W) / ((block_size**2) * ((H - block_size + 1) * (W - block_size + 1)))
4144
noise = torch.empty((N, C, H - block_size + 1, W - block_size + 1), dtype=input.dtype, device=input.device)
@@ -82,6 +85,9 @@ def drop_block3d(
8285

8386
N, C, D, H, W = input.size()
8487
block_size = min(block_size, D, H, W)
88+
if block_size % 2 == 0:
89+
raise ValueError(f"block size should be odd. Got {block_size} which is even.")
90+
8591
# compute the gamma of Bernoulli distribution
8692
gamma = (p * D * H * W) / ((block_size**3) * ((D - block_size + 1) * (H - block_size + 1) * (W - block_size + 1)))
8793
noise = torch.empty(

torchvision/transforms/v2/_misc.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -214,13 +214,22 @@ class GaussianNoise(Transform):
214214
Each image or frame in a batch will be transformed independently i.e. the
215215
noise added to each image will be different.
216216
217-
The input tensor is also expected to be of float dtype in ``[0, 1]``.
218-
This transform does not support PIL images.
217+
The input tensor is also expected to be of float dtype in ``[0, 1]``,
218+
or of ``uint8`` dtype in ``[0, 255]``. This transform does not support PIL
219+
images.
220+
221+
Regardless of the dtype used, the parameters of the function use the same
222+
scale, so a ``mean`` parameter of 0.5 will result in an average value
223+
increase of 0.5 units for float images, and an average increase of 127.5
224+
units for ``uint8`` images.
219225
220226
Args:
221227
mean (float): Mean of the sampled normal distribution. Default is 0.
222228
sigma (float): Standard deviation of the sampled normal distribution. Default is 0.1.
223-
clip (bool, optional): Whether to clip the values in ``[0, 1]`` after adding noise. Default is True.
229+
clip (bool, optional): Whether to clip the values after adding noise, be it to
230+
``[0, 1]`` for floats or to ``[0, 255]`` for ``uint8``. Setting this parameter to
231+
``False`` may cause unsigned integer overflows with uint8 inputs.
232+
Default is True.
224233
"""
225234

226235
def __init__(self, mean: float = 0.0, sigma: float = 0.1, clip=True) -> None:

torchvision/transforms/v2/functional/_misc.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -195,16 +195,28 @@ def gaussian_noise(inpt: torch.Tensor, mean: float = 0.0, sigma: float = 0.1, cl
195195
@_register_kernel_internal(gaussian_noise, torch.Tensor)
196196
@_register_kernel_internal(gaussian_noise, tv_tensors.Image)
197197
def gaussian_noise_image(image: torch.Tensor, mean: float = 0.0, sigma: float = 0.1, clip: bool = True) -> torch.Tensor:
198-
if not image.is_floating_point():
199-
raise ValueError(f"Input tensor is expected to be in float dtype, got dtype={image.dtype}")
200198
if sigma < 0:
201199
raise ValueError(f"sigma shouldn't be negative. Got {sigma}")
202200

203-
noise = mean + torch.randn_like(image) * sigma
204-
out = image + noise
205-
if clip:
206-
out = torch.clamp(out, 0, 1)
207-
return out
201+
if image.is_floating_point():
202+
noise = mean + torch.randn_like(image) * sigma
203+
out = image + noise
204+
if clip:
205+
out = torch.clamp(out, 0, 1)
206+
return out
207+
208+
elif image.dtype == torch.uint8:
209+
# Convert to intermediate dtype int16 to add to input more efficiently
210+
# See https://github.com/pytorch/vision/pull/9169 for alternative implementations and benchmark
211+
noise = ((mean * 255) + torch.randn_like(image, dtype=torch.float32) * (sigma * 255)).to(torch.int16)
212+
out = image + noise
213+
214+
if clip:
215+
out = torch.clamp(out, 0, 255)
216+
return out.to(torch.uint8)
217+
218+
else:
219+
raise ValueError(f"Input tensor is expected to be in uint8 or float dtype, got dtype={image.dtype}")
208220

209221

210222
@_register_kernel_internal(gaussian_noise, tv_tensors.Video)

0 commit comments

Comments
 (0)