forked from daviddao/spatial-transformer-tensorflow
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathantialiasing.py
More file actions
216 lines (175 loc) · 8.22 KB
/
antialiasing.py
File metadata and controls
216 lines (175 loc) · 8.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import tensorflow as tf
from .spatial_transformer import RestrictedTransformer
def gaussian_function(x, sigmas):
"""TF 1d Gaussian function for multiple sigmas."""
return tf.exp(-0.5 * tf.square(x) / tf.square(tf.expand_dims(sigmas, -1)))
def central_pixel_offsets(size):
limit = tf.cast(size - 1, tf.float32) / 2.0
return tf.linspace(-limit, limit, size)
def kernels1d(sigmas, size, kernel_function, normalize=True):
"""TF 1d kernel for multiple sigmas."""
x = central_pixel_offsets(size)
sigmas = sigmas + 1e-9 # to avoid numerical errors
kernels = kernel_function(x, sigmas)
if normalize:
kernels /= tf.reduce_sum(kernels, -1, keepdims=True)
return kernels
def gaussian_kernels1d(sigmas, size, normalize=True):
"""TF 1d Gaussian kernel for multiple sigmas."""
return kernels1d(sigmas, size, gaussian_function, normalize)
def kernels2d(sigmas, size, kernels1d_function):
"""TF 2d (separable) normalized kernels for multiple sigmas.
Parameters
----------
sigmas : a 2d Tensor of shape [n, 2] containing x and y sigma for n kernels
size : a 1d Tensor giving the [x, y] size of the resulting kernels
kernels1d_function : the 1d kernel function used to create the 2d kernel
"""
kx, ky = [kernels1d_function(sigmas[..., i], size[i], normalize=False) for i in range(2)]
kernels = tf.einsum("...i,...j->...ij", ky, kx)
kernels /= tf.reduce_sum(kernels, (-1, -2), keepdims=True)
return kernels
def gaussian_kernels2d(sigmas, size):
"""TF 2d normalized Gaussian kernels for multiple sigmas.
Parameters
----------
sigmas : a 2d Tensor of shape [n, 2] containing x and y sigma for n kernels
size : a 1d Tensor giving the [x, y] size of the resulting kernels
"""
return kernels2d(sigmas, size, gaussian_kernels1d)
def gaussian_rotated_kernels2d(sigmas, angles, size):
"""TF 2d Gaussian kernels for multiple sigmas and angles.
Parameters
----------
sigmas : a 2d Tensor of shape [n, 2] containing x and y sigma for n kernels
angles : a 1d Tensor giving the rotation angle of each of the n kernels
size : a 1d Tensor giving the [x, y] size of the resulting kernels
"""
raise NotImplementedError
def next_odd_int(a):
"""Return next odd integers for float Tensor a"""
a = tf.cast(tf.math.ceil(a), tf.int32)
b = tf.cast(~tf.cast(a % 2, tf.bool), tf.int32)
return a + b
def kernel_size(sigmas, size_factor=6.0):
return next_odd_int(tf.reduce_max(sigmas, axis=0) * size_factor)
def filters2d(
inputs, sigmas, kernels2d_function, size_factor=6.0, padding_mode="CONSTANT", padding_value=0
):
"""Convolve a batch of images with kernels.
Each image is convolved using a kernel with different sigmas.
An x and y sigma is specified for each image.
Each channel of the image is convolved with the same kernel.
Parameters
----------
inputs : a 4d Tensor of shape [n, w, h, c] containing n images
sigmas : a 2d Tensor of shape [n, 2] containing x and y sigma for n kernels
kernels2d_function : the function used to create the 2d kernel
size_factor : size of kernels in sigma
pading_mode : mode to use for padding prior to convolution (see tf.pad)
"""
size = kernel_size(sigmas, size_factor)
kernels = kernels2d_function(sigmas, size)
# depthwise convolution is over "channels", but we want it over the batch,
# so swap first and last axes
inputs = tf.transpose(inputs, [3, 1, 2, 0])
padding = [
[0] * 2,
[size[1] // 2] * 2,
[size[0] // 2] * 2,
[0] * 2,
]
inputs = tf.pad(inputs, padding, mode=padding_mode, constant_values=padding_value)
# rearrange kernels into correct form for depthwise convolution
kernels = tf.expand_dims(tf.transpose(kernels, [1, 2, 0]), -1)
convolved = tf.nn.depthwise_conv2d(inputs, kernels, strides=(1, 1, 1, 1), padding="VALID")
# swap back channel and batch axes
convolved = tf.transpose(convolved, [3, 1, 2, 0])
return convolved
def gaussian_filters2d(inputs, sigmas, size_factor=6.0, padding_mode="CONSTANT", padding_value=0):
"""Convolve a batch of images with Gaussian kernels.
Each image is convolved using a kernel with different sigmas.
An x and y sigma is specified for each image.
Each channel of the image is convolved with the same kernel.
Parameters
----------
inputs : a 4d Tensor of shape [n, w, h, c] containing n images
sigmas : a 2d Tensor of shape [n, 2] containing x and y sigma for n kernels
size_factor : size of kernels in sigma
pading_mode : mode to use for padding prior to convolution (see tf.pad)
"""
return filters2d(inputs, sigmas, gaussian_kernels2d, size_factor, padding_mode, padding_value)
class AntiAliasingRestrictedTransformer(RestrictedTransformer):
"""Spatial Restricted Transformer Layer with anti-aliasing
Version of the AffineTransformer class that is restricted to a subset of affine
transformations: no reflection, no shear, rotations only up to +-180 degrees
Avoids anti-aliasing by convolving with an appropriate Gaussian kernel
prior to interpolation
"""
def __init__(self, out_size, **kwargs):
"""
Parameters
----------
out_size : tuple of two ints
The size of the output of the spatial network (height, width)
name : string
The name of this layer
interp_method: 'bilinear' (default) or 'bicubic'
masked: bool (default: False)
Should the edges of the transformed images be masked
cval: int (default: 0)
Value to mask edges with if masked=True
preserve_flux: bool (default: False)
Preserve total flux during transformation
reverse: bool (default: False)
Reverse order of the transformation operations:
Regular order is: scale, then rotate, then translate
Reverse order is: translate, then rotate, then scale
prerotate: bool (default: False)
Perform an additional rotation before the regular operations
"""
super().__init__(out_size, **kwargs)
if self.prerotate:
raise NotImplementedError("Pre-rotating is not currently supported with anti-aliasing.")
if self.reverse:
raise NotImplementedError(
"Reverse transformation is not currently supported with anti-aliasing."
)
def call(self, tensors, mask=None):
"""
Restricted Transformation of input tensor inp with parameters theta
Parameters
----------
tensors: list of two floats
inp: The input tensor should have the shape
[batch_size, height, width, num_channels].
theta: The output of the localisation network
should have the shape [batch_size, 5], where the parameters are:
x_scale, y_scale, rotation, x_translation, y_translation,
where the scales are logarithms of the actual scale factor and the rotation
is given as tan(angle/2). If prerotate=True, then takes an additional parameter:
the first rotation given as tan(angle/2).
mask: currently unused
Notes
-----
Reflections are prevented by the use of logarithmic scale parameters.
Rotations are limited to +-180 degrees by the tan(angle/2) parameterization
To initialize the network to the identity transform initialize ``theta`` to :
identity = np.array([0., 0., 0., 0., 0.])
theta = tf.Variable(initial_value=identity)
"""
inp, theta = tensors
shape = tf.shape(inp)
in_height = shape[1]
in_width = shape[2]
out_height = self.out_size[0]
out_width = self.out_size[1]
overall_scale = tf.stack([in_height / out_height, in_width / out_width], axis=0)
overall_scale = tf.cast(overall_scale, tf.float32)
transfo_scale = tf.exp(theta[:, :2])
scale = overall_scale * transfo_scale
sigmas = tf.maximum((scale - 1) / 2, 0)
padding_mode = "CONSTANT" if self.masked else "REFLECT"
inp = gaussian_filters2d(inp, sigmas, padding_mode=padding_mode, padding_value=self.cval)
output = super().call([inp, theta])
return output