spatial-transformer-tensorflow/antialiasing.py at main · bamford/spatial-transformer-tensorflow · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import tensorflow as tf

from .spatial_transformer import RestrictedTransformer


def gaussian_function(x, sigmas):
    """TF 1d Gaussian function for multiple sigmas."""
    return tf.exp(-0.5 * tf.square(x) / tf.square(tf.expand_dims(sigmas, -1)))


def central_pixel_offsets(size):
    limit = tf.cast(size - 1, tf.float32) / 2.0
    return tf.linspace(-limit, limit, size)


def kernels1d(sigmas, size, kernel_function, normalize=True):
    """TF 1d kernel for multiple sigmas."""
    x = central_pixel_offsets(size)
    sigmas = sigmas + 1e-9  # to avoid numerical errors
    kernels = kernel_function(x, sigmas)
    if normalize:
        kernels /= tf.reduce_sum(kernels, -1, keepdims=True)
    return kernels


def gaussian_kernels1d(sigmas, size, normalize=True):
    """TF 1d Gaussian kernel for multiple sigmas."""
    return kernels1d(sigmas, size, gaussian_function, normalize)


def kernels2d(sigmas, size, kernels1d_function):
    """TF 2d (separable) normalized kernels for multiple sigmas.

    Parameters
    ----------
    sigmas : a 2d Tensor of shape [n, 2] containing x and y sigma for n kernels
    size : a 1d Tensor giving the [x, y] size of the resulting kernels
    kernels1d_function : the 1d kernel function used to create the 2d kernel
    """
    kx, ky = [kernels1d_function(sigmas[..., i], size[i], normalize=False) for i in range(2)]
    kernels = tf.einsum("...i,...j->...ij", ky, kx)
    kernels /= tf.reduce_sum(kernels, (-1, -2), keepdims=True)
    return kernels


def gaussian_kernels2d(sigmas, size):
    """TF 2d normalized Gaussian kernels for multiple sigmas.

    Parameters
    ----------
    sigmas : a 2d Tensor of shape [n, 2] containing x and y sigma for n kernels
    size : a 1d Tensor giving the [x, y] size of the resulting kernels
    """
    return kernels2d(sigmas, size, gaussian_kernels1d)


def gaussian_rotated_kernels2d(sigmas, angles, size):
    """TF 2d Gaussian kernels for multiple sigmas and angles.

    Parameters
    ----------
    sigmas : a 2d Tensor of shape [n, 2] containing x and y sigma for n kernels
    angles : a 1d Tensor giving the rotation angle of each of the n kernels
    size : a 1d Tensor giving the [x, y] size of the resulting kernels
    """
    raise NotImplementedError


def next_odd_int(a):
    """Return next odd integers for float Tensor a"""
    a = tf.cast(tf.math.ceil(a), tf.int32)
    b = tf.cast(~tf.cast(a % 2, tf.bool), tf.int32)
    return a + b


def kernel_size(sigmas, size_factor=6.0):
    return next_odd_int(tf.reduce_max(sigmas, axis=0) * size_factor)


def filters2d(
    inputs, sigmas, kernels2d_function, size_factor=6.0, padding_mode="CONSTANT", padding_value=0
):
    """Convolve a batch of images with kernels.

    Each image is convolved using a kernel with different sigmas.
    An x and y sigma is specified for each image.
    Each channel of the image is convolved with the same kernel.

    Parameters
    ----------
    inputs : a 4d Tensor of shape [n, w, h, c] containing n images
    sigmas : a 2d Tensor of shape [n, 2] containing x and y sigma for n kernels
    kernels2d_function : the function used to create the 2d kernel
    size_factor : size of kernels in sigma
    pading_mode : mode to use for padding prior to convolution (see tf.pad)
    """
    size = kernel_size(sigmas, size_factor)
    kernels = kernels2d_function(sigmas, size)
    # depthwise convolution is over "channels", but we want it over the batch,
    # so swap first and last axes
    inputs = tf.transpose(inputs, [3, 1, 2, 0])
    padding = [
        [0] * 2,
        [size[1] // 2] * 2,
        [size[0] // 2] * 2,
        [0] * 2,
    ]
    inputs = tf.pad(inputs, padding, mode=padding_mode, constant_values=padding_value)
    # rearrange kernels into correct form for depthwise convolution
    kernels = tf.expand_dims(tf.transpose(kernels, [1, 2, 0]), -1)
    convolved = tf.nn.depthwise_conv2d(inputs, kernels, strides=(1, 1, 1, 1), padding="VALID")
    # swap back channel and batch axes
    convolved = tf.transpose(convolved, [3, 1, 2, 0])
    return convolved


def gaussian_filters2d(inputs, sigmas, size_factor=6.0, padding_mode="CONSTANT", padding_value=0):
    """Convolve a batch of images with Gaussian kernels.

    Each image is convolved using a kernel with different sigmas.
    An x and y sigma is specified for each image.
    Each channel of the image is convolved with the same kernel.

    Parameters
    ----------
    inputs : a 4d Tensor of shape [n, w, h, c] containing n images
    sigmas : a 2d Tensor of shape [n, 2] containing x and y sigma for n kernels
    size_factor : size of kernels in sigma
    pading_mode : mode to use for padding prior to convolution (see tf.pad)
    """
    return filters2d(inputs, sigmas, gaussian_kernels2d, size_factor, padding_mode, padding_value)


class AntiAliasingRestrictedTransformer(RestrictedTransformer):

    """Spatial Restricted Transformer Layer with anti-aliasing

    Version of the AffineTransformer class that is restricted to a subset of affine
    transformations: no reflection, no shear, rotations only up to +-180 degrees

    Avoids anti-aliasing by convolving with an appropriate Gaussian kernel
    prior to interpolation

    """

    def __init__(self, out_size, **kwargs):
        """
        Parameters
        ----------
        out_size : tuple of two ints
            The size of the output of the spatial network (height, width)
        name : string
            The name of this layer
        interp_method: 'bilinear' (default) or 'bicubic'
        masked: bool (default: False)
            Should the edges of the transformed images be masked
        cval: int (default: 0)
            Value to mask edges with if masked=True
        preserve_flux: bool (default: False)
            Preserve total flux during transformation
        reverse: bool (default: False)
            Reverse order of the transformation operations:
            Regular order is: scale, then rotate, then translate
            Reverse order is: translate, then rotate, then scale
        prerotate: bool (default: False)
            Perform an additional rotation before the regular operations
        """
        super().__init__(out_size, **kwargs)
        if self.prerotate:
            raise NotImplementedError("Pre-rotating is not currently supported with anti-aliasing.")
        if self.reverse:
            raise NotImplementedError(
                "Reverse transformation is not currently supported with anti-aliasing."
            )

    def call(self, tensors, mask=None):
        """
        Restricted Transformation of input tensor inp with parameters theta

        Parameters
        ----------
        tensors: list of two floats
            inp: The input tensor should have the shape
            [batch_size, height, width, num_channels].
            theta: The output of the localisation network
            should have the shape [batch_size, 5], where the parameters are:
            x_scale, y_scale, rotation, x_translation, y_translation,
            where the scales are logarithms of the actual scale factor and the rotation
            is given as tan(angle/2). If prerotate=True, then takes an additional parameter:
            the first rotation given as tan(angle/2).
        mask: currently unused

        Notes
        -----
        Reflections are prevented by the use of logarithmic scale parameters.
        Rotations are limited to +-180 degrees by the tan(angle/2) parameterization
        To initialize the network to the identity transform initialize ``theta`` to :
            identity = np.array([0., 0., 0., 0., 0.])
            theta = tf.Variable(initial_value=identity)

        """
        inp, theta = tensors
        shape = tf.shape(inp)
        in_height = shape[1]
        in_width = shape[2]
        out_height = self.out_size[0]
        out_width = self.out_size[1]
        overall_scale = tf.stack([in_height / out_height, in_width / out_width], axis=0)
        overall_scale = tf.cast(overall_scale, tf.float32)
        transfo_scale = tf.exp(theta[:, :2])
        scale = overall_scale * transfo_scale
        sigmas = tf.maximum((scale - 1) / 2, 0)
        padding_mode = "CONSTANT" if self.masked else "REFLECT"
        inp = gaussian_filters2d(inp, sigmas, padding_mode=padding_mode, padding_value=self.cval)
        output = super().call([inp, theta])
        return output