Add mode and alternative border parameters to CUDNN conv.

lucasb-eyer · lucasb-eyer · commit c49206a7563c · 2015-08-05T11:53:37.000-04:00
- The `mode` now defaults to `cross`, which is what all other packages
  use and thus it allows loading their weights without needing to fiddle
  with mirroring/flipping them correctly.
- Adds an alternative `border` parameter for these two reasons:
    1. So that we have a flexible shortcut for 'same'
    2. So that Theano can use some tricks for 'valid' and 'full'
  The old interface is still available and used since `border=None` by default.
diff --git a/DeepFried2/layers/SpatialConvolutionCUDNN.py b/DeepFried2/layers/SpatialConvolutionCUDNN.py
@@ -8,18 +8,31 @@
 
 
 class SpatialConvolutionCUDNN(Module):
-    def __init__(self, n_input_plane, n_output_plane, k_w, k_h, d_w=1, d_h=1, pad_w=0, pad_h=0, with_bias=True, initW=xavier(), initB=const(0)):
+    def __init__(self, n_input_plane, n_output_plane, k_w, k_h, d_w=1, d_h=1, pad_w=0, pad_h=0, mode='cross', with_bias=True, initW=xavier(), initB=const(0), border=None):
+        # mode='cross' is the default in Lasagne[1], Torch[2], matConvNet[3], Caffee[4].
+        #
+        # 1: https://github.com/Lasagne/Lasagne/blob/63d44a0d/lasagne/layers/dnn.py#L299
+        # 2: https://github.com/soumith/cudnn.torch/blob/840f0228/SpatialConvolution.lua#L83
+        # 3: https://github.com/vlfeat/matconvnet/blob/b7dd9c96/matlab/src/bits/impl/nnconv_cudnn.cu#L133
+        # 4: https://github.com/BVLC/caffe/blob/50ab52cb/include/caffe/util/cudnn.hpp#L104
+        #
+        # `border` is an alternative way to specify `pad_w` and `pad_h` so that Theano strings can be used. Better documentation to follow soon.
         Module.__init__(self)
         self.n_input_plane = n_input_plane
         self.n_output_plane = n_output_plane
         self.k_w = k_w
         self.k_h = k_h
         self.d_w = d_w
         self.d_h = d_h
-        self.pad_w = pad_w
-        self.pad_h = pad_h
+        self.mode = mode
         self.with_bias = with_bias
 
+        # 'same' is a (common) shortcut for "zero-padding so that outshape == inshape".
+        self.border = border or (pad_h, pad_w)
+        if self.border == 'same':
+            assert self.k_w % 2 == 1 and self.k_h % 2 == 1, "'same' convolution only supports odd filter sizes."
+            self.border = ((self.k_h-1)//2, (self.k_w-1)//2)
+
         w_shape = (n_output_plane, n_input_plane, k_h, k_w)
         w_fan = (n_input_plane*k_w*k_h, n_output_plane*k_w*k_h)
 
@@ -30,8 +43,9 @@ def __init__(self, n_input_plane, n_output_plane, k_w, k_h, d_w=1, d_h=1, pad_w=
     def symb_forward(self, symb_input):
         conv_output = _dnn.dnn_conv(img=symb_input,
                                     kerns=self.weight,
-                                    border_mode=(self.pad_h, self.pad_w),
-                                    subsample=(self.d_h, self.d_w))
+                                    border_mode=self.border,
+                                    subsample=(self.d_h, self.d_w),
+                                    conv_mode=self.mode)
 
         if self.with_bias:
             return conv_output + self.bias.dimshuffle('x', 0, 'x', 'x')
diff --git a/examples/MNIST/model.py b/examples/MNIST/model.py
@@ -20,12 +20,12 @@ def net():
 def lenet():
     model = df.Sequential()
     model.add(df.Reshape(-1, 1, 28, 28))
-    model.add(df.SpatialConvolutionCUDNN(1, 32, 5, 5, 1, 1, 2, 2, with_bias=False))
+    model.add(df.SpatialConvolutionCUDNN(1, 32, 5, 5, 1, 1, border='same', with_bias=False))
     model.add(df.BatchNormalization(32))
     model.add(df.ReLU())
     model.add(df.SpatialMaxPoolingCUDNN(2, 2))
 
-    model.add(df.SpatialConvolutionCUDNN(32, 64, 5, 5, 1, 1, 2, 2, with_bias=False))
+    model.add(df.SpatialConvolutionCUDNN(32, 64, 5, 5, 1, 1, border='same', with_bias=False))
     model.add(df.BatchNormalization(64))
     model.add(df.ReLU())
     model.add(df.SpatialMaxPoolingCUDNN(2, 2))