Merge pull request #88 from lucasb-eyer/bias-init

lucasb-eyer · web-flow · commit e309711e434a · 2016-08-27T12:53:12.000-04:00
Unifies optional model parameter handling.
diff --git a/DeepFried2/Module.py b/DeepFried2/Module.py
@@ -22,15 +22,24 @@ def __init__(self):
     #def __hash__(self):
     #    raise NotImplementedError("You *need* to reimplement hash, even if it's just python's default. See the documentation for more info.")
 
-    def _addparam(self, *a, **kw):
+    def _addparam(self, shape, init, *a, **kw):
+        assert init is not None and init is not False, "`{}` requires parameter `{}` to have initializer.".format(df.utils.typename(self), kw.get("name", "unnamed"))
+
         # Add it here because many don't even have params. This avoids misuse.
         if not hasattr(self, '_params'):
             self._params = []
 
-        param = df.Param(*a, **kw)
+        param = df.Param(shape, init, *a, **kw)
         self._params.append(param)
         return param
 
+    def _addparam_optional(self, shape, init, *a, **kw):
+        if init is None or init is False:
+            return None
+
+        return self._addparam(shape, init, *a, **kw)
+
+
     def zero_grad_parameters(self):
         for p in self.parameters(trainable_only=True):
             p.zero_grad()
diff --git a/DeepFried2/Param.py b/DeepFried2/Param.py
@@ -10,10 +10,12 @@ def __init__(self, shape, init, fan=None, name=None, learn=True, decay=True, dty
         self.fan = fan
         self.decay = decay
 
-        # Support a useful shortcut for initializing with an array-like:
-        # TODO: It would be nicer to use Python's buffer-interface.
+        # Support a couple useful shortcut for initializing:
         if hasattr(init, 'shape') and hasattr(init, 'dtype'):
+            # TODO: It would be nicer to use Python's buffer-interface.
             self.init = df.init.array(init)
+        elif _np.isscalar(init):
+            self.init = df.init.const(init)
 
         val = self.init(self.shape, self.fan).astype(dtype)
         self.param = df.th.shared(val, name=name, **kw)
diff --git a/DeepFried2/layers/BackwardsConvolutionCUDNN.py b/DeepFried2/layers/BackwardsConvolutionCUDNN.py
@@ -7,7 +7,7 @@
 
 
 class BackwardsConvolutionCUDNN(df.Module):
-    def __init__(self, nchan_in, nchan_out, filter_size, stride=1, border=0, mode='cross', init=df.init.xavier(), bias=df.init.const(0)):
+    def __init__(self, nchan_in, nchan_out, filter_size, stride=1, border=0, mode='cross', init=df.init.xavier(), bias=0):
         """
         This is the backwards path through a convolution, sometimes is also
         referred to as transposed convolution and (wrongly) deconvolution.
@@ -43,11 +43,7 @@ def __init__(self, nchan_in, nchan_out, filter_size, stride=1, border=0, mode='c
         w_fan = (np.prod(self.filter_size)*nchan_out, np.prod(self.filter_size)*nchan_in)
         w_name = ('Wconv_{},{}@{}' + 'x{}'*(len(w_shape) - 3)).format(*w_shape)
         self.W = self._addparam(w_shape, init, fan=w_fan, name=w_name)
-
-        if bias not in (None, False):
-            self.b = self._addparam(nchan_out, bias, decay=False, name='bconv_{}'.format(nchan_out))
-        else:
-            self.b = None
+        self.b = self._addparam_optional(nchan_out, bias, decay=False, name='bconv_{}'.format(nchan_out))
 
 
     def symb_forward(self, symb_input):
diff --git a/DeepFried2/layers/Linear.py b/DeepFried2/layers/Linear.py
@@ -5,18 +5,15 @@
 
 class Linear(df.Module):
 
-    def __init__(self, nin, nout, init=df.init.xavier(), bias=df.init.const(0)):
+    def __init__(self, nin, nout, init=df.init.xavier(), bias=0):
         df.Module.__init__(self)
 
         self.nin = nin
         self.nout = nout
 
         shape = (nin, nout)
         self.W = self._addparam(shape, init, fan=shape, name='Wlin_{}x{}'.format(*shape))
-        if bias not in (None, False):
-            self.b = self._addparam(nout, bias, decay=False, name='blin_{}'.format(nout))
-        else:
-            self.b = None
+        self.b = self._addparam_optional(nout, bias, decay=False, name='blin_{}'.format(nout))
 
     def symb_forward(self, symb_input):
         out = df.T.dot(symb_input, self.W.param)
diff --git a/DeepFried2/layers/SpatialConvolution.py b/DeepFried2/layers/SpatialConvolution.py
@@ -4,7 +4,7 @@
 from theano.tensor.nnet import conv3d2d
 
 class SpatialConvolution(df.Module):
-    def __init__(self, nchan_in, nchan_out, filter_size, stride=1, border='valid', mode='cross', init=df.init.xavier(), bias=df.init.const(0), imshape=None):
+    def __init__(self, nchan_in, nchan_out, filter_size, stride=1, border='valid', mode='cross', init=df.init.xavier(), bias=0, imshape=None):
         # See `SpatialConvolutionCUDNN` comment for the `mode` parameter. Only works in 2D
         df.Module.__init__(self)
         self.nchan_in = nchan_in
@@ -31,11 +31,7 @@ def __init__(self, nchan_in, nchan_out, filter_size, stride=1, border='valid', m
         w_fan = (nchan_in*np.prod(self.filter_size), nchan_out*np.prod(self.filter_size))
         w_name = ('Wconv_{},{}@{}' + 'x{}'*(len(self.w_shape) - 3)).format(*self.w_shape)
         self.W = self._addparam(self.w_shape, init, fan=w_fan, name=w_name)
-
-        if bias not in (None, False):
-            self.b = self._addparam(nchan_out, bias, decay=False, name='bconv_{}'.format(nchan_out))
-        else:
-            self.b = None
+        self.b = self._addparam_optional(nchan_out, bias, decay=False, name='bconv_{}'.format(nchan_out))
 
 
     def symb_forward(self, symb_input):
diff --git a/DeepFried2/layers/SpatialConvolutionCUDNN.py b/DeepFried2/layers/SpatialConvolutionCUDNN.py
@@ -5,15 +5,15 @@
 import numpy as np
 
 class SpatialConvolutionCUDNN(df.Module):
-    def __init__(self, nchan_in, nchan_out, filter_size, stride=1, border=0, mode='cross', init=df.init.xavier(), bias=df.init.const(0)):
+    def __init__(self, nchan_in, nchan_out, filter_size, stride=1, border=0, mode='cross', init=df.init.xavier(), bias=0):
         # mode='cross' is the default in Lasagne[1], Torch[2], matConvNet[3], Caffee[4].
         #
         # 1: https://github.com/Lasagne/Lasagne/blob/63d44a0d/lasagne/layers/dnn.py#L299
         # 2: https://github.com/soumith/cudnn.torch/blob/840f0228/SpatialConvolution.lua#L83
         # 3: https://github.com/vlfeat/matconvnet/blob/b7dd9c96/matlab/src/bits/impl/nnconv_cudnn.cu#L133
         # 4: https://github.com/BVLC/caffe/blob/50ab52cb/include/caffe/util/cudnn.hpp#L104
         df.Module.__init__(self)
-        
+
         # Catch a probably common bug while we transition the API.
         assert isinstance(filter_size, (list, tuple)), "New conv API: filter_size needs to be a tuple!"
 
@@ -33,11 +33,7 @@ def __init__(self, nchan_in, nchan_out, filter_size, stride=1, border=0, mode='c
         w_fan = (np.prod(self.filter_size)*nchan_in, np.prod(self.filter_size)*nchan_out)
         w_name = ('Wconv_{},{}@{}' + 'x{}'*(len(w_shape) - 3)).format(*w_shape)
         self.W = self._addparam(w_shape, init, fan=w_fan, name=w_name)
-
-        if bias not in (None, False):
-            self.b = self._addparam(nchan_out, bias, decay=False, name='bconv_{}'.format(nchan_out))
-        else:
-            self.b = None
+        self.b = self._addparam_optional(nchan_out, bias, decay=False, name='bconv_{}'.format(nchan_out))
 
 
     def symb_forward(self, symb_input):