Added a MNIST example. Fixed typos.

ikostrikov2 · ikostrikov2 · commit 0540b56b511c · 2015-05-21T13:40:39.000+02:00
Compatibility fixes.

Compatibility fix 2.
diff --git a/beacon8/__init__.py b/beacon8/__init__.py
@@ -1 +1,3 @@
-from layer import *
+from .layers import *
+from .containers import *
+from .criteria import *
diff --git a/beacon8/containers/Container.py b/beacon8/containers/Container.py
@@ -0,0 +1,34 @@
+from ..layers import Module
+
+
+class Container(Module):
+
+    def __init__(self, *modules):
+        Module.__init__(self)
+
+        self.modules = []
+        for module in modules:
+            self.add(module)
+
+    def evaluate(self):
+        Module.evaluate(self)
+        for module in self.modules:
+            module.evaluate()
+
+    def training(self):
+        Module.training(self)
+        for module in self.modules:
+            module.training()
+
+    def parameters(self):
+        params, grads = [], []
+
+        for module in self.modules:
+            mod_params, mod_grads = module.parameters()
+            params += mod_params
+            grads += mod_grads
+
+        return params, grads
+
+    def add(self, module):
+        self.modules.append(module)
diff --git a/beacon8/containers/Sequential.py b/beacon8/containers/Sequential.py
@@ -0,0 +1,9 @@
+from .Container import Container
+
+
+class Sequential(Container):
+    def symb_forward(self, symb_input):
+        symb_output = symb_input
+        for module in self.modules:
+            symb_output = module.symb_forward(symb_output)
+        return symb_output
diff --git a/beacon8/containers/__init__.py b/beacon8/containers/__init__.py
@@ -1,36 +1,2 @@
-from .Layers import Module
-
-class Container(Module):
-
-    def __init__(self, *modules):
-        super().__init__()
-
-        self.modules = []
-        for module in modules:
-            self.add(module)
-
-    def evaluate(self):
-        super().evaluate()
-        for module in self.modules:
-            module.evaluate()
-
-    def training(self):
-        super().training()
-        for module in self.modules:
-            module.training()
-
-    def parameters(self):
-        params, grads = [], []
-
-        for module in self.modules:
-            mod_params, mod_grads = module.parameters()
-            params += mod_params
-            grads += mod_grads
-
-        return params, grads
-
-    def add(self, module):
-        self.modules.append(module)
-
-    def symbolic_forward(self, symbolic_input):
-        raise NotImplementedError
+from .Container import *
+from .Sequential import *
diff --git a/beacon8/criteria/ClassNLLCriterion.py b/beacon8/criteria/ClassNLLCriterion.py
@@ -0,0 +1,7 @@
+import theano.tensor as _T
+
+
+class ClassNLLCriterion:
+    def symb_forward(self, symb_input, symb_targets):
+        int_targets = _T.cast(symb_targets, 'int32')
+        return _T.mean(-_T.log(symb_input[_T.arange(symb_targets.shape[0]), int_targets]))
diff --git a/beacon8/criteria/__init__.py b/beacon8/criteria/__init__.py
@@ -0,0 +1 @@
+from .ClassNLLCriterion import *
diff --git a/beacon8/layers/Linear.py b/beacon8/layers/Linear.py
@@ -1,12 +1,13 @@
-from . import Module
+from .Module import Module
 
 import numpy as _np
 import theano as _th
 
+
 class Linear(Module):
 
     def __init__(self, nin, nout, init='Xavier', with_bias=True):
-        super().__init__()
+        Module.__init__(self)
 
         self.nin = nin
         self.nout = nout
@@ -17,19 +18,21 @@ def __init__(self, nin, nout, init='Xavier', with_bias=True):
 
     def reset(self):
         if self.init == 'Xavier':
-            w_bound = _np.sqrt(4 / (self.nin + self.nout))
+            w_bound = _np.sqrt(4. / (self.nin + self.nout))
             W = _np.random.uniform(low=-w_bound, high=w_bound,
                                    size=(self.nin, self.nout))
         else:
             raise NotImplementedError
 
         self.weight = _th.shared(W.astype(_th.config.floatX))
+        self.grad_weight = _th.shared((W*0.).astype(_th.config.floatX))
 
         if self.with_bias:
             self.bias = _th.shared(_np.zeros(shape=self.nout, dtype=_th.config.floatX))
+            self.grad_bias = _th.shared(_np.zeros(shape=self.nout, dtype=_th.config.floatX))
 
-    def symbolic_forward(self, symbolic_input):
-        out = _th.tensor.dot(symbolic_input, self.weight)
+    def symb_forward(self, symb_input):
+        out = _th.tensor.dot(symb_input, self.weight)
 
         if self.with_bias:
             out += self.bias
diff --git a/beacon8/layers/Module.py b/beacon8/layers/Module.py
@@ -0,0 +1,72 @@
+import theano as _th
+import theano.tensor as _T
+
+
+class Module:
+
+    def __init__(self):
+        self.training_mode = True
+
+        self.fn_forward = None
+        self.fn_accum_grads = None
+
+    def reset(self):
+        pass
+
+    #def __hash__(self):
+    #    raise NotImplementedError("You *need* to reimplement hash, even if it's just python's default. See the documentation for more info.")
+
+    def zero_grad_parameters(self):
+        _, grads = self.parameters()
+        for grad in grads:
+            grad.set_value(0 * grad.get_value())
+
+    def parameters(self):
+        params, grads = [], []
+
+        if self.training_mode and hasattr(self, 'weight'):
+            assert hasattr(self, 'grad_weight'), "The layer {} has a `weight` variable but no `grad_weight`, you probably forget to implement it.".format(type(self))
+            params += [self.weight]
+            grads += [self.grad_weight]
+
+        if self.training_mode and hasattr(self, 'bias'):
+            assert hasattr(self, 'grad_bias'), "The layer {} has a `bias` variable but no `grad_bias`, you probably forget to implement it.".format(type(self))
+            params += [self.bias]
+            grads += [self.grad_bias]
+
+        return params, grads
+
+    def evaluate(self):
+        self.training_mode = False
+
+    def training(self):
+        self.training_mode = True
+
+    def symb_forward(self, symb_input):
+        raise NotImplementedError
+
+    def forward(self, data):
+        if self.fn_forward is None:
+            symb_in = _T.TensorType(_th.config.floatX, (False,) * data.ndim)('X')
+            symb_out = self.symb_forward(symb_in)
+            self.fn_forward = _th.function(inputs=[symb_in], outputs=symb_out)
+
+        return self.fn_forward(data)
+
+    def accumulate_gradients(self, data_in, data_tgt, loss):
+        if self.fn_accum_grads is None:
+            symb_in = _T.TensorType(_th.config.floatX, (False,) * data_in.ndim)('X')
+            symb_tgt = _T.TensorType(_th.config.floatX, (False,) * data_tgt.ndim)('T')
+            symb_out = self.symb_forward(symb_in)
+            symb_err = loss.symb_forward(symb_out, symb_tgt)
+
+            params, grads = self.parameters()
+            symb_grads = _th.grad(cost=symb_err, wrt=params)
+
+            grads_updates = [(grad, grad + symb_grad) for grad, symb_grad in zip(grads, symb_grads)]
+            self.fn_accum_grads = _th.function(
+                inputs=[symb_in, symb_tgt],
+                updates=grads_updates
+            )
+
+        self.fn_accum_grads(data_in, data_tgt)
diff --git a/beacon8/layers/ReLU.py b/beacon8/layers/ReLU.py
@@ -0,0 +1,7 @@
+from .Module import Module
+
+
+class ReLU(Module):
+
+    def symb_forward(self, symb_input):
+        return (symb_input + abs(symb_input)) * 0.5
diff --git a/beacon8/layers/Softmax.py b/beacon8/layers/Softmax.py
@@ -2,10 +2,11 @@
 
 import theano.tensor as _T
 
+
 class SoftMax(Module):
 
     def __init__(self):
         Module.__init__(self)
 
-    def symbolic_forward(self, symbolic_input):
-        return _T.nnet.softmax(symbolic_input)
+    def symb_forward(self, symb_input):
+        return _T.nnet.softmax(symb_input)
diff --git a/beacon8/layers/Tanh.py b/beacon8/layers/Tanh.py
@@ -0,0 +1,9 @@
+from .Module import Module
+
+import theano.tensor as _T
+
+
+class Tanh(Module):
+
+    def symb_forward(self, symb_input):
+        return _T.tanh(symb_input)
diff --git a/beacon8/layers/__init__.py b/beacon8/layers/__init__.py
@@ -1,72 +1,5 @@
-import theano as _th
-import theano.tensor as _T
-import theano.config.floatX as _floatX
-
-class Module:
-
-    def __init__(self):
-        self.training_mode = True
-
-        self.fn_forward = None
-        self.fn_accum_grads = None
-
-    def reset(self):
-        pass
-
-    def __hash__(self):
-        raise NotImplementedError("You *need* to reimplement hash, even if it's just python's default. See the documentation for more info.")
-
-    def zero_grad_parameters(self):
-        _, grads = self.parameters()
-        for grad in grads:
-            grad.set_value(0 * grad.get_value())
-
-    def parameters(self):
-        params, grads = [], []
-
-        if self.training_mode and hasattr(self, 'weight'):
-            assert hasattr(self, 'grad_weight'), "The layer {} has a `weight` variable but no `grad_weight`, you probably forget to implement it.".format(type(self))
-            params += [self.weight]
-            grads += [self.grad_weight]
-
-        if self.training_mode and hasattr(self, 'bias'):
-            assert hasattr(self, 'grad_bias'), "The layer {} has a `bias` variable but no `grad_bias`, you probably forget to implement it.".format(type(self))
-            params += [self.bias]
-            grads += [self.grad_bias]
-
-        return params, grads
-
-    def evaluate(self):
-        self.training_mode = False
-
-    def training(self):
-        self.training_mode = True
-
-    def symb_forward(self, symb_input):
-        raise NotImplementedError
-
-    def forward(self, data):
-        if self.fn_forward is None:
-            symb_in = _T.TensorType(_floatX, (False,) * data.ndim)('X')
-            symb_out = self.symb_forward(symb_in)
-            self.fn_forward = _th.function(inputs=[symb_in], outputs=symb_out)
-
-        return self.fn_forward(data)
-
-    def accumulate_gradients(self, data_in, data_tgt, loss):
-        if self.fn_accum_grads is None:
-            symb_in = _T.TensorType(_floatX, (False,) * data_in.ndim)('X')
-            symb_tgt = _T.TensorType(_floatX, (False,) * data_tgt.ndim)('T')
-            symb_out = self.symbolic_forward(symb_in)
-            symb_err = loss.symb_forward(symb_out, symb_tgt)
-
-            params, grads = self.parameters()
-            symb_grads = theano.grad(cost=symb_err, wrt=params)
-
-            grads_updates = [(grad, grad + symb_grad) for grad, symb_grad in zip(grads, symb_grads)]
-            self.fn_accum_grads = _th.function(
-                inputs=[symb_in, symb_tgt],
-                updates=grads_updates
-            )
-
-        self.fn_accum_grads(data_in, data_tgt)
+from .Linear import *
+from .Softmax import *
+from .Module import *
+from .Tanh import *
+from .ReLU import *
diff --git a/beacon8/optimizers/Momentum.py b/beacon8/optimizers/Momentum.py
@@ -1,17 +1,17 @@
-from .optimizers import Optimizer
-from .utils import create_param_state_as
+from .Optimizer import Optimizer
+from ..utils import create_param_state_as
 
 
 class Momentum(Optimizer):
 
     def __init__(self, lr, momentum):
-        super().__init__(lr=lr, momentum=momentum)
+        Optimizer.__init__(self, lr=lr, momentum=momentum)
 
     def get_updates(self, params, grads, lr, momentum):
         updates = []
 
         for param, grad in zip(params, grads):
-            param_mom = self.create_param_state_as(param)
+            param_mom = create_param_state_as(param)
             v = momentum * param_mom - lr * grad
             updates.append((param_mom, v))
             updates.append((param, param + v))
diff --git a/beacon8/optimizers/Optimizer.py b/beacon8/optimizers/Optimizer.py
@@ -0,0 +1,26 @@
+import theano.tensor as _T
+import theano as _th
+
+
+class Optimizer:
+
+    def __init__(self, **hyperparams):
+        self.states = {}
+        self.hyperparams = hyperparams
+
+    def update_parameters(self, model):
+
+        if model not in self.states:
+            params, grads = model.parameters()
+            # TODO: Not only scalar
+            hyperparams = {name: _T.scalar(name) for name in self.hyperparams}
+            updates = self.get_updates(params, grads, **hyperparams)
+            self.states[model] = _th.function(
+                inputs=list(hyperparams.values()),
+                updates=updates
+            )
+
+        self.states[model](**self.hyperparams)
+
+    def get_updates(self, params, grads):
+        raise NotImplementedError
diff --git a/beacon8/optimizers/SGD.py b/beacon8/optimizers/SGD.py
@@ -1,10 +1,10 @@
-from .optimizers import Optimizer
+from ..optimizers import Optimizer
 
 
 class SGD(Optimizer):
 
     def __init__(self, lr):
-        super().__init__(lr=lr)
+        Optimizer.__init__(self, lr=lr)
 
     def get_updates(self, params, grads, lr):
-        return [g - lr * p for g, p in zip(grads, params)]
+        return [(p, p - lr * g) for p, g in zip(params, grads)]
diff --git a/beacon8/optimizers/__init__.py b/beacon8/optimizers/__init__.py
diff --git a/examples/MNIST/mnist.py b/examples/MNIST/mnist.py
diff --git a/examples/MNIST/model.py b/examples/MNIST/model.py
diff --git a/examples/MNIST/progress_bar.py b/examples/MNIST/progress_bar.py
diff --git a/examples/MNIST/run.py b/examples/MNIST/run.py
diff --git a/examples/MNIST/test.py b/examples/MNIST/test.py
diff --git a/examples/MNIST/train.py b/examples/MNIST/train.py