lucasb-eyer
diff --git a/‎DeepFried2/Container.py‎
Lines changed: 26 additions & 2 deletions b/‎DeepFried2/Container.py‎
Lines changed: 26 additions & 2 deletions
diff --git a/‎DeepFried2/Criterion.py‎
Lines changed: 64 additions & 19 deletions b/‎DeepFried2/Criterion.py‎
Lines changed: 64 additions & 19 deletions
diff --git a/‎DeepFried2/Module.py‎
Lines changed: 71 additions & 32 deletions b/‎DeepFried2/Module.py‎
Lines changed: 71 additions & 32 deletions
diff --git a/‎DeepFried2/Optimizer.py‎
Lines changed: 1 addition & 1 deletion b/‎DeepFried2/Optimizer.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎DeepFried2/Param.py‎
Lines changed: 1 addition & 1 deletion b/‎DeepFried2/Param.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎DeepFried2/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎DeepFried2/__init__.py‎
Lines changed: 1 addition & 1 deletion
@@ -9,7 +9,8 @@ def __init__(self, *modules):
         df.Module.__init__(self)
 
         self.modules = []
-        self.add(*modules)
+        if len(modules):
+            self.add(*modules)
 
     def evaluate(self):
         df.Module.evaluate(self)
@@ -30,6 +31,9 @@ def parameters(self, *a, **kw):
         # e.g. do weight sharing.
         return list(_OrderedDict.fromkeys(params).keys())
 
+    def get_extra_outputs(self):
+        return list(_chain.from_iterable(m.get_extra_outputs() for m in self.modules))
+
     def get_stat_updates(self):
         return list(_chain.from_iterable(m.get_stat_updates() for m in self.modules))
 
@@ -38,17 +42,37 @@ def add(self, *modules):
             assert isinstance(m, df.Module), "`{}`s can only contain objects subtyping `df.Module`. You tried to add the following `{}`: {}".format(df.utils.typename(self), df.utils.typename(m), m)
         self.modules += modules
 
+        # Just return for enabling some nicer usage-patterns.
+        return modules
+
     def __getitem__(self, key):
         if isinstance(key, slice):
-            return type(self)(*df.utils.aslist(self.modules[key]))
+            return type(self)(*self.modules[key])
         elif isinstance(key, (list, tuple)):
             return type(self)(*[self.modules[k] for k in key])
         else:
             return self.modules[key]
 
+    def __len__(self):
+        # This one is needed to make __getindex__ work with negative indices.
+        return len(self.modules)
+
     def __getstate__(self):
         return [m.__getstate__() for m in self.modules]
 
     def __setstate__(self, state):
         for m, s in zip(self.modules, state):
             m.__setstate__(s)
+
+
+class SingleModuleContainer(Container):
+    def __init__(self, module):
+        Container.__init__(self, module)
+
+    def add(self, mod):
+        if len(self.modules):
+            raise TypeError("Container `{}` can't hold more than one module.".format(df.utils.typename(self)))
+        Container.add(self, mod)
+
+    def symb_forward(self, symb_input):
+        return self.modules[0](symb_input)
@@ -1,11 +1,13 @@
 import DeepFried2 as df
-from DeepFried2.utils import make_tensor_or_tensors, aslist
+from DeepFried2.utils import tensors_for_ndarrays, flatten
 
 
-class Criterion:
+class Criterion(object):
 
     def __init__(self):
         self.penalties = []
+        self.with_weights = False
+        self._ret_per_sample = False
         self._fn_forward = {}
 
     def _assert_same_dim(self, symb_input, symb_target):
@@ -23,28 +25,71 @@ def add_penalty(self, weight_or_pen, pen=None):
             weight, pen = weight_or_pen, pen
         self.penalties.append((weight, pen))
 
-    def full_symb_forward(self, symb_input, symb_target):
+    def __call__(self, symb_input, symb_target, with_penalties=True):
+        # Possibly extract the weights as 2nd target.
+        if self.with_weights is True:
+            symb_target, symb_weights = symb_target
+        # Or extract a 0/1 weighting using magic value.
+        elif self.with_weights is not False:
+            symb_weights = df.T.neq(symb_target, self.with_weights)
+        else:
+            symb_weights = None
+
         cost = self.symb_forward(symb_input, symb_target)
+        self._per_sample_cost = cost
+
+        if symb_weights is not None:
+            cost = symb_weights * cost
 
-        for w, p in self.penalties:
-            cost += w*p.symb_forward()
+        # Criteria may return per-sample cost which we will average
+        # (optionally weighted) across samples, if necessary.
+        if cost.ndim != 0:
+            cost = df.T.mean(cost)
+            if symb_weights is not None:
+                # Need a very small eps to avoid 0/0 when all weights are 0!
+                cost = cost / (1e-8 + df.T.mean(symb_weights))
+
+        if with_penalties:
+            for w, p in self.penalties:
+                cost = cost + w*p.symb_forward()
 
         return cost
 
-    def forward(self, num_input, num_target, with_penalties=True):
+    def enable_weights(self):
+        self.with_weights = True
+        return self
+
+    def enable_maskval(self, val):
+        self.with_weights = val
+        return self
+
+    def enable_per_sample_cost(self):
+        self._ret_per_sample = True
+        return self
+
+    def forward(self, num_input, num_target, with_penalties=True, per_sample=False):
         # NOTE: using the GPU for such trivial computations as most costs
-        # is actually somewhat slower (e.g. for RMSE: 1.2ms vs. 0.2ms). So
-        # ideally, we'd like to compile a CPU-version here, but I don't know how!
-        if with_penalties not in self._fn_forward:
-            symb_in = make_tensor_or_tensors(num_input, 'Y')
-            symb_tgt = make_tensor_or_tensors(num_target, 'T')
-            if with_penalties:
-                symb_out = self.full_symb_forward(symb_in, symb_tgt)
-            else:
-                symb_out = self.symb_forward(symb_in, symb_tgt)
-            self._fn_forward[with_penalties] = df.th.function(
-                inputs=aslist(symb_in) + aslist(symb_tgt),
-                outputs=symb_out
+        # is actually somewhat slower (e.g. for RMSE: GPU 1.2ms vs. CPU 0.2ms).
+        # So ideally, we'd like to compile a CPU-version here, but I don't know how!
+        if (with_penalties, per_sample) not in self._fn_forward:
+            symb_in = tensors_for_ndarrays(num_input, 'Y')
+            symb_tgt = tensors_for_ndarrays(num_target, 'T')
+            symb_out = self(symb_in, symb_tgt, with_penalties)
+            self._fn_forward[with_penalties, per_sample] = df.th.function(
+                inputs=flatten(symb_in) + flatten(symb_tgt),
+                outputs=symb_out if not per_sample else self._per_sample_cost
             )
 
-        return self._fn_forward[with_penalties](*(aslist(num_input)+aslist(num_target)))
+        return self._fn_forward[with_penalties, per_sample](*(flatten(num_input) + flatten(num_target)))
+
+
+    # Get the per-sample cost in a similar way to the `StoreIO` container.
+
+    def get_extra_outputs(self):
+        return [self._per_sample_cost] if self._ret_per_sample else []
+
+    def last_per_sample_cost(self):
+        if not self._ret_per_sample:
+            raise ValueError("Call `enable_per_sample_cost()` first!")
+
+        return self._per_sample_cost.val
@@ -1,20 +1,24 @@
 import DeepFried2 as df
-from DeepFried2.utils import make_tensor_or_tensors, aslist
+from DeepFried2.utils import tensors_for_ndarrays, flatten
 
 import numpy as _np
 
-class Module:
+class Module(object):
 
     def __init__(self):
-        self.training_mode = True
+        self._mode = 'train'
 
         # The functions are stored in a dictionary whose keys correspond to the
-        # values that `self.training_mode` can take. That way, it would be
-        # trivial to extend to further modes, and the code avoids many branches.
+        # values that `self._mode` can take.
         self._fn_forward = {}
         self._fn_accum_grads = {}
         self._fn_accum_stats = {}
 
+        # These will store the last gotten/produced symbolic input/output
+        # expressions, respectively. The key is the current mode.
+        self._last_symb_inp = {}
+        self._last_symb_out = {}
+
     #def __hash__(self):
     #    raise NotImplementedError("You *need* to reimplement hash, even if it's just python's default. See the documentation for more info.")
 
@@ -38,44 +42,79 @@ def parameters(self, trainable_only=False):
         return params
 
     def evaluate(self):
-        self.training_mode = False
+        self._mode = 'eval'
 
     def training(self):
-        self.training_mode = True
+        self._mode = 'train'
 
     def symb_forward(self, symb_input):
         raise NotImplementedError("`{}` needs to implement `symb_forward` method.".format(df.utils.typename(self)))
 
+    def __call__(self, symb_input):
+        # Keep track of the symbolic inputs/outputs for things such as `Backward` layer.
+        self._last_symb_inp[self._mode] = symb_input
+        self._last_symb_out[self._mode] = self.symb_forward(symb_input)
+        return self._last_symb_out[self._mode]
+
     def forward(self, data):
-        if self.training_mode not in self._fn_forward:
-            symb_in = make_tensor_or_tensors(data, 'X')
-            symb_out = self.symb_forward(symb_in)
-            self._fn_forward[self.training_mode] = df.th.function(
-                inputs=aslist(symb_in),
-                outputs=symb_out
+        if self._mode not in self._fn_forward:
+            symb_in = tensors_for_ndarrays(data, 'X')
+            symb_out = self(symb_in)
+            extra_out = self.get_extra_outputs()
+            fn = self._fn_forward[self._mode] = df.th.function(
+                inputs=flatten(symb_in),
+                outputs=flatten(symb_out) + flatten(extra_out)
             )
+            fn._df2_extra = extra_out
 
-        return self._fn_forward[self.training_mode](*aslist(data))
+        fn = self._fn_forward[self._mode]
+        outs = fn(*flatten(data))
+        return self._collect_extra_outputs(fn, outs)
 
-    def accumulate_gradients(self, data_in, data_tgt, loss):
-        if self.training_mode not in self._fn_accum_grads:
-            symb_in = make_tensor_or_tensors(data_in, 'X')
-            symb_tgt = make_tensor_or_tensors(data_tgt, 'T')
-            symb_out = self.symb_forward(symb_in)
-            symb_err = loss.full_symb_forward(symb_out, symb_tgt)
+    def accumulate_gradients(self, data_in, data_tgt, crit):
+        if self._mode not in self._fn_accum_grads:
+            symb_in = tensors_for_ndarrays(data_in, 'X')
+            symb_tgt = tensors_for_ndarrays(data_tgt, 'T')
+            symb_out = self(symb_in)
+            symb_cost = crit(symb_out, symb_tgt)
+            extra_out = self.get_extra_outputs() + crit.get_extra_outputs()
 
             params = self.parameters(trainable_only=True)
-            symb_grads = df.th.grad(cost=symb_err, wrt=[p.param for p in params])
+            symb_grads = df.th.grad(cost=symb_cost, wrt=[p.param for p in params])
             grads_updates = [(p.grad, p.grad + symb_grad) for p, symb_grad in zip(params, symb_grads)]
 
-            self._fn_accum_grads[self.training_mode] = df.th.function(
-                inputs=aslist(symb_in) + aslist(symb_tgt),
-                outputs=symb_err,
+            fn = self._fn_accum_grads[self._mode] = df.th.function(
+                inputs=flatten(symb_in) + flatten(symb_tgt),
+                outputs=flatten(symb_cost) + flatten(extra_out),
                 updates=grads_updates
             )
+            fn._df2_extra = extra_out
+
+        fn = self._fn_accum_grads[self._mode]
+        args = flatten(data_in) + flatten(data_tgt)
+        outs = fn(*args)
+        return self._collect_extra_outputs(fn, outs)
+
+    def get_extra_outputs(self):
+        """
+        Return a list of Theano expressions which will be passed as additional
+        `output` parameters. The computed value will be stored in the
+        expression's `val` attribute.
+
+        Guaranteed to be called after `symb_forward`.
+        """
+        return []
+
+    def _collect_extra_outputs(self, fn, vals):
+        # The number of non-extra outputs.
+        nout = len(vals) - len(fn._df2_extra)
+
+        # Store all outputs in the `val` attribute so that they can possibly
+        # be retrieved by the modules that asked for them.
+        for out, val in zip(fn._df2_extra, vals[nout:]):
+            out.val = val
 
-        args = aslist(data_in) + aslist(data_tgt)
-        return self._fn_accum_grads[self.training_mode](*args)
+        return vals[:nout] if nout > 1 else vals[0]
 
     def get_stat_updates(self):
         """
@@ -88,12 +127,12 @@ def get_stat_updates(self):
         return []
 
     def accumulate_statistics(self, data_in):
-        if self.training_mode not in self._fn_accum_stats:
-            symb_in = make_tensor_or_tensors(data_in, 'X')
+        if self._mode not in self._fn_accum_stats:
+            symb_in = tensors_for_ndarrays(data_in, 'X')
 
             # Call forward once so it can compute some variables it'll actually
             # use in the stat updates collection.
-            self.symb_forward(symb_in)
+            self(symb_in)
 
             stat_updates = self.get_stat_updates()
             if not stat_updates:
@@ -117,12 +156,12 @@ def accumulate_statistics(self, data_in):
                         print("WARNING: Dropped the following stat-update because that variable got multiple updates: {}".format(upd[0]))
                 stat_updates = uniq_updates
 
-            self._fn_accum_stats[self.training_mode] = df.th.function(
-                inputs=aslist(symb_in),
+            self._fn_accum_stats[self._mode] = df.th.function(
+                inputs=flatten(symb_in),
                 updates=stat_updates
             )
 
-        self._fn_accum_stats[self.training_mode](*aslist(data_in))
+        self._fn_accum_stats[self._mode](*flatten(data_in))
 
     def clear(self):
         self._fn_forward.clear()
 
@@ -1,7 +1,7 @@
 import DeepFried2 as df
 
 
-class Optimizer:
+class Optimizer(object):
 
     def __init__(self, **hyperparams):
         self.states = {}
 
@@ -2,7 +2,7 @@
 import numpy as _np
 
 
-class Param:
+class Param(object):
 
     def __init__(self, shape, init, fan=None, name=None, learn=True, decay=True, dtype=df.floatX, **kw):
         self.init = init
 
@@ -9,7 +9,7 @@
 from .Module import Module
 from .layers import *
 
-from .Container import Container
+from .Container import Container, SingleModuleContainer
 from .containers import *
 
 from .Criterion import Criterion