Skip to content

Commit 37bccf0

Browse files
committed
Merge branch 'master' of github.com:lucasb-eyer/DeepFried2
2 parents 58efd08 + ac54a8f commit 37bccf0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+997
-267
lines changed

DeepFried2/Container.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ def __init__(self, *modules):
99
df.Module.__init__(self)
1010

1111
self.modules = []
12-
self.add(*modules)
12+
if len(modules):
13+
self.add(*modules)
1314

1415
def evaluate(self):
1516
df.Module.evaluate(self)
@@ -30,6 +31,9 @@ def parameters(self, *a, **kw):
3031
# e.g. do weight sharing.
3132
return list(_OrderedDict.fromkeys(params).keys())
3233

34+
def get_extra_outputs(self):
35+
return list(_chain.from_iterable(m.get_extra_outputs() for m in self.modules))
36+
3337
def get_stat_updates(self):
3438
return list(_chain.from_iterable(m.get_stat_updates() for m in self.modules))
3539

@@ -38,17 +42,37 @@ def add(self, *modules):
3842
assert isinstance(m, df.Module), "`{}`s can only contain objects subtyping `df.Module`. You tried to add the following `{}`: {}".format(df.utils.typename(self), df.utils.typename(m), m)
3943
self.modules += modules
4044

45+
# Just return for enabling some nicer usage-patterns.
46+
return modules
47+
4148
def __getitem__(self, key):
4249
if isinstance(key, slice):
43-
return type(self)(*df.utils.aslist(self.modules[key]))
50+
return type(self)(*self.modules[key])
4451
elif isinstance(key, (list, tuple)):
4552
return type(self)(*[self.modules[k] for k in key])
4653
else:
4754
return self.modules[key]
4855

56+
def __len__(self):
57+
# This one is needed to make __getindex__ work with negative indices.
58+
return len(self.modules)
59+
4960
def __getstate__(self):
5061
return [m.__getstate__() for m in self.modules]
5162

5263
def __setstate__(self, state):
5364
for m, s in zip(self.modules, state):
5465
m.__setstate__(s)
66+
67+
68+
class SingleModuleContainer(Container):
69+
def __init__(self, module):
70+
Container.__init__(self, module)
71+
72+
def add(self, mod):
73+
if len(self.modules):
74+
raise TypeError("Container `{}` can't hold more than one module.".format(df.utils.typename(self)))
75+
Container.add(self, mod)
76+
77+
def symb_forward(self, symb_input):
78+
return self.modules[0](symb_input)

DeepFried2/Criterion.py

Lines changed: 64 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import DeepFried2 as df
2-
from DeepFried2.utils import make_tensor_or_tensors, aslist
2+
from DeepFried2.utils import tensors_for_ndarrays, flatten
33

44

5-
class Criterion:
5+
class Criterion(object):
66

77
def __init__(self):
88
self.penalties = []
9+
self.with_weights = False
10+
self._ret_per_sample = False
911
self._fn_forward = {}
1012

1113
def _assert_same_dim(self, symb_input, symb_target):
@@ -23,28 +25,71 @@ def add_penalty(self, weight_or_pen, pen=None):
2325
weight, pen = weight_or_pen, pen
2426
self.penalties.append((weight, pen))
2527

26-
def full_symb_forward(self, symb_input, symb_target):
28+
def __call__(self, symb_input, symb_target, with_penalties=True):
29+
# Possibly extract the weights as 2nd target.
30+
if self.with_weights is True:
31+
symb_target, symb_weights = symb_target
32+
# Or extract a 0/1 weighting using magic value.
33+
elif self.with_weights is not False:
34+
symb_weights = df.T.neq(symb_target, self.with_weights)
35+
else:
36+
symb_weights = None
37+
2738
cost = self.symb_forward(symb_input, symb_target)
39+
self._per_sample_cost = cost
40+
41+
if symb_weights is not None:
42+
cost = symb_weights * cost
2843

29-
for w, p in self.penalties:
30-
cost += w*p.symb_forward()
44+
# Criteria may return per-sample cost which we will average
45+
# (optionally weighted) across samples, if necessary.
46+
if cost.ndim != 0:
47+
cost = df.T.mean(cost)
48+
if symb_weights is not None:
49+
# Need a very small eps to avoid 0/0 when all weights are 0!
50+
cost = cost / (1e-8 + df.T.mean(symb_weights))
51+
52+
if with_penalties:
53+
for w, p in self.penalties:
54+
cost = cost + w*p.symb_forward()
3155

3256
return cost
3357

34-
def forward(self, num_input, num_target, with_penalties=True):
58+
def enable_weights(self):
59+
self.with_weights = True
60+
return self
61+
62+
def enable_maskval(self, val):
63+
self.with_weights = val
64+
return self
65+
66+
def enable_per_sample_cost(self):
67+
self._ret_per_sample = True
68+
return self
69+
70+
def forward(self, num_input, num_target, with_penalties=True, per_sample=False):
3571
# NOTE: using the GPU for such trivial computations as most costs
36-
# is actually somewhat slower (e.g. for RMSE: 1.2ms vs. 0.2ms). So
37-
# ideally, we'd like to compile a CPU-version here, but I don't know how!
38-
if with_penalties not in self._fn_forward:
39-
symb_in = make_tensor_or_tensors(num_input, 'Y')
40-
symb_tgt = make_tensor_or_tensors(num_target, 'T')
41-
if with_penalties:
42-
symb_out = self.full_symb_forward(symb_in, symb_tgt)
43-
else:
44-
symb_out = self.symb_forward(symb_in, symb_tgt)
45-
self._fn_forward[with_penalties] = df.th.function(
46-
inputs=aslist(symb_in) + aslist(symb_tgt),
47-
outputs=symb_out
72+
# is actually somewhat slower (e.g. for RMSE: GPU 1.2ms vs. CPU 0.2ms).
73+
# So ideally, we'd like to compile a CPU-version here, but I don't know how!
74+
if (with_penalties, per_sample) not in self._fn_forward:
75+
symb_in = tensors_for_ndarrays(num_input, 'Y')
76+
symb_tgt = tensors_for_ndarrays(num_target, 'T')
77+
symb_out = self(symb_in, symb_tgt, with_penalties)
78+
self._fn_forward[with_penalties, per_sample] = df.th.function(
79+
inputs=flatten(symb_in) + flatten(symb_tgt),
80+
outputs=symb_out if not per_sample else self._per_sample_cost
4881
)
4982

50-
return self._fn_forward[with_penalties](*(aslist(num_input)+aslist(num_target)))
83+
return self._fn_forward[with_penalties, per_sample](*(flatten(num_input) + flatten(num_target)))
84+
85+
86+
# Get the per-sample cost in a similar way to the `StoreIO` container.
87+
88+
def get_extra_outputs(self):
89+
return [self._per_sample_cost] if self._ret_per_sample else []
90+
91+
def last_per_sample_cost(self):
92+
if not self._ret_per_sample:
93+
raise ValueError("Call `enable_per_sample_cost()` first!")
94+
95+
return self._per_sample_cost.val

DeepFried2/Module.py

Lines changed: 71 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
import DeepFried2 as df
2-
from DeepFried2.utils import make_tensor_or_tensors, aslist
2+
from DeepFried2.utils import tensors_for_ndarrays, flatten
33

44
import numpy as _np
55

6-
class Module:
6+
class Module(object):
77

88
def __init__(self):
9-
self.training_mode = True
9+
self._mode = 'train'
1010

1111
# The functions are stored in a dictionary whose keys correspond to the
12-
# values that `self.training_mode` can take. That way, it would be
13-
# trivial to extend to further modes, and the code avoids many branches.
12+
# values that `self._mode` can take.
1413
self._fn_forward = {}
1514
self._fn_accum_grads = {}
1615
self._fn_accum_stats = {}
1716

17+
# These will store the last gotten/produced symbolic input/output
18+
# expressions, respectively. The key is the current mode.
19+
self._last_symb_inp = {}
20+
self._last_symb_out = {}
21+
1822
#def __hash__(self):
1923
# raise NotImplementedError("You *need* to reimplement hash, even if it's just python's default. See the documentation for more info.")
2024

@@ -38,44 +42,79 @@ def parameters(self, trainable_only=False):
3842
return params
3943

4044
def evaluate(self):
41-
self.training_mode = False
45+
self._mode = 'eval'
4246

4347
def training(self):
44-
self.training_mode = True
48+
self._mode = 'train'
4549

4650
def symb_forward(self, symb_input):
4751
raise NotImplementedError("`{}` needs to implement `symb_forward` method.".format(df.utils.typename(self)))
4852

53+
def __call__(self, symb_input):
54+
# Keep track of the symbolic inputs/outputs for things such as `Backward` layer.
55+
self._last_symb_inp[self._mode] = symb_input
56+
self._last_symb_out[self._mode] = self.symb_forward(symb_input)
57+
return self._last_symb_out[self._mode]
58+
4959
def forward(self, data):
50-
if self.training_mode not in self._fn_forward:
51-
symb_in = make_tensor_or_tensors(data, 'X')
52-
symb_out = self.symb_forward(symb_in)
53-
self._fn_forward[self.training_mode] = df.th.function(
54-
inputs=aslist(symb_in),
55-
outputs=symb_out
60+
if self._mode not in self._fn_forward:
61+
symb_in = tensors_for_ndarrays(data, 'X')
62+
symb_out = self(symb_in)
63+
extra_out = self.get_extra_outputs()
64+
fn = self._fn_forward[self._mode] = df.th.function(
65+
inputs=flatten(symb_in),
66+
outputs=flatten(symb_out) + flatten(extra_out)
5667
)
68+
fn._df2_extra = extra_out
5769

58-
return self._fn_forward[self.training_mode](*aslist(data))
70+
fn = self._fn_forward[self._mode]
71+
outs = fn(*flatten(data))
72+
return self._collect_extra_outputs(fn, outs)
5973

60-
def accumulate_gradients(self, data_in, data_tgt, loss):
61-
if self.training_mode not in self._fn_accum_grads:
62-
symb_in = make_tensor_or_tensors(data_in, 'X')
63-
symb_tgt = make_tensor_or_tensors(data_tgt, 'T')
64-
symb_out = self.symb_forward(symb_in)
65-
symb_err = loss.full_symb_forward(symb_out, symb_tgt)
74+
def accumulate_gradients(self, data_in, data_tgt, crit):
75+
if self._mode not in self._fn_accum_grads:
76+
symb_in = tensors_for_ndarrays(data_in, 'X')
77+
symb_tgt = tensors_for_ndarrays(data_tgt, 'T')
78+
symb_out = self(symb_in)
79+
symb_cost = crit(symb_out, symb_tgt)
80+
extra_out = self.get_extra_outputs() + crit.get_extra_outputs()
6681

6782
params = self.parameters(trainable_only=True)
68-
symb_grads = df.th.grad(cost=symb_err, wrt=[p.param for p in params])
83+
symb_grads = df.th.grad(cost=symb_cost, wrt=[p.param for p in params])
6984
grads_updates = [(p.grad, p.grad + symb_grad) for p, symb_grad in zip(params, symb_grads)]
7085

71-
self._fn_accum_grads[self.training_mode] = df.th.function(
72-
inputs=aslist(symb_in) + aslist(symb_tgt),
73-
outputs=symb_err,
86+
fn = self._fn_accum_grads[self._mode] = df.th.function(
87+
inputs=flatten(symb_in) + flatten(symb_tgt),
88+
outputs=flatten(symb_cost) + flatten(extra_out),
7489
updates=grads_updates
7590
)
91+
fn._df2_extra = extra_out
92+
93+
fn = self._fn_accum_grads[self._mode]
94+
args = flatten(data_in) + flatten(data_tgt)
95+
outs = fn(*args)
96+
return self._collect_extra_outputs(fn, outs)
97+
98+
def get_extra_outputs(self):
99+
"""
100+
Return a list of Theano expressions which will be passed as additional
101+
`output` parameters. The computed value will be stored in the
102+
expression's `val` attribute.
103+
104+
Guaranteed to be called after `symb_forward`.
105+
"""
106+
return []
107+
108+
def _collect_extra_outputs(self, fn, vals):
109+
# The number of non-extra outputs.
110+
nout = len(vals) - len(fn._df2_extra)
111+
112+
# Store all outputs in the `val` attribute so that they can possibly
113+
# be retrieved by the modules that asked for them.
114+
for out, val in zip(fn._df2_extra, vals[nout:]):
115+
out.val = val
76116

77-
args = aslist(data_in) + aslist(data_tgt)
78-
return self._fn_accum_grads[self.training_mode](*args)
117+
return vals[:nout] if nout > 1 else vals[0]
79118

80119
def get_stat_updates(self):
81120
"""
@@ -88,12 +127,12 @@ def get_stat_updates(self):
88127
return []
89128

90129
def accumulate_statistics(self, data_in):
91-
if self.training_mode not in self._fn_accum_stats:
92-
symb_in = make_tensor_or_tensors(data_in, 'X')
130+
if self._mode not in self._fn_accum_stats:
131+
symb_in = tensors_for_ndarrays(data_in, 'X')
93132

94133
# Call forward once so it can compute some variables it'll actually
95134
# use in the stat updates collection.
96-
self.symb_forward(symb_in)
135+
self(symb_in)
97136

98137
stat_updates = self.get_stat_updates()
99138
if not stat_updates:
@@ -117,12 +156,12 @@ def accumulate_statistics(self, data_in):
117156
print("WARNING: Dropped the following stat-update because that variable got multiple updates: {}".format(upd[0]))
118157
stat_updates = uniq_updates
119158

120-
self._fn_accum_stats[self.training_mode] = df.th.function(
121-
inputs=aslist(symb_in),
159+
self._fn_accum_stats[self._mode] = df.th.function(
160+
inputs=flatten(symb_in),
122161
updates=stat_updates
123162
)
124163

125-
self._fn_accum_stats[self.training_mode](*aslist(data_in))
164+
self._fn_accum_stats[self._mode](*flatten(data_in))
126165

127166
def clear(self):
128167
self._fn_forward.clear()

DeepFried2/Optimizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import DeepFried2 as df
22

33

4-
class Optimizer:
4+
class Optimizer(object):
55

66
def __init__(self, **hyperparams):
77
self.states = {}

DeepFried2/Param.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import numpy as _np
33

44

5-
class Param:
5+
class Param(object):
66

77
def __init__(self, shape, init, fan=None, name=None, learn=True, decay=True, dtype=df.floatX, **kw):
88
self.init = init

DeepFried2/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from .Module import Module
1010
from .layers import *
1111

12-
from .Container import Container
12+
from .Container import Container, SingleModuleContainer
1313
from .containers import *
1414

1515
from .Criterion import Criterion

0 commit comments

Comments
 (0)