Skip to content

Commit 054d61a

Browse files
authored
Merge pull request #104 from lucasb-eyer/crit-avg-nnz
Add non-zero averaging option to criterion.
2 parents 6ade78b + 64317b2 commit 054d61a

File tree

1 file changed

+15
-1
lines changed

1 file changed

+15
-1
lines changed

DeepFried2/Criterion.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ def __init__(self):
88
self.penalties = []
99
self.with_weights = False
1010
self._ret_per_sample = False
11+
self._nonzero_averaging = False
1112
self._fn_forward = {}
1213

1314
def _assert_same_dim(self, symb_input, symb_target):
@@ -45,7 +46,16 @@ def __call__(self, symb_input, symb_target, with_penalties=True):
4546
# Criteria may return per-sample cost which we will average
4647
# (optionally weighted) across samples, if necessary.
4748
if cost.ndim != 0:
48-
cost = df.T.mean(cost)
49+
# The default is to average the batch, regardless of the loss values.
50+
# But we also allow to average only over non-zero losses, for some
51+
# applications. Especially in margin-losses, this may make sense as
52+
# it effectively weights the "rare non-zero" losses higher.
53+
if self._nonzero_averaging:
54+
cost = df.T.mean(cost)
55+
else:
56+
nnz = df.th.gradient.disconnected_grad(cost.nonzero_values().shape[0])
57+
cost = df.T.sum(cost)/(1e-8 + nnz)
58+
4959
if symb_weights is not None:
5060
# Need a very small eps to avoid 0/0 when all weights are 0!
5161
cost = cost / (1e-8 + df.T.mean(symb_weights))
@@ -68,6 +78,10 @@ def enable_per_sample_cost(self):
6878
self._ret_per_sample = True
6979
return self
7080

81+
def enable_nonzero_averaging(self):
82+
self._nonzero_averaging = True
83+
return self
84+
7185
def forward(self, num_input, num_target, with_penalties=True, per_sample=False):
7286
# NOTE: using the GPU for such trivial computations as most costs
7387
# is actually somewhat slower (e.g. for RMSE: GPU 1.2ms vs. CPU 0.2ms).

0 commit comments

Comments
 (0)