18
18
from __future__ import division
19
19
from __future__ import print_function
20
20
21
+ from absl import logging
21
22
import tensorflow .compat .v2 as tf
22
23
23
24
@@ -76,7 +77,7 @@ def focal_loss(logits, targets, alpha, gamma, normalizer):
76
77
# (1 - p_t)^r = exp(-r * z * x - r * log(1 + exp(-x))).
77
78
neg_logits = - 1.0 * logits
78
79
modulator = tf .math .exp (gamma * targets * neg_logits -
79
- gamma * tf .math .log1p (tf .math .exp (neg_logits )))
80
+ gamma * tf .math .log1p (tf .math .exp (neg_logits )))
80
81
loss = modulator * cross_entropy
81
82
weighted_loss = tf .where (positive_label_mask , alpha * loss ,
82
83
(1.0 - alpha ) * loss )
@@ -89,6 +90,8 @@ class RpnScoreLoss(object):
89
90
90
91
def __init__ (self , params ):
91
92
self ._rpn_batch_size_per_im = params .rpn_batch_size_per_im
93
+ self ._binary_crossentropy = tf .keras .losses .BinaryCrossentropy (
94
+ reduction = tf .keras .losses .Reduction .SUM , from_logits = True )
92
95
93
96
def __call__ (self , score_outputs , labels ):
94
97
"""Computes total RPN detection loss.
@@ -128,17 +131,16 @@ def _rpn_score_loss(self, score_outputs, score_targets, normalizer=1.0):
128
131
# (3) score_targets[i]=-1, the anchor is don't care (ignore).
129
132
with tf .name_scope ('rpn_score_loss' ):
130
133
mask = tf .math .logical_or (tf .math .equal (score_targets , 1 ),
131
- tf .math .equal (score_targets , 0 ))
132
- score_targets = tf .math .maximum (score_targets , tf .zeros_like (score_targets ))
133
- # RPN score loss is sum over all except ignored samples.
134
- # Keep the compat.v1 loss because Keras does not have a
135
- # sigmoid_cross_entropy substitution yet.
136
- # TODO(b/143720144): replace this loss.
137
- score_loss = tf .compat .v1 .losses .sigmoid_cross_entropy (
138
- score_targets ,
139
- score_outputs ,
140
- weights = mask ,
141
- reduction = tf .compat .v1 .losses .Reduction .SUM )
134
+ tf .math .equal (score_targets , 0 ))
135
+
136
+ score_targets = tf .math .maximum (score_targets ,
137
+ tf .zeros_like (score_targets ))
138
+
139
+ score_targets = tf .expand_dims (score_targets , axis = - 1 )
140
+ score_outputs = tf .expand_dims (score_outputs , axis = - 1 )
141
+ score_loss = self ._binary_crossentropy (
142
+ score_targets , score_outputs , sample_weight = mask )
143
+
142
144
score_loss /= normalizer
143
145
return score_loss
144
146
@@ -147,7 +149,10 @@ class RpnBoxLoss(object):
147
149
"""Region Proposal Network box regression loss function."""
148
150
149
151
def __init__ (self , params ):
150
- self ._delta = params .huber_loss_delta
152
+ logging .info ('RpnBoxLoss huber_loss_delta %s' , params .huber_loss_delta )
153
+ # The delta is typically around the mean value of regression target.
154
+ # for instances, the regression targets of 512x512 input with 6 anchors on
155
+ # P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
151
156
self ._huber_loss = tf .keras .losses .Huber (
152
157
delta = params .huber_loss_delta , reduction = tf .keras .losses .Reduction .SUM )
153
158
@@ -171,35 +176,32 @@ def __call__(self, box_outputs, labels):
171
176
172
177
box_losses = []
173
178
for level in levels :
174
- box_losses .append (
175
- self ._rpn_box_loss (
176
- box_outputs [level ], labels [level ], delta = self ._delta ))
179
+ box_losses .append (self ._rpn_box_loss (box_outputs [level ], labels [level ]))
177
180
178
181
# Sum per level losses to total loss.
179
182
return tf .add_n (box_losses )
180
183
181
- def _rpn_box_loss (self , box_outputs , box_targets , normalizer = 1.0 , delta = 1. / 9 ):
184
+ def _rpn_box_loss (self , box_outputs , box_targets , normalizer = 1.0 ):
182
185
"""Computes box regression loss."""
183
- # The delta is typically around the mean value of regression target.
184
- # for instances, the regression targets of 512x512 input with 6 anchors on
185
- # P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
186
186
with tf .name_scope ('rpn_box_loss' ):
187
- mask = tf .math .not_equal (box_targets , 0.0 )
188
- # The loss is normalized by the sum of non-zero weights before additional
189
- # normalizer provided by the function caller.
190
- box_loss = tf .compat .v1 .losses .huber_loss (
191
- box_targets ,
192
- box_outputs ,
193
- weights = mask ,
194
- delta = delta ,
195
- reduction = tf .compat .v1 .losses .Reduction .SUM_BY_NONZERO_WEIGHTS )
196
- box_loss /= normalizer
187
+ mask = tf .cast (tf .not_equal (box_targets , 0.0 ), dtype = tf .float32 )
188
+ box_targets = tf .expand_dims (box_targets , axis = - 1 )
189
+ box_outputs = tf .expand_dims (box_outputs , axis = - 1 )
190
+ box_loss = self ._huber_loss (box_targets , box_outputs , sample_weight = mask )
191
+ # The loss is normalized by the sum of non-zero weights and additional
192
+ # normalizer provided by the function caller. Using + 0.01 here to avoid
193
+ # division by zero.
194
+ box_loss /= normalizer * (tf .reduce_sum (mask ) + 0.01 )
197
195
return box_loss
198
196
199
197
200
198
class FastrcnnClassLoss (object ):
201
199
"""Fast R-CNN classification loss function."""
202
200
201
+ def __init__ (self ):
202
+ self ._categorical_crossentropy = tf .keras .losses .CategoricalCrossentropy (
203
+ reduction = tf .keras .losses .Reduction .SUM , from_logits = True )
204
+
203
205
def __call__ (self , class_outputs , class_targets ):
204
206
"""Computes the class loss (Fast-RCNN branch) of Mask-RCNN.
205
207
@@ -218,24 +220,19 @@ def __call__(self, class_outputs, class_targets):
218
220
a scalar tensor representing total class loss.
219
221
"""
220
222
with tf .name_scope ('fast_rcnn_loss' ):
221
- _ , _ , num_classes = class_outputs .get_shape ().as_list ()
223
+ batch_size , num_boxes , num_classes = class_outputs .get_shape ().as_list ()
222
224
class_targets = tf .cast (class_targets , dtype = tf .int32 )
223
225
class_targets_one_hot = tf .one_hot (class_targets , num_classes )
224
- return self ._fast_rcnn_class_loss (class_outputs , class_targets_one_hot )
226
+ return self ._fast_rcnn_class_loss (class_outputs , class_targets_one_hot ,
227
+ normalizer = batch_size * num_boxes / 2.0 )
225
228
226
229
def _fast_rcnn_class_loss (self , class_outputs , class_targets_one_hot ,
227
- normalizer = 1.0 ):
230
+ normalizer ):
228
231
"""Computes classification loss."""
229
232
with tf .name_scope ('fast_rcnn_class_loss' ):
230
- # The loss is normalized by the sum of non-zero weights before additional
231
- # normalizer provided by the function caller.
232
- # Keep the compat.v1 loss because Keras does not have a
233
- # softmax_cross_entropy substitution yet.
234
- # TODO(b/143720144): replace this loss.
235
- class_loss = tf .compat .v1 .losses .softmax_cross_entropy (
236
- class_targets_one_hot ,
237
- class_outputs ,
238
- reduction = tf .compat .v1 .losses .Reduction .SUM_BY_NONZERO_WEIGHTS )
233
+ class_loss = self ._categorical_crossentropy (class_targets_one_hot ,
234
+ class_outputs )
235
+
239
236
class_loss /= normalizer
240
237
return class_loss
241
238
@@ -244,7 +241,12 @@ class FastrcnnBoxLoss(object):
244
241
"""Fast R-CNN box regression loss function."""
245
242
246
243
def __init__ (self , params ):
247
- self ._delta = params .huber_loss_delta
244
+ logging .info ('FastrcnnBoxLoss huber_loss_delta %s' , params .huber_loss_delta )
245
+ # The delta is typically around the mean value of regression target.
246
+ # for instances, the regression targets of 512x512 input with 6 anchors on
247
+ # P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
248
+ self ._huber_loss = tf .keras .losses .Huber (
249
+ delta = params .huber_loss_delta , reduction = tf .keras .losses .Reduction .SUM )
248
250
249
251
def __call__ (self , box_outputs , class_targets , box_targets ):
250
252
"""Computes the box loss (Fast-RCNN branch) of Mask-RCNN.
@@ -296,36 +298,32 @@ def __call__(self, box_outputs, class_targets, box_targets):
296
298
dtype = box_outputs .dtype ), tf .reshape (box_outputs , [- 1 , 4 ]))
297
299
box_outputs = tf .reshape (box_outputs , [batch_size , - 1 , 4 ])
298
300
299
- return self ._fast_rcnn_box_loss (box_outputs , box_targets , class_targets ,
300
- delta = self ._delta )
301
+ return self ._fast_rcnn_box_loss (box_outputs , box_targets , class_targets )
301
302
302
303
def _fast_rcnn_box_loss (self , box_outputs , box_targets , class_targets ,
303
- normalizer = 1.0 , delta = 1. ):
304
+ normalizer = 1.0 ):
304
305
"""Computes box regression loss."""
305
- # The delta is typically around the mean value of regression target.
306
- # for instances, the regression targets of 512x512 input with 6 anchors on
307
- # P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
308
306
with tf .name_scope ('fast_rcnn_box_loss' ):
309
307
mask = tf .tile (tf .expand_dims (tf .greater (class_targets , 0 ), axis = 2 ),
310
308
[1 , 1 , 4 ])
311
- # The loss is normalized by the sum of non-zero weights before additional
312
- # normalizer provided by the function caller.
313
- # Keep the compat.v1 loss because Keras does not have a
314
- # Reduction.SUM_BY_NONZERO_WEIGHTS substitution yet.
315
- # TODO(b/143720144): replace this loss.
316
- box_loss = tf .compat .v1 .losses .huber_loss (
317
- box_targets ,
318
- box_outputs ,
319
- weights = mask ,
320
- delta = delta ,
321
- reduction = tf .compat .v1 .losses .Reduction .SUM_BY_NONZERO_WEIGHTS )
322
- box_loss /= normalizer
309
+ mask = tf .cast (mask , dtype = tf .float32 )
310
+ box_targets = tf .expand_dims (box_targets , axis = - 1 )
311
+ box_outputs = tf .expand_dims (box_outputs , axis = - 1 )
312
+ box_loss = self ._huber_loss (box_targets , box_outputs , sample_weight = mask )
313
+ # The loss is normalized by the number of ones in mask,
314
+ # additianal normalizer provided by the user and using 0.01 here to avoid
315
+ # division by 0.
316
+ box_loss /= normalizer * (tf .reduce_sum (mask ) + 0.01 )
323
317
return box_loss
324
318
325
319
326
320
class MaskrcnnLoss (object ):
327
321
"""Mask R-CNN instance segmentation mask loss function."""
328
322
323
+ def __init__ (self ):
324
+ self ._binary_crossentropy = tf .keras .losses .BinaryCrossentropy (
325
+ reduction = tf .keras .losses .Reduction .SUM , from_logits = True )
326
+
329
327
def __call__ (self , mask_outputs , mask_targets , select_class_targets ):
330
328
"""Computes the mask loss of Mask-RCNN.
331
329
@@ -358,11 +356,16 @@ def __call__(self, mask_outputs, mask_targets, select_class_targets):
358
356
tf .reshape (tf .greater (select_class_targets , 0 ),
359
357
[batch_size , num_masks , 1 , 1 ]),
360
358
[1 , 1 , mask_height , mask_width ])
361
- return tf .compat .v1 .losses .sigmoid_cross_entropy (
362
- mask_targets ,
363
- mask_outputs ,
364
- weights = weights ,
365
- reduction = tf .compat .v1 .losses .Reduction .SUM_BY_NONZERO_WEIGHTS )
359
+ weights = tf .cast (weights , dtype = tf .float32 )
360
+
361
+ mask_targets = tf .expand_dims (mask_targets , axis = - 1 )
362
+ mask_outputs = tf .expand_dims (mask_outputs , axis = - 1 )
363
+ mask_loss = self ._binary_crossentropy (mask_targets , mask_outputs ,
364
+ sample_weight = weights )
365
+
366
+ # The loss is normalized by the number of 1's in weights and
367
+ # + 0.01 is used to avoid division by zero.
368
+ return mask_loss / (tf .reduce_sum (weights ) + 0.01 )
366
369
367
370
368
371
class RetinanetClassLoss (object ):
0 commit comments