Skip to content

Commit 20c78a9

Browse files
Internal change
PiperOrigin-RevId: 301643231
1 parent b86ffb1 commit 20c78a9

File tree

6 files changed

+427
-79
lines changed

6 files changed

+427
-79
lines changed

official/vision/detection/executor/detection_executor.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,11 @@ def _replicated_step(inputs):
8080
all_losses = loss_fn(labels, outputs)
8181
losses = {}
8282
for k, v in all_losses.items():
83-
v = tf.reduce_mean(v) / strategy.num_replicas_in_sync
84-
losses[k] = v
85-
loss = losses['total_loss']
83+
losses[k] = tf.reduce_mean(v)
84+
per_replica_loss = losses['total_loss'] / strategy.num_replicas_in_sync
8685
_update_state(labels, outputs)
8786

88-
grads = tape.gradient(loss, trainable_variables)
87+
grads = tape.gradient(per_replica_loss, trainable_variables)
8988
optimizer.apply_gradients(zip(grads, trainable_variables))
9089
return losses
9190

official/vision/detection/modeling/base_model.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@
2121
import abc
2222
import functools
2323
import re
24-
from absl import logging
25-
2624
import tensorflow.compat.v2 as tf
2725
from official.vision.detection.modeling import checkpoint_utils
2826
from official.vision.detection.modeling import learning_rates
@@ -60,11 +58,10 @@ def __call__(self, learning_rate):
6058

6159

6260
def _make_filter_trainable_variables_fn(frozen_variable_prefix):
63-
"""Creates a function for filtering trainable varialbes.
64-
"""
61+
"""Creates a function for filtering trainable varialbes."""
6562

6663
def _filter_trainable_variables(variables):
67-
"""Filters trainable varialbes
64+
"""Filters trainable varialbes.
6865
6966
Args:
7067
variables: a list of tf.Variable to be filtered.
@@ -141,8 +138,7 @@ def build_optimizer(self):
141138
return self._optimizer_fn(self._learning_rate)
142139

143140
def make_filter_trainable_variables_fn(self):
144-
"""Creates a function for filtering trainable varialbes.
145-
"""
141+
"""Creates a function for filtering trainable varialbes."""
146142
return _make_filter_trainable_variables_fn(self._frozen_variable_prefix)
147143

148144
def weight_decay_loss(self, trainable_variables):
@@ -151,8 +147,6 @@ def weight_decay_loss(self, trainable_variables):
151147
if self._regularization_var_regex is None
152148
or re.match(self._regularization_var_regex, v.name)
153149
]
154-
logging.info('Regularization Variables: %s',
155-
[v.name for v in reg_variables])
156150

157151
return self._l2_weight_decay * tf.add_n(
158152
[tf.nn.l2_loss(v) for v in reg_variables])

official/vision/detection/modeling/factory.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,16 @@
1515
"""Factory to build detection model."""
1616

1717

18+
from official.vision.detection.modeling import maskrcnn_model
1819
from official.vision.detection.modeling import retinanet_model
1920

2021

2122
def model_generator(params):
2223
"""Model function generator."""
2324
if params.type == 'retinanet':
2425
model_fn = retinanet_model.RetinanetModel(params)
26+
elif params.type == 'mask_rcnn':
27+
model_fn = maskrcnn_model.MaskrcnnModel(params)
2528
else:
2629
raise ValueError('Model %s is not supported.'% params.type)
2730

official/vision/detection/modeling/losses.py

Lines changed: 69 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from __future__ import division
1919
from __future__ import print_function
2020

21+
from absl import logging
2122
import tensorflow.compat.v2 as tf
2223

2324

@@ -76,7 +77,7 @@ def focal_loss(logits, targets, alpha, gamma, normalizer):
7677
# (1 - p_t)^r = exp(-r * z * x - r * log(1 + exp(-x))).
7778
neg_logits = -1.0 * logits
7879
modulator = tf.math.exp(gamma * targets * neg_logits -
79-
gamma * tf.math.log1p(tf.math.exp(neg_logits)))
80+
gamma * tf.math.log1p(tf.math.exp(neg_logits)))
8081
loss = modulator * cross_entropy
8182
weighted_loss = tf.where(positive_label_mask, alpha * loss,
8283
(1.0 - alpha) * loss)
@@ -89,6 +90,8 @@ class RpnScoreLoss(object):
8990

9091
def __init__(self, params):
9192
self._rpn_batch_size_per_im = params.rpn_batch_size_per_im
93+
self._binary_crossentropy = tf.keras.losses.BinaryCrossentropy(
94+
reduction=tf.keras.losses.Reduction.SUM, from_logits=True)
9295

9396
def __call__(self, score_outputs, labels):
9497
"""Computes total RPN detection loss.
@@ -128,17 +131,16 @@ def _rpn_score_loss(self, score_outputs, score_targets, normalizer=1.0):
128131
# (3) score_targets[i]=-1, the anchor is don't care (ignore).
129132
with tf.name_scope('rpn_score_loss'):
130133
mask = tf.math.logical_or(tf.math.equal(score_targets, 1),
131-
tf.math.equal(score_targets, 0))
132-
score_targets = tf.math.maximum(score_targets, tf.zeros_like(score_targets))
133-
# RPN score loss is sum over all except ignored samples.
134-
# Keep the compat.v1 loss because Keras does not have a
135-
# sigmoid_cross_entropy substitution yet.
136-
# TODO(b/143720144): replace this loss.
137-
score_loss = tf.compat.v1.losses.sigmoid_cross_entropy(
138-
score_targets,
139-
score_outputs,
140-
weights=mask,
141-
reduction=tf.compat.v1.losses.Reduction.SUM)
134+
tf.math.equal(score_targets, 0))
135+
136+
score_targets = tf.math.maximum(score_targets,
137+
tf.zeros_like(score_targets))
138+
139+
score_targets = tf.expand_dims(score_targets, axis=-1)
140+
score_outputs = tf.expand_dims(score_outputs, axis=-1)
141+
score_loss = self._binary_crossentropy(
142+
score_targets, score_outputs, sample_weight=mask)
143+
142144
score_loss /= normalizer
143145
return score_loss
144146

@@ -147,7 +149,10 @@ class RpnBoxLoss(object):
147149
"""Region Proposal Network box regression loss function."""
148150

149151
def __init__(self, params):
150-
self._delta = params.huber_loss_delta
152+
logging.info('RpnBoxLoss huber_loss_delta %s', params.huber_loss_delta)
153+
# The delta is typically around the mean value of regression target.
154+
# for instances, the regression targets of 512x512 input with 6 anchors on
155+
# P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
151156
self._huber_loss = tf.keras.losses.Huber(
152157
delta=params.huber_loss_delta, reduction=tf.keras.losses.Reduction.SUM)
153158

@@ -171,35 +176,32 @@ def __call__(self, box_outputs, labels):
171176

172177
box_losses = []
173178
for level in levels:
174-
box_losses.append(
175-
self._rpn_box_loss(
176-
box_outputs[level], labels[level], delta=self._delta))
179+
box_losses.append(self._rpn_box_loss(box_outputs[level], labels[level]))
177180

178181
# Sum per level losses to total loss.
179182
return tf.add_n(box_losses)
180183

181-
def _rpn_box_loss(self, box_outputs, box_targets, normalizer=1.0, delta=1./9):
184+
def _rpn_box_loss(self, box_outputs, box_targets, normalizer=1.0):
182185
"""Computes box regression loss."""
183-
# The delta is typically around the mean value of regression target.
184-
# for instances, the regression targets of 512x512 input with 6 anchors on
185-
# P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
186186
with tf.name_scope('rpn_box_loss'):
187-
mask = tf.math.not_equal(box_targets, 0.0)
188-
# The loss is normalized by the sum of non-zero weights before additional
189-
# normalizer provided by the function caller.
190-
box_loss = tf.compat.v1.losses.huber_loss(
191-
box_targets,
192-
box_outputs,
193-
weights=mask,
194-
delta=delta,
195-
reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
196-
box_loss /= normalizer
187+
mask = tf.cast(tf.not_equal(box_targets, 0.0), dtype=tf.float32)
188+
box_targets = tf.expand_dims(box_targets, axis=-1)
189+
box_outputs = tf.expand_dims(box_outputs, axis=-1)
190+
box_loss = self._huber_loss(box_targets, box_outputs, sample_weight=mask)
191+
# The loss is normalized by the sum of non-zero weights and additional
192+
# normalizer provided by the function caller. Using + 0.01 here to avoid
193+
# division by zero.
194+
box_loss /= normalizer * (tf.reduce_sum(mask) + 0.01)
197195
return box_loss
198196

199197

200198
class FastrcnnClassLoss(object):
201199
"""Fast R-CNN classification loss function."""
202200

201+
def __init__(self):
202+
self._categorical_crossentropy = tf.keras.losses.CategoricalCrossentropy(
203+
reduction=tf.keras.losses.Reduction.SUM, from_logits=True)
204+
203205
def __call__(self, class_outputs, class_targets):
204206
"""Computes the class loss (Fast-RCNN branch) of Mask-RCNN.
205207
@@ -218,24 +220,19 @@ def __call__(self, class_outputs, class_targets):
218220
a scalar tensor representing total class loss.
219221
"""
220222
with tf.name_scope('fast_rcnn_loss'):
221-
_, _, num_classes = class_outputs.get_shape().as_list()
223+
batch_size, num_boxes, num_classes = class_outputs.get_shape().as_list()
222224
class_targets = tf.cast(class_targets, dtype=tf.int32)
223225
class_targets_one_hot = tf.one_hot(class_targets, num_classes)
224-
return self._fast_rcnn_class_loss(class_outputs, class_targets_one_hot)
226+
return self._fast_rcnn_class_loss(class_outputs, class_targets_one_hot,
227+
normalizer=batch_size * num_boxes / 2.0)
225228

226229
def _fast_rcnn_class_loss(self, class_outputs, class_targets_one_hot,
227-
normalizer=1.0):
230+
normalizer):
228231
"""Computes classification loss."""
229232
with tf.name_scope('fast_rcnn_class_loss'):
230-
# The loss is normalized by the sum of non-zero weights before additional
231-
# normalizer provided by the function caller.
232-
# Keep the compat.v1 loss because Keras does not have a
233-
# softmax_cross_entropy substitution yet.
234-
# TODO(b/143720144): replace this loss.
235-
class_loss = tf.compat.v1.losses.softmax_cross_entropy(
236-
class_targets_one_hot,
237-
class_outputs,
238-
reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
233+
class_loss = self._categorical_crossentropy(class_targets_one_hot,
234+
class_outputs)
235+
239236
class_loss /= normalizer
240237
return class_loss
241238

@@ -244,7 +241,12 @@ class FastrcnnBoxLoss(object):
244241
"""Fast R-CNN box regression loss function."""
245242

246243
def __init__(self, params):
247-
self._delta = params.huber_loss_delta
244+
logging.info('FastrcnnBoxLoss huber_loss_delta %s', params.huber_loss_delta)
245+
# The delta is typically around the mean value of regression target.
246+
# for instances, the regression targets of 512x512 input with 6 anchors on
247+
# P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
248+
self._huber_loss = tf.keras.losses.Huber(
249+
delta=params.huber_loss_delta, reduction=tf.keras.losses.Reduction.SUM)
248250

249251
def __call__(self, box_outputs, class_targets, box_targets):
250252
"""Computes the box loss (Fast-RCNN branch) of Mask-RCNN.
@@ -296,36 +298,32 @@ def __call__(self, box_outputs, class_targets, box_targets):
296298
dtype=box_outputs.dtype), tf.reshape(box_outputs, [-1, 4]))
297299
box_outputs = tf.reshape(box_outputs, [batch_size, -1, 4])
298300

299-
return self._fast_rcnn_box_loss(box_outputs, box_targets, class_targets,
300-
delta=self._delta)
301+
return self._fast_rcnn_box_loss(box_outputs, box_targets, class_targets)
301302

302303
def _fast_rcnn_box_loss(self, box_outputs, box_targets, class_targets,
303-
normalizer=1.0, delta=1.):
304+
normalizer=1.0):
304305
"""Computes box regression loss."""
305-
# The delta is typically around the mean value of regression target.
306-
# for instances, the regression targets of 512x512 input with 6 anchors on
307-
# P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
308306
with tf.name_scope('fast_rcnn_box_loss'):
309307
mask = tf.tile(tf.expand_dims(tf.greater(class_targets, 0), axis=2),
310308
[1, 1, 4])
311-
# The loss is normalized by the sum of non-zero weights before additional
312-
# normalizer provided by the function caller.
313-
# Keep the compat.v1 loss because Keras does not have a
314-
# Reduction.SUM_BY_NONZERO_WEIGHTS substitution yet.
315-
# TODO(b/143720144): replace this loss.
316-
box_loss = tf.compat.v1.losses.huber_loss(
317-
box_targets,
318-
box_outputs,
319-
weights=mask,
320-
delta=delta,
321-
reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
322-
box_loss /= normalizer
309+
mask = tf.cast(mask, dtype=tf.float32)
310+
box_targets = tf.expand_dims(box_targets, axis=-1)
311+
box_outputs = tf.expand_dims(box_outputs, axis=-1)
312+
box_loss = self._huber_loss(box_targets, box_outputs, sample_weight=mask)
313+
# The loss is normalized by the number of ones in mask,
314+
# additianal normalizer provided by the user and using 0.01 here to avoid
315+
# division by 0.
316+
box_loss /= normalizer * (tf.reduce_sum(mask) + 0.01)
323317
return box_loss
324318

325319

326320
class MaskrcnnLoss(object):
327321
"""Mask R-CNN instance segmentation mask loss function."""
328322

323+
def __init__(self):
324+
self._binary_crossentropy = tf.keras.losses.BinaryCrossentropy(
325+
reduction=tf.keras.losses.Reduction.SUM, from_logits=True)
326+
329327
def __call__(self, mask_outputs, mask_targets, select_class_targets):
330328
"""Computes the mask loss of Mask-RCNN.
331329
@@ -358,11 +356,16 @@ def __call__(self, mask_outputs, mask_targets, select_class_targets):
358356
tf.reshape(tf.greater(select_class_targets, 0),
359357
[batch_size, num_masks, 1, 1]),
360358
[1, 1, mask_height, mask_width])
361-
return tf.compat.v1.losses.sigmoid_cross_entropy(
362-
mask_targets,
363-
mask_outputs,
364-
weights=weights,
365-
reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
359+
weights = tf.cast(weights, dtype=tf.float32)
360+
361+
mask_targets = tf.expand_dims(mask_targets, axis=-1)
362+
mask_outputs = tf.expand_dims(mask_outputs, axis=-1)
363+
mask_loss = self._binary_crossentropy(mask_targets, mask_outputs,
364+
sample_weight=weights)
365+
366+
# The loss is normalized by the number of 1's in weights and
367+
# + 0.01 is used to avoid division by zero.
368+
return mask_loss / (tf.reduce_sum(weights) + 0.01)
366369

367370

368371
class RetinanetClassLoss(object):

0 commit comments

Comments
 (0)