Skip to content

Commit df587df

Browse files
authored
clip box value (#361)
Also changes the box output format of detection (from [y, x, height, width] to [y_min, x_min, y_max, x_ma].
1 parent 2bbdf6a commit df587df

File tree

2 files changed

+21
-17
lines changed

2 files changed

+21
-17
lines changed

efficientdet/anchors.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ def _generate_detections_tf(cls_outputs,
265265
classes,
266266
image_id,
267267
image_scale,
268+
image_size,
268269
min_score_thresh=MIN_SCORE_THRESH,
269270
max_boxes_to_draw=MAX_DETECTIONS_PER_IMAGE,
270271
soft_nms_sigma=0.0,
@@ -303,7 +304,7 @@ def _generate_detections_tf(cls_outputs,
303304
304305
Returns:
305306
detections: detection results in a tensor with each row representing
306-
[image_id, y, x, height, width, score, class]
307+
[image_id, ymin, xmin, ymax, xmax, score, class]
307308
"""
308309
logging.info('Using tf version of post-processing.')
309310
anchor_boxes = tf.gather(anchor_boxes, indices)
@@ -330,15 +331,13 @@ def _generate_detections_tf(cls_outputs,
330331
detections = tf.gather(all_detections, top_detection_idx)
331332
scores = detections[:, 4]
332333
boxes = detections[:, :4]
333-
height = boxes[:, 2] - boxes[:, 0]
334-
width = boxes[:, 3] - boxes[:, 1]
335334

336335
detections = tf.stack([
337-
tf.cast(tf.tile(image_id, [tf.shape(top_detection_idx)[0]]), tf.float32),
338-
boxes[:, 0] * image_scale,
339-
boxes[:, 1] * image_scale,
340-
height * image_scale,
341-
width * image_scale,
336+
tf.cast(tf.tile(image_id, tf.shape(top_detection_idx)), tf.float32),
337+
tf.clip_by_value(boxes[:, 0], 0, image_size[0]) * image_scale,
338+
tf.clip_by_value(boxes[:, 1], 0, image_size[1]) * image_scale,
339+
tf.clip_by_value(boxes[:, 2], 0, image_size[0]) * image_scale,
340+
tf.clip_by_value(boxes[:, 3], 0, image_size[1]) * image_scale,
342341
scores,
343342
tf.cast(tf.gather(classes, top_detection_idx) + 1, tf.float32)
344343
], axis=1)
@@ -566,6 +565,7 @@ def generate_detections(self,
566565
classes,
567566
image_id,
568567
image_scale,
568+
image_size,
569569
min_score_thresh=MIN_SCORE_THRESH,
570570
max_boxes_to_draw=MAX_DETECTIONS_PER_IMAGE,
571571
disable_pyfun=None):
@@ -579,6 +579,7 @@ def generate_detections(self,
579579
classes,
580580
image_id,
581581
image_scale,
582+
image_size,
582583
min_score_thresh=min_score_thresh,
583584
max_boxes_to_draw=max_boxes_to_draw)
584585
else:

efficientdet/inference.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -248,13 +248,15 @@ def det_post_process_combined(params, cls_outputs, box_outputs, scales,
248248
tf.tile(
249249
tf.expand_dims(tf.range(batch_size), axis=1), [1, max_boxes_to_draw]),
250250
dtype=tf.float32)
251-
y = nmsed_boxes[..., 0] * scales
252-
x = nmsed_boxes[..., 1] * scales
253-
height = nmsed_boxes[..., 2] * scales - y
254-
width = nmsed_boxes[..., 3] * scales - x
251+
image_size = params['image_size']
252+
ymin = tf.clip_by_value(nmsed_boxes[..., 0], 0, image_size[0]) * scales
253+
xmin = tf.clip_by_value(nmsed_boxes[..., 1], 0, image_size[1]) * scales
254+
ymax = tf.clip_by_value(nmsed_boxes[..., 2], 0, image_size[0]) * scales
255+
xmax = tf.clip_by_value(nmsed_boxes[..., 3], 0, image_size[1]) * scales
256+
255257
detection_list = [
256-
# Format: (image_ids, y, x, height, width, score, class)
257-
image_ids, y, x, height, width, nmsed_scores,
258+
# Format: (image_ids, ymin, xmin, ymax, xmax, score, class)
259+
image_ids, ymin, xmin, ymax, xmax, nmsed_scores,
258260
tf.cast(nmsed_classes + 1, tf.float32)
259261
]
260262
detections = tf.stack(detection_list, axis=2, name='detections')
@@ -281,7 +283,7 @@ def det_post_process(params: Dict[Any, Any], cls_outputs: Dict[int, tf.Tensor],
281283
282284
Returns:
283285
detections_batch: a batch of detection results. Each detection is a tensor
284-
with each row representing [image_id, x, y, width, height, score, class].
286+
with each row representing [image_id, ymin, xmin, ymax, xmax, score, class].
285287
"""
286288
if not params['batch_size']:
287289
# Use combined version for dynamic batch size.
@@ -318,6 +320,7 @@ def det_post_process(params: Dict[Any, Any], cls_outputs: Dict[int, tf.Tensor],
318320
classes_per_sample,
319321
image_id=[index],
320322
image_scale=[scales[index]],
323+
image_size=params['image_size'],
321324
min_score_thresh=min_score_thresh,
322325
max_boxes_to_draw=max_boxes_to_draw,
323326
disable_pyfun=params.get('disable_pyfun'))
@@ -412,7 +415,7 @@ def visualize_image_prediction(image,
412415
Args:
413416
image: Image content in shape of [height, width, 3].
414417
prediction: a list of vector, with each vector has the format of [image_id,
415-
y, x, height, width, score, class].
418+
ymin, xmin, ymax, xmax, score, class].
416419
disable_pyfun: disable pyfunc for faster post processing.
417420
label_id_mapping: a map from label id to name.
418421
**kwargs: extra parameters for vistualization, such as min_score_thresh,
@@ -430,7 +433,7 @@ def visualize_image_prediction(image,
430433
boxes[:, [0, 1, 2, 3]] = boxes[:, [1, 0, 3, 2]]
431434

432435
label_id_mapping = label_id_mapping or coco_id_mapping
433-
boxes[:, 2:4] += boxes[:, 0:2]
436+
434437
return visualize_image(image, boxes, classes, scores, label_id_mapping,
435438
**kwargs)
436439

0 commit comments

Comments
 (0)