Put some post-proc/nms constants (MAX_DET..., soft-nms) as config options. Soft-nms via cmd line. Fix #120 and comment typo for fix #150.

rwightman · rwightman · commit 9370d27db522 · 2020-12-14T13:32:18.000-08:00
diff --git a/effdet/anchors.py b/effdet/anchors.py
@@ -43,12 +43,6 @@
 # The score for a dummy detection
 _DUMMY_DETECTION_SCORE = -1e5
 
-# The maximum number of (anchor,class) pairs to keep for non-max suppression.
-MAX_DETECTION_POINTS = 5000
-
-# The maximum number of detections per image.
-MAX_DETECTIONS_PER_IMAGE = 100
-
 
 def decode_box_outputs(rel_codes, anchors, output_xyxy: bool=False):
     """Transforms relative regression coordinates to absolute positions.
@@ -97,17 +91,17 @@ def clip_boxes_xyxy(boxes: torch.Tensor, size: torch.Tensor):
 def generate_detections(
         cls_outputs, box_outputs, anchor_boxes, indices, classes,
         img_scale: Optional[torch.Tensor], img_size: Optional[torch.Tensor],
-        max_det_per_image: int = MAX_DETECTIONS_PER_IMAGE, soft_nms: bool = False):
+        max_det_per_image: int = 100, soft_nms: bool = False):
     """Generates detections with RetinaNet model outputs and anchors.
 
     Args:
         cls_outputs: a torch tensor with shape [N, 1], which has the highest class
             scores on all feature levels. The N is the number of selected
-            top-K total anchors on all levels.  (k being MAX_DETECTION_POINTS)
+            top-K total anchors on all levels.
 
         box_outputs: a torch tensor with shape [N, 4], which stacks box regression
             outputs on all feature levels. The N is the number of selected top-k
-            total anchors on all levels. (k being MAX_DETECTION_POINTS)
+            total anchors on all levels.
 
         anchor_boxes: a torch tensor with shape [N, 4], which stacks anchors on all
             feature levels. The N is the number of selected top-k total anchors on all levels.
@@ -124,7 +118,7 @@ def generate_detections(
         max_det_per_image: an int constant, added as argument to make torchscript happy
 
     Returns:
-        detections: detection results in a tensor with shape [MAX_DETECTION_POINTS, 6],
+        detections: detection results in a tensor with shape [max_det_per_image, 6],
             each row representing [x_min, y_min, x_max, y_max, score, class]
     """
     assert box_outputs.shape[-1] == 4
@@ -147,7 +141,7 @@ def generate_detections(
     else:
         top_detection_idx = batched_nms(boxes, scores, classes, iou_threshold=0.5)
 
-    # keep only topk scoring predictions
+    # keep only top max_det_per_image scoring predictions
     top_detection_idx = top_detection_idx[:max_det_per_image]
     boxes = boxes[top_detection_idx]
     scores = scores[top_detection_idx, None]
@@ -159,7 +153,7 @@ def generate_detections(
     # FIXME add option to convert boxes back to yxyx? Otherwise must be handled downstream if
     # that is the preferred output format.
 
-    # stack em and pad out to MAX_DETECTIONS_PER_IMAGE if necessary
+    # stack em and pad out to max_det_per_image if necessary
     num_det = len(top_detection_idx)
     detections = torch.cat([boxes, scores, classes.float()], dim=1)
     if num_det < max_det_per_image:
diff --git a/effdet/bench.py b/effdet/bench.py
@@ -5,7 +5,7 @@
 from typing import Optional, Dict, List
 import torch
 import torch.nn as nn
-from .anchors import Anchors, AnchorLabeler, generate_detections, MAX_DETECTION_POINTS
+from .anchors import Anchors, AnchorLabeler, generate_detections
 from .loss import DetectionLoss
 
 
@@ -14,7 +14,7 @@ def _post_process(
         box_outputs: List[torch.Tensor],
         num_levels: int,
         num_classes: int,
-        max_detection_points: int = MAX_DETECTION_POINTS,
+        max_detection_points: int = 5000,
 ):
     """Selects top-k predictions.
 
@@ -59,14 +59,19 @@ def _post_process(
 @torch.jit.script
 def _batch_detection(
         batch_size: int, class_out, box_out, anchor_boxes, indices, classes,
-        img_scale: Optional[torch.Tensor] = None, img_size: Optional[torch.Tensor] = None):
+        img_scale: Optional[torch.Tensor] = None,
+        img_size: Optional[torch.Tensor] = None,
+        max_det_per_image: int = 100,
+        soft_nms: bool = False,
+):
     batch_detections = []
     # FIXME we may be able to do this as a batch with some tensor reshaping/indexing, PR welcome
     for i in range(batch_size):
         img_scale_i = None if img_scale is None else img_scale[i]
         img_size_i = None if img_size is None else img_size[i]
         detections = generate_detections(
-            class_out[i], box_out[i], anchor_boxes, indices[i], classes[i], img_scale_i, img_size_i)
+            class_out[i], box_out[i], anchor_boxes, indices[i], classes[i],
+            img_scale_i, img_size_i, max_det_per_image=max_det_per_image, soft_nms=soft_nms)
         batch_detections.append(detections)
     return torch.stack(batch_detections, dim=0)
 
@@ -79,17 +84,23 @@ def __init__(self, model):
         self.num_levels = model.config.num_levels
         self.num_classes = model.config.num_classes
         self.anchors = Anchors.from_config(model.config)
+        self.max_detection_points = model.config.max_detection_points
+        self.max_det_per_image = model.config.max_det_per_image
+        self.soft_nms = model.config.soft_nms
 
     def forward(self, x, img_info: Optional[Dict[str, torch.Tensor]] = None):
         class_out, box_out = self.model(x)
         class_out, box_out, indices, classes = _post_process(
-            class_out, box_out, num_levels=self.num_levels, num_classes=self.num_classes)
+            class_out, box_out, num_levels=self.num_levels, num_classes=self.num_classes,
+            max_detection_points=self.max_detection_points)
         if img_info is None:
             img_scale, img_size = None, None
         else:
             img_scale, img_size = img_info['img_scale'], img_info['img_size']
         return _batch_detection(
-            x.shape[0], class_out, box_out, self.anchors.boxes, indices, classes, img_scale, img_size)
+            x.shape[0], class_out, box_out, self.anchors.boxes, indices, classes,
+            img_scale, img_size, max_det_per_image=self.max_det_per_image, soft_nms=self.soft_nms
+        )
 
 
 class DetBenchTrain(nn.Module):
@@ -100,6 +111,9 @@ def __init__(self, model, create_labeler=True):
         self.num_levels = model.config.num_levels
         self.num_classes = model.config.num_classes
         self.anchors = Anchors.from_config(model.config)
+        self.max_detection_points = model.config.max_detection_points
+        self.max_det_per_image = model.config.max_det_per_image
+        self.soft_nms = model.config.soft_nms
         self.anchor_labeler = None
         if create_labeler:
             self.anchor_labeler = AnchorLabeler(self.anchors, self.num_classes, match_threshold=0.5)
@@ -122,10 +136,12 @@ def forward(self, x, target: Dict[str, torch.Tensor]):
         if not self.training:
             # if eval mode, output detections for evaluation
             class_out_pp, box_out_pp, indices, classes = _post_process(
-                class_out, box_out, num_levels=self.num_levels, num_classes=self.num_classes)
+                class_out, box_out, num_levels=self.num_levels, num_classes=self.num_classes,
+                max_detection_points=self.max_detection_points)
             output['detections'] = _batch_detection(
                 x.shape[0], class_out_pp, box_out_pp, self.anchors.boxes, indices, classes,
-                target['img_scale'], target['img_size'])
+                target['img_scale'], target['img_size'],
+                max_det_per_image=self.max_det_per_image, soft_nms=self.soft_nms)
         return output
 
 
diff --git a/effdet/config/model_config.py b/effdet/config/model_config.py
@@ -70,6 +70,11 @@ def default_detection_model_configs():
     h.delta = 0.1
     h.box_loss_weight = 50.0
 
+    # nms
+    h.soft_nms = False  # use soft-nms, this is incredibly slow
+    h.max_detection_points = 5000  # max detections for post process, input to NMS
+    h.max_det_per_image = 100  # max detections per image limit, output of NMS
+
     return h
 
 
diff --git a/effdet/factory.py b/effdet/factory.py
@@ -23,7 +23,7 @@ def create_model_from_config(
         pretrained_backbone = False  # no point in loading backbone weights
 
     # Config overrides, override some config values via kwargs.
-    overrides = ('redundant_bias', 'label_smoothing', 'legacy_focal', 'jit_loss')
+    overrides = ('redundant_bias', 'label_smoothing', 'legacy_focal', 'jit_loss', 'soft_nms')
     for ov in overrides:
         value = kwargs.pop(ov, None)
         if value is not None:
diff --git a/train.py b/train.py
@@ -63,7 +63,7 @@
 parser.add_argument('--model', default='tf_efficientdet_d1', type=str, metavar='MODEL',
                     help='Name of model to train (default: "tf_efficientdet_d1"')
 add_bool_arg(parser, 'redundant-bias', default=None, help='override model config for redundant bias')
-parser.set_defaults(redundant_bias=None)
+add_bool_arg(parser, 'soft-nms', default=None, help='override model config for soft-nms')
 parser.add_argument('--val-skip', type=int, default=0, metavar='N',
                     help='Skip every N validation samples.')
 parser.add_argument('--num-classes', type=int, default=None, metavar='N',
@@ -277,6 +277,7 @@ def main():
         label_smoothing=args.smoothing,
         legacy_focal=args.legacy_focal,
         jit_loss=args.jit_loss,
+        soft_nms=args.soft_nms,
         bench_labeler=args.bench_labeler,
         checkpoint_path=args.initial_checkpoint,
     )
diff --git a/validate.py b/validate.py
@@ -51,6 +51,7 @@ def add_bool_arg(parser, name, default=False, help=''):  # FIXME move to utils
                     help='model architecture (default: tf_efficientdet_d1)')
 add_bool_arg(parser, 'redundant-bias', default=None,
                     help='override model config for redundant bias layers')
+add_bool_arg(parser, 'soft-nms', default=None, help='override model config for soft-nms')
 parser.add_argument('--num-classes', type=int, default=None, metavar='N',
                     help='Override num_classes in model config if set. For fine-tuning from pretrained.')
 parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
@@ -112,6 +113,7 @@ def validate(args):
         num_classes=args.num_classes,
         pretrained=args.pretrained,
         redundant_bias=args.redundant_bias,
+        soft_nms=args.soft_nms,
         checkpoint_path=args.checkpoint,
         checkpoint_ema=args.use_ema,
     )