supports efficientdet-d7x now

zylo117 · zylo117 · commit c533bc2de651 · 2020-07-23T15:38:46.000+08:00
diff --git a/backbone.py b/backbone.py
@@ -1,7 +1,5 @@
 # Author: Zylo117
 
-import math
-
 import torch
 from torch import nn
 
@@ -14,12 +12,13 @@ def __init__(self, num_classes=80, compound_coef=0, load_weights=False, **kwargs
         super(EfficientDetBackbone, self).__init__()
         self.compound_coef = compound_coef
 
-        self.backbone_compound_coef = [0, 1, 2, 3, 4, 5, 6, 6]
-        self.fpn_num_filters = [64, 88, 112, 160, 224, 288, 384, 384]
-        self.fpn_cell_repeats = [3, 4, 5, 6, 7, 7, 8, 8]
-        self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
-        self.box_class_repeats = [3, 3, 3, 4, 4, 4, 5, 5]
-        self.anchor_scale = [4., 4., 4., 4., 4., 4., 4., 5.]
+        self.backbone_compound_coef = [0, 1, 2, 3, 4, 5, 6, 6, 7]
+        self.fpn_num_filters = [64, 88, 112, 160, 224, 288, 384, 384, 384]
+        self.fpn_cell_repeats = [3, 4, 5, 6, 7, 7, 8, 8, 8]
+        self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
+        self.box_class_repeats = [3, 3, 3, 4, 4, 4, 5, 5, 5]
+        self.pyramid_levels = [5, 5, 5, 5, 5, 5, 5, 5, 6]
+        self.anchor_scale = [4., 4., 4., 4., 4., 4., 4., 5., 4.]
         self.aspect_ratios = kwargs.get('ratios', [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)])
         self.num_scales = len(kwargs.get('scales', [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]))
         conv_channel_coef = {
@@ -32,6 +31,7 @@ def __init__(self, num_classes=80, compound_coef=0, load_weights=False, **kwargs
             5: [64, 176, 512],
             6: [72, 200, 576],
             7: [72, 200, 576],
+            8: [80, 224, 640],
         }
 
         num_anchors = len(self.aspect_ratios) * self.num_scales
@@ -40,17 +40,22 @@ def __init__(self, num_classes=80, compound_coef=0, load_weights=False, **kwargs
             *[BiFPN(self.fpn_num_filters[self.compound_coef],
                     conv_channel_coef[compound_coef],
                     True if _ == 0 else False,
-                    attention=True if compound_coef < 6 else False)
+                    attention=True if compound_coef < 6 else False,
+                    use_p8=compound_coef > 7)
               for _ in range(self.fpn_cell_repeats[compound_coef])])
 
         self.num_classes = num_classes
         self.regressor = Regressor(in_channels=self.fpn_num_filters[self.compound_coef], num_anchors=num_anchors,
-                                   num_layers=self.box_class_repeats[self.compound_coef])
+                                   num_layers=self.box_class_repeats[self.compound_coef],
+                                   pyramid_levels=self.pyramid_levels[self.compound_coef])
         self.classifier = Classifier(in_channels=self.fpn_num_filters[self.compound_coef], num_anchors=num_anchors,
                                      num_classes=num_classes,
-                                     num_layers=self.box_class_repeats[self.compound_coef])
+                                     num_layers=self.box_class_repeats[self.compound_coef],
+                                     pyramid_levels=self.pyramid_levels[self.compound_coef])
 
-        self.anchors = Anchors(anchor_scale=self.anchor_scale[compound_coef], **kwargs)
+        self.anchors = Anchors(anchor_scale=self.anchor_scale[compound_coef],
+                               pyramid_levels=(torch.arange(self.pyramid_levels[self.compound_coef]) + 3).tolist(),
+                               **kwargs)
 
         self.backbone_net = EfficientNet(self.backbone_compound_coef[compound_coef], load_weights)
 
diff --git a/efficientdet/model.py b/efficientdet/model.py
@@ -57,7 +57,8 @@ class BiFPN(nn.Module):
     modified by Zylo117
     """
 
-    def __init__(self, num_channels, conv_channels, first_time=False, epsilon=1e-4, onnx_export=False, attention=True):
+    def __init__(self, num_channels, conv_channels, first_time=False, epsilon=1e-4, onnx_export=False, attention=True,
+                 use_p8=False):
         """
 
         Args:
@@ -70,6 +71,8 @@ def __init__(self, num_channels, conv_channels, first_time=False, epsilon=1e-4,
         """
         super(BiFPN, self).__init__()
         self.epsilon = epsilon
+        self.use_p8 = use_p8
+
         # Conv layers
         self.conv6_up = SeparableConvBlock(num_channels, onnx_export=onnx_export)
         self.conv5_up = SeparableConvBlock(num_channels, onnx_export=onnx_export)
@@ -79,6 +82,9 @@ def __init__(self, num_channels, conv_channels, first_time=False, epsilon=1e-4,
         self.conv5_down = SeparableConvBlock(num_channels, onnx_export=onnx_export)
         self.conv6_down = SeparableConvBlock(num_channels, onnx_export=onnx_export)
         self.conv7_down = SeparableConvBlock(num_channels, onnx_export=onnx_export)
+        if use_p8:
+            self.conv7_up = SeparableConvBlock(num_channels, onnx_export=onnx_export)
+            self.conv8_down = SeparableConvBlock(num_channels, onnx_export=onnx_export)
 
         # Feature scaling layers
         self.p6_upsample = nn.Upsample(scale_factor=2, mode='nearest')
@@ -90,6 +96,9 @@ def __init__(self, num_channels, conv_channels, first_time=False, epsilon=1e-4,
         self.p5_downsample = MaxPool2dStaticSamePadding(3, 2)
         self.p6_downsample = MaxPool2dStaticSamePadding(3, 2)
         self.p7_downsample = MaxPool2dStaticSamePadding(3, 2)
+        if use_p8:
+            self.p7_upsample = nn.Upsample(scale_factor=2, mode='nearest')
+            self.p8_downsample = MaxPool2dStaticSamePadding(3, 2)
 
         self.swish = MemoryEfficientSwish() if not onnx_export else Swish()
 
@@ -116,6 +125,10 @@ def __init__(self, num_channels, conv_channels, first_time=False, epsilon=1e-4,
             self.p6_to_p7 = nn.Sequential(
                 MaxPool2dStaticSamePadding(3, 2)
             )
+            if use_p8:
+                self.p7_to_p8 = nn.Sequential(
+                    MaxPool2dStaticSamePadding(3, 2)
+                )
 
             self.p4_down_channel_2 = nn.Sequential(
                 Conv2dStaticSamePadding(conv_channels[1], num_channels, 1),
@@ -172,11 +185,11 @@ def forward(self, inputs):
         # elif later phase, upsample to target phase's by nearest interpolation
 
         if self.attention:
-            p3_out, p4_out, p5_out, p6_out, p7_out = self._forward_fast_attention(inputs)
+            outs = self._forward_fast_attention(inputs)
         else:
-            p3_out, p4_out, p5_out, p6_out, p7_out = self._forward(inputs)
+            outs = self._forward(inputs)
 
-        return p3_out, p4_out, p5_out, p6_out, p7_out
+        return outs
 
     def _forward_fast_attention(self, inputs):
         if self.first_time:
@@ -258,19 +271,34 @@ def _forward(self, inputs):
 
             p6_in = self.p5_to_p6(p5)
             p7_in = self.p6_to_p7(p6_in)
+            if self.use_p8:
+                p8_in = self.p7_to_p8(p7_in)
 
             p3_in = self.p3_down_channel(p3)
             p4_in = self.p4_down_channel(p4)
             p5_in = self.p5_down_channel(p5)
 
         else:
-            # P3_0, P4_0, P5_0, P6_0 and P7_0
-            p3_in, p4_in, p5_in, p6_in, p7_in = inputs
+            if self.use_p8:
+                # P3_0, P4_0, P5_0, P6_0, P7_0 and P8_0
+                p3_in, p4_in, p5_in, p6_in, p7_in, p8_in = inputs
+            else:
+                # P3_0, P4_0, P5_0, P6_0 and P7_0
+                p3_in, p4_in, p5_in, p6_in, p7_in = inputs
 
-        # P7_0 to P7_2
+        if self.use_p8:
+            # P8_0 to P8_2
 
-        # Connections for P6_0 and P7_0 to P6_1 respectively
-        p6_up = self.conv6_up(self.swish(p6_in + self.p6_upsample(p7_in)))
+            # Connections for P7_0 and P8_0 to P7_1 respectively
+            p7_up = self.conv7_up(self.swish(p7_in + self.p7_upsample(p8_in)))
+
+            # Connections for P6_0 and P7_0 to P6_1 respectively
+            p6_up = self.conv6_up(self.swish(p6_in + self.p6_upsample(p7_up)))
+        else:
+            # P7_0 to P7_2
+
+            # Connections for P6_0 and P7_0 to P6_1 respectively
+            p6_up = self.conv6_up(self.swish(p6_in + self.p6_upsample(p7_in)))
 
         # Connections for P5_0 and P6_1 to P5_1 respectively
         p5_up = self.conv5_up(self.swish(p5_in + self.p5_upsample(p6_up)))
@@ -297,26 +325,36 @@ def _forward(self, inputs):
         p6_out = self.conv6_down(
             self.swish(p6_in + p6_up + self.p6_downsample(p5_out)))
 
-        # Connections for P7_0 and P6_2 to P7_2
-        p7_out = self.conv7_down(self.swish(p7_in + self.p7_downsample(p6_out)))
+        if self.use_p8:
+            # Connections for P7_0, P7_1 and P6_2 to P7_2 respectively
+            p7_out = self.conv7_down(
+                self.swish(p7_in + p7_up + self.p7_downsample(p6_out)))
 
-        return p3_out, p4_out, p5_out, p6_out, p7_out
+            # Connections for P8_0 and P7_2 to P8_2
+            p8_out = self.conv8_down(self.swish(p8_in + self.p8_downsample(p7_out)))
+
+            return p3_out, p4_out, p5_out, p6_out, p7_out, p8_out
+        else:
+            # Connections for P7_0 and P6_2 to P7_2
+            p7_out = self.conv7_down(self.swish(p7_in + self.p7_downsample(p6_out)))
+
+            return p3_out, p4_out, p5_out, p6_out, p7_out
 
 
 class Regressor(nn.Module):
     """
     modified by Zylo117
     """
 
-    def __init__(self, in_channels, num_anchors, num_layers, onnx_export=False):
+    def __init__(self, in_channels, num_anchors, num_layers, pyramid_levels=5, onnx_export=False):
         super(Regressor, self).__init__()
         self.num_layers = num_layers
 
         self.conv_list = nn.ModuleList(
             [SeparableConvBlock(in_channels, in_channels, norm=False, activation=False) for i in range(num_layers)])
         self.bn_list = nn.ModuleList(
             [nn.ModuleList([nn.BatchNorm2d(in_channels, momentum=0.01, eps=1e-3) for i in range(num_layers)]) for j in
-             range(5)])
+             range(pyramid_levels)])
         self.header = SeparableConvBlock(in_channels, num_anchors * 4, norm=False, activation=False)
         self.swish = MemoryEfficientSwish() if not onnx_export else Swish()
 
@@ -344,7 +382,7 @@ class Classifier(nn.Module):
     modified by Zylo117
     """
 
-    def __init__(self, in_channels, num_anchors, num_classes, num_layers, onnx_export=False):
+    def __init__(self, in_channels, num_anchors, num_classes, num_layers, pyramid_levels=5, onnx_export=False):
         super(Classifier, self).__init__()
         self.num_anchors = num_anchors
         self.num_classes = num_classes
@@ -353,7 +391,7 @@ def __init__(self, in_channels, num_anchors, num_classes, num_layers, onnx_expor
             [SeparableConvBlock(in_channels, in_channels, norm=False, activation=False) for i in range(num_layers)])
         self.bn_list = nn.ModuleList(
             [nn.ModuleList([nn.BatchNorm2d(in_channels, momentum=0.01, eps=1e-3) for i in range(num_layers)]) for j in
-             range(5)])
+             range(pyramid_levels)])
         self.header = SeparableConvBlock(in_channels, num_anchors * num_classes, norm=False, activation=False)
         self.swish = MemoryEfficientSwish() if not onnx_export else Swish()
 
diff --git a/efficientdet/utils.py b/efficientdet/utils.py
@@ -63,6 +63,8 @@ def __init__(self, anchor_scale=4., pyramid_levels=None, **kwargs):
 
         if pyramid_levels is None:
             self.pyramid_levels = [3, 4, 5, 6, 7]
+        else:
+            self.pyramid_levels = pyramid_levels
 
         self.strides = kwargs.get('strides', [2 ** x for x in self.pyramid_levels])
         self.scales = np.array(kwargs.get('scales', [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]))
diff --git a/efficientdet_test.py b/efficientdet_test.py
@@ -45,7 +45,7 @@
 
 color_list = standard_to_bgr(STANDARD_COLORS)
 # tf bilinear interpolation is different from any other's, just make do
-input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
+input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
 input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size
 ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)