From 6315729f6aa6b634c2c1ec53ea2d4543b40ec94e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Br=C3=A1zdil?= <brazdil.martin@gmail.com>
Date: Sun, 12 Jan 2020 14:58:51 +0100
Subject: [PATCH] Allow custom input resolution.

---
 demo.py                | 8 +++++---
 models/bifpn.py        | 4 +++-
 models/efficientdet.py | 4 +++-
 models/module.py       | 9 ++-------
 4 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/demo.py b/demo.py
index 7219ef4..f59370c 100644
--- a/demo.py
+++ b/demo.py
@@ -38,13 +38,13 @@ class Detect(object):
         dir_name: Folder or image_file
     """
 
-    def __init__(self, weights, num_class=21, network='efficientdet-d0', size_image=(512, 512)):
+    def __init__(self, weights, num_class=21, network='efficientdet-d0', size_image=(720, 1280)):
         super(Detect,  self).__init__()
         self.weights = weights
         self.size_image = size_image
         self.device = torch.device(
             "cuda:0" if torch.cuda.is_available() else 'cpu')
-        self.transform = get_augumentation(phase='test')
+        self.transform = get_augumentation('test', self.size_image[1], self.size_image[0])
         if(self.weights is not None):
             print('Load pretrained Model')
             checkpoint = torch.load(
@@ -61,7 +61,7 @@ def __init__(self, weights, num_class=21, network='efficientdet-d0', size_image=
                                   is_training=False
                                   )
 
-        if(self.weights is not None):
+        if self.weights is not None:
             state_dict = checkpoint['state_dict']
             self.model.load_state_dict(state_dict)
         if torch.cuda.is_available():
@@ -84,6 +84,8 @@ def process(self, file_name=None, img=None, show=False):
             bbox_scores = list()
             colors = list()
             for j in range(scores.shape[0]):
+                if scores[j] < args.threshold:
+                    continue
                 bbox = transformed_anchors[[j], :][0].data.cpu().numpy()
                 x1 = int(bbox[0]*origin_img.shape[1]/self.size_image[1])
                 y1 = int(bbox[1]*origin_img.shape[0]/self.size_image[0])
diff --git a/models/bifpn.py b/models/bifpn.py
index 0f8e6bd..d1373a4 100644
--- a/models/bifpn.py
+++ b/models/bifpn.py
@@ -186,8 +186,10 @@ def forward(self, inputs):
             inputs_clone.append(in_tensor.clone())
 
         for i in range(levels - 1, 0, -1):
+            scale_factor = [pathtd[i - 1].shape[2] / pathtd[i].shape[2],
+                            pathtd[i - 1].shape[3] / pathtd[i].shape[3]]
             pathtd[i - 1] = (w1[0, i-1]*pathtd[i - 1] + w1[1, i-1]*F.interpolate(
-                pathtd[i], scale_factor=2, mode='nearest'))/(w1[0, i-1] + w1[1, i-1] + self.eps)
+                pathtd[i], scale_factor=scale_factor, mode='nearest'))/(w1[0, i-1] + w1[1, i-1] + self.eps)
             pathtd[i - 1] = self.bifpn_convs[idx_bifpn](pathtd[i - 1])
             idx_bifpn = idx_bifpn + 1
         # build down-top
diff --git a/models/efficientdet.py b/models/efficientdet.py
index 43357c9..9612e66 100644
--- a/models/efficientdet.py
+++ b/models/efficientdet.py
@@ -1,5 +1,6 @@
 import torch
 import torch.nn as nn
+import numpy as np
 import math
 from models.efficientnet import EfficientNet
 from models.bifpn import BIFPN
@@ -63,7 +64,8 @@ def forward(self, inputs):
         outs = self.bbox_head(x)
         classification = torch.cat([out for out in outs[0]], dim=1)
         regression = torch.cat([out for out in outs[1]], dim=1)
-        anchors = self.anchors(inputs)
+        feature_shapes = [np.array([_.shape[2], _.shape[3]]) for _ in x]
+        anchors = self.anchors(inputs, feature_shapes)
         if self.is_training:
             return self.criterion(classification, regression, anchors, annotations)
         else:
diff --git a/models/module.py b/models/module.py
index 6ec9a7f..c0dda03 100644
--- a/models/module.py
+++ b/models/module.py
@@ -158,12 +158,7 @@ def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, s
             self.scales = np.array(
                 [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
 
-    def forward(self, image):
-
-        image_shape = image.shape[2:]
-        image_shape = np.array(image_shape)
-        image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x)
-                        for x in self.pyramid_levels]
+    def forward(self, image, feature_shapes):
 
         # compute anchors over all pyramid levels
         all_anchors = np.zeros((0, 4)).astype(np.float32)
@@ -172,7 +167,7 @@ def forward(self, image):
             anchors = generate_anchors(
                 base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
             shifted_anchors = shift(
-                image_shapes[idx], self.strides[idx], anchors)
+                feature_shapes[idx], self.strides[idx], anchors)
             all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
 
         all_anchors = np.expand_dims(all_anchors, axis=0)