From 6315729f6aa6b634c2c1ec53ea2d4543b40ec94e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Br=C3=A1zdil?= Date: Sun, 12 Jan 2020 14:58:51 +0100 Subject: [PATCH] Allow custom input resolution. --- demo.py | 8 +++++--- models/bifpn.py | 4 +++- models/efficientdet.py | 4 +++- models/module.py | 9 ++------- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/demo.py b/demo.py index 7219ef4..f59370c 100644 --- a/demo.py +++ b/demo.py @@ -38,13 +38,13 @@ class Detect(object): dir_name: Folder or image_file """ - def __init__(self, weights, num_class=21, network='efficientdet-d0', size_image=(512, 512)): + def __init__(self, weights, num_class=21, network='efficientdet-d0', size_image=(720, 1280)): super(Detect, self).__init__() self.weights = weights self.size_image = size_image self.device = torch.device( "cuda:0" if torch.cuda.is_available() else 'cpu') - self.transform = get_augumentation(phase='test') + self.transform = get_augumentation('test', self.size_image[1], self.size_image[0]) if(self.weights is not None): print('Load pretrained Model') checkpoint = torch.load( @@ -61,7 +61,7 @@ def __init__(self, weights, num_class=21, network='efficientdet-d0', size_image= is_training=False ) - if(self.weights is not None): + if self.weights is not None: state_dict = checkpoint['state_dict'] self.model.load_state_dict(state_dict) if torch.cuda.is_available(): @@ -84,6 +84,8 @@ def process(self, file_name=None, img=None, show=False): bbox_scores = list() colors = list() for j in range(scores.shape[0]): + if scores[j] < args.threshold: + continue bbox = transformed_anchors[[j], :][0].data.cpu().numpy() x1 = int(bbox[0]*origin_img.shape[1]/self.size_image[1]) y1 = int(bbox[1]*origin_img.shape[0]/self.size_image[0]) diff --git a/models/bifpn.py b/models/bifpn.py index 0f8e6bd..d1373a4 100644 --- a/models/bifpn.py +++ b/models/bifpn.py @@ -186,8 +186,10 @@ def forward(self, inputs): inputs_clone.append(in_tensor.clone()) for i in range(levels - 1, 0, -1): + scale_factor = [pathtd[i - 1].shape[2] / pathtd[i].shape[2], + pathtd[i - 1].shape[3] / pathtd[i].shape[3]] pathtd[i - 1] = (w1[0, i-1]*pathtd[i - 1] + w1[1, i-1]*F.interpolate( - pathtd[i], scale_factor=2, mode='nearest'))/(w1[0, i-1] + w1[1, i-1] + self.eps) + pathtd[i], scale_factor=scale_factor, mode='nearest'))/(w1[0, i-1] + w1[1, i-1] + self.eps) pathtd[i - 1] = self.bifpn_convs[idx_bifpn](pathtd[i - 1]) idx_bifpn = idx_bifpn + 1 # build down-top diff --git a/models/efficientdet.py b/models/efficientdet.py index 43357c9..9612e66 100644 --- a/models/efficientdet.py +++ b/models/efficientdet.py @@ -1,5 +1,6 @@ import torch import torch.nn as nn +import numpy as np import math from models.efficientnet import EfficientNet from models.bifpn import BIFPN @@ -63,7 +64,8 @@ def forward(self, inputs): outs = self.bbox_head(x) classification = torch.cat([out for out in outs[0]], dim=1) regression = torch.cat([out for out in outs[1]], dim=1) - anchors = self.anchors(inputs) + feature_shapes = [np.array([_.shape[2], _.shape[3]]) for _ in x] + anchors = self.anchors(inputs, feature_shapes) if self.is_training: return self.criterion(classification, regression, anchors, annotations) else: diff --git a/models/module.py b/models/module.py index 6ec9a7f..c0dda03 100644 --- a/models/module.py +++ b/models/module.py @@ -158,12 +158,7 @@ def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, s self.scales = np.array( [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]) - def forward(self, image): - - image_shape = image.shape[2:] - image_shape = np.array(image_shape) - image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) - for x in self.pyramid_levels] + def forward(self, image, feature_shapes): # compute anchors over all pyramid levels all_anchors = np.zeros((0, 4)).astype(np.float32) @@ -172,7 +167,7 @@ def forward(self, image): anchors = generate_anchors( base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales) shifted_anchors = shift( - image_shapes[idx], self.strides[idx], anchors) + feature_shapes[idx], self.strides[idx], anchors) all_anchors = np.append(all_anchors, shifted_anchors, axis=0) all_anchors = np.expand_dims(all_anchors, axis=0)