Add inceptionv4 backbone (#88)

khornlund · qubvel · commit f5cf4b8683b4 · 2019-10-26T10:33:55.000+03:00
diff --git a/README.md b/README.md
@@ -7,13 +7,13 @@ The main features of this library are:
 
  - High level API (just two lines to create neural network)
  - 4 models architectures for binary and multi class segmentation (including legendary Unet)
- - 30 available encoders for each architecture
+ - 31 available encoders for each architecture
  - All encoders have pre-trained weights for faster and better convergence
 
 ### Table of content
  1. [Quick start](#start)
  2. [Examples](#examples)
- 3. [Models](#models) 
+ 3. [Models](#models)
     1. [Architectures](#architectires)
     2. [Encoders](#encoders)
     3. [Pretrained weights](#weights)
@@ -57,7 +57,7 @@ preprocess_input = get_preprocessing_fn('resnet18', pretrained='imagenet')
  - [Linknet](https://arxiv.org/abs/1707.03718)
  - [FPN](http://presentations.cocodataset.org/COCO17-Stuff-FAIR.pdf)
  - [PSPNet](https://arxiv.org/abs/1612.01105)
- 
+
 #### Encoders <a name="encoders"></a>
 
 | Type       | Encoder names                                                                               |
@@ -82,10 +82,10 @@ preprocess_input = get_preprocessing_fn('resnet18', pretrained='imagenet')
 | [instagram](https://pytorch.org/hub/facebookresearch_WSL-Images_resnext/) | resnext101_32x8d, resnext101_32x16d, resnext101_32x32d, resnext101_32x48d                                                                                                                                                                                                                                                                                                           |
 
 ### Models API <a name="api"></a>
- - `model.encoder` - pretrained backbone to extract features of different spatial resolution  
- - `model.decoder` - segmentation head, depends on models architecture (`Unet`/`Linknet`/`PSPNet`/`FPN`)  
+ - `model.encoder` - pretrained backbone to extract features of different spatial resolution
+ - `model.decoder` - segmentation head, depends on models architecture (`Unet`/`Linknet`/`PSPNet`/`FPN`)
  - `model.activation` - output activation function, one of `sigmoid`, `softmax`
- - `model.forward(x)` - sequentially pass `x` through model\`s encoder and decoder (return logits!)  
+ - `model.forward(x)` - sequentially pass `x` through model\`s encoder and decoder (return logits!)
  - `model.predict(x)` - inference method, switch model to `.eval()` mode, call `.forward(x)` and apply activation function with `torch.no_grad()`
 
 ### Installation <a name="installation"></a>
diff --git a/segmentation_models_pytorch/encoders/__init__.py b/segmentation_models_pytorch/encoders/__init__.py
@@ -6,7 +6,8 @@
 from .vgg import vgg_encoders
 from .senet import senet_encoders
 from .densenet import densenet_encoders
-from .inceptionresnetv2 import inception_encoders
+from .inceptionresnetv2 import inceptionresnetv2_encoders
+from .inceptionv4 import inceptionv4_encoders
 from .efficientnet import efficient_net_encoders
 
 
@@ -18,7 +19,8 @@
 encoders.update(vgg_encoders)
 encoders.update(senet_encoders)
 encoders.update(densenet_encoders)
-encoders.update(inception_encoders)
+encoders.update(inceptionresnetv2_encoders)
+encoders.update(inceptionv4_encoders)
 encoders.update(efficient_net_encoders)
 
 
@@ -43,7 +45,7 @@ def get_preprocessing_params(encoder_name, pretrained='imagenet'):
 
     if pretrained not in settings.keys():
         raise ValueError('Avaliable pretrained options {}'.format(settings.keys()))
-    
+
     formatted_settings = {}
     formatted_settings['input_space'] = settings[pretrained].get('input_space')
     formatted_settings['input_range'] = settings[pretrained].get('input_range')
diff --git a/segmentation_models_pytorch/encoders/inceptionresnetv2.py b/segmentation_models_pytorch/encoders/inceptionresnetv2.py
@@ -57,7 +57,7 @@ def load_state_dict(self, state_dict, **kwargs):
         super().load_state_dict(state_dict, **kwargs)
 
 
-inception_encoders = {
+inceptionresnetv2_encoders = {
     'inceptionresnetv2': {
         'encoder': InceptionResNetV2Encoder,
         'pretrained_settings': pretrained_settings['inceptionresnetv2'],
diff --git a/segmentation_models_pytorch/encoders/inceptionv4.py b/segmentation_models_pytorch/encoders/inceptionv4.py
@@ -0,0 +1,52 @@
+import torch.nn as nn
+from pretrainedmodels.models.inceptionv4 import InceptionV4, BasicConv2d
+from pretrainedmodels.models.inceptionv4 import pretrained_settings
+
+
+class InceptionV4Encoder(InceptionV4):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.in_channels = 3
+        self.features[0] = BasicConv2d(self.in_channels, 32, kernel_size=3, stride=2, padding=1)
+        self.features[1] = BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1)
+
+        self.chunks = [3, 5, 9, 15]
+
+        # correct paddings
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                if m.kernel_size == (3, 3):
+                    m.padding = (1, 1)
+            if isinstance(m, nn.MaxPool2d):
+                m.padding = (1, 1)
+
+        # remove linear layers
+        del self.last_linear
+
+    def forward(self, x):
+        x0 = self.features[:self.chunks[0]](x)
+        x1 = self.features[self.chunks[0]:self.chunks[1]](x0)
+        x2 = self.features[self.chunks[1]:self.chunks[2]](x1)
+        x3 = self.features[self.chunks[2]:self.chunks[3]](x2)
+        x4 = self.features[self.chunks[3]:](x3)
+
+        features = [x4, x3, x2, x1, x0]
+        return features
+
+    def load_state_dict(self, state_dict, **kwargs):
+        state_dict.pop('last_linear.bias')
+        state_dict.pop('last_linear.weight')
+        super().load_state_dict(state_dict, **kwargs)
+
+
+inceptionv4_encoders = {
+    'inceptionv4': {
+        'encoder': InceptionV4Encoder,
+        'pretrained_settings': pretrained_settings['inceptionv4'],
+        'out_shapes': (1536, 1024, 384, 192, 64),
+        'params': {
+            'num_classes': 1001,
+        }
+    }
+}