Add scSE attention module for Unet (#53)

laol777 · qubvel · commit 5b105a858479 · 2019-09-06T10:52:08.000+03:00
* added scSE module

* ability to choose type of attention for unet decoder

* update docstring
diff --git a/segmentation_models_pytorch/common/blocks.py b/segmentation_models_pytorch/common/blocks.py
@@ -20,3 +20,19 @@ def __init__(self, in_channels, out_channels, kernel_size, padding=0,
 
     def forward(self, x):
         return self.block(x)
+
+
+class SCSEModule(nn.Module):
+    def __init__(self, ch, re=16):
+        super().__init__()
+        self.cSE = nn.Sequential(nn.AdaptiveAvgPool2d(1),
+                                 nn.Conv2d(ch, ch//re, 1),
+                                 nn.ReLU(inplace=True),
+                                 nn.Conv2d(ch//re, ch, 1),
+                                 nn.Sigmoid()
+                                )
+        self.sSE = nn.Sequential(nn.Conv2d(ch, ch, 1),
+                                 nn.Sigmoid())
+
+    def forward(self, x):
+        return x * self.cSE(x) + x * self.sSE(x)
diff --git a/segmentation_models_pytorch/unet/decoder.py b/segmentation_models_pytorch/unet/decoder.py
@@ -2,13 +2,20 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from ..common.blocks import Conv2dReLU
+from ..common.blocks import Conv2dReLU, SCSEModule
 from ..base.model import Model
 
 
 class DecoderBlock(nn.Module):
-    def __init__(self, in_channels, out_channels, use_batchnorm=True):
+    def __init__(self, in_channels, out_channels, use_batchnorm=True, attention_type=None):
         super().__init__()
+        if attention_type is None:
+            self.attention1 = nn.Identity()
+            self.attention2 = nn.Identity()
+        elif attention_type == 'scse':
+            self.attention1 = SCSEModule(in_channels)
+            self.attention2 = SCSEModule(out_channels)
+
         self.block = nn.Sequential(
             Conv2dReLU(in_channels, out_channels, kernel_size=3, padding=1, use_batchnorm=use_batchnorm),
             Conv2dReLU(out_channels, out_channels, kernel_size=3, padding=1, use_batchnorm=use_batchnorm),
@@ -19,7 +26,10 @@ def forward(self, x):
         x = F.interpolate(x, scale_factor=2, mode='nearest')
         if skip is not None:
             x = torch.cat([x, skip], dim=1)
+            x = self.attention1(x)
+
         x = self.block(x)
+        x = self.attention2(x)
         return x
 
 
@@ -38,6 +48,7 @@ def __init__(
             final_channels=1,
             use_batchnorm=True,
             center=False,
+            attention_type=None
     ):
         super().__init__()
 
@@ -50,11 +61,16 @@ def __init__(
         in_channels = self.compute_channels(encoder_channels, decoder_channels)
         out_channels = decoder_channels
 
-        self.layer1 = DecoderBlock(in_channels[0], out_channels[0], use_batchnorm=use_batchnorm)
-        self.layer2 = DecoderBlock(in_channels[1], out_channels[1], use_batchnorm=use_batchnorm)
-        self.layer3 = DecoderBlock(in_channels[2], out_channels[2], use_batchnorm=use_batchnorm)
-        self.layer4 = DecoderBlock(in_channels[3], out_channels[3], use_batchnorm=use_batchnorm)
-        self.layer5 = DecoderBlock(in_channels[4], out_channels[4], use_batchnorm=use_batchnorm)
+        self.layer1 = DecoderBlock(in_channels[0], out_channels[0],
+                                   use_batchnorm=use_batchnorm, attention_type=attention_type)
+        self.layer2 = DecoderBlock(in_channels[1], out_channels[1],
+                                   use_batchnorm=use_batchnorm, attention_type=attention_type)
+        self.layer3 = DecoderBlock(in_channels[2], out_channels[2],
+                                   use_batchnorm=use_batchnorm, attention_type=attention_type)
+        self.layer4 = DecoderBlock(in_channels[3], out_channels[3],
+                                   use_batchnorm=use_batchnorm, attention_type=attention_type)
+        self.layer5 = DecoderBlock(in_channels[4], out_channels[4],
+                                   use_batchnorm=use_batchnorm, attention_type=attention_type)
         self.final_conv = nn.Conv2d(out_channels[4], final_channels, kernel_size=(1, 1))
 
         self.initialize()
diff --git a/segmentation_models_pytorch/unet/model.py b/segmentation_models_pytorch/unet/model.py
@@ -17,6 +17,8 @@ class Unet(EncoderDecoder):
         activation: activation function used in ``.predict(x)`` method for inference.
             One of [``sigmoid``, ``softmax``, callable, None]
         center: if ``True`` add ``Conv2dReLU`` block on encoder head (useful for VGG models)
+        attention_type: attention module used in decoder of the model
+            One of [``None``, ``scse``]
 
     Returns:
         ``torch.nn.Module``: **Unet**
@@ -35,6 +37,7 @@ def __init__(
             classes=1,
             activation='sigmoid',
             center=False,  # usefull for VGG models
+            attention_type=None
     ):
         encoder = get_encoder(
             encoder_name,
@@ -47,6 +50,7 @@ def __init__(
             final_channels=classes,
             use_batchnorm=decoder_use_batchnorm,
             center=center,
+            attention_type=attention_type
         )
 
         super().__init__(encoder, decoder, activation)