fix issue #377

munehiro-k · munehiro-k · commit 3179751aa326 · 2024-11-25T03:44:15.000+09:00
diff --git a/segmentation_models_pytorch/decoders/deeplabv3/decoder.py b/segmentation_models_pytorch/decoders/deeplabv3/decoder.py
@@ -71,6 +71,7 @@ class DeepLabV3PlusDecoder(nn.Module):
     def __init__(
         self,
         encoder_channels: Sequence[int, ...],
+        encoder_depth: Literal[3, 4, 5],
         out_channels: int,
         atrous_rates: Iterable[int],
         output_stride: Literal[8, 16],
@@ -104,7 +105,14 @@ def __init__(
         scale_factor = 2 if output_stride == 8 else 4
         self.up = nn.UpsamplingBilinear2d(scale_factor=scale_factor)
 
-        highres_in_channels = encoder_channels[-4]
+        if encoder_depth == 3 and output_stride == 8:
+            self.highres_input_index = -2
+        elif encoder_depth == 3 or encoder_depth == 4:
+            self.highres_input_index = -3
+        else:
+            self.highres_input_index = -4
+
+        highres_in_channels = encoder_channels[self.highres_input_index]
         highres_out_channels = 48  # proposed by authors of paper
         self.block1 = nn.Sequential(
             nn.Conv2d(
@@ -128,7 +136,7 @@ def __init__(
     def forward(self, *features):
         aspp_features = self.aspp(features[-1])
         aspp_features = self.up(aspp_features)
-        high_res_features = self.block1(features[-4])
+        high_res_features = self.block1(features[self.highres_input_index])
         concat_features = torch.cat([aspp_features, high_res_features], dim=1)
         fused_features = self.block2(concat_features)
         return fused_features
diff --git a/segmentation_models_pytorch/decoders/deeplabv3/model.py b/segmentation_models_pytorch/decoders/deeplabv3/model.py
@@ -150,7 +150,7 @@ class DeepLabV3Plus(SegmentationModel):
     def __init__(
         self,
         encoder_name: str = "resnet34",
-        encoder_depth: int = 5,
+        encoder_depth: Literal[3, 4, 5] = 5,
         encoder_weights: Optional[str] = "imagenet",
         encoder_output_stride: Literal[8, 16] = 16,
         decoder_channels: int = 256,
@@ -177,6 +177,7 @@ def __init__(
 
         self.decoder = DeepLabV3PlusDecoder(
             encoder_channels=self.encoder.out_channels,
+            encoder_depth=encoder_depth,
             out_channels=decoder_channels,
             atrous_rates=decoder_atrous_rates,
             output_stride=encoder_output_stride,