qubvel-org
diff --git a/‎segmentation_models_pytorch/encoders/_base.py
Lines changed: 4 additions & 0 deletions b/‎segmentation_models_pytorch/encoders/_base.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎segmentation_models_pytorch/encoders/densenet.py
Lines changed: 15 additions & 17 deletions b/‎segmentation_models_pytorch/encoders/densenet.py
Lines changed: 15 additions & 17 deletions
diff --git a/‎segmentation_models_pytorch/encoders/dpn.py
Lines changed: 2 additions & 0 deletions b/‎segmentation_models_pytorch/encoders/dpn.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎segmentation_models_pytorch/encoders/efficientnet.py
Lines changed: 2 additions & 0 deletions b/‎segmentation_models_pytorch/encoders/efficientnet.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎segmentation_models_pytorch/encoders/inceptionresnetv2.py
Lines changed: 3 additions & 0 deletions b/‎segmentation_models_pytorch/encoders/inceptionresnetv2.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎segmentation_models_pytorch/encoders/inceptionv4.py
Lines changed: 14 additions & 20 deletions b/‎segmentation_models_pytorch/encoders/inceptionv4.py
Lines changed: 14 additions & 20 deletions
diff --git a/‎segmentation_models_pytorch/encoders/mix_transformer.py
Lines changed: 35 additions & 30 deletions b/‎segmentation_models_pytorch/encoders/mix_transformer.py
Lines changed: 35 additions & 30 deletions
diff --git a/‎segmentation_models_pytorch/encoders/mobilenet.py
Lines changed: 12 additions & 16 deletions b/‎segmentation_models_pytorch/encoders/mobilenet.py
Lines changed: 12 additions & 16 deletions
@@ -10,6 +10,10 @@ class EncoderMixin:
     - patching first convolution for arbitrary input channels
     """
 
+    _is_torch_scriptable = True
+    _is_torch_exportable = True
+    _is_torch_compilable = True
+
     def __init__(self):
         self._depth = 5
         self._in_channels = 3
 
@@ -24,8 +24,6 @@
 """
 
 import re
-import torch
-import torch.nn as nn
 
 from torchvision.models.densenet import DenseNet
 
@@ -47,15 +45,6 @@ def make_dilated(self, *args, **kwargs):
             "due to pooling operation for downsampling!"
         )
 
-    def apply_transition(
-        self, transition: torch.nn.Sequential, x: torch.Tensor
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        for module in transition:
-            x = module(x)
-            if isinstance(module, nn.ReLU):
-                intermediate = x
-        return x, intermediate
-
     def forward(self, x):
         features = [x]
 
@@ -68,20 +57,29 @@ def forward(self, x):
         if self._depth >= 2:
             x = self.features.pool0(x)
             x = self.features.denseblock1(x)
-            x, intermediate = self.apply_transition(self.features.transition1, x)
-            features.append(intermediate)
+            x = self.features.transition1.norm(x)
+            x = self.features.transition1.relu(x)
+            features.append(x)
 
         if self._depth >= 3:
+            x = self.features.transition1.conv(x)
+            x = self.features.transition1.pool(x)
             x = self.features.denseblock2(x)
-            x, intermediate = self.apply_transition(self.features.transition2, x)
-            features.append(intermediate)
+            x = self.features.transition2.norm(x)
+            x = self.features.transition2.relu(x)
+            features.append(x)
 
         if self._depth >= 4:
+            x = self.features.transition2.conv(x)
+            x = self.features.transition2.pool(x)
             x = self.features.denseblock3(x)
-            x, intermediate = self.apply_transition(self.features.transition3, x)
-            features.append(intermediate)
+            x = self.features.transition3.norm(x)
+            x = self.features.transition3.relu(x)
+            features.append(x)
 
         if self._depth >= 5:
+            x = self.features.transition3.conv(x)
+            x = self.features.transition3.pool(x)
             x = self.features.denseblock4(x)
             x = self.features.norm5(x)
             features.append(x)
 
@@ -34,6 +34,8 @@
 
 
 class DPNEncoder(DPN, EncoderMixin):
+    _is_torch_scriptable = False
+
     def __init__(
         self,
         stage_idxs: List[int],
 
@@ -33,6 +33,8 @@
 
 
 class EfficientNetEncoder(EfficientNet, EncoderMixin):
+    _is_torch_scriptable = False
+
     def __init__(
         self,
         stage_idxs: List[int],
 
@@ -54,6 +54,9 @@ def __init__(
             if isinstance(m, nn.MaxPool2d):
                 m.padding = (1, 1)
 
+        # for torchscript, block8 does not have relu defined
+        self.block8.relu = nn.Identity()
+
         # remove linear layers
         del self.avgpool_1a
         del self.last_linear
 
@@ -35,19 +35,19 @@
 class InceptionV4Encoder(InceptionV4, EncoderMixin):
     def __init__(
         self,
-        stage_idxs: List[int],
+        out_indexes: List[int],
         out_channels: List[int],
         depth: int = 5,
         output_stride: int = 32,
         **kwargs,
     ):
         super().__init__(**kwargs)
 
-        self._stage_idxs = stage_idxs
         self._depth = depth
         self._in_channels = 3
         self._out_channels = out_channels
         self._output_stride = output_stride
+        self._out_indexes = out_indexes
 
         # correct paddings
         for m in self.modules():
@@ -67,28 +67,22 @@ def make_dilated(self, *args, **kwargs):
         )
 
     def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
+        depth = 0
         features = [x]
 
-        if self._depth >= 1:
-            x = self.features[: self._stage_idxs[0]](x)
-            features.append(x)
+        for i, module in enumerate(self.features):
+            x = module(x)
 
-        if self._depth >= 2:
-            x = self.features[self._stage_idxs[0] : self._stage_idxs[1]](x)
-            features.append(x)
+            if i in self._out_indexes:
+                features.append(x)
+                depth += 1
 
-        if self._depth >= 3:
-            x = self.features[self._stage_idxs[1] : self._stage_idxs[2]](x)
-            features.append(x)
-
-        if self._depth >= 4:
-            x = self.features[self._stage_idxs[2] : self._stage_idxs[3]](x)
-            features.append(x)
-
-        if self._depth >= 5:
-            x = self.features[self._stage_idxs[3] :](x)
-            features.append(x)
+            # torchscript does not support break in cycle, so we just
+            # go over all modules and then slice number of features
+            if not torch.jit.is_scripting() and depth > self._depth:
+                break
 
+        features = features[: self._depth + 1]
         return features
 
     def load_state_dict(self, state_dict, **kwargs):
@@ -121,7 +115,7 @@ def load_state_dict(self, state_dict, **kwargs):
             },
         },
         "params": {
-            "stage_idxs": [3, 5, 9, 15],
+            "out_indexes": [2, 4, 8, 14],
             "out_channels": [3, 64, 192, 384, 1024, 1536],
             "num_classes": 1001,
         },
 
@@ -11,20 +11,22 @@
 import math
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 from functools import partial
+from typing import Dict, Sequence, List
 
 from timm.layers import DropPath, to_2tuple, trunc_normal_
 
 
 class LayerNorm(nn.LayerNorm):
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         if x.ndim == 4:
-            B, C, H, W = x.shape
-            x = x.view(B, C, -1).transpose(1, 2)
-            x = super().forward(x)
-            x = x.transpose(1, 2).view(B, C, H, W)
+            batch_size, channels, height, width = x.shape
+            x = x.view(batch_size, channels, -1).transpose(1, 2)
+            x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+            x = x.transpose(1, 2).view(batch_size, channels, height, width)
         else:
-            x = super().forward(x)
+            x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
         return x
 
 
@@ -60,9 +62,9 @@ def _init_weights(self, m):
             if m.bias is not None:
                 m.bias.data.zero_()
 
-    def forward(self, x, H, W):
+    def forward(self, x: torch.Tensor, height: int, width: int) -> torch.Tensor:
         x = self.fc1(x)
-        x = self.dwconv(x, H, W)
+        x = self.dwconv(x, height, width)
         x = self.act(x)
         x = self.drop(x)
         x = self.fc2(x)
@@ -101,6 +103,10 @@ def __init__(
         if sr_ratio > 1:
             self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
             self.norm = LayerNorm(dim)
+        else:
+            # for torchscript compatibility
+            self.sr = nn.Identity()
+            self.norm = nn.Identity()
 
         self.apply(self._init_weights)
 
@@ -119,27 +125,27 @@ def _init_weights(self, m):
             if m.bias is not None:
                 m.bias.data.zero_()
 
-    def forward(self, x, H, W):
-        B, N, C = x.shape
+    def forward(self, x: torch.Tensor, height: int, width: int) -> torch.Tensor:
+        batch_size, N, C = x.shape
         q = (
             self.q(x)
-            .reshape(B, N, self.num_heads, C // self.num_heads)
+            .reshape(batch_size, N, self.num_heads, C // self.num_heads)
             .permute(0, 2, 1, 3)
         )
 
         if self.sr_ratio > 1:
-            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
-            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
+            x_ = x.permute(0, 2, 1).reshape(batch_size, C, height, width)
+            x_ = self.sr(x_).reshape(batch_size, C, -1).permute(0, 2, 1)
             x_ = self.norm(x_)
             kv = (
                 self.kv(x_)
-                .reshape(B, -1, 2, self.num_heads, C // self.num_heads)
+                .reshape(batch_size, -1, 2, self.num_heads, C // self.num_heads)
                 .permute(2, 0, 3, 1, 4)
             )
         else:
             kv = (
                 self.kv(x)
-                .reshape(B, -1, 2, self.num_heads, C // self.num_heads)
+                .reshape(batch_size, -1, 2, self.num_heads, C // self.num_heads)
                 .permute(2, 0, 3, 1, 4)
             )
         k, v = kv[0], kv[1]
@@ -148,7 +154,7 @@ def forward(self, x, H, W):
         attn = attn.softmax(dim=-1)
         attn = self.attn_drop(attn)
 
-        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = (attn @ v).transpose(1, 2).reshape(batch_size, N, C)
         x = self.proj(x)
         x = self.proj_drop(x)
 
@@ -209,12 +215,12 @@ def _init_weights(self, m):
             if m.bias is not None:
                 m.bias.data.zero_()
 
-    def forward(self, x):
-        B, _, H, W = x.shape
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        batch_size, _, height, width = x.shape
         x = x.flatten(2).transpose(1, 2)
-        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
-        x = x + self.drop_path(self.mlp(self.norm2(x), H, W))
-        x = x.transpose(1, 2).view(B, -1, H, W)
+        x = x + self.drop_path(self.attn(self.norm1(x), height, width))
+        x = x + self.drop_path(self.mlp(self.norm2(x), height, width))
+        x = x.transpose(1, 2).view(batch_size, -1, height, width)
         return x
 
 
@@ -256,7 +262,7 @@ def _init_weights(self, m):
             if m.bias is not None:
                 m.bias.data.zero_()
 
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         x = self.proj(x)
         x = self.norm(x)
         return x
@@ -462,7 +468,7 @@ def reset_classifier(self, num_classes, global_pool=""):
             nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
         )
 
-    def forward_features(self, x):
+    def forward_features(self, x: torch.Tensor) -> List[torch.Tensor]:
         outs = []
 
         # stage 1
@@ -491,21 +497,21 @@ def forward_features(self, x):
 
         return outs
 
-    def forward(self, x):
-        x = self.forward_features(x)
+    def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
+        features = self.forward_features(x)
         # x = self.head(x)
 
-        return x
+        return features
 
 
 class DWConv(nn.Module):
     def __init__(self, dim=768):
         super(DWConv, self).__init__()
         self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim)
 
-    def forward(self, x, H, W):
-        B, _, C = x.shape
-        x = x.transpose(1, 2).view(B, C, H, W)
+    def forward(self, x: torch.Tensor, height: int, width: int) -> torch.Tensor:
+        batch_size, _, channels = x.shape
+        x = x.transpose(1, 2).view(batch_size, channels, height, width)
         x = self.dwconv(x)
         x = x.flatten(2).transpose(1, 2)
 
@@ -516,7 +522,6 @@ def forward(self, x, H, W):
 # End of NVIDIA code
 # ---------------------------------------------------------------
 
-from typing import Dict, Sequence, List  # noqa E402
 from ._base import EncoderMixin  # noqa E402
 
 
 
@@ -40,6 +40,7 @@ def __init__(
         self._in_channels = 3
         self._out_channels = out_channels
         self._output_stride = output_stride
+        self._out_indexes = [2, 4, 7, 14]
 
         del self.classifier
 
@@ -52,25 +53,20 @@ def get_stages(self) -> Dict[int, Sequence[torch.nn.Module]]:
     def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
         features = [x]
 
-        if self._depth >= 1:
-            x = self.features[:2](x)
-            features.append(x)
+        depth = 0
+        for i, module in enumerate(self.features):
+            x = module(x)
 
-        if self._depth >= 2:
-            x = self.features[2:4](x)
-            features.append(x)
+            if i in self._out_indexes:
+                features.append(x)
+                depth += 1
 
-        if self._depth >= 3:
-            x = self.features[4:7](x)
-            features.append(x)
+            # torchscript does not support break in cycle, so we just
+            # go over all modules and then slice number of features
+            if not torch.jit.is_scripting() and depth > self._depth:
+                break
 
-        if self._depth >= 4:
-            x = self.features[7:14](x)
-            features.append(x)
-
-        if self._depth >= 5:
-            x = self.features[14:](x)
-            features.append(x)
+        features = features[: self._depth + 1]
 
         return features