qubvel-org
diff --git a/‎README.md
Lines changed: 21 additions & 0 deletions b/‎README.md
Lines changed: 21 additions & 0 deletions
diff --git a/‎docs/save_load.rst
Lines changed: 8 additions & 0 deletions b/‎docs/save_load.rst
Lines changed: 8 additions & 0 deletions
diff --git a/‎examples/segformer_inference_pretrained.ipynb
Lines changed: 3 additions & 3 deletions b/‎examples/segformer_inference_pretrained.ipynb
Lines changed: 3 additions & 3 deletions
diff --git a/‎requirements/docs.txt
Lines changed: 2 additions & 2 deletions b/‎requirements/docs.txt
Lines changed: 2 additions & 2 deletions
diff --git a/‎requirements/required.txt
Lines changed: 4 additions & 4 deletions b/‎requirements/required.txt
Lines changed: 4 additions & 4 deletions
diff --git a/‎requirements/test.txt
Lines changed: 4 additions & 4 deletions b/‎requirements/test.txt
Lines changed: 4 additions & 4 deletions
diff --git a/‎segmentation_models_pytorch/base/model.py
Lines changed: 42 additions & 19 deletions b/‎segmentation_models_pytorch/base/model.py
Lines changed: 42 additions & 19 deletions
diff --git a/‎segmentation_models_pytorch/base/modules.py
Lines changed: 91 additions & 23 deletions b/‎segmentation_models_pytorch/base/modules.py
Lines changed: 91 additions & 23 deletions
diff --git a/‎segmentation_models_pytorch/decoders/fpn/decoder.py
Lines changed: 12 additions & 5 deletions b/‎segmentation_models_pytorch/decoders/fpn/decoder.py
Lines changed: 12 additions & 5 deletions
diff --git a/‎segmentation_models_pytorch/decoders/fpn/model.py
Lines changed: 4 additions & 0 deletions b/‎segmentation_models_pytorch/decoders/fpn/model.py
Lines changed: 4 additions & 0 deletions
@@ -25,6 +25,27 @@ The main features of the library are:
  - 800+ **pretrained** convolution- and transform-based encoders, including [timm](https://github.com/huggingface/pytorch-image-models) support
  - Popular metrics and losses for training routines (Dice, Jaccard, Tversky, ...)
  - ONNX export and torch script/trace/compile friendly
+
+### Community-Driven Project, Supported By
+<table>
+  <tr>
+    <td align="center" vertical-align="center">
+      <a href="https://withoutbg.com/?utm_source=smp&utm_medium=github_readme&utm_campaign=sponsorship" >
+        <img src="https://withoutbg.com/images/logo-social.png" width="70px;" alt="withoutBG API Logo" />
+      </a>
+    </td>
+    <td align="center" vertical-align="center">
+      <b>withoutBG API</b>
+      <br />
+      <a href="https://withoutbg.com/?utm_source=smp&utm_medium=github_readme&utm_campaign=sponsorship">https://withoutbg.com</a>
+      <br />
+      <p width="200px">
+      High-quality background removal API
+        <br/>
+      </p>
+    </td>
+  </tr>
+</table>
 
 ### [📚 Project Documentation 📚](http://smp.readthedocs.io/)
 
 
@@ -40,6 +40,14 @@ For example:
     # Alternatively, load the model directly from the Hugging Face Hub
     model = smp.from_pretrained('username/my-model')
 
+Loading pre-trained model with different number of classes for fine-tuning:
+
+.. code:: python
+
+    import segmentation_models_pytorch as smp
+
+    model = smp.from_pretrained('<path-or-repo-name>', classes=5, strict=False)
+
 Saving model Metrics and Dataset Name
 -------------------------------------
 
 
@@ -13,9 +13,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# fix for HF hub download\n",
-    "# see PR https://github.com/albumentations-team/albumentations/pull/2171\n",
-    "!pip install -U git+https://github.com/qubvel/albumentations@patch-2"
+    "# make sure you have the latest version of the libraries\n",
+    "!pip install -U segmentation-models-pytorch\n",
+    "!pip install albumentations matplotlib requests pillow"
    ]
   },
   {
 
@@ -1,5 +1,5 @@
 autodocsumm==0.2.14
-huggingface-hub==0.29.1
+huggingface-hub==0.30.1
 six==1.17.0
-sphinx==8.2.1
+sphinx==8.2.3
 sphinx-book-theme==1.1.4
@@ -1,7 +1,7 @@
-huggingface_hub==0.29.1
-numpy==2.2.3
-pillow==11.1.0
-safetensors==0.5.2
+huggingface_hub==0.30.1
+numpy==2.2.4
+pillow==11.2.0
+safetensors==0.5.3
 timm==1.0.15
 torch==2.6.0
 torchvision==0.21.0
 
@@ -1,7 +1,7 @@
 gitpython==3.1.44
 packaging==24.2
-pytest==8.3.4
+pytest==8.3.5
 pytest-xdist==3.6.1
-pytest-cov==6.0.0
-ruff==0.9.7
-setuptools==75.8.0
+pytest-cov==6.1.0
+ruff==0.11.3
+setuptools==78.1.0
@@ -1,6 +1,7 @@
 import torch
-from typing import TypeVar, Type
+import warnings
 
+from typing import TypeVar, Type
 from . import initialization as init
 from .hub_mixin import SMPHubMixin
 from .utils import is_torch_compiling
@@ -96,23 +97,45 @@ def load_state_dict(self, state_dict, **kwargs):
         # timm- ported encoders with TimmUniversalEncoder
         from segmentation_models_pytorch.encoders import TimmUniversalEncoder
 
-        if not isinstance(self.encoder, TimmUniversalEncoder):
-            return super().load_state_dict(state_dict, **kwargs)
-
-        patterns = ["regnet", "res2", "resnest", "mobilenetv3", "gernet"]
-
-        is_deprecated_encoder = any(
-            self.encoder.name.startswith(pattern) for pattern in patterns
-        )
-
-        if is_deprecated_encoder:
-            keys = list(state_dict.keys())
-            for key in keys:
-                new_key = key
-                if key.startswith("encoder.") and not key.startswith("encoder.model."):
-                    new_key = "encoder.model." + key.removeprefix("encoder.")
-                if "gernet" in self.encoder.name:
-                    new_key = new_key.replace(".stages.", ".stages_")
-                state_dict[new_key] = state_dict.pop(key)
+        if isinstance(self.encoder, TimmUniversalEncoder):
+            patterns = ["regnet", "res2", "resnest", "mobilenetv3", "gernet"]
+            is_deprecated_encoder = any(
+                self.encoder.name.startswith(pattern) for pattern in patterns
+            )
+            if is_deprecated_encoder:
+                keys = list(state_dict.keys())
+                for key in keys:
+                    new_key = key
+                    if key.startswith("encoder.") and not key.startswith(
+                        "encoder.model."
+                    ):
+                        new_key = "encoder.model." + key.removeprefix("encoder.")
+                    if "gernet" in self.encoder.name:
+                        new_key = new_key.replace(".stages.", ".stages_")
+                    state_dict[new_key] = state_dict.pop(key)
+
+        # To be able to load weight with mismatched sizes
+        # We are going to filter mismatched sizes as well if strict=False
+        strict = kwargs.get("strict", True)
+        if not strict:
+            mismatched_keys = []
+            model_state_dict = self.state_dict()
+            common_keys = set(model_state_dict.keys()) & set(state_dict.keys())
+            for key in common_keys:
+                if model_state_dict[key].shape != state_dict[key].shape:
+                    mismatched_keys.append(
+                        (key, model_state_dict[key].shape, state_dict[key].shape)
+                    )
+                    state_dict.pop(key)
+
+            if mismatched_keys:
+                str_keys = "\n".join(
+                    [
+                        f" - {key}: {s} (weights) -> {m} (model)"
+                        for key, m, s in mismatched_keys
+                    ]
+                )
+                text = f"\n\n !!!!!! Mismatched keys !!!!!!\n\nYou should TRAIN the model to use it:\n{str_keys}\n"
+                warnings.warn(text, stacklevel=-1)
 
         return super().load_state_dict(state_dict, **kwargs)
@@ -1,3 +1,5 @@
+from typing import Any, Dict, Union
+
 import torch
 import torch.nn as nn
 
@@ -7,43 +9,109 @@
     InPlaceABN = None
 
 
+def get_norm_layer(
+    use_norm: Union[bool, str, Dict[str, Any]], out_channels: int
+) -> nn.Module:
+    supported_norms = ("inplace", "batchnorm", "identity", "layernorm", "instancenorm")
+
+    # Step 1. Convert tot dict representation
+
+    ## Check boolean
+    if use_norm is True:
+        norm_params = {"type": "batchnorm"}
+    elif use_norm is False:
+        norm_params = {"type": "identity"}
+
+    ## Check string
+    elif isinstance(use_norm, str):
+        norm_str = use_norm.lower()
+        if norm_str == "inplace":
+            norm_params = {
+                "type": "inplace",
+                "activation": "leaky_relu",
+                "activation_param": 0.0,
+            }
+        elif norm_str in supported_norms:
+            norm_params = {"type": norm_str}
+        else:
+            raise ValueError(
+                f"Unrecognized normalization type string provided: {use_norm}. Should be in "
+                f"{supported_norms}"
+            )
+
+    ## Check dict
+    elif isinstance(use_norm, dict):
+        norm_params = use_norm
+
+    else:
+        raise ValueError(
+            f"Invalid type for use_norm should either be a bool (batchnorm/identity), "
+            f"a string in {supported_norms}, or a dict like {{'type': 'batchnorm', **kwargs}}"
+        )
+
+    # Step 2. Check if the dict is valid
+    if "type" not in norm_params:
+        raise ValueError(
+            f"Malformed dictionary given in use_norm: {use_norm}. Should contain key 'type'."
+        )
+    if norm_params["type"] not in supported_norms:
+        raise ValueError(
+            f"Unrecognized normalization type string provided: {use_norm}. Should be in {supported_norms}"
+        )
+    if norm_params["type"] == "inplace" and InPlaceABN is None:
+        raise RuntimeError(
+            "In order to use `use_norm='inplace'` the inplace_abn package must be installed. Use:\n"
+            "  $ pip install -U wheel setuptools\n"
+            "  $ pip install inplace_abn --no-build-isolation\n"
+            "Also see: https://github.com/mapillary/inplace_abn"
+        )
+
+    # Step 3. Initialize the norm layer
+    norm_type = norm_params["type"]
+    norm_kwargs = {k: v for k, v in norm_params.items() if k != "type"}
+
+    if norm_type == "inplace":
+        norm = InPlaceABN(out_channels, **norm_kwargs)
+    elif norm_type == "batchnorm":
+        norm = nn.BatchNorm2d(out_channels, **norm_kwargs)
+    elif norm_type == "identity":
+        norm = nn.Identity()
+    elif norm_type == "layernorm":
+        norm = nn.LayerNorm(out_channels, **norm_kwargs)
+    elif norm_type == "instancenorm":
+        norm = nn.InstanceNorm2d(out_channels, **norm_kwargs)
+    else:
+        raise ValueError(f"Unrecognized normalization type: {norm_type}")
+
+    return norm
+
+
 class Conv2dReLU(nn.Sequential):
     def __init__(
         self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        padding=0,
-        stride=1,
-        use_batchnorm=True,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: int,
+        padding: int = 0,
+        stride: int = 1,
+        use_norm: Union[bool, str, Dict[str, Any]] = "batchnorm",
     ):
-        if use_batchnorm == "inplace" and InPlaceABN is None:
-            raise RuntimeError(
-                "In order to use `use_batchnorm='inplace'` inplace_abn package must be installed. "
-                + "To install see: https://github.com/mapillary/inplace_abn"
-            )
+        norm = get_norm_layer(use_norm, out_channels)
 
+        is_identity = isinstance(norm, nn.Identity)
         conv = nn.Conv2d(
             in_channels,
             out_channels,
             kernel_size,
             stride=stride,
             padding=padding,
-            bias=not (use_batchnorm),
+            bias=is_identity,
         )
-        relu = nn.ReLU(inplace=True)
-
-        if use_batchnorm == "inplace":
-            bn = InPlaceABN(out_channels, activation="leaky_relu", activation_param=0.0)
-            relu = nn.Identity()
 
-        elif use_batchnorm and use_batchnorm != "inplace":
-            bn = nn.BatchNorm2d(out_channels)
-
-        else:
-            bn = nn.Identity()
+        is_inplaceabn = InPlaceABN is not None and isinstance(norm, InPlaceABN)
+        activation = nn.Identity() if is_inplaceabn else nn.ReLU(inplace=True)
 
-        super(Conv2dReLU, self).__init__(conv, bn, relu)
+        super(Conv2dReLU, self).__init__(conv, norm, activation)
 
 
 class SCSEModule(nn.Module):
 
@@ -25,12 +25,18 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 
 class FPNBlock(nn.Module):
-    def __init__(self, pyramid_channels: int, skip_channels: int):
+    def __init__(
+        self,
+        pyramid_channels: int,
+        skip_channels: int,
+        interpolation_mode: str = "nearest",
+    ):
         super().__init__()
         self.skip_conv = nn.Conv2d(skip_channels, pyramid_channels, kernel_size=1)
+        self.interpolation_mode = interpolation_mode
 
     def forward(self, x: torch.Tensor, skip: torch.Tensor) -> torch.Tensor:
-        x = F.interpolate(x, scale_factor=2.0, mode="nearest")
+        x = F.interpolate(x, scale_factor=2.0, mode=self.interpolation_mode)
         skip = self.skip_conv(skip)
         x = x + skip
         return x
@@ -84,6 +90,7 @@ def __init__(
         segmentation_channels: int = 128,
         dropout: float = 0.2,
         merge_policy: Literal["add", "cat"] = "add",
+        interpolation_mode: str = "nearest",
     ):
         super().__init__()
 
@@ -103,9 +110,9 @@ def __init__(
         encoder_channels = encoder_channels[: encoder_depth + 1]
 
         self.p5 = nn.Conv2d(encoder_channels[0], pyramid_channels, kernel_size=1)
-        self.p4 = FPNBlock(pyramid_channels, encoder_channels[1])
-        self.p3 = FPNBlock(pyramid_channels, encoder_channels[2])
-        self.p2 = FPNBlock(pyramid_channels, encoder_channels[3])
+        self.p4 = FPNBlock(pyramid_channels, encoder_channels[1], interpolation_mode)
+        self.p3 = FPNBlock(pyramid_channels, encoder_channels[2], interpolation_mode)
+        self.p2 = FPNBlock(pyramid_channels, encoder_channels[3], interpolation_mode)
 
         self.seg_blocks = nn.ModuleList(
             [
 
@@ -28,6 +28,8 @@ class FPN(SegmentationModel):
         decoder_merge_policy: Determines how to merge pyramid features inside FPN. Available options are **add**
             and **cat**
         decoder_dropout: Spatial dropout rate in range (0, 1) for feature pyramid in FPN_
+        decoder_interpolation: Interpolation mode used in decoder of the model. Available options are
+            **"nearest"**, **"bilinear"**, **"bicubic"**, **"area"**, **"nearest-exact"**. Default is **"nearest"**.
         in_channels: A number of input channels for the model, default is 3 (RGB images)
         classes: A number of classes for output mask (or you can think as a number of channels of output mask)
         activation: An activation function to apply after the final convolution layer.
@@ -61,6 +63,7 @@ def __init__(
         decoder_segmentation_channels: int = 128,
         decoder_merge_policy: str = "add",
         decoder_dropout: float = 0.2,
+        decoder_interpolation: str = "nearest",
         in_channels: int = 3,
         classes: int = 1,
         activation: Optional[str] = None,
@@ -91,6 +94,7 @@ def __init__(
             segmentation_channels=decoder_segmentation_channels,
             dropout=decoder_dropout,
             merge_policy=decoder_merge_policy,
+            interpolation_mode=decoder_interpolation,
         )
 
         self.segmentation_head = SegmentationHead(
Original file line number	Diff line number	Diff line change
`@@ -13,9 +13,9 @@`
`13`	`13`	`"metadata": {},`
`14`	`14`	`"outputs": [],`
`15`	`15`	`"source": [`
`16`		`- "# fix for HF hub download\n",`
`17`		`- "# see PR https://github.com/albumentations-team/albumentations/pull/2171\n",`
`18`		`- "!pip install -U git+https://github.com/qubvel/albumentations@patch-2"`
	`16`	`+ "# make sure you have the latest version of the libraries\n",`
	`17`	`+ "!pip install -U segmentation-models-pytorch\n",`
	`18`	`+ "!pip install albumentations matplotlib requests pillow"`
`19`	`19`	`]`
`20`	`20`	`},`
`21`	`21`	`{`