diff --git a/README.md b/README.md
index b3a0b3ff..d7679c08 100644
--- a/README.md
+++ b/README.md
@@ -1,18 +1,18 @@
-
-
-**Python library with Neural Networks for Image
-Segmentation based on [PyTorch](https://pytorch.org/).**
-
-[](https://github.com/qubvel/segmentation_models.pytorch/blob/main/LICENSE)
-[](https://github.com/qubvel/segmentation_models.pytorch/actions/workflows/tests.yml)
-[](https://smp.readthedocs.io/en/latest/)
+
+
+**Python library with Neural Networks for Image
+Segmentation based on [PyTorch](https://pytorch.org/).**
+
+[](https://github.com/qubvel/segmentation_models.pytorch/blob/main/LICENSE)
+[](https://github.com/qubvel/segmentation_models.pytorch/actions/workflows/tests.yml)
+[](https://smp.readthedocs.io/en/latest/)
-[](https://pypi.org/project/segmentation-models-pytorch/)
-[](https://pepy.tech/project/segmentation-models-pytorch)
+[](https://pypi.org/project/segmentation-models-pytorch/)
+[](https://pepy.tech/project/segmentation-models-pytorch)
-[](https://pepy.tech/project/segmentation-models-pytorch)
-[](https://pepy.tech/project/segmentation-models-pytorch)
+[](https://pepy.tech/project/segmentation-models-pytorch)
+[](https://pepy.tech/project/segmentation-models-pytorch)
@@ -23,7 +23,7 @@ The main features of this library are:
- 124 available encoders (and 500+ encoders from [timm](https://github.com/rwightman/pytorch-image-models))
- All encoders have pre-trained weights for faster and better convergence
- Popular metrics and losses for training routines
-
+
### [📚 Project Documentation 📚](http://smp.readthedocs.io/)
Visit [Read The Docs Project Page](https://smp.readthedocs.io/) or read the following README to know more about Segmentation Models Pytorch (SMP for short) library
@@ -55,7 +55,7 @@ The segmentation model is just a PyTorch `torch.nn.Module`, which can be created
import segmentation_models_pytorch as smp
model = smp.Unet(
- encoder_name="resnet34", # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
+ encoder_name="resnet34", # choose encoder, e.g. mobilenet_v2 or timm-efficientnet-b7
encoder_weights="imagenet", # use `imagenet` pre-trained weights for encoder initialization
in_channels=1, # model input channels (1 for gray-scale images, 3 for RGB, etc.)
classes=3, # model output channels (number of classes in your dataset)
@@ -277,14 +277,6 @@ The following is a list of supported encoders in the SMP. Select the appropriate
|Encoder |Weights |Params, M |
|--------------------------------|:------------------------------:|:------------------------------:|
-|efficientnet-b0 |imagenet |4M |
-|efficientnet-b1 |imagenet |6M |
-|efficientnet-b2 |imagenet |7M |
-|efficientnet-b3 |imagenet |10M |
-|efficientnet-b4 |imagenet |17M |
-|efficientnet-b5 |imagenet |28M |
-|efficientnet-b6 |imagenet |40M |
-|efficientnet-b7 |imagenet |63M |
|timm-efficientnet-b0 |imagenet / advprop / noisy-student|4M |
|timm-efficientnet-b1 |imagenet / advprop / noisy-student|6M |
|timm-efficientnet-b2 |imagenet / advprop / noisy-student|7M |
@@ -361,7 +353,7 @@ The following is a list of supported encoders in the SMP. Select the appropriate
Backbone from SegFormer pretrained on Imagenet! Can be used with other decoders from package, you can combine Mix Vision Transformer with Unet, FPN and others!
-Limitations:
+Limitations:
- encoder is **not** supported by Linknet, Unet++
- encoder is supported by FPN only for encoder **depth = 5**
@@ -423,18 +415,18 @@ Total number of supported encoders: 549
##### Input channels
Input channels parameter allows you to create models, which process tensors with arbitrary number of channels.
If you use pretrained weights from imagenet - weights of first convolution will be reused. For
-1-channel case it would be a sum of weights of first convolution layer, otherwise channels would be
+1-channel case it would be a sum of weights of first convolution layer, otherwise channels would be
populated with weights like `new_weight[:, i] = pretrained_weight[:, i % 3]` and than scaled with `new_weight * 3 / new_in_channels`.
```python
model = smp.FPN('resnet34', in_channels=1)
mask = model(torch.ones([1, 1, 64, 64]))
```
-##### Auxiliary classification output
-All models support `aux_params` parameters, which is default set to `None`.
+##### Auxiliary classification output
+All models support `aux_params` parameters, which is default set to `None`.
If `aux_params = None` then classification auxiliary output is not created, else
model produce not only `mask`, but also `label` output with shape `NC`.
-Classification head consists of GlobalPooling->Dropout(optional)->Linear->Activation(optional) layers, which can be
+Classification head consists of GlobalPooling->Dropout(optional)->Linear->Activation(optional) layers, which can be
configured by `aux_params` as follows:
```python
aux_params=dict(
@@ -472,7 +464,7 @@ $ pip install git+https://github.com/qubvel/segmentation_models.pytorch
### 🤝 Contributing
-#### Install SMP
+#### Install SMP
```bash
make install_dev # create .venv, install SMP in dev mode
@@ -484,7 +476,7 @@ make install_dev # create .venv, install SMP in dev mode
make fixup # Ruff for formatting and lint checks
```
-#### Update table with encoders
+#### Update table with encoders
```bash
make table # generate a table with encoders and print to stdout
diff --git a/docs/conf.py b/docs/conf.py
index c7dde9e5..82583c6b 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -102,7 +102,6 @@ def get_version():
"PIL",
"pretrainedmodels",
"torchvision",
- "efficientnet-pytorch",
"segmentation_models_pytorch.encoders",
"segmentation_models_pytorch.utils",
# 'segmentation_models_pytorch.base',
diff --git a/docs/encoders.rst b/docs/encoders.rst
index 652745b7..5931593a 100644
--- a/docs/encoders.rst
+++ b/docs/encoders.rst
@@ -215,22 +215,6 @@ EfficientNet
+------------------------+--------------------------------------+-------------+
| Encoder | Weights | Params, M |
+========================+======================================+=============+
-| efficientnet-b0 | imagenet | 4M |
-+------------------------+--------------------------------------+-------------+
-| efficientnet-b1 | imagenet | 6M |
-+------------------------+--------------------------------------+-------------+
-| efficientnet-b2 | imagenet | 7M |
-+------------------------+--------------------------------------+-------------+
-| efficientnet-b3 | imagenet | 10M |
-+------------------------+--------------------------------------+-------------+
-| efficientnet-b4 | imagenet | 17M |
-+------------------------+--------------------------------------+-------------+
-| efficientnet-b5 | imagenet | 28M |
-+------------------------+--------------------------------------+-------------+
-| efficientnet-b6 | imagenet | 40M |
-+------------------------+--------------------------------------+-------------+
-| efficientnet-b7 | imagenet | 63M |
-+------------------------+--------------------------------------+-------------+
| timm-efficientnet-b0 | imagenet / advprop / noisy-student | 4M |
+------------------------+--------------------------------------+-------------+
| timm-efficientnet-b1 | imagenet / advprop / noisy-student | 6M |
diff --git a/docs/quickstart.rst b/docs/quickstart.rst
index 7fc04dd7..20d974ca 100644
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@@ -6,11 +6,11 @@
Segmentation model is just a PyTorch nn.Module, which can be created as easy as:
.. code-block:: python
-
+
import segmentation_models_pytorch as smp
model = smp.Unet(
- encoder_name="resnet34", # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
+ encoder_name="resnet34", # choose encoder, e.g. mobilenet_v2 or timm-efficientnet-b7
encoder_weights="imagenet", # use `imagenet` pre-trained weights for encoder initialization
in_channels=1, # model input channels (1 for gray-scale images, 3 for RGB, etc.)
classes=3, # model output channels (number of classes in your dataset)
diff --git a/pyproject.toml b/pyproject.toml
index 0e9310b5..70fe9394 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,6 @@ classifiers = [
'Programming Language :: Python :: Implementation :: PyPy',
]
dependencies = [
- 'efficientnet-pytorch>=0.6.1',
'huggingface-hub>=0.24',
'numpy>=1.19.3',
'pillow>=8',
diff --git a/requirements/minimum.old b/requirements/minimum.old
index 1080bdb4..40bdc6ce 100644
--- a/requirements/minimum.old
+++ b/requirements/minimum.old
@@ -1,4 +1,3 @@
-efficientnet-pytorch==0.6.1
huggingface-hub==0.24.0
numpy==1.19.3
pillow==8.0.0
diff --git a/requirements/required.txt b/requirements/required.txt
index 9c7aa3b5..34a1db7b 100644
--- a/requirements/required.txt
+++ b/requirements/required.txt
@@ -1,4 +1,3 @@
-efficientnet-pytorch==0.7.1
huggingface_hub==0.27.0
numpy==2.2.1
pillow==11.0.0
diff --git a/segmentation_models_pytorch/encoders/__init__.py b/segmentation_models_pytorch/encoders/__init__.py
index c4a4c037..1a4e6f70 100644
--- a/segmentation_models_pytorch/encoders/__init__.py
+++ b/segmentation_models_pytorch/encoders/__init__.py
@@ -9,7 +9,6 @@
from .densenet import densenet_encoders
from .inceptionresnetv2 import inceptionresnetv2_encoders
from .inceptionv4 import inceptionv4_encoders
-from .efficientnet import efficient_net_encoders
from .mobilenet import mobilenet_encoders
from .xception import xception_encoders
from .timm_efficientnet import timm_efficientnet_encoders
@@ -34,7 +33,6 @@
encoders.update(densenet_encoders)
encoders.update(inceptionresnetv2_encoders)
encoders.update(inceptionv4_encoders)
-encoders.update(efficient_net_encoders)
encoders.update(mobilenet_encoders)
encoders.update(xception_encoders)
encoders.update(timm_efficientnet_encoders)
diff --git a/segmentation_models_pytorch/encoders/efficientnet.py b/segmentation_models_pytorch/encoders/efficientnet.py
deleted file mode 100644
index 4a7af6b4..00000000
--- a/segmentation_models_pytorch/encoders/efficientnet.py
+++ /dev/null
@@ -1,177 +0,0 @@
-"""Each encoder should have following attributes and methods and be inherited from `_base.EncoderMixin`
-
-Attributes:
-
- _out_channels (list of int): specify number of channels for each encoder feature tensor
- _depth (int): specify number of stages in decoder (in other words number of downsampling operations)
- _in_channels (int): default number of input channels in first Conv2d layer for encoder (usually 3)
-
-Methods:
-
- forward(self, x: torch.Tensor)
- produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of
- shape NCHW (features should be sorted in descending order according to spatial resolution, starting
- with resolution same as input `x` tensor).
-
- Input: `x` with shape (1, 3, 64, 64)
- Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes
- [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8),
- (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ)
-
- also should support number of features according to specified depth, e.g. if depth = 5,
- number of feature tensors = 6 (one with same resolution as input and 5 downsampled),
- depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled).
-"""
-
-import torch.nn as nn
-from efficientnet_pytorch import EfficientNet
-from efficientnet_pytorch.utils import url_map, url_map_advprop, get_model_params
-
-from ._base import EncoderMixin
-
-
-class EfficientNetEncoder(EfficientNet, EncoderMixin):
- def __init__(self, stage_idxs, out_channels, model_name, depth=5):
- blocks_args, global_params = get_model_params(model_name, override_params=None)
- super().__init__(blocks_args, global_params)
-
- self._stage_idxs = stage_idxs
- self._out_channels = out_channels
- self._depth = depth
- self._in_channels = 3
-
- del self._fc
-
- def get_stages(self):
- return [
- nn.Identity(),
- nn.Sequential(self._conv_stem, self._bn0, self._swish),
- self._blocks[: self._stage_idxs[0]],
- self._blocks[self._stage_idxs[0] : self._stage_idxs[1]],
- self._blocks[self._stage_idxs[1] : self._stage_idxs[2]],
- self._blocks[self._stage_idxs[2] :],
- ]
-
- def forward(self, x):
- stages = self.get_stages()
-
- block_number = 0.0
- drop_connect_rate = self._global_params.drop_connect_rate
-
- features = []
- for i in range(self._depth + 1):
- # Identity and Sequential stages
- if i < 2:
- x = stages[i](x)
-
- # Block stages need drop_connect rate
- else:
- for module in stages[i]:
- drop_connect = drop_connect_rate * block_number / len(self._blocks)
- block_number += 1.0
- x = module(x, drop_connect)
-
- features.append(x)
-
- return features
-
- def load_state_dict(self, state_dict, **kwargs):
- state_dict.pop("_fc.bias", None)
- state_dict.pop("_fc.weight", None)
- super().load_state_dict(state_dict, **kwargs)
-
-
-def _get_pretrained_settings(encoder):
- pretrained_settings = {
- "imagenet": {
- "mean": [0.485, 0.456, 0.406],
- "std": [0.229, 0.224, 0.225],
- "url": url_map[encoder],
- "input_space": "RGB",
- "input_range": [0, 1],
- },
- "advprop": {
- "mean": [0.5, 0.5, 0.5],
- "std": [0.5, 0.5, 0.5],
- "url": url_map_advprop[encoder],
- "input_space": "RGB",
- "input_range": [0, 1],
- },
- }
- return pretrained_settings
-
-
-efficient_net_encoders = {
- "efficientnet-b0": {
- "encoder": EfficientNetEncoder,
- "pretrained_settings": _get_pretrained_settings("efficientnet-b0"),
- "params": {
- "out_channels": (3, 32, 24, 40, 112, 320),
- "stage_idxs": (3, 5, 9, 16),
- "model_name": "efficientnet-b0",
- },
- },
- "efficientnet-b1": {
- "encoder": EfficientNetEncoder,
- "pretrained_settings": _get_pretrained_settings("efficientnet-b1"),
- "params": {
- "out_channels": (3, 32, 24, 40, 112, 320),
- "stage_idxs": (5, 8, 16, 23),
- "model_name": "efficientnet-b1",
- },
- },
- "efficientnet-b2": {
- "encoder": EfficientNetEncoder,
- "pretrained_settings": _get_pretrained_settings("efficientnet-b2"),
- "params": {
- "out_channels": (3, 32, 24, 48, 120, 352),
- "stage_idxs": (5, 8, 16, 23),
- "model_name": "efficientnet-b2",
- },
- },
- "efficientnet-b3": {
- "encoder": EfficientNetEncoder,
- "pretrained_settings": _get_pretrained_settings("efficientnet-b3"),
- "params": {
- "out_channels": (3, 40, 32, 48, 136, 384),
- "stage_idxs": (5, 8, 18, 26),
- "model_name": "efficientnet-b3",
- },
- },
- "efficientnet-b4": {
- "encoder": EfficientNetEncoder,
- "pretrained_settings": _get_pretrained_settings("efficientnet-b4"),
- "params": {
- "out_channels": (3, 48, 32, 56, 160, 448),
- "stage_idxs": (6, 10, 22, 32),
- "model_name": "efficientnet-b4",
- },
- },
- "efficientnet-b5": {
- "encoder": EfficientNetEncoder,
- "pretrained_settings": _get_pretrained_settings("efficientnet-b5"),
- "params": {
- "out_channels": (3, 48, 40, 64, 176, 512),
- "stage_idxs": (8, 13, 27, 39),
- "model_name": "efficientnet-b5",
- },
- },
- "efficientnet-b6": {
- "encoder": EfficientNetEncoder,
- "pretrained_settings": _get_pretrained_settings("efficientnet-b6"),
- "params": {
- "out_channels": (3, 56, 40, 72, 200, 576),
- "stage_idxs": (9, 15, 31, 45),
- "model_name": "efficientnet-b6",
- },
- },
- "efficientnet-b7": {
- "encoder": EfficientNetEncoder,
- "pretrained_settings": _get_pretrained_settings("efficientnet-b7"),
- "params": {
- "out_channels": (3, 64, 48, 80, 224, 640),
- "stage_idxs": (11, 18, 38, 55),
- "model_name": "efficientnet-b7",
- },
- },
-}
diff --git a/tests/encoders/test_smp_encoders.py b/tests/encoders/test_smp_encoders.py
index 863537bf..e3f63a3d 100644
--- a/tests/encoders/test_smp_encoders.py
+++ b/tests/encoders/test_smp_encoders.py
@@ -22,20 +22,3 @@ class TestMixTransformerEncoder(base.BaseEncoderTester):
if not RUN_ALL_ENCODERS
else ["mit_b0", "mit_b1", "mit_b2", "mit_b3", "mit_b4", "mit_b5"]
)
-
-
-class TestEfficientNetEncoder(base.BaseEncoderTester):
- encoder_names = (
- ["efficientnet-b0"]
- if not RUN_ALL_ENCODERS
- else [
- "efficientnet-b0",
- "efficientnet-b1",
- "efficientnet-b2",
- "efficientnet-b3",
- "efficientnet-b4",
- "efficientnet-b5",
- "efficientnet-b6",
- # "efficientnet-b7", # extra large model
- ]
- )