martinsbruveris
diff --git a/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 20 additions & 2 deletions b/‎README.md‎
Lines changed: 20 additions & 2 deletions
diff --git a/‎docs/source/content/efficientnet.rst‎
Lines changed: 11 additions & 0 deletions b/‎docs/source/content/efficientnet.rst‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎docs/source/index.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/index.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎scripts/test_conversion.py‎
Lines changed: 28 additions & 19 deletions b/‎scripts/test_conversion.py‎
Lines changed: 28 additions & 19 deletions
diff --git a/‎tests/models/architectures.py‎
Lines changed: 18 additions & 0 deletions b/‎tests/models/architectures.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎tests/models/test_factory.py‎
Lines changed: 13 additions & 2 deletions b/‎tests/models/test_factory.py‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎tests/test_timm.py‎
Lines changed: 12 additions & 3 deletions b/‎tests/test_timm.py‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎tfimm/architectures/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎tfimm/architectures/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tfimm/architectures/cait.py‎
Lines changed: 1 addition & 0 deletions b/‎tfimm/architectures/cait.py‎
Lines changed: 1 addition & 0 deletions
@@ -1,5 +1,9 @@
 # Change Log
 
+## Unpublished
+
+- Added EfficinentNet and MobileNet-V2 models.
+
 ## v0.2.6 - 2022-05-13
 
 - Added tiny and small ConvNeXt models.
 
@@ -19,8 +19,10 @@ weights, obtained by porting architectures from
 [timm](https://github.com/rwightman/pytorch-image-models) to TensorFlow. The hope is
 that the number of available architectures will grow over time. For now, it contains
 vision transformers (ViT, DeiT, CaiT, PVT and Swin Transformers), MLP-Mixer models 
-(MLP-Mixer, ResMLP, gMLP, PoolFormer and ConvMixer) and various ResNet flavours (ResNet,
-ResNeXt, ECA-ResNet, SE-ResNet) as well as the recent ConvNeXt.
+(MLP-Mixer, ResMLP, gMLP, PoolFormer and ConvMixer), various ResNet flavours (ResNet,
+ResNeXt, ECA-ResNet, SE-ResNet), the EfficientNet family (including AdvProp, 
+NoisyStudent, Edge-TPU, V2 and Lite versions), MobileNet-V2, as well as the recent 
+ConvNeXt.
 
 This work would not have been possible wihout Ross Wightman's `timm` library and the
 work on PyTorch/TensorFlow interoperability in HuggingFace's `transformer` repository.
@@ -144,6 +146,22 @@ The following architectures are currently available:
   [\[github\]](https://github.com/tmp-iclr/convmixer)
   - Patches Are All You Need? 
     [\[ICLR 2022 submission\]](https://openreview.net/forum?id=TVHS5Y4dNvM)
+- EfficientNet family
+  - EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks
+    [\[arXiv:1905.11946\]](https://arxiv.org/abs/1905.11946)
+  - Adversarial Examples Improve Image Recognition
+    [\[arXiv:1911.09665\]](https://arxiv.org/abs/1911.09665)
+  - Self-training with Noisy Student improves ImageNet classification
+    [\[arXiv:1911.04252\]](https://arxiv.org/abs/1911.04252)
+  - EfficientNet-EdgeTPU
+    [\[Blog\]](https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html)
+  - EfficientNet-Lite
+    [\[Blog\]](https://blog.tensorflow.org/2020/03/higher-accuracy-on-vision-models-with-efficientnet-lite.html)
+  - EfficientNetV2: Smaller Models and Faster Training
+    [\[arXiv:2104.00298\]](https://arxiv.org/abs/2104.00298)
+- MobileNet-V2
+  - MobileNetV2: Inverted Residuals and Linear Bottlenecks
+    [\[arXiv:1801.04381\]](https://arxiv.org/abs/1801.04381)
 - Pyramid Vision Transformer 
   [\[github\]](https://github.com/whai362/PVT)
   - Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without
 
@@ -0,0 +1,11 @@
+EfficientNet
+============
+
+.. py:module:: tfimm.architectures.efficientnet
+
+.. automodule:: tfimm.architectures.efficientnet
+
+.. autoclass:: EfficientNetConfig
+
+.. autoclass:: EfficientNet
+   :members: call, forward_features, dummy_inputs, feature_names
@@ -33,6 +33,7 @@ Contents
    :caption: Architectures
 
    content/convnext
+   content/efficientnet
    content/pit
    content/poolformer
 
 
@@ -1,6 +1,8 @@
 """
 This script is used when converting models from PyTorch to TF.
 """
+import logging
+
 import numpy as np
 import tensorflow as tf
 import timm
@@ -10,14 +12,19 @@
 import tfimm  # noqa: F401
 from tfimm.utils.timm import load_pytorch_weights_in_tf2_model  # noqa: F401
 
-model_name = "resnet18"
+logging.basicConfig(level=logging.INFO)
+
+model_name = "efficientnet_b0"
+pt_model_name = "tf_efficientnet_b0"
 
 # We need to test models in both training and inference mode (BN)
 training = False
 nb_calls = 3
 
 # Load PyTorch model
-pt_model = timm.create_model(model_name, pretrained=True)
+pt_model = timm.create_model(
+    pt_model_name, pretrained=True, drop_rate=0.0, drop_path_rate=0.0
+)
 # If a model is not part of the `timm` library, we can load the state dict directly
 # state_dict = load_state_dict_from_url(
 #     url="https://github.com/sail-sg/poolformer/releases/download/v1.0/poolformer_m48.pth.tar"  # noqa: E501
@@ -35,17 +42,32 @@
 if not training:  # Set PyTorch model to inference mode
     pt_model.eval()
 
+# Create test input
+img = np.random.rand(5, 224, 224, 3).astype("float32")
+
+# Run inference for PyTorch model
+pt_img = torch.Tensor(img.transpose([0, 3, 1, 2]))
+if training:
+    for _ in range(nb_calls):
+        _ = pt_model.forward(pt_img)
+pt_res = pt_model.forward(pt_img)
+pt_res = pt_res.detach().numpy()
+# When we look at output of intermediate layers, we have to transpose PyTorch data
+# format (NCHW) to TF data format (NHWC). We don't have to do this, if we only look
+# at the final logits
+# pt_res = pt_res.transpose([0, 2, 3, 1])
+print(pt_res.shape)
+
 # Load TF model
-tf_model = tfimm.create_model(model_name, pretrained="timm")
+tf_model = tfimm.create_model(
+    model_name, pretrained=True, drop_rate=0.0, drop_path_rate=0.0
+)
 # If we want to load the weights from a pytorch model outside the model factory:
 # load_pytorch_weights_in_tf2_model(tf_model, pt_model.state_dict())
 # For debug purposes we may want to print variable names
 # for w in tf_model.weights:
 #     print(w.name)
 
-# Create test input
-img = np.random.rand(5, 224, 224, 3).astype("float32")
-
 # Run inference for TF model
 tf_img = tf.constant(img)
 if training:  # If training we do multiple forward passes to test BN param updates
@@ -59,19 +81,6 @@
 tf_res = tf_res.numpy()
 print(tf_res.shape)
 
-# Run inference for PyTorch model
-pt_img = torch.Tensor(img.transpose([0, 3, 1, 2]))
-if training:
-    for _ in range(nb_calls):
-        _ = pt_model.forward(pt_img)
-pt_res = pt_model.forward(pt_img)
-pt_res = pt_res.detach().numpy()
-# When we look at output of intermediate layers, we have to transpose PyTorch data
-# format (NCHW) to TF data format (NHWC). We don't have to do this, if we only look
-# at the final logits
-# pt_res = pt_res.transpose([0, 2, 3, 1])
-print(pt_res.shape)
-
 # Compare outputs between PyTorch and Tensorflow. We should expect the relative error
 # to be <1e-5. It won't be much lower, because TF and PyTorch implement BN slightly
 # differently. The two formulas are mathematically, but not numerically equivalent.
 
@@ -5,6 +5,8 @@
     ConvMixerConfig,
     ConvNeXt,
     ConvNeXtConfig,
+    EfficientNet,
+    EfficientNetConfig,
     MLPMixer,
     MLPMixerConfig,
     PoolFormer,
@@ -30,6 +32,7 @@
     "cait_test_model",  # cait.py
     "convmixer_test_model",  # convmixer.py
     "convnext_test_model",  # convnext.py
+    "efficientnet_test_model",  # efficientnet.py
     "mixer_test_model",  # mlp_mixer.py
     "resmlp_test_model",
     "gmlp_test_model",
@@ -90,6 +93,21 @@ def convnext_test_model():
     return ConvNeXt, cfg
 
 
+@register_model
+def efficientnet_test_model():
+    cfg = EfficientNetConfig(
+        name="efficientnet_test_model",
+        input_size=(32, 32),
+        architecture=(
+            ("ds_r1_k3_s1_e1_c16_se0.25",),
+            ("ir_r2_k3_s2_e6_c24_se0.25",),
+            ("er_r1_k3_s1_e4_c24_fc24_noskip",),
+        ),
+        nb_features=32,
+    )
+    return EfficientNet, cfg
+
+
 @register_model
 def mixer_test_model():
     cfg = MLPMixerConfig(
 
@@ -87,7 +87,7 @@ def test_change_in_channels(model_name, in_channels):
         # based on ResNetV2, because they use `StdConv`, which normalizes weight
         # statistics internally. The models are still adaptable, but results won't be
         # the same.
-        assert (np.max(np.abs(y_1 - y_2))) / (np.max(np.abs(y_1)) + 1e-8) < 1e-5
+        assert np.all(np.isclose(y_1, y_2, rtol=1e-5, atol=1e-5))
 
 
 @pytest.mark.parametrize("model_name", TEST_ARCHITECTURES)
@@ -96,7 +96,7 @@ def test_save_load_model(model_name):
     model = create_model(model_name)
     with tempfile.TemporaryDirectory() as tmpdir:
         model.save(tmpdir)
-        loaded_model = tf.keras.models.load_model(tmpdir)
+        loaded_model = tf.keras.models.load_model(tmpdir, compile=False)
 
     assert type(model) is type(loaded_model)
 
@@ -179,6 +179,17 @@ def test_change_input_size_inference(model_name):
     flexible_model(img)
 
 
+@pytest.mark.parametrize("model_name", TEST_ARCHITECTURES)
+def test_model_name_keras(model_name):
+    """
+    We test if model.name == model.cfg.name, i.e., the keras model name is set
+    correctly.
+    """
+    tf.keras.backend.clear_session()
+    model = create_model(model_name)
+    assert model.name == model_name == model.cfg.name
+
+
 @pytest.mark.parametrize("model_name", TEST_ARCHITECTURES)
 def test_variable_prefix(model_name):
     """
 
@@ -1,3 +1,5 @@
+from typing import Tuple, Union
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -12,6 +14,7 @@
     "cait_xxs24_224",  # cait.py
     "convmixer_768_32",  # convmixer.py
     "convnext_tiny",  # convnext.py
+    ("efficientnet_b0", "tf_efficientnet_b0"),  # efficientnet.py
     "mixer_s32_224",  # mlp_mixer.py
     "resmlp_12_224",
     "gmlp_ti16_224",
@@ -31,15 +34,21 @@
 
 
 @pytest.mark.parametrize("model_name", TIMM_ARCHITECTURES)
-def test_load_timm_model(model_name: str):
+def test_load_timm_model(model_name: Union[str, Tuple[str, str]]):
     """Test if we can load models from timm."""
+    # To cater for those models, where TIMM name differs from TFIMM name
+    if isinstance(model_name, tuple):
+        tf_model_name, pt_model_name = model_name
+    else:
+        tf_model_name = pt_model_name = model_name
+
     # We don't need to load the pretrained weights from timm, we only need a PyTorch
     # model, that we then convert to tensorflow. This allows us to run these tests
     # in GitHub CI without data transfer issues.
-    pt_model = timm.create_model(model_name, pretrained=False)
+    pt_model = timm.create_model(pt_model_name, pretrained=False)
     pt_model.eval()
 
-    tf_model = create_model(model_name, pretrained=False)
+    tf_model = create_model(tf_model_name, pretrained=False)
     load_pytorch_weights_in_tf2_model(tf_model, pt_model.state_dict())
 
     rng = np.random.default_rng(2021)
 
@@ -1,6 +1,7 @@
 from .cait import *  # noqa: F401
 from .convmixer import *  # noqa: F401
 from .convnext import *  # noqa: F401
+from .efficientnet import *  # noqa: F401
 from .mlp_mixer import *  # noqa: F401
 from .pit import *  # noqa: F401
 from .poolformer import *  # noqa: F401
 
@@ -319,6 +319,7 @@ class CaiT(tf.keras.Model):
     cfg_class = CaiTConfig
 
     def __init__(self, cfg: CaiTConfig, *args, **kwargs):
+        kwargs["name"] = kwargs.get("name", cfg.name)
         super().__init__(*args, **kwargs)
         self.cfg = cfg
         self.nb_features = cfg.embed_dim