Added efficientnet v2 models (#2982)

Anna Grebneva · web-flow · commit 597d053a6040 · 2021-12-16T13:56:09.000+03:00
* Added efficientnet-v2-b0 model

* Added efficientnet-v2-s model

* Fix yamllint errors

* Updated link to paper
diff --git a/demos/classification_demo/python/README.md b/demos/classification_demo/python/README.md
@@ -50,6 +50,8 @@ omz_converter --list models.lst
 * efficientnet-b5-pytorch
 * efficientnet-b7_auto_aug
 * efficientnet-b7-pytorch
+* efficientnet-v2-b0
+* efficientnet-v2-s
 * googlenet-v1
 * googlenet-v1-tf
 * googlenet-v2
diff --git a/demos/classification_demo/python/models.lst b/demos/classification_demo/python/models.lst
@@ -18,6 +18,8 @@ efficientnet-b5
 efficientnet-b5-pytorch
 efficientnet-b7_auto_aug
 efficientnet-b7-pytorch
+efficientnet-v2-b0
+efficientnet-v2-s
 googlenet-v1
 googlenet-v1-tf
 googlenet-v2
diff --git a/models/public/device_support.md b/models/public/device_support.md
@@ -40,6 +40,8 @@
 | efficientnet-b5-pytorch | YES | YES |    |
 | efficientnet-b7-pytorch | YES | YES |    |
 | efficientnet-b7_auto_aug | YES | YES |    |
+| efficientnet-v2-b0 | YES | YES |    |
+| efficientnet-v2-s | YES | YES |    |
 | f3net | YES | YES | YES |
 | face-detection-retail-0044 | YES | YES |    |
 | face-recognition-resnet100-arcface-onnx | YES | YES |    |
diff --git a/models/public/efficientnet-v2-b0/README.md b/models/public/efficientnet-v2-b0/README.md
@@ -0,0 +1,84 @@
+# efficientnet-v2-b0
+
+## Use Case and High-Level Description
+
+The `efficientnet-v2-b0` model is a variant of the EfficientNetV2 pre-trained on ImageNet dataset for image classification task. EfficientNetV2 is a new family of convolutional networks that have faster training speed and better parameter efficiency than previous models. A combination of training-aware neural architecture search and scaling were used in the development to jointly optimize training speed and parameter efficiency.
+
+More details provided in the [paper](https://arxiv.org/abs/2104.00298) and [repository](https://github.com/rwightman/pytorch-image-models).
+
+## Specification
+
+| Metric                          | Value          |
+|---------------------------------|----------------|
+| Type                            | Classification |
+| GFlops                          | 1.4641         |
+| MParams                         | 7.1094         |
+| Source framework                | PyTorch\*      |
+
+## Accuracy
+
+| Metric | Value  |
+| ------ | ------ |
+| Top 1  | 78.36% |
+| Top 5  | 94.02% |
+
+## Input
+
+### Original Model
+
+Image, name: `input`, shape: `1, 3, 224, 224`, format: `B, C, H, W`, where:
+
+- `B` - batch size
+- `C` - number of channels
+- `H` - image height
+- `W` - image width
+
+Expected color order: `RGB`.
+Mean values - [123.675, 116.28, 103.53], scale values - [58.395, 57.12, 57.375].
+
+### Converted Model
+
+Image, name: `input`, shape: `1, 3, 224, 224`, format: `B, C, H, W`, where:
+
+- `B` - batch size
+- `C` - number of channels
+- `H` - image height
+- `W` - image width
+
+Expected color order: `BGR`.
+
+## Output
+
+### Original Model
+
+Object classifier according to ImageNet classes, name: `logits`,  shape: `1, 1000`, output data format is `B, C`, where:
+
+- `B` - batch size
+- `C` - vector of probabilities for all dataset classes in logits format
+
+### Converted Model
+
+Object classifier according to ImageNet classes, name: `logits`,  shape: `1, 1000`, output data format is `B, C`, where:
+
+- `B` - batch size
+- `C` - vector of probabilities for all dataset classes in logits format
+
+## Download a Model and Convert it into Inference Engine Format
+
+You can download models and if necessary convert them into Inference Engine format using the [Model Downloader and other automation tools](../../../tools/model_tools/README.md) as shown in the examples below.
+
+An example of using the Model Downloader:
+```
+omz_downloader --name <model_name>
+```
+
+An example of using the Model Converter:
+```
+omz_converter --name <model_name>
+```
+
+## Legal Information
+
+The original model is distributed under the
+[Apache License, Version 2.0](https://raw.githubusercontent.com/rwightman/pytorch-image-models/master/LICENSE).
+A copy of the license is provided in `<omz_dir>/models/public/licenses/APACHE-2.0-PyTorch-Image-Models.txt`.
diff --git a/models/public/efficientnet-v2-b0/accuracy-check.yml b/models/public/efficientnet-v2-b0/accuracy-check.yml
@@ -0,0 +1,69 @@
+models:
+  - name: efficientnet-v2-b0-onnx
+
+    launchers:
+      - framework: onnx_runtime
+        model: efficientnet-v2-b0.onnx
+        adapter: classification
+
+    datasets:
+      - name: imagenet_1000_classes
+        reader: pillow_imread
+
+        preprocessing:
+          - type: resize
+            size: 256
+            aspect_ratio_scale: greater
+            use_pillow: True
+            interpolation: BICUBIC
+
+          - type: crop
+            use_pillow: True
+            size: 224
+
+          - type: normalization
+            mean: [123.675, 116.28, 103.53]
+            std: [58.395, 57.12, 57.375]
+
+        metrics:
+          - name: accuracy@top1
+            type: accuracy
+            top_k: 1
+            reference: 0.78358
+          - name: accuracy@top5
+            type: accuracy
+            top_k: 5
+            reference: 0.94024
+
+  - name: efficientnet-v2-b0
+
+    launchers:
+      - framework: dlsdk
+        adapter: classification
+
+    datasets:
+      - name: imagenet_1000_classes
+        reader: pillow_imread
+
+        preprocessing:
+          - type: rgb_to_bgr
+
+          - type: resize
+            size: 256
+            aspect_ratio_scale: greater
+            use_pillow: True
+            interpolation: BICUBIC
+
+          - type: crop
+            use_pillow: True
+            size: 224
+
+        metrics:
+          - name: accuracy@top1
+            type: accuracy
+            top_k: 1
+            reference: 0.78358
+          - name: accuracy@top5
+            type: accuracy
+            top_k: 5
+            reference: 0.94024
diff --git a/models/public/efficientnet-v2-b0/model.yml b/models/public/efficientnet-v2-b0/model.yml
@@ -0,0 +1,57 @@
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+description: >-
+  The "efficientnet-v2-b0" model is a variant of the EfficientNetV2 pre-trained on
+  ImageNet dataset for image classification task. EfficientNetV2 is a new family of
+  convolutional networks that have faster training speed and better parameter efficiency
+  than previous models. A combination of training-aware neural architecture search
+  and scaling were used in the development to jointly optimize training speed and
+  parameter efficiency.
+
+  More details provided in the paper <https://arxiv.org/abs/2104.00298> and repository
+  <https://github.com/rwightman/pytorch-image-models>.
+task_type: classification
+files:
+  - name: timm-0.4.12-py3-none-any.whl
+    size: 376973
+    checksum: e5030b792501314113ca804b3b00d35d89c40d0a92d53aa0c41e2a83697d11ef5ea8c73ed1c0bec28c4791047c9dd1b3
+    source: https://files.pythonhosted.org/packages/90/fc/606bc5cf46acac3aa9bd179b3954433c026aaf88ea98d6b19f5d14c336da/timm-0.4.12-py3-none-any.whl
+  - name: tf_efficientnetv2_b0-c7cc451f.pth
+    size: 28865269
+    checksum: b3192e284c7ecf9ab766eb77682ee258152b3f9b142b3a962bdbd409a5332b7731574f8f7e6a5fe3f7af353acd6ad76a
+    source: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_b0-c7cc451f.pth
+postprocessing:
+  - $type: unpack_archive
+    format: zip
+    file: timm-0.4.12-py3-none-any.whl
+conversion_to_onnx_args:
+  - --model-path=$dl_dir
+  - --model-name=tf_efficientnetv2_b0
+  - --import-module=timm.models.efficientnet
+  - --weights=$dl_dir/tf_efficientnetv2_b0-c7cc451f.pth
+  - --input-shape=1,3,224,224
+  - --input-names=input
+  - --output-names=logits
+  - --output-file=$conv_dir/efficientnet-v2-b0.onnx
+model_optimizer_args:
+  - --input_shape=[1,3,224,224]
+  - --input=input
+  - --input_model=$conv_dir/efficientnet-v2-b0.onnx
+  - --mean_values=input[123.675,116.28,103.53]
+  - --scale_values=input[58.395,57.12,57.375]
+  - --reverse_input_channels
+  - --output=logits
+framework: pytorch
+license: https://raw.githubusercontent.com/rwightman/pytorch-image-models/master/LICENSE
diff --git a/models/public/efficientnet-v2-s/README.md b/models/public/efficientnet-v2-s/README.md
@@ -0,0 +1,84 @@
+# efficientnet-v2-s
+
+## Use Case and High-Level Description
+
+The `efficientnet-v2-s` model is a small variant of the EfficientNetV2 pre-trained on ImageNet-21k dataset and fine-tuned on ImageNet-1k for image classification task. EfficientNetV2 is a new family of convolutional networks that have faster training speed and better parameter efficiency than previous models. A combination of training-aware neural architecture search and scaling were used in the development to jointly optimize training speed and parameter efficiency.
+
+More details provided in the [paper](https://arxiv.org/abs/2104.00298) and [repository](https://github.com/rwightman/pytorch-image-models).
+
+## Specification
+
+| Metric                          | Value          |
+|---------------------------------|----------------|
+| Type                            | Classification |
+| GFlops                          | 16.9406        |
+| MParams                         | 21.3816        |
+| Source framework                | PyTorch\*      |
+
+## Accuracy
+
+| Metric | Value  |
+| ------ | ------ |
+| Top 1  | 84.29% |
+| Top 5  | 97.26% |
+
+## Input
+
+### Original Model
+
+Image, name: `input`, shape: `1, 3, 384, 384`, format: `B, C, H, W`, where:
+
+- `B` - batch size
+- `C` - number of channels
+- `H` - image height
+- `W` - image width
+
+Expected color order: `RGB`.
+Mean values - [127.5, 127.5, 127.5], scale values - [127.5, 127.5, 127.5].
+
+### Converted Model
+
+Image, name: `input`, shape: `1, 3, 384, 384`, format: `B, C, H, W`, where:
+
+- `B` - batch size
+- `C` - number of channels
+- `H` - image height
+- `W` - image width
+
+Expected color order: `BGR`.
+
+## Output
+
+### Original Model
+
+Object classifier according to ImageNet classes, name: `logits`,  shape: `1, 1000`, output data format is `B, C`, where:
+
+- `B` - batch size
+- `C` - vector of probabilities for all dataset classes in logits format
+
+### Converted Model
+
+Object classifier according to ImageNet classes, name: `logits`,  shape: `1, 1000`, output data format is `B, C`, where:
+
+- `B` - batch size
+- `C` - vector of probabilities for all dataset classes in logits format
+
+## Download a Model and Convert it into Inference Engine Format
+
+You can download models and if necessary convert them into Inference Engine format using the [Model Downloader and other automation tools](../../../tools/model_tools/README.md) as shown in the examples below.
+
+An example of using the Model Downloader:
+```
+omz_downloader --name <model_name>
+```
+
+An example of using the Model Converter:
+```
+omz_converter --name <model_name>
+```
+
+## Legal Information
+
+The original model is distributed under the
+[Apache License, Version 2.0](https://raw.githubusercontent.com/rwightman/pytorch-image-models/master/LICENSE).
+A copy of the license is provided in `<omz_dir>/models/public/licenses/APACHE-2.0-PyTorch-Image-Models.txt`.
diff --git a/models/public/efficientnet-v2-s/accuracy-check.yml b/models/public/efficientnet-v2-s/accuracy-check.yml
@@ -0,0 +1,69 @@
+models:
+  - name: efficientnet-v2-s-onnx
+
+    launchers:
+      - framework: onnx_runtime
+        model: efficientnet-v2-s.onnx
+        adapter: classification
+
+    datasets:
+      - name: imagenet_1000_classes
+        reader: pillow_imread
+
+        preprocessing:
+          - type: resize
+            size: 384
+            aspect_ratio_scale: greater
+            use_pillow: True
+            interpolation: BICUBIC
+
+          - type: crop
+            use_pillow: True
+            size: 384
+
+          - type: normalization
+            mean: [127.5, 127.5, 127.5]
+            std: [127.5, 127.5, 127.5]
+
+        metrics:
+          - name: accuracy@top1
+            type: accuracy
+            top_k: 1
+            reference: 0.84286
+          - name: accuracy@top5
+            type: accuracy
+            top_k: 5
+            reference: 0.97256
+
+  - name: efficientnet-v2-s
+
+    launchers:
+      - framework: dlsdk
+        adapter: classification
+
+    datasets:
+      - name: imagenet_1000_classes
+        reader: pillow_imread
+
+        preprocessing:
+          - type: rgb_to_bgr
+
+          - type: resize
+            size: 384
+            aspect_ratio_scale: greater
+            use_pillow: True
+            interpolation: BICUBIC
+
+          - type: crop
+            use_pillow: True
+            size: 384
+
+        metrics:
+          - name: accuracy@top1
+            type: accuracy
+            top_k: 1
+            reference: 0.84286
+          - name: accuracy@top5
+            type: accuracy
+            top_k: 5
+            reference: 0.97256
diff --git a/models/public/efficientnet-v2-s/model.yml b/models/public/efficientnet-v2-s/model.yml
diff --git a/models/public/index.md b/models/public/index.md
diff --git a/tools/accuracy_checker/configs/efficientnet-v2-b0.yml b/tools/accuracy_checker/configs/efficientnet-v2-b0.yml
diff --git a/tools/accuracy_checker/configs/efficientnet-v2-s.yml b/tools/accuracy_checker/configs/efficientnet-v2-s.yml