Added LeViT 128s model (#3414)

Anna Grebneva · web-flow · commit a5e6bb5e6ff9 · 2022-03-30T09:20:56.000+03:00
diff --git a/demos/classification_benchmark_demo/cpp/README.md b/demos/classification_benchmark_demo/cpp/README.md
@@ -52,6 +52,7 @@ omz_converter --list models.lst
 * hbonet-0.25
 * hbonet-1.0
 * inception-resnet-v2-tf
+* levit-128s
 * mixnet-l
 * mobilenet-v1-0.25-128
 * mobilenet-v1-1.0-224
diff --git a/demos/classification_benchmark_demo/cpp/models.lst b/demos/classification_benchmark_demo/cpp/models.lst
@@ -16,6 +16,7 @@ googlenet-v4-tf
 hbonet-0.25
 hbonet-1.0
 inception-resnet-v2-tf
+levit-128s
 mixnet-l
 mobilenet-v1-0.25-128
 mobilenet-v1-1.0-224
diff --git a/demos/classification_demo/python/README.md b/demos/classification_demo/python/README.md
@@ -56,6 +56,7 @@ omz_converter --list models.lst
 * hbonet-0.25
 * hbonet-1.0
 * inception-resnet-v2-tf
+* levit-128s
 * mixnet-l
 * mobilenet-v1-0.25-128
 * mobilenet-v1-1.0-224
diff --git a/demos/classification_demo/python/models.lst b/demos/classification_demo/python/models.lst
@@ -18,6 +18,7 @@ googlenet-v4-tf
 hbonet-0.25
 hbonet-1.0
 inception-resnet-v2-tf
+levit-128s
 mixnet-l
 mobilenet-v1-0.25-128
 mobilenet-v1-1.0-224
diff --git a/models/public/device_support.md b/models/public/device_support.md
@@ -61,6 +61,7 @@
 | hybrid-cs-model-mri | YES |    |    |
 | i3d-rgb-tf | YES | YES |    |
 | inception-resnet-v2-tf | YES | YES | YES |
+| levit-128s | YES | YES |    |
 | license-plate-recognition-barrier-0007 | YES |    |    |
 | mask_rcnn_inception_resnet_v2_atrous_coco | YES | YES |    |
 | mask_rcnn_resnet50_atrous_coco | YES | YES |    |
diff --git a/models/public/index.md b/models/public/index.md
@@ -62,6 +62,7 @@
    omz_models_model_hbonet_0_25
    omz_models_model_hbonet_1_0
    omz_models_model_inception_resnet_v2_tf
+   omz_models_model_levit_128s
    omz_models_model_mixnet_l
    omz_models_model_mobilenet_v1_0_25_128
    omz_models_model_mobilenet_v1_1_0_224
@@ -344,6 +345,7 @@ You can download models and convert them into OpenVINO™ IR format (\*.xml + \*
 | Inception (GoogleNet) V3    | TensorFlow\*<br>PyTorch\*          | [googlenet-v3](./googlenet-v3/README.md) <br> [googlenet-v3-pytorch](./googlenet-v3-pytorch/README.md) | 77.904%/93.808%<br>77.69%/93.7% | 11.469 | 23.817 |
 | Inception (GoogleNet) V4    | TensorFlow\*                       | [googlenet-v4-tf](./googlenet-v4-tf/README.md) | 80.204%/95.21% | 24.584 | 42.648 |
 | Inception-ResNet V2         | TensorFlow\*                       | [inception-resnet-v2-tf](./inception-resnet-v2-tf/README.md) | 77.82%/94.03% | 22.227 | 30.223 |
+| LeViT 128S                  | PyTorch\*                          | [levit-128s](./levit-128s/README.md) | 76.54%/92.85% | 0.6177 | 8.2199 |
 | MixNet L                    | TensorFlow\*                       | [mixnet-l](./mixnet-l/README.md)  | 78.30%/93.91% | 0.565 | 7.3 |
 | MobileNet V1 0.25 128       | Caffe\*                            | [mobilenet-v1-0.25-128](./mobilenet-v1-0.25-128/README.md)  | 40.54%/65% | 0.028 | 0.468 |
 | MobileNet V1 1.0 224        | Caffe\*<br>TensorFlow\*            | [mobilenet-v1-1.0-224](./mobilenet-v1-1.0-224/README.md)<br>[mobilenet-v1-1.0-224-tf](./mobilenet-v1-1.0-224-tf/README.md)| 69.496%/89.224%<br>71.03%/89.94% | 1.148 | 4.221 |
diff --git a/models/public/levit-128s/README.md b/models/public/levit-128s/README.md
@@ -0,0 +1,95 @@
+# levit-128s
+
+## Use Case and High-Level Description
+
+The `levit-128s` model is one of the LeViT models family: a hybrid neural network for fast inference image classification. The model is pre-trained on the ImageNet dataset. LeViT-128s model is a small LeViT variant that has 128 channels on input of the transformer stage and 2, 3 and 4 number of pairs of Attention and MLP blocks at 1, 2 and 3 model stages respectively.
+
+The model input is a blob that consists of a single image of `1, 3, 224, 224` in `RGB` order.
+
+The model output is typical object classifier for the 1000 different classifications matching with those in the ImageNet database.
+
+For details see [repository](https://github.com/rwightman/pytorch-image-models) and [paper](https://arxiv.org/abs/2104.01136).
+
+## Specification
+
+| Metric           | Value          |
+| ---------------- | -------------- |
+| Type             | Classification |
+| GFLOPs           | 0.6177         |
+| MParams          | 8.2199         |
+| Source framework | PyTorch\*      |
+
+## Accuracy
+
+| Metric | Value  |
+| ------ | -----  |
+| Top 1  | 76.54% |
+| Top 5  | 92.85% |
+
+## Input
+
+### Original model
+
+Image, name - `image`,  shape - `1, 3, 224, 224`, format is `B, C, H, W`, where:
+
+- `B` - batch size
+- `C` - channel
+- `H` - height
+- `W` - width
+
+Channel order is `RGB`.
+Mean values - [123.675,116.28,103.53], scale values - [58.395, 57.12, 57.375].
+
+### Converted model
+
+Image, name - `image`,  shape - `1, 3, 224, 224`, format is `B, C, H, W`, where:
+
+- `B` - batch size
+- `C` - channel
+- `H` - height
+- `W` - width
+
+Channel order is `BGR`.
+
+## Output
+
+### Original model
+
+Object classifier according to ImageNet classes, name - `probs`,  shape - `1, 1000`, output data format is `B, C`, where:
+
+- `B` - batch size
+- `C` - predicted probabilities for each class in logits format
+
+### Converted model
+
+Object classifier according to ImageNet classes, name - `probs`,  shape - `1, 1000`, output data format is `B, C`, where:
+
+- `B` - batch size
+- `C` - predicted probabilities for each class in logits format
+
+## Download a Model and Convert it into OpenVINO™ IR Format
+
+You can download models and if necessary convert them into OpenVINO™ IR format using the [Model Downloader and other automation tools](../../../tools/model_tools/README.md) as shown in the examples below.
+
+An example of using the Model Downloader:
+```
+omz_downloader --name <model_name>
+```
+
+An example of using the Model Converter:
+```
+omz_converter --name <model_name>
+```
+
+## Demo usage
+
+The model can be used in the following demos provided by the Open Model Zoo to show its capabilities:
+
+* [Classification Benchmark C++ Demo](../../../demos/classification_benchmark_demo/cpp/README.md)
+* [Classification Python\* Demo](../../../demos/classification_demo/python/README.md)
+
+## Legal Information
+
+The original model is distributed under the
+[Apache License, Version 2.0](https://raw.githubusercontent.com/rwightman/pytorch-image-models/master/LICENSE).
+A copy of the license is provided in `<omz_dir>/models/public/licenses/APACHE-2.0-PyTorch-Image-Models.txt`.
diff --git a/models/public/levit-128s/accuracy-check.yml b/models/public/levit-128s/accuracy-check.yml
@@ -0,0 +1,61 @@
+models:
+  - name: levit-128s-onnx
+
+    launchers:
+      - framework: onnx_runtime
+        model: levit-128s.onnx
+        adapter: classification
+
+    datasets:
+      - name: imagenet_1000_classes
+        reader: pillow_imread
+        preprocessing:
+          - type: resize
+            size: 249
+            interpolation: BICUBIC
+            aspect_ratio_scale: greater
+            use_pillow: True
+          - type: crop
+            size: 224
+            use_pillow: True
+          - type: normalization
+            mean: [123.675, 116.28, 103.53]
+            std: [58.395, 57.12, 57.375]
+        metrics:
+          - name: accuracy@top1
+            type: accuracy
+            top_k: 1
+            reference: 0.7654
+          - name: accuracy@top5
+            type: accuracy
+            top_k: 5
+            reference: 0.9285
+
+  - name: levit-128s
+
+    launchers:
+      - framework: openvino
+        adapter: classification
+
+    datasets:
+      - name: imagenet_1000_classes
+        reader: pillow_imread
+        preprocessing:
+          - type: resize
+            size: 249
+            interpolation: BICUBIC
+            aspect_ratio_scale: greater
+            use_pillow: True
+          - type: crop
+            size: 224
+            use_pillow: True
+          - type: rgb_to_bgr
+        metrics:
+          - name: accuracy@top1
+            type: accuracy
+            top_k: 1
+            reference: 0.7654
+          - name: accuracy@top5
+            type: accuracy
+            top_k: 5
+            reference: 0.9285
diff --git a/models/public/levit-128s/model.py b/models/public/levit-128s/model.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from torch import load
+from timm.models.levit import levit_128s
+
+
+def create_levit(weights):
+    model = levit_128s()
+
+    checkpoint = load(weights, map_location='cpu')['model']
+    model.load_state_dict(checkpoint)
+
+    return model
diff --git a/models/public/levit-128s/model.yml b/models/public/levit-128s/model.yml
@@ -0,0 +1,66 @@
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+description: >-
+  The "levit-128s" model is one of the LeViT models family: a hybrid neural network
+  for fast inference image classification. The model is pre-trained on the ImageNet
+  dataset. LeViT-128s model is a small LeViT variant that has 128 channels on input
+  of the transformer stage and 2, 3 and 4 number of pairs of Attention and MLP blocks
+  at 1, 2 and 3 model stages respectively.
+
+  The model input is a blob that consists of a single image of "1, 3, 224, 224" in
+  "RGB" order.
+
+  The model output is typical object classifier for the 1000 different classifications
+  matching with those in the ImageNet database.
+
+  For details see repository <https://github.com/rwightman/pytorch-image-models> and
+  paper <https://arxiv.org/abs/2104.01136>.
+task_type: classification
+files:
+  - name: timm-0.5.4-py3-none-any.whl
+    size: 431537
+    checksum: e8f1967a8e2029fe21a43875132b4b123227b718abc35725d7f2b9fd0ef2062884ac3dd558570b51a780aad89bc375d6
+    source: https://files.pythonhosted.org/packages/49/65/a83208746dc9c0d70feff7874b49780ff110810feb528df4b0ecadcbee60/timm-0.5.4-py3-none-any.whl
+  - name: LeViT-128S-96703c44.pth
+    size: 32152063
+    checksum: ac05427904bc10921aa04e4c5970ce75429e4b77231b6735d584d570f4dfaebd9de42539d2200802f1d5a069e8e0071a
+    original_source: https://dl.fbaipublicfiles.com/LeViT/LeViT-128S-96703c44.pth
+    source: https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/2022.2/levit-128s/LeViT-128S-96703c44.pth
+postprocessing:
+  - $type: unpack_archive
+    format: zip
+    file: timm-0.5.4-py3-none-any.whl
+conversion_to_onnx_args:
+  - --model-path=$dl_dir
+  - --model-path=$config_dir
+  - --model-name=create_levit
+  - --import-module=model
+  - --model-param=weights=r"$dl_dir/LeViT-128S-96703c44.pth"
+  - --input-shape=1,3,224,224
+  - --input-names=image
+  - --output-names=probs
+  - --output-file=$conv_dir/levit-128s.onnx
+input_info:
+  - name: image
+    shape: [1, 3, 224, 224]
+    layout: NCHW
+model_optimizer_args:
+  - --input_model=$conv_dir/levit-128s.onnx
+  - --mean_values=image[123.675,116.28,103.53]
+  - --scale_values=image[58.395, 57.12, 57.375]
+  - --reverse_input_channels
+  - --output=probs
+framework: pytorch
+license: https://raw.githubusercontent.com/rwightman/pytorch-image-models/master/LICENSE
diff --git a/tools/accuracy_checker/configs/levit-128s.yml b/tools/accuracy_checker/configs/levit-128s.yml
@@ -0,0 +1 @@
+../../../models/public/levit-128s/accuracy-check.yml