add efficientnet, mobilenet, resnet qdq models (#552)

mengniwang95 · web-flow · commit c435efb642e4 · 2022-10-01T09:09:54.000-07:00
* add efficientnet, mobilenet, resnet qdq models

Signed-off-by: mengniwa &lt;mengni.wang@intel.com&gt;

* fix readme

Signed-off-by: mengniwa &lt;mengni.wang@intel.com&gt;

* update ONNX_HUB_MANIFEST.json

Signed-off-by: mengniwa &lt;mengni.wang@intel.com&gt;

* fix ONNX_HUB_MANIFEST.json

Signed-off-by: mengniwa &lt;mengni.wang@intel.com&gt;

* fix ONNX_HUB_MANIFEST.json

Signed-off-by: mengniwa &lt;mengni.wang@intel.com&gt;

* update data

Signed-off-by: mengniwa &lt;mengni.wang@intel.com&gt;

* update json info

Signed-off-by: mengniwa &lt;mengni.wang@intel.com&gt;

* update model name

Signed-off-by: mengniwa &lt;mengni.wang@intel.com&gt;

* update json

Signed-off-by: mengniwa &lt;mengni.wang@intel.com&gt;

* remove models

Signed-off-by: mengniwa &lt;mengni.wang@intel.com&gt;

Signed-off-by: mengniwa &lt;mengni.wang@intel.com&gt;
diff --git a/ONNX_HUB_MANIFEST.json b/ONNX_HUB_MANIFEST.json
@@ -2033,6 +2033,48 @@
             "model_with_data_bytes": 12780764
         }
     },
+    {
+        "model": "EfficientNet-Lite4-qdq",
+        "model_path": "vision/classification/efficientnet-lite4/model/efficientnet-lite4-11-qdq.onnx",
+        "onnx_version": "1.10.0",
+        "opset_version": 11,
+        "metadata": {
+            "model_sha": "6837d0b19625d4aff8266d7197a7f3775afd82a8c40f9fd0283d52db4955566f",
+            "model_bytes": 13469992,
+            "tags": [
+                "vision",
+                "classification",
+                "efficientnet-lite4"
+            ],
+            "io_ports": {
+                "inputs": [
+                    {
+                        "name": "images:0",
+                        "shape": [
+                            1,
+                            224,
+                            224,
+                            3
+                        ],
+                        "type": "tensor(float)"
+                    }
+                ],
+                "outputs": [
+                    {
+                        "name": "Softmax:0",
+                        "shape": [
+                            1,
+                            1000
+                        ],
+                        "type": "tensor(float)"
+                    }
+                ]
+            },
+            "model_with_data_path": "vision/classification/efficientnet-lite4/model/efficientnet-lite4-11-qdq.tar.gz",
+            "model_with_data_sha": "f58754ff53120d937e4bc32e6a815fc0163261f7b842762868ed453947258390",
+            "model_with_data_bytes": 10193490
+        }
+    },
     {
         "model": "EfficientNet-Lite4",
         "model_path": "vision/classification/efficientnet-lite4/model/efficientnet-lite4-11.onnx",
@@ -3206,6 +3248,48 @@
             "model_with_data_bytes": 3910933
         }
     },
+    {
+        "model": "MobileNet v2-1.0-qdq",
+        "model_path": "vision/classification/mobilenet/model/mobilenetv2-12-qdq.onnx",
+        "onnx_version": "1.10.0",
+        "opset_version": 12,
+        "metadata": {
+            "model_sha": "41a36090dafe98f4ad8f9b7fe0b218c56ac3c031e547f0367c30655d2702bffe",
+            "model_bytes": 3593903,
+            "tags": [
+                "vision",
+                "classification",
+                "mobilenet"
+            ],
+            "io_ports": {
+                "inputs": [
+                    {
+                        "name": "input",
+                        "shape": [
+                            "batch_size",
+                            3,
+                            224,
+                            224
+                        ],
+                        "type": "tensor(float)"
+                    }
+                ],
+                "outputs": [
+                    {
+                        "name": "output",
+                        "shape": [
+                            "batch_size",
+                            1000
+                        ],
+                        "type": "tensor(float)"
+                    }
+                ]
+            },
+            "model_with_data_path": "vision/classification/mobilenet/model/mobilenetv2-12-qdq.tar.gz",
+            "model_with_data_sha": "29be79b1561b1bf028a064eb4061b23b92883cb7f22b50f5f52d685efd23a04c",
+            "model_with_data_bytes": 3431250
+        }
+    },
     {
         "model": "MobileNet v2-1.0-fp32",
         "model_path": "vision/classification/mobilenet/model/mobilenetv2-12.onnx",
@@ -4192,6 +4276,48 @@
             "model_with_data_bytes": 22318352
         }
     },
+    {
+        "model": "ResNet50-qdq",
+        "model_path": "vision/classification/resnet/model/resnet50-v1-12-qdq.onnx",
+        "onnx_version": "1.10.0",
+        "opset_version": 12,
+        "metadata": {
+            "model_sha": "e6429d274805654c79ba68dc886c1efd4eff95393e747c35d3fdab579febd8a0",
+            "model_bytes": 25753167,
+            "tags": [
+                "vision",
+                "classification",
+                "resnet"
+            ],
+            "io_ports": {
+                "inputs": [
+                    {
+                        "name": "data",
+                        "shape": [
+                            "N",
+                            3,
+                            224,
+                            224
+                        ],
+                        "type": "tensor(float)"
+                    }
+                ],
+                "outputs": [
+                    {
+                        "name": "resnetv17_dense0_fwd",
+                        "shape": [
+                            "N",
+                            1000
+                        ],
+                        "type": "tensor(float)"
+                    }
+                ]
+            },
+            "model_with_data_path": "vision/classification/resnet/model/resnet50-v1-12-qdq.tar.gz",
+            "model_with_data_sha": "986160ffdcf88dedba54ee1fd8ea9597a91b11ccf16016f495bffe22e83ea40e",
+            "model_with_data_bytes": 17043120
+        }
+    },
     {
         "model": "ResNet50_fp32",
         "model_path": "vision/classification/resnet/model/resnet50-v1-12.onnx",
diff --git a/vision/classification/efficientnet-lite4/README.md b/vision/classification/efficientnet-lite4/README.md
@@ -14,6 +14,7 @@ EfficientNet-Lite 4 is the largest variant and most accurate of the set of Effic
 |-------------|:--------------|:--------------|:--------------|:--------------|:--------------|
 |EfficientNet-Lite4     | [51.9 MB](model/efficientnet-lite4-11.onnx)	  | [48.6 MB](model/efficientnet-lite4-11.tar.gz)|1.7.0|11|80.4|
 |EfficientNet-Lite4-int8     | [13.0 MB](model/efficientnet-lite4-11-int8.onnx)	  | [12.2 MB](model/efficientnet-lite4-11-int8.tar.gz)|1.9.0|11|77.56|
+|EfficientNet-Lite4-qdq | [12.9 MB](model/efficientnet-lite4-11-qdq.onnx) | [9.72 MB](model/efficientnet-lite4-11-qdq.tar.gz) |1.10.0 | 11| 76.90 |
 > The fp32 Top-1 accuracy got by [Intel® Neural Compressor](https://github.com/intel/neural-compressor) is 77.70%, and compared with this value, int8 EfficientNet-Lite4's Top-1 accuracy drop ratio is 0.18% and performance improvement is 1.12x.
 >
 > **Note** 
@@ -139,7 +140,7 @@ Refer to [efficientnet-lite4 conversion notebook](https://github.com/onnx/tensor
 <hr>
 
 ## Quantization
-CaffeNet-int8 is obtained by quantizing fp32 CaffeNet model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
+EfficientNet-Lite4-int8 and EfficientNet-Lite4-qdq are obtained by quantizing fp32 CaffeNet model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
 
 ### Environment
 onnx: 1.9.0 
diff --git a/vision/classification/efficientnet-lite4/model/efficientnet-lite4-11-qdq.onnx b/vision/classification/efficientnet-lite4/model/efficientnet-lite4-11-qdq.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6837d0b19625d4aff8266d7197a7f3775afd82a8c40f9fd0283d52db4955566f
+size 13469992
diff --git a/vision/classification/efficientnet-lite4/model/efficientnet-lite4-11-qdq.tar.gz b/vision/classification/efficientnet-lite4/model/efficientnet-lite4-11-qdq.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f58754ff53120d937e4bc32e6a815fc0163261f7b842762868ed453947258390
+size 10193490
diff --git a/vision/classification/mobilenet/README.md b/vision/classification/mobilenet/README.md
@@ -18,6 +18,7 @@ The below model is using multiplier value as 1.0.
 |MobileNet v2-1.0|    [13.6 MB](model/mobilenetv2-7.onnx)  |  [14.1 MB](model/mobilenetv2-7.tar.gz) |  1.2.1  | 7| 70.94    |     89.99           |
 |MobileNet v2-1.0-fp32|    [13.3 MB](model/mobilenetv2-12.onnx)  |  [12.9 MB](model/mobilenetv2-12.tar.gz) |  1.9.0  | 12| 69.48    |     89.26           |
 |MobileNet v2-1.0-int8|    [3.5 MB](model/mobilenetv2-12-int8.onnx)  |  [3.7 MB](model/mobilenetv2-12-int8.tar.gz) |  1.9.0  | 12| 68.30    |     88.44           |
+|MobileNet v2-1.0-qdq| [3.4 MB](model/mobilenetv2-12-qdq.onnx) | [3.3 MB](model/mobilenetv2-12-qdq.tar.gz) |1.10.0| 12 |67.40 | |
 > Compared with the fp32 MobileNet v2-1.0, int8 MobileNet v2-1.0's Top-1 accuracy decline ratio is 1.70%, Top-5 accuracy decline ratio is 0.92% and performance improvement is 1.05x.
 >
 > Note the performance depends on the test hardware. 
@@ -56,7 +57,7 @@ We used MXNet as framework with gluon APIs to perform training. View the [traini
 We used MXNet as framework with gluon APIs to perform validation. Use the notebook [imagenet_validation](../imagenet_validation.ipynb) to verify the accuracy of the model on the validation set. Make sure to specify the appropriate model name in the notebook.
 
 ## Quantization
-MobileNet v2-1.0-int8 is obtained by quantizing MobileNet v2-1.0-fp32 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
+MobileNet v2-1.0-int8 and MobileNet v2-1.0-qdq are obtained by quantizing MobileNet v2-1.0-fp32 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
 
 ### Environment
 onnx: 1.9.0 
diff --git a/vision/classification/mobilenet/model/mobilenetv2-12-qdq.onnx b/vision/classification/mobilenet/model/mobilenetv2-12-qdq.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41a36090dafe98f4ad8f9b7fe0b218c56ac3c031e547f0367c30655d2702bffe
+size 3593903
diff --git a/vision/classification/mobilenet/model/mobilenetv2-12-qdq.tar.gz b/vision/classification/mobilenet/model/mobilenetv2-12-qdq.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29be79b1561b1bf028a064eb4061b23b92883cb7f22b50f5f52d685efd23a04c
+size 3431250
diff --git a/vision/classification/resnet/README.md b/vision/classification/resnet/README.md
@@ -32,8 +32,9 @@ ResNet v2 uses pre-activation function whereas ResNet v1  uses post-activation f
 |ResNet50|    [97.8 MB](model/resnet50-v1-7.onnx)    |[92.2 MB](model/resnet50-v1-7.tar.gz)    |1.2.1    |7|74.93         |     92.38           |
 |ResNet101|    [170.6 MB](model/resnet101-v1-7.onnx)   | [159.8 MB](model/resnet101-v1-7.tar.gz)    |  1.2.1  |7  | 76.48         |     93.20         |
 |ResNet152|    [230.6 MB](model/resnet152-v1-7.onnx)    |[217.2 MB](model/resnet152-v1-7.tar.gz)    | 1.2.1  |7 |77.11         |     93.61         |
-|ResNet50_fp32| [97.8 MB](model/resnet50-v1-12.onnx) |[92.0 MB](model/resnet50-v1-12.tar.gz)   |1.7.0 |12 |74.97 |92.33 |
-|ResNet50_int8| [24.6 MB](model/resnet50-v1-12-int8.onnx) |[22.3 MB](model/resnet50-v1-12-int8.tar.gz) |1.7.0 |12 |74.77 |92.32 |
+|ResNet50-fp32| [97.8 MB](model/resnet50-v1-12.onnx) |[92.0 MB](model/resnet50-v1-12.tar.gz)   |1.7.0 |12 |74.97 |92.33 |
+|ResNet50-int8| [24.6 MB](model/resnet50-v1-12-int8.onnx) |[22.3 MB](model/resnet50-v1-12-int8.tar.gz) |1.7.0 |12 |74.77 |92.32 |
+|ResNet50-qdq | [24.6 MB](model/resnet50-v1-12-qdq.onnx) | [16.8 MB](model/resnet50-v1-12-qdq.tar.gz) | 1.10.0 | 12 |74.43 | |
 > Compared with the fp32 ResNet50, int8 ResNet50's Top-1 accuracy drop ratio is 0.27%, Top-5 accuracy drop ratio is 0.01% and performance improvement is 1.82x.
 >
 > Note the performance depends on the test hardware. 
@@ -109,7 +110,7 @@ We used MXNet as framework with gluon APIs to perform training. View the [traini
 We used MXNet as framework with gluon APIs to perform validation. Use the notebook [imagenet_validation](../imagenet_validation.ipynb) to verify the accuracy of the model on the validation set. Make sure to specify the appropriate model name in the notebook.
 
 ## Quantization
-ResNet50_int8 is obtained by quantizing ResNet50_fp32 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
+ResNet50-int8 and ResNet50-qdq are obtained by quantizing ResNet50-fp32 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
 
 ### Environment
 onnx: 1.7.0 
diff --git a/vision/classification/resnet/model/resnet50-v1-12-qdq.onnx b/vision/classification/resnet/model/resnet50-v1-12-qdq.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6429d274805654c79ba68dc886c1efd4eff95393e747c35d3fdab579febd8a0
+size 25753167
diff --git a/vision/classification/resnet/model/resnet50-v1-12-qdq.tar.gz b/vision/classification/resnet/model/resnet50-v1-12-qdq.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:986160ffdcf88dedba54ee1fd8ea9597a91b11ccf16016f495bffe22e83ea40e
+size 17043120
diff --git a/workflow_scripts/check_model.py b/workflow_scripts/check_model.py
@@ -15,7 +15,7 @@ def has_vnni_support():
 
 
 def skip_quant_models_if_missing_vnni(model_name):
-    return '-int8' in model_name and not has_vnni_support()
+    return ('-int8' in model_name or '-qdq' in model_name) and not has_vnni_support()
 
 
 def run_onnx_checker(model_path):

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:6837d0b19625d4aff8266d7197a7f3775afd82a8c40f9fd0283d52db4955566f`
	`3`	`+size 13469992`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:f58754ff53120d937e4bc32e6a815fc0163261f7b842762868ed453947258390`
	`3`	`+size 10193490`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:41a36090dafe98f4ad8f9b7fe0b218c56ac3c031e547f0367c30655d2702bffe`
	`3`	`+size 3593903`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:29be79b1561b1bf028a064eb4061b23b92883cb7f22b50f5f52d685efd23a04c`
	`3`	`+size 3431250`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:e6429d274805654c79ba68dc886c1efd4eff95393e747c35d3fdab579febd8a0`
	`3`	`+size 25753167`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:986160ffdcf88dedba54ee1fd8ea9597a91b11ccf16016f495bffe22e83ea40e`
	`3`	`+size 17043120`