Skip to content

Commit c435efb

Browse files
authored
add efficientnet, mobilenet, resnet qdq models (#552)
* add efficientnet, mobilenet, resnet qdq models Signed-off-by: mengniwa <[email protected]> * fix readme Signed-off-by: mengniwa <[email protected]> * update ONNX_HUB_MANIFEST.json Signed-off-by: mengniwa <[email protected]> * fix ONNX_HUB_MANIFEST.json Signed-off-by: mengniwa <[email protected]> * fix ONNX_HUB_MANIFEST.json Signed-off-by: mengniwa <[email protected]> * update data Signed-off-by: mengniwa <[email protected]> * update json info Signed-off-by: mengniwa <[email protected]> * update model name Signed-off-by: mengniwa <[email protected]> * update json Signed-off-by: mengniwa <[email protected]> * remove models Signed-off-by: mengniwa <[email protected]> Signed-off-by: mengniwa <[email protected]>
1 parent e77240a commit c435efb

File tree

11 files changed

+153
-6
lines changed

11 files changed

+153
-6
lines changed

ONNX_HUB_MANIFEST.json

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2033,6 +2033,48 @@
20332033
"model_with_data_bytes": 12780764
20342034
}
20352035
},
2036+
{
2037+
"model": "EfficientNet-Lite4-qdq",
2038+
"model_path": "vision/classification/efficientnet-lite4/model/efficientnet-lite4-11-qdq.onnx",
2039+
"onnx_version": "1.10.0",
2040+
"opset_version": 11,
2041+
"metadata": {
2042+
"model_sha": "6837d0b19625d4aff8266d7197a7f3775afd82a8c40f9fd0283d52db4955566f",
2043+
"model_bytes": 13469992,
2044+
"tags": [
2045+
"vision",
2046+
"classification",
2047+
"efficientnet-lite4"
2048+
],
2049+
"io_ports": {
2050+
"inputs": [
2051+
{
2052+
"name": "images:0",
2053+
"shape": [
2054+
1,
2055+
224,
2056+
224,
2057+
3
2058+
],
2059+
"type": "tensor(float)"
2060+
}
2061+
],
2062+
"outputs": [
2063+
{
2064+
"name": "Softmax:0",
2065+
"shape": [
2066+
1,
2067+
1000
2068+
],
2069+
"type": "tensor(float)"
2070+
}
2071+
]
2072+
},
2073+
"model_with_data_path": "vision/classification/efficientnet-lite4/model/efficientnet-lite4-11-qdq.tar.gz",
2074+
"model_with_data_sha": "f58754ff53120d937e4bc32e6a815fc0163261f7b842762868ed453947258390",
2075+
"model_with_data_bytes": 10193490
2076+
}
2077+
},
20362078
{
20372079
"model": "EfficientNet-Lite4",
20382080
"model_path": "vision/classification/efficientnet-lite4/model/efficientnet-lite4-11.onnx",
@@ -3206,6 +3248,48 @@
32063248
"model_with_data_bytes": 3910933
32073249
}
32083250
},
3251+
{
3252+
"model": "MobileNet v2-1.0-qdq",
3253+
"model_path": "vision/classification/mobilenet/model/mobilenetv2-12-qdq.onnx",
3254+
"onnx_version": "1.10.0",
3255+
"opset_version": 12,
3256+
"metadata": {
3257+
"model_sha": "41a36090dafe98f4ad8f9b7fe0b218c56ac3c031e547f0367c30655d2702bffe",
3258+
"model_bytes": 3593903,
3259+
"tags": [
3260+
"vision",
3261+
"classification",
3262+
"mobilenet"
3263+
],
3264+
"io_ports": {
3265+
"inputs": [
3266+
{
3267+
"name": "input",
3268+
"shape": [
3269+
"batch_size",
3270+
3,
3271+
224,
3272+
224
3273+
],
3274+
"type": "tensor(float)"
3275+
}
3276+
],
3277+
"outputs": [
3278+
{
3279+
"name": "output",
3280+
"shape": [
3281+
"batch_size",
3282+
1000
3283+
],
3284+
"type": "tensor(float)"
3285+
}
3286+
]
3287+
},
3288+
"model_with_data_path": "vision/classification/mobilenet/model/mobilenetv2-12-qdq.tar.gz",
3289+
"model_with_data_sha": "29be79b1561b1bf028a064eb4061b23b92883cb7f22b50f5f52d685efd23a04c",
3290+
"model_with_data_bytes": 3431250
3291+
}
3292+
},
32093293
{
32103294
"model": "MobileNet v2-1.0-fp32",
32113295
"model_path": "vision/classification/mobilenet/model/mobilenetv2-12.onnx",
@@ -4192,6 +4276,48 @@
41924276
"model_with_data_bytes": 22318352
41934277
}
41944278
},
4279+
{
4280+
"model": "ResNet50-qdq",
4281+
"model_path": "vision/classification/resnet/model/resnet50-v1-12-qdq.onnx",
4282+
"onnx_version": "1.10.0",
4283+
"opset_version": 12,
4284+
"metadata": {
4285+
"model_sha": "e6429d274805654c79ba68dc886c1efd4eff95393e747c35d3fdab579febd8a0",
4286+
"model_bytes": 25753167,
4287+
"tags": [
4288+
"vision",
4289+
"classification",
4290+
"resnet"
4291+
],
4292+
"io_ports": {
4293+
"inputs": [
4294+
{
4295+
"name": "data",
4296+
"shape": [
4297+
"N",
4298+
3,
4299+
224,
4300+
224
4301+
],
4302+
"type": "tensor(float)"
4303+
}
4304+
],
4305+
"outputs": [
4306+
{
4307+
"name": "resnetv17_dense0_fwd",
4308+
"shape": [
4309+
"N",
4310+
1000
4311+
],
4312+
"type": "tensor(float)"
4313+
}
4314+
]
4315+
},
4316+
"model_with_data_path": "vision/classification/resnet/model/resnet50-v1-12-qdq.tar.gz",
4317+
"model_with_data_sha": "986160ffdcf88dedba54ee1fd8ea9597a91b11ccf16016f495bffe22e83ea40e",
4318+
"model_with_data_bytes": 17043120
4319+
}
4320+
},
41954321
{
41964322
"model": "ResNet50_fp32",
41974323
"model_path": "vision/classification/resnet/model/resnet50-v1-12.onnx",

vision/classification/efficientnet-lite4/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ EfficientNet-Lite 4 is the largest variant and most accurate of the set of Effic
1414
|-------------|:--------------|:--------------|:--------------|:--------------|:--------------|
1515
|EfficientNet-Lite4 | [51.9 MB](model/efficientnet-lite4-11.onnx) | [48.6 MB](model/efficientnet-lite4-11.tar.gz)|1.7.0|11|80.4|
1616
|EfficientNet-Lite4-int8 | [13.0 MB](model/efficientnet-lite4-11-int8.onnx) | [12.2 MB](model/efficientnet-lite4-11-int8.tar.gz)|1.9.0|11|77.56|
17+
|EfficientNet-Lite4-qdq | [12.9 MB](model/efficientnet-lite4-11-qdq.onnx) | [9.72 MB](model/efficientnet-lite4-11-qdq.tar.gz) |1.10.0 | 11| 76.90 |
1718
> The fp32 Top-1 accuracy got by [Intel® Neural Compressor](https://github.com/intel/neural-compressor) is 77.70%, and compared with this value, int8 EfficientNet-Lite4's Top-1 accuracy drop ratio is 0.18% and performance improvement is 1.12x.
1819
>
1920
> **Note**
@@ -139,7 +140,7 @@ Refer to [efficientnet-lite4 conversion notebook](https://github.com/onnx/tensor
139140
<hr>
140141

141142
## Quantization
142-
CaffeNet-int8 is obtained by quantizing fp32 CaffeNet model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
143+
EfficientNet-Lite4-int8 and EfficientNet-Lite4-qdq are obtained by quantizing fp32 CaffeNet model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
143144

144145
### Environment
145146
onnx: 1.9.0
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:6837d0b19625d4aff8266d7197a7f3775afd82a8c40f9fd0283d52db4955566f
3+
size 13469992
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:f58754ff53120d937e4bc32e6a815fc0163261f7b842762868ed453947258390
3+
size 10193490

vision/classification/mobilenet/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ The below model is using multiplier value as 1.0.
1818
|MobileNet v2-1.0| [13.6 MB](model/mobilenetv2-7.onnx) | [14.1 MB](model/mobilenetv2-7.tar.gz) | 1.2.1 | 7| 70.94 | 89.99 |
1919
|MobileNet v2-1.0-fp32| [13.3 MB](model/mobilenetv2-12.onnx) | [12.9 MB](model/mobilenetv2-12.tar.gz) | 1.9.0 | 12| 69.48 | 89.26 |
2020
|MobileNet v2-1.0-int8| [3.5 MB](model/mobilenetv2-12-int8.onnx) | [3.7 MB](model/mobilenetv2-12-int8.tar.gz) | 1.9.0 | 12| 68.30 | 88.44 |
21+
|MobileNet v2-1.0-qdq| [3.4 MB](model/mobilenetv2-12-qdq.onnx) | [3.3 MB](model/mobilenetv2-12-qdq.tar.gz) |1.10.0| 12 |67.40 | |
2122
> Compared with the fp32 MobileNet v2-1.0, int8 MobileNet v2-1.0's Top-1 accuracy decline ratio is 1.70%, Top-5 accuracy decline ratio is 0.92% and performance improvement is 1.05x.
2223
>
2324
> Note the performance depends on the test hardware.
@@ -56,7 +57,7 @@ We used MXNet as framework with gluon APIs to perform training. View the [traini
5657
We used MXNet as framework with gluon APIs to perform validation. Use the notebook [imagenet_validation](../imagenet_validation.ipynb) to verify the accuracy of the model on the validation set. Make sure to specify the appropriate model name in the notebook.
5758

5859
## Quantization
59-
MobileNet v2-1.0-int8 is obtained by quantizing MobileNet v2-1.0-fp32 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
60+
MobileNet v2-1.0-int8 and MobileNet v2-1.0-qdq are obtained by quantizing MobileNet v2-1.0-fp32 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
6061

6162
### Environment
6263
onnx: 1.9.0
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:41a36090dafe98f4ad8f9b7fe0b218c56ac3c031e547f0367c30655d2702bffe
3+
size 3593903
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:29be79b1561b1bf028a064eb4061b23b92883cb7f22b50f5f52d685efd23a04c
3+
size 3431250

vision/classification/resnet/README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ ResNet v2 uses pre-activation function whereas ResNet v1 uses post-activation f
3232
|ResNet50| [97.8 MB](model/resnet50-v1-7.onnx) |[92.2 MB](model/resnet50-v1-7.tar.gz) |1.2.1 |7|74.93 | 92.38 |
3333
|ResNet101| [170.6 MB](model/resnet101-v1-7.onnx) | [159.8 MB](model/resnet101-v1-7.tar.gz) | 1.2.1 |7 | 76.48 | 93.20 |
3434
|ResNet152| [230.6 MB](model/resnet152-v1-7.onnx) |[217.2 MB](model/resnet152-v1-7.tar.gz) | 1.2.1 |7 |77.11 | 93.61 |
35-
|ResNet50_fp32| [97.8 MB](model/resnet50-v1-12.onnx) |[92.0 MB](model/resnet50-v1-12.tar.gz) |1.7.0 |12 |74.97 |92.33 |
36-
|ResNet50_int8| [24.6 MB](model/resnet50-v1-12-int8.onnx) |[22.3 MB](model/resnet50-v1-12-int8.tar.gz) |1.7.0 |12 |74.77 |92.32 |
35+
|ResNet50-fp32| [97.8 MB](model/resnet50-v1-12.onnx) |[92.0 MB](model/resnet50-v1-12.tar.gz) |1.7.0 |12 |74.97 |92.33 |
36+
|ResNet50-int8| [24.6 MB](model/resnet50-v1-12-int8.onnx) |[22.3 MB](model/resnet50-v1-12-int8.tar.gz) |1.7.0 |12 |74.77 |92.32 |
37+
|ResNet50-qdq | [24.6 MB](model/resnet50-v1-12-qdq.onnx) | [16.8 MB](model/resnet50-v1-12-qdq.tar.gz) | 1.10.0 | 12 |74.43 | |
3738
> Compared with the fp32 ResNet50, int8 ResNet50's Top-1 accuracy drop ratio is 0.27%, Top-5 accuracy drop ratio is 0.01% and performance improvement is 1.82x.
3839
>
3940
> Note the performance depends on the test hardware.
@@ -109,7 +110,7 @@ We used MXNet as framework with gluon APIs to perform training. View the [traini
109110
We used MXNet as framework with gluon APIs to perform validation. Use the notebook [imagenet_validation](../imagenet_validation.ipynb) to verify the accuracy of the model on the validation set. Make sure to specify the appropriate model name in the notebook.
110111

111112
## Quantization
112-
ResNet50_int8 is obtained by quantizing ResNet50_fp32 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
113+
ResNet50-int8 and ResNet50-qdq are obtained by quantizing ResNet50-fp32 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
113114

114115
### Environment
115116
onnx: 1.7.0
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:e6429d274805654c79ba68dc886c1efd4eff95393e747c35d3fdab579febd8a0
3+
size 25753167
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:986160ffdcf88dedba54ee1fd8ea9597a91b11ccf16016f495bffe22e83ea40e
3+
size 17043120

0 commit comments

Comments
 (0)