Skip to content

Commit 376af18

Browse files
add googlenet, inception_v1 and shufflenet-v2 qdq models (#570)
* add googlenet, inception_v1 and shufflenet-v2 qdq models Signed-off-by: mengniwa <[email protected]> * update test data Signed-off-by: mengniwa <[email protected]> * update json Signed-off-by: mengniwa <[email protected]> Signed-off-by: mengniwa <[email protected]> Co-authored-by: Chun-Wei Chen <[email protected]>
1 parent ba62990 commit 376af18

File tree

10 files changed

+152
-3
lines changed

10 files changed

+152
-3
lines changed

ONNX_HUB_MANIFEST.json

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2185,6 +2185,49 @@
21852185
"model_with_data_bytes": 5724344
21862186
}
21872187
},
2188+
{
2189+
"model": "GoogleNet-qdq",
2190+
"model_path": "vision/classification/inception_and_googlenet/googlenet/model/googlenet-12-qdq.onnx",
2191+
"onnx_version": "1.12",
2192+
"opset_version": 12,
2193+
"metadata": {
2194+
"model_sha": "f764ae1ed52e5fca319a43a19e9526d2f028e7f10b9d17a8eefcb098cd90d36d",
2195+
"model_bytes": 7135204,
2196+
"tags": [
2197+
"vision",
2198+
"classification",
2199+
"inception and googlenet",
2200+
"googlenet"
2201+
],
2202+
"io_ports": {
2203+
"inputs": [
2204+
{
2205+
"name": "data_0",
2206+
"shape": [
2207+
1,
2208+
3,
2209+
224,
2210+
224
2211+
],
2212+
"type": "tensor(float)"
2213+
}
2214+
],
2215+
"outputs": [
2216+
{
2217+
"name": "prob_1",
2218+
"shape": [
2219+
1,
2220+
1000
2221+
],
2222+
"type": "tensor(float)"
2223+
}
2224+
]
2225+
},
2226+
"model_with_data_path": "vision/classification/inception_and_googlenet/googlenet/model/googlenet-12-qdq.tar.gz",
2227+
"model_with_data_sha": "5587255e94438edd7c0017f5355178546b150deea05ed5e59109a4d4ae9aa4b3",
2228+
"model_with_data_bytes": 5562451
2229+
}
2230+
},
21882231
{
21892232
"model": "GoogleNet",
21902233
"model_path": "vision/classification/inception_and_googlenet/googlenet/model/googlenet-12.onnx",
@@ -2564,6 +2607,49 @@
25642607
"model_with_data_bytes": 9474526
25652608
}
25662609
},
2610+
{
2611+
"model": "Inception-1-qdq",
2612+
"model_path": "vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-12-qdq.onnx",
2613+
"onnx_version": "1.12",
2614+
"opset_version": 12,
2615+
"metadata": {
2616+
"model_sha": "c6a8e07c53ea417a0001ebba885a3a623f1c3c51fac105485aca638e2f3de25d",
2617+
"model_bytes": 7135424,
2618+
"tags": [
2619+
"vision",
2620+
"classification",
2621+
"inception and googlenet",
2622+
"inception v1"
2623+
],
2624+
"io_ports": {
2625+
"inputs": [
2626+
{
2627+
"name": "data_0",
2628+
"shape": [
2629+
1,
2630+
3,
2631+
224,
2632+
224
2633+
],
2634+
"type": "tensor(float)"
2635+
}
2636+
],
2637+
"outputs": [
2638+
{
2639+
"name": "prob_1",
2640+
"shape": [
2641+
1,
2642+
1000
2643+
],
2644+
"type": "tensor(float)"
2645+
}
2646+
]
2647+
},
2648+
"model_with_data_path": "vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-12-qdq.tar.gz",
2649+
"model_with_data_sha": "c981fd944a125d3626cb79720c7f0a57309da5a24994c0378e2657fdca87a869",
2650+
"model_with_data_bytes": 5559367
2651+
}
2652+
},
25672653
{
25682654
"model": "Inception-1",
25692655
"model_path": "vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-12.onnx",
@@ -4820,6 +4906,48 @@
48204906
"model_with_data_bytes": 2488137
48214907
}
48224908
},
4909+
{
4910+
"model": "ShuffleNet-v2-qdq",
4911+
"model_path": "vision/classification/shufflenet/model/shufflenet-v2-12-qdq.onnx",
4912+
"onnx_version": "1.12",
4913+
"opset_version": 12,
4914+
"metadata": {
4915+
"model_sha": "7c536d02e2f6af9569e5f3c7a4d8282060072119524c93c8da71e63876b4722b",
4916+
"model_bytes": 2415805,
4917+
"tags": [
4918+
"vision",
4919+
"classification",
4920+
"shufflenet"
4921+
],
4922+
"io_ports": {
4923+
"inputs": [
4924+
{
4925+
"name": "input",
4926+
"shape": [
4927+
1,
4928+
3,
4929+
224,
4930+
224
4931+
],
4932+
"type": "tensor(float)"
4933+
}
4934+
],
4935+
"outputs": [
4936+
{
4937+
"name": "output",
4938+
"shape": [
4939+
1,
4940+
1000
4941+
],
4942+
"type": "tensor(float)"
4943+
}
4944+
]
4945+
},
4946+
"model_with_data_path": "vision/classification/shufflenet/model/shufflenet-v2-12-qdq.tar.gz",
4947+
"model_with_data_sha": "071594e233cedf5688501c9b67cad30c4babb1b46771fb87afe7fd1beb1cc008",
4948+
"model_with_data_bytes": 2245304
4949+
}
4950+
},
48234951
{
48244952
"model": "ShuffleNet-v2-fp32",
48254953
"model_path": "vision/classification/shufflenet/model/shufflenet-v2-12.onnx",

vision/classification/inception_and_googlenet/googlenet/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
|GoogleNet| [28 MB](model/googlenet-9.onnx) | [31 MB](model/googlenet-9.tar.gz) | 1.4 | 9| | |
1212
|GoogleNet| [27 MB](model/googlenet-12.onnx) | [25 MB](model/googlenet-12.tar.gz) | 1.9 | 12|67.78|88.34|
1313
|GoogleNet-int8| [7 MB](model/googlenet-12-int8.onnx) | [5 MB](model/googlenet-12-int8.tar.gz) | 1.9 | 12|67.73|88.32|
14+
|GoogleNet-qdq | [7 MB](model/googlenet-12-qdq.onnx) | [5 MB](model/googlenet-12-qdq.tar.gz) | 1.12 | 12 | 67.73 | 88.31 |
1415
> Compared with the fp32 GoogleNet, int8 GoogleNet's Top-1 accuracy drop ratio is 0.07%, Top-5 accuracy drop ratio is 0.02% and performance improvement is 1.27x.
1516
>
1617
> **Note**
@@ -103,7 +104,7 @@ just the center crop. (Using the average of 10 crops,
103104
(4 + 1 center) * 2 mirror, should obtain a bit higher accuracy.)
104105

105106
## Quantization
106-
GoogleNet-int8 is obtained by quantizing fp32 GoogleNet model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
107+
GoogleNet-int8 and GoogleNet-qdq are obtained by quantizing fp32 GoogleNet model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
107108

108109
### Environment
109110
onnx: 1.9.0
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:f764ae1ed52e5fca319a43a19e9526d2f028e7f10b9d17a8eefcb098cd90d36d
3+
size 7135204
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:5587255e94438edd7c0017f5355178546b150deea05ed5e59109a4d4ae9aa4b3
3+
size 5562451

vision/classification/inception_and_googlenet/inception_v1/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
|Inception-1| [28 MB](model/inception-v1-9.onnx) | [29 MB](model/inception-v1-9.tar.gz) | 1.4 | 9| |
1212
|Inception-1| [27 MB](model/inception-v1-12.onnx) | [25 MB](model/inception-v1-12.tar.gz) | 1.9 | 12| 67.23|
1313
|Inception-1-int8| [10 MB](model/inception-v1-12-int8.onnx) | [9 MB](model/inception-v1-12-int8.tar.gz) | 1.9 | 12| 67.24|
14+
|Inception-1-qdq| [7 MB](model/inception-v1-12-qdq.onnx) | [5 MB](model/inception-v1-12-qdq.tar.gz) | 1.12 | 12 | 67.21 |
1415
> Compared with the fp32 Inception-1, int8 Inception-1's Top-1 accuracy drop ratio is -0.01% and performance improvement is 1.26x.
1516
>
1617
> **Note**
@@ -51,7 +52,7 @@ random generated sampe test data:
5152
## Results/accuracy on test set
5253

5354
## Quantization
54-
Inception-1-int8 is obtained by quantizing fp32 Inception-1 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
55+
Inception-1-int8 and Inception-1-qdq are obtained by quantizing fp32 Inception-1 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
5556

5657
### Environment
5758
onnx: 1.9.0
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:c6a8e07c53ea417a0001ebba885a3a623f1c3c51fac105485aca638e2f3de25d
3+
size 7135424
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:c981fd944a125d3626cb79720c7f0a57309da5a24994c0378e2657fdca87a869
3+
size 5559367

vision/classification/shufflenet/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ ONNX ShuffleNet-v2 ==> Quantized ONNX ShuffleNet-v2
3131
|ShuffleNet-v2 |[9.2MB](model/shufflenet-v2-10.onnx) | [8.7MB](model/shufflenet-v2-10.tar.gz) | 1.6 | 10 | 30.64 | 11.68|
3232
|ShuffleNet-v2-fp32 |[8.79MB](model/shufflenet-v2-12.onnx) |[8.69MB](model/shufflenet-v2-12.tar.gz) |1.9 |12 |33.65 |13.43|
3333
|ShuffleNet-v2-int8 |[2.28MB](model/shufflenet-v2-12-int8.onnx) |[2.37MB](model/shufflenet-v2-12-int8.tar.gz) |1.9 |12 |33.85 |13.66 |
34+
|ShuffleNet-v2-qdq |[2.30MB](model/shufflenet-v2-12-qdq.onnx) |[2.68MB](model/shufflenet-v2-12-qdq.tar.gz) |1.12 |12 |33.88 | 19.94 |
3435
> Compared with the fp32 ShuffleNet-v2, int8 ShuffleNet-v2's Top-1 error rising ratio is 0.59%, Top-5 error rising ratio is 1.71% and performance improvement is 1.62x.
3536
>
3637
> Note the performance depends on the test hardware.
@@ -79,7 +80,7 @@ Details of performance on COCO object detection are provided in [this paper](htt
7980
<hr>
8081

8182
## Quantization
82-
ShuffleNet-v2-int8 is obtained by quantizing ShuffleNet-v2-fp32 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
83+
ShuffleNet-v2-int8 and ShuffleNet-v2-int8 are obtained by quantizing ShuffleNet-v2-fp32 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
8384

8485
### Environment
8586
onnx: 1.9.0
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:7c536d02e2f6af9569e5f3c7a4d8282060072119524c93c8da71e63876b4722b
3+
size 2415805
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:071594e233cedf5688501c9b67cad30c4babb1b46771fb87afe7fd1beb1cc008
3+
size 2245304

0 commit comments

Comments
 (0)