Add bidaf and arcface int8 model (#598)

yuwenzho · jcwchen · web-flow · commit e49c41dbddaa · 2023-04-18T15:05:17.000-07:00
* upload arcface and bidaf int8 onnx model

Signed-off-by: yuwenzho &lt;yuwen.zhou@intel.com&gt;

* add bidaf and arcface int8 model

Signed-off-by: yuwenzho &lt;yuwen.zhou@intel.com&gt;

---------

Signed-off-by: yuwenzho &lt;yuwen.zhou@intel.com&gt;
Co-authored-by: Chun-Wei Chen &lt;jacky82226@gmail.com&gt;
diff --git a/ONNX_HUB_MANIFEST.json b/ONNX_HUB_MANIFEST.json
@@ -311,6 +311,154 @@
             "model_with_data_bytes": 403400046
         }
     },
+    {
+        "model": "BiDAF-int8",
+        "model_path": "text/machine_comprehension/bidirectional_attention_flow/model/bidaf-11-int8.onnx",
+        "onnx_version": "1.13.1",
+        "opset_version": 11,
+        "metadata": {
+            "model_sha": "c2bbfd7568f4f19c8db82395c81d8d6199f3c0237f49e0f669d47c82643ef29e",
+            "model_bytes": 12452924,
+            "tags": [
+                "text",
+                "machine comprehension",
+                "bidirectional attention flow"
+            ],
+            "io_ports": {
+                "inputs": [
+                    {
+                        "name": "context_word",
+                        "shape": [
+                            "c",
+                            1
+                        ],
+                        "type": "tensor(string)"
+                    },
+                    {
+                        "name": "context_char",
+                        "shape": [
+                            "c",
+                            1,
+                            1,
+                            16
+                        ],
+                        "type": "tensor(string)"
+                    },
+                    {
+                        "name": "query_word",
+                        "shape": [
+                            "q",
+                            1
+                        ],
+                        "type": "tensor(string)"
+                    },
+                    {
+                        "name": "query_char",
+                        "shape": [
+                            "q",
+                            1,
+                            1,
+                            16
+                        ],
+                        "type": "tensor(string)"
+                    }
+                ],
+                "outputs": [
+                    {
+                        "name": "start_pos",
+                        "shape": [
+                            1
+                        ],
+                        "type": "tensor(int32)"
+                    },
+                    {
+                        "name": "end_pos",
+                        "shape": [
+                            1
+                        ],
+                        "type": "tensor(int32)"
+                    }
+                ]
+            },
+            "model_with_data_path": "text/machine_comprehension/bidirectional_attention_flow/model/bidaf-11-int8.tar.gz",
+            "model_with_data_sha": "571410c31445882ea9ed7b9f48fe8c2ed6ccb72b925281a1be82a75c0c12b6ab",
+            "model_with_data_bytes": 9086295
+        }
+    },
+    {
+        "model": "BiDAF",
+        "model_path": "text/machine_comprehension/bidirectional_attention_flow/model/bidaf-9.onnx",
+        "onnx_version": "1.4",
+        "opset_version": 9,
+        "metadata": {
+            "model_sha": "dfc317b56d065a3e297240a9e9b9118ff2260790b5850f4be2bc6ea1bcc65e80",
+            "model_bytes": 43522228,
+            "tags": [
+                "text",
+                "machine comprehension",
+                "bidirectional attention flow"
+            ],
+            "io_ports": {
+                "inputs": [
+                    {
+                        "name": "context_word",
+                        "shape": [
+                            "c",
+                            1
+                        ],
+                        "type": "tensor(string)"
+                    },
+                    {
+                        "name": "context_char",
+                        "shape": [
+                            "c",
+                            1,
+                            1,
+                            16
+                        ],
+                        "type": "tensor(string)"
+                    },
+                    {
+                        "name": "query_word",
+                        "shape": [
+                            "q",
+                            1
+                        ],
+                        "type": "tensor(string)"
+                    },
+                    {
+                        "name": "query_char",
+                        "shape": [
+                            "q",
+                            1,
+                            1,
+                            16
+                        ],
+                        "type": "tensor(string)"
+                    }
+                ],
+                "outputs": [
+                    {
+                        "name": "start_pos",
+                        "shape": [
+                            1
+                        ],
+                        "type": "tensor(int32)"
+                    },
+                    {
+                        "name": "end_pos",
+                        "shape": [
+                            1
+                        ],
+                        "type": "tensor(int32)"
+                    }
+                ]
+            },
+            "model_with_data_path": "text/machine_comprehension/bidirectional_attention_flow/model/bidaf-9.tar.gz",
+            "model_with_data_sha": "c74387eec257f2cb37cefc2846e1c4078bfebf06cd6486e9dafe6c9f7cdc1ef3",
+            "model_with_data_bytes": 39092248
+        }
+    },
     {
         "model": "GPT-2",
         "model_path": "text/machine_comprehension/gpt-2/model/gpt2-10.onnx",
@@ -841,6 +989,48 @@
             "model_with_data_bytes": 194535656
         }
     },
+    {
+        "model": "LResNet100E-IR-int8",
+        "model_path": "vision/body_analysis/arcface/model/arcfaceresnet100-11-int8.onnx",
+        "onnx_version": "1.13.1",
+        "opset_version": 11,
+        "metadata": {
+            "model_sha": "c625ca68a422418c48aa84f73341337e0a92b111f327909005d1eec07c95f936",
+            "model_bytes": 65764892,
+            "tags": [
+                "vision",
+                "body analysis",
+                "arcface"
+            ],
+            "io_ports": {
+                "inputs": [
+                    {
+                        "name": "data",
+                        "shape": [
+                            1,
+                            3,
+                            112,
+                            112
+                        ],
+                        "type": "tensor(float)"
+                    }
+                ],
+                "outputs": [
+                    {
+                        "name": "fc1",
+                        "shape": [
+                            1,
+                            512
+                        ],
+                        "type": "tensor(float)"
+                    }
+                ]
+            },
+            "model_with_data_path": "vision/body_analysis/arcface/model/arcfaceresnet100-11-int8.tar.gz",
+            "model_with_data_sha": "d560f59c57fa4784771ba520b5b2f380097d7b2210e6c8b02ca203c2e9784f8a",
+            "model_with_data_bytes": 47945269
+        }
+    },
     {
         "model": "LResNet100E-IR",
         "model_path": "vision/body_analysis/arcface/model/arcfaceresnet100-8.onnx",
diff --git a/text/machine_comprehension/bidirectional_attention_flow/README.md b/text/machine_comprehension/bidirectional_attention_flow/README.md
@@ -9,7 +9,11 @@ This model is a neural network for answering a query about a given context parag
 
  |Model        |Download  |Download (with sample test data)|ONNX version|Opset version|Accuracy |
 |-------------|:--------------|:--------------|:--------------|:--------------|:--------------|
-|BiDAF  |[41.5 MB](model/bidaf-9.onnx) |[37.3 MB](model/bidaf-9.tar.gz)|1.4 |ONNX 9, ONNX.ML 1 |EM of 68.1 in SQuAD v1.1 |
+|BiDAF  |[41.5 MB](model/bidaf-9.onnx) |[37.3 MB](model/bidaf-9.tar.gz)|1.4 | 9 |EM of 68.1 in SQuAD v1.1 |
+|BiDAF-int8  |[12 MB](model/bidaf-11-int8.onnx) |[8.7 MB](model/bidaf-11-int8.tar.gz)|1.13.1 |11 |EM of 65.93 in SQuAD v1.1 |
+> Compared with the fp32 BiDAF, int8 BiDAF accuracy drop ratio is 0.23% and performance improvement is 0.89x in SQuAD v1.1.
+>
+> The performance depends on the test hardware. Performance data here is collected with Intel® Xeon® Platinum 8280 Processor, 1s 4c per instance, CentOS Linux 8.3, data batch size is 1.
 
  <hr>
 
@@ -77,6 +81,40 @@ The model is trained with [SQuAD v1.1](https://rajpurkar.github.io/SQuAD-explore
 
  ## Validation accuracy
 Metric is Exact Matching (EM) of 68.1, computed over SQuAD v1.1 dev data.
+<hr>
+
+## Quantization
+BiDAF-int8 is obtained by quantizing fp32 BiDAF model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/README.md) to understand how to use Intel® Neural Compressor for quantization.
+
+
+### Prepare Model
+Download model from [ONNX Model Zoo](https://github.com/onnx/models).
+
+```shell
+wget https://github.com/onnx/models/raw/main/text/machine_comprehension/bidirectional_attention_flow/model/bidaf-9.onnx
+```
+
+Convert opset version to 11 for more quantization capability.
+
+```python
+import onnx
+from onnx import version_converter
+
+model = onnx.load('bidaf-9.onnx')
+model = version_converter.convert_version(model, 11)
+onnx.save_model(model, 'bidaf-11.onnx')
+```
+
+### Model quantize
+
+Dynamic quantization:
+
+```bash
+bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx
+                   --dataset_location=path/to/squad/dev-v1.1.json
+                   --output_model=path/to/model_tune
+```
+
 <hr>
 
  ## Publication/Attribution
@@ -85,7 +123,16 @@ Minjoon Seo, Aniruddha Kembhavi, Ali Farhadi, Hannaneh Hajishirzi. Bidirectional
  <hr>
 
  ## References
-This model is converted from a CNTK model trained from [this implementation](https://github.com/microsoft/CNTK/tree/nikosk/bidaf/Examples/Text/BidirectionalAttentionFlow/squad).
+* This model is converted from a CNTK model trained from [this implementation](https://github.com/microsoft/CNTK/tree/nikosk/bidaf/Examples/Text/BidirectionalAttentionFlow/squad).
+* [Intel® Neural Compressor](https://github.com/intel/neural-compressor)
+<hr>
+
+## Contributors
+* [mengniwang95](https://github.com/mengniwang95) (Intel)
+* [yuwenzho](https://github.com/yuwenzho) (Intel)
+* [airMeng](https://github.com/airMeng) (Intel)
+* [ftian1](https://github.com/ftian1) (Intel)
+* [hshen14](https://github.com/hshen14) (Intel)
 <hr>
 
  ## License
diff --git a/text/machine_comprehension/bidirectional_attention_flow/model/bidaf-11-int8.onnx b/text/machine_comprehension/bidirectional_attention_flow/model/bidaf-11-int8.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2bbfd7568f4f19c8db82395c81d8d6199f3c0237f49e0f669d47c82643ef29e
+size 12452924
diff --git a/text/machine_comprehension/bidirectional_attention_flow/model/bidaf-11-int8.tar.gz b/text/machine_comprehension/bidirectional_attention_flow/model/bidaf-11-int8.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:571410c31445882ea9ed7b9f48fe8c2ed6ccb72b925281a1be82a75c0c12b6ab
+size 9086295
diff --git a/vision/body_analysis/arcface/README.md b/vision/body_analysis/arcface/README.md
@@ -14,6 +14,10 @@ The model LResNet100E-IR is an ArcFace model that uses ResNet100 as a backend wi
 |Model        |Download  |Download (with sample test data)|ONNX version|Opset version|LFW * accuracy (%)|CFP-FF * accuracy (%)|CFP-FP * accuracy (%)|AgeDB-30 * accuracy (%)|
 |-------------|:--------------|:--------------|:--------------|:--------------|:--------------|:--------------|:--------------|:--------------|
 |LResNet100E-IR|    [248.9 MB](model/arcfaceresnet100-8.onnx)|[226.6 MB](model/arcfaceresnet100-8.tar.gz) | 1.3  |8|99.77     | 99.83  |  94.21     | 97.87|
+|LResNet100E-IR-int8|    [63 MB](model/arcfaceresnet100-11-int8.onnx)|[46 MB](model/arcfaceresnet100-11-int8.tar.gz) | 1.13.1  |11|99.80     |   |       | |
+> Compared with the fp32 LResNet100E-IR, int8 LResNet100E-IR accuracy drop ratio is 0% and performance improvement is 1.78x in LFW dataset.
+>
+> The performance depends on the test hardware. Performance data here is collected with Intel® Xeon® Platinum 8280 Processor, 1s 4c per instance, CentOS Linux 8.3, data batch size is 1.
 
 \* each of the accuracy metrics correspond to accuracies on different [validation sets](#val_data) each with their own [validation methods](#val_method).
 
@@ -66,13 +70,49 @@ The validation techniques for the three validation sets are described below:
 
 We used MXNet as framework to perform validation. Use the notebook [arcface_validation](dependencies/arcface_validation.ipynb) to verify the accuracy of the model on the validation set. Make sure to specify the appropriate model name in the notebook.
 
+## Quantization
+LResNet100E-IR-int8 is obtained by quantizing fp32 LResNet100E-IR model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/README.md) to understand how to use Intel® Neural Compressor for quantization.
+
+
+### Prepare Model
+Download model from [ONNX Model Zoo](https://github.com/onnx/models).
+
+```shell
+wget https://github.com/onnx/models/raw/main/vision/body_analysis/arcface/model/arcfaceresnet100-8.onnx
+```
+
+Convert opset version to 11 for more quantization capability.
+
+```python
+import onnx
+from onnx import version_converter
+model = onnx.load('arcfaceresnet100-8.onnx')
+model = version_converter.convert_version(model, 11)
+onnx.save_model(model, 'arcfaceresnet100-11.onnx')
+```
+
+### Model quantize
+
+```bash
+cd neural-compressor/examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static
+bash run_tuning.sh --input_model=path/to/model \  # model path as *.onnx
+                   --dataset_location=/path/to/faces_ms1m_112x112/task.bin \
+                   --output_model=path/to/save
+```
+
 ## References
 * All models are from the paper [ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698).
 * Original training dataset from the paper [MS-Celeb-1M: A Dataset and Benchmark for Large-Scale Face Recognition](https://arxiv.org/abs/1607.08221).
 * [InsightFace repo](https://github.com/deepinsight/insightface), [MXNet](http://mxnet.incubator.apache.org)
+* [Intel® Neural Compressor](https://github.com/intel/neural-compressor)
 
 ## Contributors
-[abhinavs95](https://github.com/abhinavs95) (Amazon AI)
+* [abhinavs95](https://github.com/abhinavs95) (Amazon AI)
+* [mengniwang95](https://github.com/mengniwang95) (Intel)
+* [yuwenzho](https://github.com/yuwenzho) (Intel)
+* [airMeng](https://github.com/airMeng) (Intel)
+* [ftian1](https://github.com/ftian1) (Intel)
+* [hshen14](https://github.com/hshen14) (Intel)
 
 ## License
 Apache 2.0
diff --git a/vision/body_analysis/arcface/model/arcfaceresnet100-11-int8.onnx b/vision/body_analysis/arcface/model/arcfaceresnet100-11-int8.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c625ca68a422418c48aa84f73341337e0a92b111f327909005d1eec07c95f936
+size 65764892
diff --git a/vision/body_analysis/arcface/model/arcfaceresnet100-11-int8.tar.gz b/vision/body_analysis/arcface/model/arcfaceresnet100-11-int8.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d560f59c57fa4784771ba520b5b2f380097d7b2210e6c8b02ca203c2e9784f8a
+size 47945269

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:c2bbfd7568f4f19c8db82395c81d8d6199f3c0237f49e0f669d47c82643ef29e`
	`3`	`+size 12452924`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:571410c31445882ea9ed7b9f48fe8c2ed6ccb72b925281a1be82a75c0c12b6ab`
	`3`	`+size 9086295`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:c625ca68a422418c48aa84f73341337e0a92b111f327909005d1eec07c95f936`
	`3`	`+size 65764892`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:d560f59c57fa4784771ba520b5b2f380097d7b2210e6c8b02ca203c2e9784f8a`
	`3`	`+size 47945269`